Merge branch 'android16-6.12' into branch 'android16-6.12-lts'
Catch the -lts branch up with all of the updates and kmi break that happened in the android16-6.12 branch. Changes included in here are: *147b27e8d6ANDROID: nvmet: Use the bdev_is_zone_start() function *392bbaffdcANDROID: scsi: ufs: Support I/O tracing for zoned block devices *e395d18c8bANDROID: scsi: scsi_debug: Support npo2 zone sizes *497ca126ddANDROID: scsi/sd_zbc: Support npo2 zone sizes *355dfccf9dANDROID: dm-table: allow zoned devices with non power-of-2 zone sizes *d6f0f66569ANDROID: block: Do not set the I/O priority for zoned writes *30ce6652eeANDROID: block: Support npo2 zone sizes *4a77dbe5c5UPSTREAM: loop: fix queue freeze vs limits lock order *d2eefa734fUPSTREAM: loop: refactor queue limits updates *b0477a0759UPSTREAM: loop: Fix ABBA locking race *49d8530dfeUPSTREAM: loop: Simplify discard granularity calc *f1aac3cfafUPSTREAM: loop: Use bdev limit helpers for configuring discard *02cf51391eUPSTREAM: usb-storage: fix queue freeze vs limits lock order *96dfef3be8UPSTREAM: nbd: fix queue freeze vs limits lock order *1bf8be0b4eUPSTREAM: nvme: fix queue freeze vs limits lock order *32ab5e2dd9UPSTREAM: block: fix queue freeze vs limits lock order in sysfs store methods *e4eb47a3ecBACKPORT: block: add a store_limit operations for sysfs entries *574e0848d2UPSTREAM: block: add a queue_limits_commit_update_frozen helper *65ad590076FROMGIT: genirq: Retain depth for managed IRQs across CPU hotplug *1bc40b53aaFROMGIT: ufs: core: support updating device command timeout *5e97c36004ANDROID: Build Rust Binder as a GKI module *daae469749ANDROID: rust_binder: handle read/write_consumed > read/write_size *b23e338263ANDROID: rust_binder: add Rust Binder to Makefile *7163533526ANDROID: rust_binder: fixups for 6.12.19 LTS *bf40001347ANDROID: rust_binder: add back tracepoints *dac7c66bc9ANDROID: rust_binder: move Rust Binder in preparation for GKI module *8313296331FROMGIT: rust: alloc: add Vec::insert_within_capacity *c28afde01dFROMGIT: rust: alloc: add Vec::remove *e1da60354aFROMGIT: rust: alloc: add Vec::retain *1e01dcf3beFROMGIT: rust: alloc: add Vec::drain_all *1a17ca097dFROMGIT: rust: alloc: add Vec::push_within_capacity *75c0948156FROMGIT: rust: alloc: add Vec::pop *ed2019e2c4FROMGIT: rust: alloc: add Vec::clear *04d685ecf9FROMGIT: rust: alloc: replace `Vec::set_len` with `inc_len` *597ebe7c32FROMGIT: rust: alloc: refactor `Vec::truncate` using `dec_len` *8a1546ee71FROMGIT: rust: alloc: add `Vec::dec_len` *48080570b0FROMGIT: rust: alloc: add Vec::len() <= Vec::capacity invariant *7907fdcba6FROMGIT: rust: alloc: allow coercion from `Box<T>` to `Box<dyn U>` if T implements U *9de29f7183FROMGIT: rust: alloc: use `spare_capacity_mut` to reduce unsafe *c40401d665FROMGIT: rust: alloc: add Vec::resize method *9d37907c65FROMGIT: rust: alloc: add Vec::truncate method *f037ab7a73FROMGIT: rust: alloc: add missing invariant in Vec::set_len() *025e0fc417UPSTREAM: rust: kunit: allow to know if we are in a test *86603276f4UPSTREAM: rust: macros: add macro to easily run KUnit tests *f8c704efd6BACKPORT: rust: kunit: add KUnit case and suite macros *1b461575a8UPSTREAM: rust: add kunitconfig *615a5b6d7eUPSTREAM: rust: uaccess: generalize userSliceReader to support any Vec *0690b3438bANDROID: 2025/05/15 KMI update *daf75d7717FROMLIST: mmc: sdhci-msm: Enable force hw reset during cqe recovery *794391e0e8FROMLIST: mmc: core: Introduce new flag to force hardware reset *84e14946ebANDROID: GKI: add ANDROID_OEM_DATA in struct bio *e7b9281897ANDROID: rust: allow zero init for KABI members *9027c8ec43ANDROID: 16K: Add VMA padding size to smaps output *307be4b887ANDROID: 16K: Don't copy data vma for maps/smaps output *d378f3ab39ANDROID: 16K: Fixup padding vm_flags bits on VMA splits *c0d7f9802aANDROID: 16K: Introduce pgsize_migration_inline.h *6fd1ed47f5ANDROID: 16K: Fix vm_flags conflicts from mseal *2a651ea884ANDROID: 16K: Don't set padding vm_flags on 32-bit archs *81734e02c6ANDROID: 16K: Avoid mmap lock assertions for padding VMAs *4199eaf23eANDROID: 16K: Only check basename of linker context *6050c4b129ANDROID: 16K: Avoid and document padding madvise lock warning *6e64e9ce1fANDROID: 16K: Fix show maps CFI failure *95d0b11a65ANDROID: 16K: Handle pad VMA splits and merges *29dc8b580bANDROID: 16K: madvise_vma_pad_pages: Remove filemap_fault check *bcbb9d3c85ANDROID: 16K: Only madvise padding from dynamic linker context *2feb999649ANDROID: 16K: Separate padding from ELF LOAD segment mappings *092ff7e5b4ANDROID: 16K: Exclude ELF padding for fault around range *a3b4e8f698ANDROID: 16K: Use MADV_DONTNEED to save VMA padding pages. *0d793cde88ANDROID: 16K: Introduce ELF padding representation for VMAs *918c98f267ANDROID: 16K: Introduce /sys/kernel/mm/pgsize_migration/enabled *e9420a4582ANDROID: 16K: rust: ashmem: __page_align VMA size check *d44ff7a3edANDROID: 16K: Introduce rust __page_*() helpers *e39fcef01cANDROID: 16K: Duplicate command line for parsing page_shift *44a6882cc7ANDROID: 16K: Init page_shift param in a pure_initcall() *68ba0f4dfbANDROID: 16K: __PAGE_ALIGN() virtio gpu dumb buffers *1022438243ANDROID: 16K: Avoid conflicting __PAGE_SIZE in bpf/core *778a447513ANDROID: 16K: Emulate cachestat counters *0d44e1eb0eANDROID: 16K: Disable kernel APIs indexed by PFNs *d684b3125eANDROID: 16K: Emulate pread() for pagemap *669f0c4355ANDROID: 16K: Emulate /proc/pid/pagemap *3c9a39c770ANDROID: 16K: Fix mincore emulation *8aab407984ANDROID: 16K: Emulate mincore() syscall *596774b15cANDROID: 16K: x86_64: Disable userfaultfd *c94c31e526ANDROID: 16K: Update sysctl_perf_event_mlock if PERF_EVENTS enabled *13ba0aec9cANDROID: 16K: Fixup perf_mmap check for metadata page *03ce5534fcANDROID: 16K: Fix swapfile header *53ab86eb55ANDROID: 16K: Fix SIGBUS semantics and document __filemap_fixup() *50a96587afANDROID: 16K: [s]maps: Fold fixup entries into the parent entry *57bbcef534ANDROID: 16K: Ensure mseal start and len are 16kB multiples *5c1d7ef671ANDROID: 16K: Handle pgoff > file_size for shmem and file backed VMAs *cd48f9a1f7ANDROID: 16K: Ensure stack expansion size is __PAGE_SIZE multiple *a8df614576ANDROID: 16K: Only support page size emulation for x86_64 *ac98b230dbANDROID: 16K: Use bit 59 for __VM_NO_COMPAT *eb54f19663ANDROID: 16K: Fix __MAP_NO_COMPAT overflow *36157a52cdANDROID: 16K: __PAGE_ALIGN dma-bufs size from heap allocations *65df6a39b7ANDROID: 16K: Align vsyscall mapping size to a 16kB multiple *4395898bf5ANDROID: 16K: Align vdso mapping size to a 16kB multiple *37ebd01b5aANDROID: 16K: Make the x86 vdso layout 16kB compatible *c64a15a595ANDROID: 16K: Introduce __MAX_PAGE_SIZE macros *60b3135822ANDROID: 16K: Remove androidboot from page_shift kernel param *5e32ba9023ANDROID: 16K: Remove unescessary err log in randomize_page() *1ae0864980ANDROID: 16K Prevent non-__PAGE_ALIGNED() VMA splits by anon names *68e0528b38ANDROID: 16K: Remove anon name for fixup VMA *f7f25a5b1aANDROID: 16K: Add page_compat[_enabled] to symbol list *93bfe702cdANDROID: 16K: Export page compat symbols *181bc19befANDROID: 16K: x86_64: Allow stack randomization of twice page-size *f51703f4c1ANDROID: 16K: x86_64: __PAGE_ALIGN mmap randomization *4daa4c1fecANDROID: 16K: brk: __PAGE_ALIGN brk *7852452429ANDROID: 16K: mlock: __PAGE_ALIGN addr and len *4956d7c6c4ANDROID: 16K: msync: __PAGE_ALIGN addr and len *5d8eb7f9e0ANDROID: 16K: madvise: __PAGE_ALIGN addr and len *a52b76b874ANDROID: 16K: mremap: __PAGE_ALIGN addr and len *2d3fed3a43ANDROID: 16K: mprotect: __PAGE_ALIGN addr and len *397425965fANDROID: 16K: munmap: __PAGE_ALIGN addr and len *a9e38ff89aANDROID: 16K: __PAGE_ALIGN stack_[top|base] *ba166bce2cANDROID: 16K: __PAGE_ALIGN randomize_stack_top() address *9ba9a0891bANDROID: 16K: __PAGE_ALIGN randomize_page() address *81e0928547ANDROID: 16K: __PAGE_ALIGN mmap hint address *a1e630ea0dANDROID: 16K: ashmem: Fix size check *df9123472fANDROID: 16K: Fix selinux mmap size check *7dea17008fANDROID: 16K: procfs: maps: Don't show fixup VMAs *e076e9ff2cANDROID: 16K: Handle filemap faults *a9ccc1128eANDROID: 16K: Introduce __VM_NO_COMPAT vma flag *e7f83d4d4bANDROID: 16K: Ensure unmapped_area returns a __PAGE_ALIGNED address *796be8fd27ANDROID: 16K: Reduce mmap rand bits *80e2a42d97ANDROID: 16K: x86_64: Set ELF_EXEC_PAGESIZE to __PAGE_SIZE *58e2fa4ec4ANDROID: 16K: Remove build time dependencies on ELF_EXEC_PAGESIZE *d09cd43b3fANDROID: 16K: Log unaligned operations *1fb2de0c3dANDROID: 16K: Add page-compat helper macros *a052d19e1cANDROID: GKI: Pad vendor properties to power_supply_property enum *61de19b772ANDROID: drivers/iommu: Pad iommu structs *6cb1db877dANDROID: KVM: arm64: Pad more pKVM structs *b3c31c9b21ANDROID: KVM: arm64: Drop struct pkvm_mapping from KMI *ef10b442e4ANDROID: KVM: arm64: Remove struct kvm_cpu_context from the KMI *15bf9aa274ANDROID: GKI: Add ABI padding for kcompressd feature *e80ed6bcfbANDROID: GKI: Add memory reclaim ABI padding *9e96103d83ANDROID: GKI: Add dmabuf ABI padding *4bd97e7a02ANDROID: GKI: Add cgroup ABI padding *b209d55c0eANDROID: GKI: Add cpuset ABI padding *060da33ae4ANDROID: GKI: Add memcg ABI padding *d48d0d0892FROMLIST: scsi: core: Implement reserved command handling *26febb7cdeUPSTREAM: block: track queue dying state automatically for modeling queue freeze lockdep *df5f9ab297UPSTREAM: block: don't verify queue freeze manually in elevator_init_mq() *752dff69aeUPSTREAM: block: track disk DEAD state automatically for modeling queue freeze lockdep *225f2e16adUPSTREAM: block: don't reorder requests in blk_mq_add_to_batch *bdcd6a28fdUPSTREAM: block: don't reorder requests in blk_add_rq_to_plug *24f685a927UPSTREAM: block: add a rq_list type *bbce2aa253UPSTREAM: block: remove rq_list_move *128144da22ANDROID: KVM: arm64: Add smc64 trap handling for protected guests *2c1385ae0eANDROID: Modify android_rvh_find_lowest_rq hook *bad3ca6c52ANDROID: GKI: add vendor padding variable in struct nf_conn *ef3d16e0e0ANDROID: vendor_hooks: add a field in pglist_data *0dd21f133bANDROID: scsi: ufs: add UFSHCD_ANDROID_QUIRK_SET_IID_TO_ONE *75adb09e2fANDROID: GKI: the "reusachtig" padding sync with android16-6.12 *20159aa0acUPSTREAM: PCI: Check BAR index for validity *46f484fa4dUPSTREAM: perf: Fix hang while freeing sigtrap event *f295287ed4UPSTREAM: perf/core: Simplify the perf_event_alloc() error path *748bd1ca17UPSTREAM: perf/core: Add aux_pause, aux_resume, aux_start_paused *887fb3f16cANDROID: KVM: arm64: Add __pkvm_host_donate_sglist_hyp *a7667808d9UPSTREAM: tools/selftests: add guard region test for /proc/$pid/pagemap *dd6e353d71UPSTREAM: fs/proc/task_mmu: add guard region bit to pagemap *df3e8432faUPSTREAM: tools/selftests: add file/shmem-backed mapping guard region tests *bc91eb889eUPSTREAM: tools/selftests: expand all guard region tests to file-backed *458e4dbd0bUPSTREAM: selftests/mm: rename guard-pages to guard-regions *8261d30079UPSTREAM: mm: allow guard regions in file-backed and read-only mappings *ca6b245e10UPSTREAM: selftests/mm: use PIDFD_SELF in guard pages test *99b3bb2022BACKPORT: selftests/pidfd: add tests for PIDFD_SELF_* *7a879200c9UPSTREAM: selftests/pidfd: add new PIDFD_SELF* defines *1734a4ad6bBACKPORT: pidfd: add PIDFD_SELF* sentinels to refer to own thread/process *b00dca6fb7UPSTREAM: selftests/mm: add fork CoW guard page test *5367c0eaccBACKPORT: selftests/mm: add self tests for guard page feature *86f861b42eUPSTREAM: tools: testing: update tools UAPI header for mman-common.h *b9ee6db5a8BACKPORT: mm: madvise: implement lightweight guard page mechanism *c14f85307dUPSTREAM: mm: add PTE_MARKER_GUARD PTE marker *c5be90ae70UPSTREAM: mm: pagewalk: add the ability to install PTEs *3306eb50a4FROMGIT: docs: core-api: document the IOVA-based API *26405baef4FROMGIT: dma-mapping: add a dma_need_unmap helper *66bc206d64FROMGIT: dma-mapping: Implement link/unlink ranges API *59a15e3bf1FROMGIT: iommu/dma: Factor out a iommu_dma_map_swiotlb helper *0f2253b2b1FROMGIT: dma-mapping: Provide an interface to allow allocate IOVA *c64f83e1d6FROMGIT: iommu: add kernel-doc for iommu_unmap_fast *5c59ff3809FROMGIT: iommu: generalize the batched sync after map interface *15ad0760b8FROMGIT: dma-mapping: move the PCI P2PDMA mapping helpers to pci-p2pdma.h *661e6bda0eFROMGIT: PCI/P2PDMA: Refactor the p2pdma mapping helpers *e44dfa62dfReapply "ANDROID: enable memory allocation profiling configs" *60372b88d2ANDROID: binder: add OEM data to struct binder_alloc *31f62a008eANDROID: Limit vfs-only namespace to GKI builds *e2c81a7fa3ANDROID: Fix incorrect namespacing for ANDROID_GKI_VFS_EXPORT_ONLY *7af261fc12ANDROID: KVM: arm64: Use smccc 1.2 for direct FF-A calls *996a35040aFROMLIST: dm-zone: Use bdev_*() helper functions where applicable *1d1b2e8d63FROMGIT: perf/aux: Allocate non-contiguous AUX pages by default *6e0b046d59UPSTREAM: wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation *617a8cdb8dANDROID: GKI: add OEM data to struct scan_control for XM OGKI *acc91ef94bFROMGIT: dma-buf: insert memory barrier before updating num_fences *85856ec8b2ANDROID: gunyah: Fix potential use-after-free in gunyah_rm_notifier_register *e48193bfcfANDROID: KVM: arm64: Reserve all args for req_mmio *42cfdfb46cANDROID: GKI: Add reservation and use macros for non-LTS backports *e1cdedc5dbFROMGIT: mm/memcg: use kmem_cache when alloc memcg pernode info *65c043e1caFROMGIT: mm/memcg: use kmem_cache when alloc memcg *434e2d5481FROMGIT: mm/memcg: move mem_cgroup_init() ahead of cgroup_init() *4e16895056ANDROID: GKI: Update oplus symbol list *28cbf47bbaANDROID: GKI: Export css_task_iter_start() *84849bc819Revert "ANDROID: arm64: Forcefully disable SME at runtime" *0aaf2786faFROMGIT: arm64/fpsimd: ptrace: Gracefully handle errors *a51c741bb6FROMGIT: arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state *1d05f8264aFROMGIT: arm64/fpsimd: ptrace: Do not present register data for inactive mode *958a94681fFROMGIT: arm64/fpsimd: ptrace: Save task state before generating SVE header *3baa9071c3FROMGIT: arm64/fpsimd: ptrace/prctl: Ensure VL changes leave task in a valid state *ccf055346eFROMGIT: arm64/fpsimd: ptrace/prctl: Ensure VL changes do not resurrect stale data *e18a498a2fFROMGIT: BACKPORT: arm64/fpsimd: Make clone() compatible with ZA lazy saving *a6267d4bf5FROMGIT: arm64/fpsimd: Clear PSTATE.SM during clone() *370e80e212FROMGIT: arm64/fpsimd: Consistently preserve FPSIMD state during clone() *f5db1f9a3bFROMGIT: arm64/fpsimd: Remove redundant task->mm check *57f5b387c4FROMGIT: arm64/fpsimd: signal: Use SMSTOP behaviour in setup_return() *f940d322b6FROMGIT: arm64/fpsimd: Add task_smstop_sm() *73106ecef5FROMGIT: arm64/fpsimd: Factor out {sve,sme}_state_size() helpers *f0f4be3921FROMGIT: arm64/fpsimd: Clarify sve_sync_*() functions *49bba8e1e8FROMGIT: arm64/fpsimd: ptrace: Consistently handle partial writes to NT_ARM_(S)SVE *b2853208b1FROMGIT: arm64/fpsimd: signal: Consistently read FPSIMD context *bed5006f4aFROMGIT: arm64/fpsimd: signal: Mandate SVE payload for streaming-mode state *63897a249fFROMGIT: arm64/fpsimd: signal: Clear PSTATE.SM when restoring FPSIMD frame only *37749ff2f7FROMGIT: arm64/fpsimd: Do not discard modified SVE state *f01e49470aFROMGIT: arm64/fpsimd: Avoid warning when sve_to_fpsimd() is unused *787c2bf09bFROMGIT: arm64/fpsimd: signal: Clear TPIDR2 when delivering signals *dd9f8f02e9FROMGIT: arm64/fpsimd: signal: Simplify preserve_tpidr2_context() *9592e13c60FROMGIT: arm64/fpsimd: signal: Always save+flush state early *14383c6162FROMGIT: arm64/fpsimd: signal32: Always save+flush state early *0c377582f6FROMGIT: arm64/fpsimd: Add fpsimd_save_and_flush_current_state() *acd59f18f3FROMGIT: arm64/fpsimd: Fix merging of FPSIMD state during signal return *f78acfcc31FROMGIT: arm64/fpsimd: Reset FPMR upon exec() *32dbf4add0FROMGIT: arm64/fpsimd: Avoid clobbering kernel FPSIMD state with SMSTOP *2d33087d98FROMGIT: arm64/fpsimd: Don't corrupt FPMR when streaming mode changes *c757f1bcc8FROMGIT: arm64/fpsimd: Discard stale CPU state when handling SME traps *64c0feb892FROMGIT: arm64/fpsimd: Remove opportunistic freeing of SME state *f55fc6340bFROMGIT: arm64/fpsimd: Remove redundant SVE trap manipulation *2ccf10f4a6FROMGIT: arm64/fpsimd: Remove unused fpsimd_force_sync_to_sve() *1e380d1c0eFROMGIT: arm64/fpsimd: Avoid RES0 bits in the SME trap handler *6cf85d6ca1BACKPORT: KVM: arm64: Eagerly switch ZCR_EL{1,2} *6c0394f0efBACKPORT: KVM: arm64: Mark some header functions as inline *66762de87fBACKPORT: KVM: arm64: Refactor exit handlers *d09c293b5bBACKPORT: KVM: arm64: Remove VHE host restore of CPACR_EL1.SMEN *5f2af6c19eBACKPORT: KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN *f012246148BACKPORT: KVM: arm64: Remove host FPSIMD saving for non-protected KVM *3aa13c0fd1BACKPORT: KVM: arm64: Unconditionally save+flush host FPSIMD/SVE/SME state *5f1b9561a1ANDROID: KVM: arm64: Eagerly restore host ZCR_EL2 after vcpu run in pKVM *86622b5452ANDROID: ABI: update symbol list for honor *5addce7b33ANDROID: GKI:Add VendorHook for ProbeTimeout *e8df77b867ANDROID: GKI: Update symbol list for qcom *6c6bf93463ANDROID: GKI: update symbol list for xiaomi *e8da2c8c48ANDROID: Export cgroup function to allow module to remove control files *f2c750c9f8ANDROID: Update symbols list for imx *c206f26b28ANDROID: Update symbols to oplus symbol list. *55ac0abda4ANDROID: Export the necessary symbols for the implementation of the BPF scheduler. *de6714dc48ANDROID: Drop tests_zip_arm64 from TV target. *7f12a7bda3ANDROID: GKI: Update RTK STB KMI symbol list *ba364a2340ANDROID: vendor_hooks: add vendor hook in cma_alloc() *21de8f00f4ANDROID: vendor hooks: use DECLARE_RESTRICTED_HOOK for android_rvh *d574cb3cc1ANDROID: GKI: update symbol list for xiaomi *41763ef33dANDROID: GKI: Update symbols list file for oplus *b62718ba86ANDROID: vendor_hooks: add hook in __alloc_workqueue() *c7b71fcb6fFROMLIST: xfrm: Migrate offload configuration *564d5ceda6ANDROID: KVM: arm64: Fix relinquish filtering *d9d550aef0Revert "ANDROID: Revert^2 "KVM: arm64: iommu: Allow to boot without IOMMU driver"" *8d139a5479ANDROID: GKI: Update symbols list file for honor *f3b22c7868ANDROID: fs/proc: Perform priority inheritance around access_remote_vm() *06a574beb9ANDROID: fix incorrect #ifdef for CONFIG_ANDROID_VENDOR_OEM_DATA *d52356998bFROMLIST: scsi: ufs: core: Increase the UIC command timeout further *17f5bd09eeANDROID: sched/psi: disable the privilege check if CONFIG_DEFAULT_SECURITY_SELINUX is enabled *ad2761e088ANDROID: ABI: Update pixel symbol list *86f6711a2dANDROID: scsi: ufs: add complete init vendor hook *273b99c30aANDROID: scsi: ufs: add vendor hook to override key reprogramming *05c9b03f4cFROMGIT: dm-verity: use softirq context only when !need_resched() *a8027abd1eANDROID: KVM: arm64: Redirect modprobe to /dev/kmsg *078ef75fa4ANDROID: gki_defconfig: Enable CONFIG_ARM_SDE_INTERFACE *f982a6b573ANDROID: arm64: SDEI: Export SDEI related symbols *b145782bbdFROMGIT: firmware: SDEI: Allow sdei initialization without ACPI_APEI_GHES *cbd7c4caa9ANDROID: KVM: arm64: Do not pkvm_init_devices() when no registered devices *1fad370b9eANDROID: KVM: arm64: iommu: Do not remap on iommu_atomic_pool reclaim *890428fb57ANDROID: Update symbols list for imx *776eedb13cANDROID: ABI: Update symbol list for mtk *ac8b302ab0ANDROID: mm: Add vendor hook before rmqueue_bulk *34fe71fe24ANDROID: GKI: Update symbol list file for xiaomi *88cb3505ebANDROID: mm: export __pte_offset_map/unuse_swap_pte/read_swap_cache_async *46aa903098ANDROID: Disable check_defconfig for kernel_aarch64_tv. *88680fe19eANDROID: fuse-bpf: Fix recursion in fuse_copy_file_range *5838b5ac0aANDROID: turn off KMI strict mode for TV builds *e680506fe0ANDROID: KVM: iommu: Allow IOMMU mapping in carveouts *4089d8be3fANDROID: GKI: Update symbol list file for xiaomi *20adcab29cUPSTREAM: codel: remove sch->q.qlen check before qdisc_tree_reduce_backlog() *4e4b0bdf85ANDROID: GKI: Update QCOM symbol list *b791ce76d1ANDROID: GKI: Update the ABI symbol list for qcom *6690013277FROMLIST: mm: add nr_free_highatomic in show_free_areas *cedbc9e5ecANDROID: GKI: Update qcom symbol list *2145149a38ANDROID: implement wrapper for reverse migration *dfc83778aaANDROID: GKI: Update symbols list file for honor *1213a4027aANDROID: ABI: Update pixel symbol list *a546b31e53BACKPORT: FROMGIT: coresight: core: Disable helpers for devices that fail to enable *bdda915529FROMGIT: coresight: catu: Introduce refcount and spinlock for enabling/disabling *2366a0bf75UPSTREAM: firmware: arm_ffa: Upgrade FF-A version to v1.2 in the driver *e5ea70aa2dANDROID: gki_defconfig: do not use FineIBT on x86 *b73e9bfc92FROMGIT: sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks Change-Id: Ie76eebb5d135e428f1c0986639fca0d1ead2aa51 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
@@ -290,20 +290,18 @@ common_kernel(
|
||||
additional_kmi_symbol_lists = [":aarch64_additional_kmi_symbol_lists"],
|
||||
arch = "arm64",
|
||||
build_gki_artifacts = True,
|
||||
check_defconfig = "disabled",
|
||||
ddk_headers_archive = ":kernel_aarch64_ddk_headers_archive",
|
||||
ddk_module_headers = [":all_headers_aarch64"],
|
||||
defconfig = "arch/arm64/configs/gki_defconfig",
|
||||
extra_dist = [
|
||||
":test_mappings_zip",
|
||||
":tests_zip_arm64",
|
||||
],
|
||||
extra_dist = [],
|
||||
gki_boot_img_sizes = _GKI_AARCH64_BOOT_IMAGE_SIZES,
|
||||
gki_system_dlkm_modules = ":gki_system_dlkm_modules_arm64",
|
||||
kcflags = COMMON_KCFLAGS,
|
||||
# We don't guarantee ABI stability for TV target
|
||||
kmi_enforced = False,
|
||||
kmi_symbol_list = "gki/aarch64/symbols/base",
|
||||
kmi_symbol_list_strict_mode = True,
|
||||
kmi_symbol_list_strict_mode = False,
|
||||
make_goals = _GKI_AARCH64_MAKE_GOALS,
|
||||
makefile = ":Makefile",
|
||||
module_implicit_outs = get_gki_modules_list("arm64") + get_kunit_modules_list("arm64"),
|
||||
|
||||
@@ -21,7 +21,8 @@ There are four components to pagemap:
|
||||
* Bit 56 page exclusively mapped (since 4.2)
|
||||
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
|
||||
Documentation/admin-guide/mm/userfaultfd.rst)
|
||||
* Bits 58-60 zero
|
||||
* Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page)
|
||||
* Bits 59-60 zero
|
||||
* Bit 61 page is file-page or shared-anon (since 3.5)
|
||||
* Bit 62 page swapped
|
||||
* Bit 63 page present
|
||||
|
||||
@@ -69,8 +69,8 @@ model features for SME is included in Appendix A.
|
||||
vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
|
||||
used for SVE vectors.
|
||||
|
||||
* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
|
||||
in which case it is set to 0.
|
||||
* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM
|
||||
is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0.
|
||||
|
||||
2. Vector lengths
|
||||
------------------
|
||||
@@ -115,7 +115,7 @@ be zeroed.
|
||||
5. Signal handling
|
||||
-------------------
|
||||
|
||||
* Signal handlers are invoked with streaming mode and ZA disabled.
|
||||
* Signal handlers are invoked with PSTATE.SM=0, PSTATE.ZA=0, and TPIDR2_EL0=0.
|
||||
|
||||
* A new signal frame record TPIDR2_MAGIC is added formatted as a struct
|
||||
tpidr2_context to allow access to TPIDR2_EL0 from signal handlers.
|
||||
@@ -241,7 +241,7 @@ prctl(PR_SME_SET_VL, unsigned long arg)
|
||||
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
|
||||
does not constitute a change to the vector length for this purpose.
|
||||
|
||||
* Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
|
||||
* Changing the vector length causes PSTATE.ZA to be cleared.
|
||||
Calling PR_SME_SET_VL with vl equal to the thread's current vector
|
||||
length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
|
||||
does not constitute a change to the vector length for this purpose.
|
||||
|
||||
@@ -530,6 +530,77 @@ routines, e.g.:::
|
||||
....
|
||||
}
|
||||
|
||||
Part Ie - IOVA-based DMA mappings
|
||||
---------------------------------
|
||||
|
||||
These APIs allow a very efficient mapping when using an IOMMU. They are an
|
||||
optional path that requires extra code and are only recommended for drivers
|
||||
where DMA mapping performance, or the space usage for storing the DMA addresses
|
||||
matter. All the considerations from the previous section apply here as well.
|
||||
|
||||
::
|
||||
|
||||
bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
|
||||
phys_addr_t phys, size_t size);
|
||||
|
||||
Is used to try to allocate IOVA space for mapping operation. If it returns
|
||||
false this API can't be used for the given device and the normal streaming
|
||||
DMA mapping API should be used. The ``struct dma_iova_state`` is allocated
|
||||
by the driver and must be kept around until unmap time.
|
||||
|
||||
::
|
||||
|
||||
static inline bool dma_use_iova(struct dma_iova_state *state)
|
||||
|
||||
Can be used by the driver to check if the IOVA-based API is used after a
|
||||
call to dma_iova_try_alloc. This can be useful in the unmap path.
|
||||
|
||||
::
|
||||
|
||||
int dma_iova_link(struct device *dev, struct dma_iova_state *state,
|
||||
phys_addr_t phys, size_t offset, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs);
|
||||
|
||||
Is used to link ranges to the IOVA previously allocated. The start of all
|
||||
but the first call to dma_iova_link for a given state must be aligned
|
||||
to the DMA merge boundary returned by ``dma_get_merge_boundary())``, and
|
||||
the size of all but the last range must be aligned to the DMA merge boundary
|
||||
as well.
|
||||
|
||||
::
|
||||
|
||||
int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
|
||||
size_t offset, size_t size);
|
||||
|
||||
Must be called to sync the IOMMU page tables for IOVA-range mapped by one or
|
||||
more calls to ``dma_iova_link()``.
|
||||
|
||||
For drivers that use a one-shot mapping, all ranges can be unmapped and the
|
||||
IOVA freed by calling:
|
||||
|
||||
::
|
||||
|
||||
void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
|
||||
size_t mapped_len, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
|
||||
Alternatively drivers can dynamically manage the IOVA space by unmapping
|
||||
and mapping individual regions. In that case
|
||||
|
||||
::
|
||||
|
||||
void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
|
||||
size_t offset, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
|
||||
is used to unmap a range previously mapped, and
|
||||
|
||||
::
|
||||
|
||||
void dma_iova_free(struct device *dev, struct dma_iova_state *state);
|
||||
|
||||
is used to free the IOVA space. All regions must have been unmapped using
|
||||
``dma_iova_unlink()`` before calling ``dma_iova_free()``.
|
||||
|
||||
Part II - Non-coherent DMA allocations
|
||||
--------------------------------------
|
||||
|
||||
@@ -12477,6 +12477,7 @@ F: Documentation/dev-tools/kunit/
|
||||
F: include/kunit/
|
||||
F: lib/kunit/
|
||||
F: rust/kernel/kunit.rs
|
||||
F: rust/macros/kunit.rs
|
||||
F: scripts/rustdoc_test_*
|
||||
F: tools/testing/kunit/
|
||||
|
||||
|
||||
@@ -78,6 +78,9 @@
|
||||
|
||||
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
|
||||
|
||||
#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */
|
||||
#define MADV_GUARD_REMOVE 103 /* unguard range */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
||||
@@ -329,6 +329,7 @@ CONFIG_REGMAP_KUNIT=m
|
||||
CONFIG_ARM_SCMI_PROTOCOL=y
|
||||
CONFIG_ARM_SCMI_TRANSPORT_VIRTIO=y
|
||||
CONFIG_ARM_SCPI_PROTOCOL=y
|
||||
CONFIG_ARM_SDE_INTERFACE=y
|
||||
# CONFIG_EFI_ARMSTUB_DTB_LOADER is not set
|
||||
CONFIG_GNSS=m
|
||||
CONFIG_ZRAM=m
|
||||
@@ -651,6 +652,7 @@ CONFIG_POWERCAP=y
|
||||
CONFIG_IDLE_INJECT=y
|
||||
CONFIG_ANDROID_BINDER_IPC=y
|
||||
CONFIG_ANDROID_BINDERFS=y
|
||||
CONFIG_ANDROID_BINDER_IPC_RUST=m
|
||||
CONFIG_ANDROID_VENDOR_HOOKS=y
|
||||
CONFIG_ANDROID_DEBUG_KINFO=y
|
||||
CONFIG_LIBNVDIMM=y
|
||||
@@ -789,6 +791,8 @@ CONFIG_UBSAN_TRAP=y
|
||||
CONFIG_PAGE_OWNER=y
|
||||
CONFIG_PAGE_PINNER=y
|
||||
CONFIG_DEBUG_MEMORY_INIT=y
|
||||
CONFIG_MEM_ALLOC_PROFILING=y
|
||||
# CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT is not set
|
||||
CONFIG_KASAN=y
|
||||
CONFIG_KASAN_HW_TAGS=y
|
||||
CONFIG_KFENCE=y
|
||||
|
||||
@@ -370,12 +370,14 @@
|
||||
/*
|
||||
* ISS values for SME traps
|
||||
*/
|
||||
#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0)
|
||||
#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK)
|
||||
|
||||
#define ESR_ELx_SME_ISS_SME_DISABLED 0
|
||||
#define ESR_ELx_SME_ISS_ILL 1
|
||||
#define ESR_ELx_SME_ISS_SM_DISABLED 2
|
||||
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
|
||||
#define ESR_ELx_SME_ISS_ZT_DISABLED 4
|
||||
#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0
|
||||
#define ESR_ELx_SME_ISS_SMTC_ILL 1
|
||||
#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2
|
||||
#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3
|
||||
#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4
|
||||
|
||||
/* ISS field definitions for MOPS exceptions */
|
||||
#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24)
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#define __ASM_FP_H
|
||||
|
||||
#include <asm/errno.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/sigcontext.h>
|
||||
@@ -76,7 +77,6 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state);
|
||||
extern void fpsimd_thread_switch(struct task_struct *next);
|
||||
extern void fpsimd_flush_thread(void);
|
||||
|
||||
extern void fpsimd_signal_preserve_current_state(void);
|
||||
extern void fpsimd_preserve_current_state(void);
|
||||
extern void fpsimd_restore_current_state(void);
|
||||
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
|
||||
@@ -94,9 +94,12 @@ struct cpu_fp_state {
|
||||
enum fp_type to_save;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
|
||||
|
||||
extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
|
||||
|
||||
extern void fpsimd_flush_task_state(struct task_struct *target);
|
||||
extern void fpsimd_save_and_flush_current_state(void);
|
||||
extern void fpsimd_save_and_flush_cpu_state(void);
|
||||
|
||||
static inline bool thread_sm_enabled(struct thread_struct *thread)
|
||||
@@ -109,6 +112,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread)
|
||||
return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
|
||||
}
|
||||
|
||||
extern void task_smstop_sm(struct task_struct *task);
|
||||
|
||||
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
|
||||
#define VL_ARCH_MAX 0x100
|
||||
|
||||
@@ -196,10 +201,8 @@ struct vl_info {
|
||||
|
||||
extern void sve_alloc(struct task_struct *task, bool flush);
|
||||
extern void fpsimd_release_task(struct task_struct *task);
|
||||
extern void fpsimd_sync_to_sve(struct task_struct *task);
|
||||
extern void fpsimd_force_sync_to_sve(struct task_struct *task);
|
||||
extern void sve_sync_to_fpsimd(struct task_struct *task);
|
||||
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
|
||||
extern void fpsimd_sync_from_effective_state(struct task_struct *task);
|
||||
extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task);
|
||||
|
||||
extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
|
||||
unsigned long vl, unsigned long flags);
|
||||
@@ -293,14 +296,29 @@ static inline bool sve_vq_available(unsigned int vq)
|
||||
return vq_available(ARM64_VEC_SVE, vq);
|
||||
}
|
||||
|
||||
size_t sve_state_size(struct task_struct const *task);
|
||||
static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
|
||||
{
|
||||
unsigned int vl = max(sve_vl, sme_vl);
|
||||
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return how many bytes of memory are required to store the full SVE
|
||||
* state for task, given task's currently configured vector length.
|
||||
*/
|
||||
static inline size_t sve_state_size(struct task_struct const *task)
|
||||
{
|
||||
unsigned int sve_vl = task_get_sve_vl(task);
|
||||
unsigned int sme_vl = task_get_sme_vl(task);
|
||||
return __sve_state_size(sve_vl, sme_vl);
|
||||
}
|
||||
|
||||
#else /* ! CONFIG_ARM64_SVE */
|
||||
|
||||
static inline void sve_alloc(struct task_struct *task, bool flush) { }
|
||||
static inline void fpsimd_release_task(struct task_struct *task) { }
|
||||
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
|
||||
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
|
||||
static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { }
|
||||
static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { }
|
||||
|
||||
static inline int sve_max_virtualisable_vl(void)
|
||||
{
|
||||
@@ -334,6 +352,11 @@ static inline void vec_update_vq_map(enum vec_type t) { }
|
||||
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
|
||||
static inline void sve_setup(void) { }
|
||||
|
||||
static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline size_t sve_state_size(struct task_struct const *task)
|
||||
{
|
||||
return 0;
|
||||
@@ -386,6 +409,16 @@ extern int sme_set_current_vl(unsigned long arg);
|
||||
extern int sme_get_current_vl(void);
|
||||
extern void sme_suspend_exit(void);
|
||||
|
||||
static inline size_t __sme_state_size(unsigned int sme_vl)
|
||||
{
|
||||
size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl));
|
||||
|
||||
if (system_supports_sme2())
|
||||
size += ZT_SIG_REG_SIZE;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return how many bytes of memory are required to store the full SME
|
||||
* specific state for task, given task's currently configured vector
|
||||
@@ -393,15 +426,7 @@ extern void sme_suspend_exit(void);
|
||||
*/
|
||||
static inline size_t sme_state_size(struct task_struct const *task)
|
||||
{
|
||||
unsigned int vl = task_get_sme_vl(task);
|
||||
size_t size;
|
||||
|
||||
size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
||||
|
||||
if (system_supports_sme2())
|
||||
size += ZT_SIG_REG_SIZE;
|
||||
|
||||
return size;
|
||||
return __sme_state_size(task_get_sme_vl(task));
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -422,6 +447,11 @@ static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
|
||||
static inline int sme_get_current_vl(void) { return -EINVAL; }
|
||||
static inline void sme_suspend_exit(void) { }
|
||||
|
||||
static inline size_t __sme_state_size(unsigned int sme_vl)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline size_t sme_state_size(struct task_struct const *task)
|
||||
{
|
||||
return 0;
|
||||
|
||||
@@ -88,7 +88,9 @@ struct kvm_hyp_memcache {
|
||||
phys_addr_t head;
|
||||
unsigned long nr_pages;
|
||||
unsigned long flags;
|
||||
struct pkvm_mapping *mapping; /* only used from EL1 */
|
||||
void *mapping; /* struct pkvm_mapping *, only used from EL1 */
|
||||
ANDROID_KABI_RESERVE(1);
|
||||
ANDROID_KABI_RESERVE(2);
|
||||
};
|
||||
|
||||
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
|
||||
@@ -299,6 +301,7 @@ struct kvm_protected_vm {
|
||||
gpa_t pvmfw_load_addr;
|
||||
bool enabled;
|
||||
u32 ffa_support;
|
||||
bool smc_forwarded;
|
||||
};
|
||||
|
||||
struct kvm_mpidr_data {
|
||||
@@ -660,23 +663,13 @@ struct kvm_host_data {
|
||||
struct kvm_cpu_context host_ctxt;
|
||||
|
||||
/*
|
||||
* All pointers in this union are hyp VA.
|
||||
* Hyp VA.
|
||||
* sve_state is only used in pKVM and if system_supports_sve().
|
||||
*/
|
||||
union {
|
||||
struct user_fpsimd_state *fpsimd_state;
|
||||
struct cpu_sve_state *sve_state;
|
||||
};
|
||||
struct cpu_sve_state *sve_state;
|
||||
|
||||
union {
|
||||
/* HYP VA pointer to the host storage for FPMR */
|
||||
u64 *fpmr_ptr;
|
||||
/*
|
||||
* Used by pKVM only, as it needs to provide storage
|
||||
* for the host
|
||||
*/
|
||||
u64 fpmr;
|
||||
};
|
||||
/* Used by pKVM only. */
|
||||
u64 fpmr;
|
||||
|
||||
/* Ownership of the FP regs */
|
||||
enum {
|
||||
@@ -1070,10 +1063,6 @@ struct kvm_vcpu_arch {
|
||||
/* pKVM host vcpu state is dirty, needs resync (nVHE-only) */
|
||||
#define PKVM_HOST_STATE_DIRTY __vcpu_single_flag(iflags, BIT(7))
|
||||
|
||||
/* SVE enabled for host EL0 */
|
||||
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
|
||||
/* SME enabled for EL0 */
|
||||
#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
|
||||
/* Physical CPU not in supported_cpus */
|
||||
#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
|
||||
/* WFIT instruction trapped */
|
||||
|
||||
@@ -26,7 +26,7 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm);
|
||||
bool pkvm_is_hyp_created(struct kvm *kvm);
|
||||
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
|
||||
void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa);
|
||||
|
||||
int pvkm_enable_smc_forwarding(struct file *kvm_file);
|
||||
/*
|
||||
* This functions as an allow-list of protected VM capabilities.
|
||||
* Features not explicitly allowed by this function are denied.
|
||||
|
||||
@@ -20,6 +20,11 @@ enum pkvm_psci_notification {
|
||||
PKVM_PSCI_CPU_ENTRY,
|
||||
};
|
||||
|
||||
struct pkvm_sglist_page {
|
||||
u64 pfn : 40;
|
||||
u8 order;
|
||||
} __packed;
|
||||
|
||||
/**
|
||||
* struct pkvm_module_ops - pKVM modules callbacks
|
||||
* @create_private_mapping: Map a memory region into the hypervisor private
|
||||
@@ -107,6 +112,10 @@ enum pkvm_psci_notification {
|
||||
* order depends on the registration order. If no
|
||||
* handler return True, the SMC is forwarded to
|
||||
* EL3.
|
||||
* @register_guest_smc_handler: @cb is called when guest identified by the
|
||||
* pkvm_handle issues an SMC that pKVM couldn't
|
||||
* handle. If @cb returns false, then unsupported
|
||||
* operation error is returned back to the guest.
|
||||
* @register_default_trap_handler:
|
||||
* @cb is called whenever EL2 traps EL1 and pKVM
|
||||
* has not handled it. If @cb returns false, the
|
||||
@@ -132,6 +141,9 @@ enum pkvm_psci_notification {
|
||||
* full control is given to the hypervisor.
|
||||
* @host_donate_hyp_prot: As host_donate_hyp_prot, but this variant sets
|
||||
* the prot of the hyp.
|
||||
* @host_donate_sglist_hyp: Similar to host_donate_hyp but take an array of PFNs
|
||||
* (kvm_sglist_page) as an argument. This intends to
|
||||
* batch IOMMU updates.
|
||||
* @hyp_donate_host: The page @pfn whom control has previously been
|
||||
* given to the hypervisor (@host_donate_hyp) is
|
||||
* given back to the host.
|
||||
@@ -218,6 +230,9 @@ struct pkvm_module_ops {
|
||||
int (*host_stage2_enable_lazy_pte)(u64 addr, u64 nr_pages);
|
||||
int (*host_stage2_disable_lazy_pte)(u64 addr, u64 nr_pages);
|
||||
int (*register_host_smc_handler)(bool (*cb)(struct user_pt_regs *));
|
||||
int (*register_guest_smc_handler)(bool (*cb)(struct arm_smccc_1_2_regs *regs,
|
||||
struct arm_smccc_1_2_regs *res,
|
||||
pkvm_handle_t handle));
|
||||
int (*register_default_trap_handler)(bool (*cb)(struct user_pt_regs *));
|
||||
int (*register_illegal_abt_notifier)(void (*cb)(struct user_pt_regs *));
|
||||
int (*register_psci_notifier)(void (*cb)(enum pkvm_psci_notification, struct user_pt_regs *));
|
||||
@@ -225,6 +240,7 @@ struct pkvm_module_ops {
|
||||
int (*register_unmask_serror)(bool (*unmask)(void), void (*mask)(void));
|
||||
int (*host_donate_hyp)(u64 pfn, u64 nr_pages, bool accept_mmio);
|
||||
int (*host_donate_hyp_prot)(u64 pfn, u64 nr_pages, bool accept_mmio, enum kvm_pgtable_prot prot);
|
||||
int (*host_donate_sglist_hyp)(struct pkvm_sglist_page *sglist, size_t nr_pages);
|
||||
int (*hyp_donate_host)(u64 pfn, u64 nr_pages);
|
||||
int (*host_share_hyp)(u64 pfn);
|
||||
int (*host_unshare_hyp)(u64 pfn);
|
||||
|
||||
@@ -396,20 +396,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
|
||||
* As per the ABI exit SME streaming mode and clear the SVE state not
|
||||
* shared with FPSIMD on syscall entry.
|
||||
*/
|
||||
static inline void fp_user_discard(void)
|
||||
static inline void fpsimd_syscall_enter(void)
|
||||
{
|
||||
/*
|
||||
* If SME is active then exit streaming mode. If ZA is active
|
||||
* then flush the SVE registers but leave userspace access to
|
||||
* both SVE and SME enabled, otherwise disable SME for the
|
||||
* task and fall through to disabling SVE too. This means
|
||||
* that after a syscall we never have any streaming mode
|
||||
* register state to track, if this changes the KVM code will
|
||||
* need updating.
|
||||
*/
|
||||
/* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */
|
||||
if (system_supports_sme())
|
||||
sme_smstop_sm();
|
||||
|
||||
/*
|
||||
* The CPU is not in streaming mode. If non-streaming SVE is not
|
||||
* supported, there is no SVE state that needs to be discarded.
|
||||
*/
|
||||
if (!system_supports_sve())
|
||||
return;
|
||||
|
||||
@@ -419,6 +415,33 @@ static inline void fp_user_discard(void)
|
||||
sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
|
||||
sve_flush_live(true, sve_vq_minus_one);
|
||||
}
|
||||
|
||||
/*
|
||||
* Any live non-FPSIMD SVE state has been zeroed. Allow
|
||||
* fpsimd_save_user_state() to lazily discard SVE state until either
|
||||
* the live state is unbound or fpsimd_syscall_exit() is called.
|
||||
*/
|
||||
__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD);
|
||||
}
|
||||
|
||||
static __always_inline void fpsimd_syscall_exit(void)
|
||||
{
|
||||
if (!system_supports_sve())
|
||||
return;
|
||||
|
||||
/*
|
||||
* The current task's user FPSIMD/SVE/SME state is now bound to this
|
||||
* CPU. The fpsimd_last_state.to_save value is either:
|
||||
*
|
||||
* - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU
|
||||
* since fpsimd_syscall_enter().
|
||||
*
|
||||
* - FP_STATE_CURRENT, if the state has been reloaded on this CPU at
|
||||
* any point.
|
||||
*
|
||||
* Reset this to FP_STATE_CURRENT to stop lazy discarding.
|
||||
*/
|
||||
__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
|
||||
}
|
||||
|
||||
UNHANDLED(el1t, 64, sync)
|
||||
@@ -710,10 +733,11 @@ static void noinstr el0_svc(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode(regs);
|
||||
cortex_a76_erratum_1463225_svc_handler();
|
||||
fp_user_discard();
|
||||
fpsimd_syscall_enter();
|
||||
local_daif_restore(DAIF_PROCCTX);
|
||||
do_el0_svc(regs);
|
||||
exit_to_user_mode(regs);
|
||||
fpsimd_syscall_exit();
|
||||
}
|
||||
|
||||
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
|
||||
|
||||
@@ -119,7 +119,7 @@
|
||||
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
|
||||
*/
|
||||
|
||||
static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
|
||||
DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
|
||||
|
||||
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
|
||||
#ifdef CONFIG_ARM64_SVE
|
||||
@@ -359,20 +359,15 @@ static void task_fpsimd_load(void)
|
||||
WARN_ON(preemptible());
|
||||
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
|
||||
|
||||
if (system_supports_fpmr())
|
||||
write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
|
||||
|
||||
if (system_supports_sve() || system_supports_sme()) {
|
||||
switch (current->thread.fp_type) {
|
||||
case FP_STATE_FPSIMD:
|
||||
/* Stop tracking SVE for this task until next use. */
|
||||
if (test_and_clear_thread_flag(TIF_SVE))
|
||||
sve_user_disable();
|
||||
clear_thread_flag(TIF_SVE);
|
||||
break;
|
||||
case FP_STATE_SVE:
|
||||
if (!thread_sm_enabled(¤t->thread) &&
|
||||
!WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
|
||||
sve_user_enable();
|
||||
if (!thread_sm_enabled(¤t->thread))
|
||||
WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));
|
||||
|
||||
if (test_thread_flag(TIF_SVE))
|
||||
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
|
||||
@@ -413,6 +408,9 @@ static void task_fpsimd_load(void)
|
||||
restore_ffr = system_supports_fa64();
|
||||
}
|
||||
|
||||
if (system_supports_fpmr())
|
||||
write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
|
||||
|
||||
if (restore_sve_regs) {
|
||||
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
|
||||
sve_load_state(sve_pffr(¤t->thread),
|
||||
@@ -453,12 +451,15 @@ static void fpsimd_save_user_state(void)
|
||||
*(last->fpmr) = read_sysreg_s(SYS_FPMR);
|
||||
|
||||
/*
|
||||
* If a task is in a syscall the ABI allows us to only
|
||||
* preserve the state shared with FPSIMD so don't bother
|
||||
* saving the full SVE state in that case.
|
||||
* Save SVE state if it is live.
|
||||
*
|
||||
* The syscall ABI discards live SVE state at syscall entry. When
|
||||
* entering a syscall, fpsimd_syscall_enter() sets to_save to
|
||||
* FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until
|
||||
* either new SVE state is loaded+bound or fpsimd_syscall_exit() is
|
||||
* called prior to a return to userspace.
|
||||
*/
|
||||
if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
|
||||
!in_syscall(current_pt_regs())) ||
|
||||
if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||
|
||||
last->to_save == FP_STATE_SVE) {
|
||||
save_sve_regs = true;
|
||||
save_ffr = true;
|
||||
@@ -651,7 +652,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
|
||||
* task->thread.uw.fpsimd_state must be up to date before calling this
|
||||
* function.
|
||||
*/
|
||||
static void fpsimd_to_sve(struct task_struct *task)
|
||||
static inline void fpsimd_to_sve(struct task_struct *task)
|
||||
{
|
||||
unsigned int vq;
|
||||
void *sst = task->thread.sve_state;
|
||||
@@ -675,7 +676,7 @@ static void fpsimd_to_sve(struct task_struct *task)
|
||||
* bytes of allocated kernel memory.
|
||||
* task->thread.sve_state must be up to date before calling this function.
|
||||
*/
|
||||
static void sve_to_fpsimd(struct task_struct *task)
|
||||
static inline void sve_to_fpsimd(struct task_struct *task)
|
||||
{
|
||||
unsigned int vq, vl;
|
||||
void const *sst = task->thread.sve_state;
|
||||
@@ -694,6 +695,28 @@ static void sve_to_fpsimd(struct task_struct *task)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)
|
||||
{
|
||||
memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));
|
||||
}
|
||||
|
||||
/*
|
||||
* Simulate the effects of an SMSTOP SM instruction.
|
||||
*/
|
||||
void task_smstop_sm(struct task_struct *task)
|
||||
{
|
||||
if (!thread_sm_enabled(&task->thread))
|
||||
return;
|
||||
|
||||
__fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);
|
||||
task->thread.uw.fpsimd_state.fpsr = 0x0800009f;
|
||||
if (system_supports_fpmr())
|
||||
task->thread.uw.fpmr = 0;
|
||||
|
||||
task->thread.svcr &= ~SVCR_SM_MASK;
|
||||
task->thread.fp_type = FP_STATE_FPSIMD;
|
||||
}
|
||||
|
||||
void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
|
||||
{
|
||||
write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
|
||||
@@ -701,39 +724,12 @@ void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_SVE
|
||||
/*
|
||||
* Call __sve_free() directly only if you know task can't be scheduled
|
||||
* or preempted.
|
||||
*/
|
||||
static void __sve_free(struct task_struct *task)
|
||||
static void sve_free(struct task_struct *task)
|
||||
{
|
||||
kfree(task->thread.sve_state);
|
||||
task->thread.sve_state = NULL;
|
||||
}
|
||||
|
||||
static void sve_free(struct task_struct *task)
|
||||
{
|
||||
WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
|
||||
|
||||
__sve_free(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return how many bytes of memory are required to store the full SVE
|
||||
* state for task, given task's currently configured vector length.
|
||||
*/
|
||||
size_t sve_state_size(struct task_struct const *task)
|
||||
{
|
||||
unsigned int vl = 0;
|
||||
|
||||
if (system_supports_sve())
|
||||
vl = task_get_sve_vl(task);
|
||||
if (system_supports_sme())
|
||||
vl = max(vl, task_get_sme_vl(task));
|
||||
|
||||
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that task->thread.sve_state is allocated and sufficiently large.
|
||||
*
|
||||
@@ -758,69 +754,34 @@ void sve_alloc(struct task_struct *task, bool flush)
|
||||
kzalloc(sve_state_size(task), GFP_KERNEL);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Force the FPSIMD state shared with SVE to be updated in the SVE state
|
||||
* even if the SVE state is the current active state.
|
||||
* Ensure that task->thread.uw.fpsimd_state is up to date with respect to the
|
||||
* task's currently effective FPSIMD/SVE state.
|
||||
*
|
||||
* This should only be called by ptrace. task must be non-runnable.
|
||||
* task->thread.sve_state must point to at least sve_state_size(task)
|
||||
* bytes of allocated kernel memory.
|
||||
* The task's FPSIMD/SVE/SME state must not be subject to concurrent
|
||||
* manipulation.
|
||||
*/
|
||||
void fpsimd_force_sync_to_sve(struct task_struct *task)
|
||||
{
|
||||
fpsimd_to_sve(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that task->thread.sve_state is up to date with respect to
|
||||
* the user task, irrespective of when SVE is in use or not.
|
||||
*
|
||||
* This should only be called by ptrace. task must be non-runnable.
|
||||
* task->thread.sve_state must point to at least sve_state_size(task)
|
||||
* bytes of allocated kernel memory.
|
||||
*/
|
||||
void fpsimd_sync_to_sve(struct task_struct *task)
|
||||
{
|
||||
if (!test_tsk_thread_flag(task, TIF_SVE) &&
|
||||
!thread_sm_enabled(&task->thread))
|
||||
fpsimd_to_sve(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that task->thread.uw.fpsimd_state is up to date with respect to
|
||||
* the user task, irrespective of whether SVE is in use or not.
|
||||
*
|
||||
* This should only be called by ptrace. task must be non-runnable.
|
||||
* task->thread.sve_state must point to at least sve_state_size(task)
|
||||
* bytes of allocated kernel memory.
|
||||
*/
|
||||
void sve_sync_to_fpsimd(struct task_struct *task)
|
||||
void fpsimd_sync_from_effective_state(struct task_struct *task)
|
||||
{
|
||||
if (task->thread.fp_type == FP_STATE_SVE)
|
||||
sve_to_fpsimd(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that task->thread.sve_state is up to date with respect to
|
||||
* the task->thread.uw.fpsimd_state.
|
||||
* Ensure that the task's currently effective FPSIMD/SVE state is up to date
|
||||
* with respect to task->thread.uw.fpsimd_state, zeroing any effective
|
||||
* non-FPSIMD (S)SVE state.
|
||||
*
|
||||
* This should only be called by ptrace to merge new FPSIMD register
|
||||
* values into a task for which SVE is currently active.
|
||||
* task must be non-runnable.
|
||||
* task->thread.sve_state must point to at least sve_state_size(task)
|
||||
* bytes of allocated kernel memory.
|
||||
* task->thread.uw.fpsimd_state must already have been initialised with
|
||||
* the new FPSIMD register values to be merged in.
|
||||
* The task's FPSIMD/SVE/SME state must not be subject to concurrent
|
||||
* manipulation.
|
||||
*/
|
||||
void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
|
||||
void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)
|
||||
{
|
||||
unsigned int vq;
|
||||
void *sst = task->thread.sve_state;
|
||||
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
|
||||
|
||||
if (!test_tsk_thread_flag(task, TIF_SVE) &&
|
||||
!thread_sm_enabled(&task->thread))
|
||||
if (task->thread.fp_type != FP_STATE_SVE)
|
||||
return;
|
||||
|
||||
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
|
||||
@@ -829,10 +790,73 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
|
||||
__fpsimd_to_sve(sst, fst, vq);
|
||||
}
|
||||
|
||||
static int change_live_vector_length(struct task_struct *task,
|
||||
enum vec_type type,
|
||||
unsigned long vl)
|
||||
{
|
||||
unsigned int sve_vl = task_get_sve_vl(task);
|
||||
unsigned int sme_vl = task_get_sme_vl(task);
|
||||
void *sve_state = NULL, *sme_state = NULL;
|
||||
|
||||
if (type == ARM64_VEC_SME)
|
||||
sme_vl = vl;
|
||||
else
|
||||
sve_vl = vl;
|
||||
|
||||
/*
|
||||
* Allocate the new sve_state and sme_state before freeing the old
|
||||
* copies so that allocation failure can be handled without needing to
|
||||
* mutate the task's state in any way.
|
||||
*
|
||||
* Changes to the SVE vector length must not discard live ZA state or
|
||||
* clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64
|
||||
* ZA lazy saving scheme may attempt to change the SVE vector length
|
||||
* while unsaved/dormant ZA state exists.
|
||||
*/
|
||||
sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);
|
||||
if (!sve_state)
|
||||
goto out_mem;
|
||||
|
||||
if (type == ARM64_VEC_SME) {
|
||||
sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);
|
||||
if (!sme_state)
|
||||
goto out_mem;
|
||||
}
|
||||
|
||||
if (task == current)
|
||||
fpsimd_save_and_flush_current_state();
|
||||
else
|
||||
fpsimd_flush_task_state(task);
|
||||
|
||||
/*
|
||||
* Always preserve PSTATE.SM and the effective FPSIMD state, zeroing
|
||||
* other SVE state.
|
||||
*/
|
||||
fpsimd_sync_from_effective_state(task);
|
||||
task_set_vl(task, type, vl);
|
||||
kfree(task->thread.sve_state);
|
||||
task->thread.sve_state = sve_state;
|
||||
fpsimd_sync_to_effective_state_zeropad(task);
|
||||
|
||||
if (type == ARM64_VEC_SME) {
|
||||
task->thread.svcr &= ~SVCR_ZA_MASK;
|
||||
kfree(task->thread.sme_state);
|
||||
task->thread.sme_state = sme_state;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_mem:
|
||||
kfree(sve_state);
|
||||
kfree(sme_state);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
|
||||
unsigned long vl, unsigned long flags)
|
||||
{
|
||||
bool free_sme = false;
|
||||
bool onexec = flags & PR_SVE_SET_VL_ONEXEC;
|
||||
bool inherit = flags & PR_SVE_VL_INHERIT;
|
||||
|
||||
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
|
||||
PR_SVE_SET_VL_ONEXEC))
|
||||
@@ -852,71 +876,17 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
|
||||
|
||||
vl = find_supported_vector_length(type, vl);
|
||||
|
||||
if (flags & (PR_SVE_VL_INHERIT |
|
||||
PR_SVE_SET_VL_ONEXEC))
|
||||
if (!onexec && vl != task_get_vl(task, type)) {
|
||||
if (change_live_vector_length(task, type, vl))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (onexec || inherit)
|
||||
task_set_vl_onexec(task, type, vl);
|
||||
else
|
||||
/* Reset VL to system default on next exec: */
|
||||
task_set_vl_onexec(task, type, 0);
|
||||
|
||||
/* Only actually set the VL if not deferred: */
|
||||
if (flags & PR_SVE_SET_VL_ONEXEC)
|
||||
goto out;
|
||||
|
||||
if (vl == task_get_vl(task, type))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* To ensure the FPSIMD bits of the SVE vector registers are preserved,
|
||||
* write any live register state back to task_struct, and convert to a
|
||||
* regular FPSIMD thread.
|
||||
*/
|
||||
if (task == current) {
|
||||
get_cpu_fpsimd_context();
|
||||
|
||||
fpsimd_save_user_state();
|
||||
}
|
||||
|
||||
fpsimd_flush_task_state(task);
|
||||
if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
|
||||
thread_sm_enabled(&task->thread)) {
|
||||
sve_to_fpsimd(task);
|
||||
task->thread.fp_type = FP_STATE_FPSIMD;
|
||||
}
|
||||
|
||||
if (system_supports_sme()) {
|
||||
if (type == ARM64_VEC_SME ||
|
||||
!(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
|
||||
/*
|
||||
* We are changing the SME VL or weren't using
|
||||
* SME anyway, discard the state and force a
|
||||
* reallocation.
|
||||
*/
|
||||
task->thread.svcr &= ~(SVCR_SM_MASK |
|
||||
SVCR_ZA_MASK);
|
||||
clear_tsk_thread_flag(task, TIF_SME);
|
||||
free_sme = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (task == current)
|
||||
put_cpu_fpsimd_context();
|
||||
|
||||
task_set_vl(task, type, vl);
|
||||
|
||||
/*
|
||||
* Free the changed states if they are not in use, SME will be
|
||||
* reallocated to the correct size on next use and we just
|
||||
* allocate SVE now in case it is needed for use in streaming
|
||||
* mode.
|
||||
*/
|
||||
sve_free(task);
|
||||
sve_alloc(task, true);
|
||||
|
||||
if (free_sme)
|
||||
sme_free(task);
|
||||
|
||||
out:
|
||||
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
|
||||
flags & PR_SVE_VL_INHERIT);
|
||||
|
||||
@@ -1212,7 +1182,7 @@ void __init sve_setup(void)
|
||||
*/
|
||||
void fpsimd_release_task(struct task_struct *dead_task)
|
||||
{
|
||||
__sve_free(dead_task);
|
||||
sve_free(dead_task);
|
||||
sme_free(dead_task);
|
||||
}
|
||||
|
||||
@@ -1436,7 +1406,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
|
||||
* If this not a trap due to SME being disabled then something
|
||||
* is being used in the wrong mode, report as SIGILL.
|
||||
*/
|
||||
if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
|
||||
if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {
|
||||
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
|
||||
return;
|
||||
}
|
||||
@@ -1460,6 +1430,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
|
||||
sme_set_vq(vq_minus_one);
|
||||
|
||||
fpsimd_bind_task_to_cpu();
|
||||
} else {
|
||||
fpsimd_flush_task_state(current);
|
||||
}
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
@@ -1573,8 +1545,8 @@ void fpsimd_thread_switch(struct task_struct *next)
|
||||
fpsimd_save_user_state();
|
||||
|
||||
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
|
||||
fpsimd_load_kernel_state(next);
|
||||
fpsimd_flush_cpu_state();
|
||||
fpsimd_load_kernel_state(next);
|
||||
} else {
|
||||
/*
|
||||
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
|
||||
@@ -1661,6 +1633,9 @@ void fpsimd_flush_thread(void)
|
||||
current->thread.svcr = 0;
|
||||
}
|
||||
|
||||
if (system_supports_fpmr())
|
||||
current->thread.uw.fpmr = 0;
|
||||
|
||||
current->thread.fp_type = FP_STATE_FPSIMD;
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
@@ -1682,43 +1657,6 @@ void fpsimd_preserve_current_state(void)
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
/*
|
||||
* Like fpsimd_preserve_current_state(), but ensure that
|
||||
* current->thread.uw.fpsimd_state is updated so that it can be copied to
|
||||
* the signal frame.
|
||||
*/
|
||||
void fpsimd_signal_preserve_current_state(void)
|
||||
{
|
||||
fpsimd_preserve_current_state();
|
||||
if (current->thread.fp_type == FP_STATE_SVE)
|
||||
sve_to_fpsimd(current);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by KVM when entering the guest.
|
||||
*/
|
||||
void fpsimd_kvm_prepare(void)
|
||||
{
|
||||
if (!system_supports_sve())
|
||||
return;
|
||||
|
||||
/*
|
||||
* KVM does not save host SVE state since we can only enter
|
||||
* the guest from a syscall so the ABI means that only the
|
||||
* non-saved SVE state needs to be saved. If we have left
|
||||
* SVE enabled for performance reasons then update the task
|
||||
* state to be FPSIMD only.
|
||||
*/
|
||||
get_cpu_fpsimd_context();
|
||||
|
||||
if (test_and_clear_thread_flag(TIF_SVE)) {
|
||||
sve_to_fpsimd(current);
|
||||
current->thread.fp_type = FP_STATE_FPSIMD;
|
||||
}
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
/*
|
||||
* Associate current's FPSIMD context with this cpu
|
||||
* The caller must have ownership of the cpu FPSIMD context before calling
|
||||
@@ -1811,30 +1749,14 @@ void fpsimd_restore_current_state(void)
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
/*
|
||||
* Load an updated userland FPSIMD state for 'current' from memory and set the
|
||||
* flag that indicates that the FPSIMD register contents are the most recent
|
||||
* FPSIMD state of 'current'. This is used by the signal code to restore the
|
||||
* register state when returning from a signal handler in FPSIMD only cases,
|
||||
* any SVE context will be discarded.
|
||||
*/
|
||||
void fpsimd_update_current_state(struct user_fpsimd_state const *state)
|
||||
{
|
||||
if (WARN_ON(!system_supports_fpsimd()))
|
||||
return;
|
||||
|
||||
get_cpu_fpsimd_context();
|
||||
|
||||
current->thread.uw.fpsimd_state = *state;
|
||||
if (test_thread_flag(TIF_SVE))
|
||||
if (current->thread.fp_type == FP_STATE_SVE)
|
||||
fpsimd_to_sve(current);
|
||||
|
||||
task_fpsimd_load();
|
||||
fpsimd_bind_task_to_cpu();
|
||||
|
||||
clear_thread_flag(TIF_FOREIGN_FPSTATE);
|
||||
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1864,6 +1786,17 @@ void fpsimd_flush_task_state(struct task_struct *t)
|
||||
barrier();
|
||||
}
|
||||
|
||||
void fpsimd_save_and_flush_current_state(void)
|
||||
{
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
||||
get_cpu_fpsimd_context();
|
||||
fpsimd_save_user_state();
|
||||
fpsimd_flush_task_state(current);
|
||||
put_cpu_fpsimd_context();
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the FPSIMD state to memory and invalidate cpu view.
|
||||
* This function must be called with preemption disabled.
|
||||
|
||||
@@ -399,17 +399,6 @@ void __init init_feature_override(u64 boot_status, const void *fdt,
|
||||
|
||||
parse_cmdline(fdt, chosen);
|
||||
|
||||
/*
|
||||
* ANDROID: Forcefully disable SME at runtime until it is fixed
|
||||
* upstream (b/393087661). We prefer this to disabling
|
||||
* CONFIG_ARM64_SME so that the impact of the fixes on KMI is
|
||||
* minimised.
|
||||
*/
|
||||
id_aa64pfr1_override.mask |= ID_AA64PFR1_EL1_SME;
|
||||
id_aa64pfr1_override.val &= ~ID_AA64PFR1_EL1_SME;
|
||||
id_aa64smfr0_override.mask = GENMASK(63, 0);
|
||||
id_aa64smfr0_override.val = 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
reg = prel64_pointer(regs[i].reg);
|
||||
override = prel64_pointer(reg->override);
|
||||
|
||||
@@ -299,50 +299,34 @@ void arch_release_task_struct(struct task_struct *tsk)
|
||||
|
||||
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
{
|
||||
if (current->mm)
|
||||
fpsimd_preserve_current_state();
|
||||
/*
|
||||
* The current/src task's FPSIMD state may or may not be live, and may
|
||||
* have been altered by ptrace after entry to the kernel. Save the
|
||||
* effective FPSIMD state so that this will be copied into dst.
|
||||
*/
|
||||
fpsimd_save_and_flush_current_state();
|
||||
fpsimd_sync_from_effective_state(src);
|
||||
|
||||
*dst = *src;
|
||||
|
||||
/*
|
||||
* Detach src's sve_state (if any) from dst so that it does not
|
||||
* get erroneously used or freed prematurely. dst's copies
|
||||
* will be allocated on demand later on if dst uses SVE.
|
||||
* For consistency, also clear TIF_SVE here: this could be done
|
||||
* later in copy_process(), but to avoid tripping up future
|
||||
* maintainers it is best not to leave TIF flags and buffers in
|
||||
* an inconsistent state, even temporarily.
|
||||
* Drop stale reference to src's sve_state and convert dst to
|
||||
* non-streaming FPSIMD mode.
|
||||
*/
|
||||
dst->thread.fp_type = FP_STATE_FPSIMD;
|
||||
dst->thread.sve_state = NULL;
|
||||
clear_tsk_thread_flag(dst, TIF_SVE);
|
||||
task_smstop_sm(dst);
|
||||
|
||||
/*
|
||||
* In the unlikely event that we create a new thread with ZA
|
||||
* enabled we should retain the ZA and ZT state so duplicate
|
||||
* it here. This may be shortly freed if we exec() or if
|
||||
* CLONE_SETTLS but it's simpler to do it here. To avoid
|
||||
* confusing the rest of the code ensure that we have a
|
||||
* sve_state allocated whenever sme_state is allocated.
|
||||
* Drop stale reference to src's sme_state and ensure dst has ZA
|
||||
* disabled.
|
||||
*
|
||||
* When necessary, ZA will be inherited later in copy_thread_za().
|
||||
*/
|
||||
if (thread_za_enabled(&src->thread)) {
|
||||
dst->thread.sve_state = kzalloc(sve_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.sve_state)
|
||||
return -ENOMEM;
|
||||
|
||||
dst->thread.sme_state = kmemdup(src->thread.sme_state,
|
||||
sme_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.sme_state) {
|
||||
kfree(dst->thread.sve_state);
|
||||
dst->thread.sve_state = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else {
|
||||
dst->thread.sme_state = NULL;
|
||||
clear_tsk_thread_flag(dst, TIF_SME);
|
||||
}
|
||||
|
||||
dst->thread.fp_type = FP_STATE_FPSIMD;
|
||||
dst->thread.sme_state = NULL;
|
||||
clear_tsk_thread_flag(dst, TIF_SME);
|
||||
dst->thread.svcr &= ~SVCR_ZA_MASK;
|
||||
|
||||
/* clear any pending asynchronous tag fault raised by the parent */
|
||||
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
|
||||
@@ -350,6 +334,31 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_thread_za(struct task_struct *dst, struct task_struct *src)
|
||||
{
|
||||
if (!thread_za_enabled(&src->thread))
|
||||
return 0;
|
||||
|
||||
dst->thread.sve_state = kzalloc(sve_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.sve_state)
|
||||
return -ENOMEM;
|
||||
|
||||
dst->thread.sme_state = kmemdup(src->thread.sme_state,
|
||||
sme_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.sme_state) {
|
||||
kfree(dst->thread.sve_state);
|
||||
dst->thread.sve_state = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
set_tsk_thread_flag(dst, TIF_SME);
|
||||
dst->thread.svcr |= SVCR_ZA_MASK;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
|
||||
|
||||
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
@@ -358,6 +367,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
unsigned long stack_start = args->stack;
|
||||
unsigned long tls = args->tls;
|
||||
struct pt_regs *childregs = task_pt_regs(p);
|
||||
int ret;
|
||||
|
||||
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
|
||||
|
||||
@@ -381,8 +391,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
* out-of-sync with the saved value.
|
||||
*/
|
||||
*task_user_tls(p) = read_sysreg(tpidr_el0);
|
||||
if (system_supports_tpidr2())
|
||||
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
|
||||
|
||||
if (system_supports_poe())
|
||||
p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
|
||||
@@ -395,13 +403,39 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
}
|
||||
|
||||
/*
|
||||
* If a TLS pointer was passed to clone, use it for the new
|
||||
* thread. We also reset TPIDR2 if it's in use.
|
||||
* Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and
|
||||
* TPIDR2 need to be manipulated as a pair, and either both
|
||||
* need to be inherited or both need to be reset.
|
||||
*
|
||||
* Within a process, child threads must not inherit their
|
||||
* parent's TPIDR2 value or they may clobber their parent's
|
||||
* stack at some later point.
|
||||
*
|
||||
* When a process is fork()'d, the child must inherit ZA and
|
||||
* TPIDR2 from its parent in case there was dormant ZA state.
|
||||
*
|
||||
* Use CLONE_VM to determine when the child will share the
|
||||
* address space with the parent, and cannot safely inherit the
|
||||
* state.
|
||||
*/
|
||||
if (clone_flags & CLONE_SETTLS) {
|
||||
p->thread.uw.tp_value = tls;
|
||||
p->thread.tpidr2_el0 = 0;
|
||||
if (system_supports_sme()) {
|
||||
if (!(clone_flags & CLONE_VM)) {
|
||||
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
|
||||
ret = copy_thread_za(p, current);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
p->thread.tpidr2_el0 = 0;
|
||||
WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If a TLS pointer was passed to clone, use it for the new
|
||||
* thread.
|
||||
*/
|
||||
if (clone_flags & CLONE_SETTLS)
|
||||
p->thread.uw.tp_value = tls;
|
||||
} else {
|
||||
/*
|
||||
* A kthread has no context to ERET to, so ensure any buggy
|
||||
|
||||
@@ -595,7 +595,7 @@ static int __fpr_get(struct task_struct *target,
|
||||
{
|
||||
struct user_fpsimd_state *uregs;
|
||||
|
||||
sve_sync_to_fpsimd(target);
|
||||
fpsimd_sync_from_effective_state(target);
|
||||
|
||||
uregs = &target->thread.uw.fpsimd_state;
|
||||
|
||||
@@ -627,7 +627,7 @@ static int __fpr_set(struct task_struct *target,
|
||||
* Ensure target->thread.uw.fpsimd_state is up to date, so that a
|
||||
* short copyin can't resurrect stale data.
|
||||
*/
|
||||
sve_sync_to_fpsimd(target);
|
||||
fpsimd_sync_from_effective_state(target);
|
||||
|
||||
newstate = target->thread.uw.fpsimd_state;
|
||||
|
||||
@@ -654,7 +654,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
sve_sync_from_fpsimd_zeropad(target);
|
||||
fpsimd_sync_to_effective_state_zeropad(target);
|
||||
fpsimd_flush_task_state(target);
|
||||
|
||||
return ret;
|
||||
@@ -776,6 +776,11 @@ static void sve_init_header_from_task(struct user_sve_header *header,
|
||||
task_type = ARM64_VEC_SVE;
|
||||
active = (task_type == type);
|
||||
|
||||
if (active && target->thread.fp_type == FP_STATE_SVE)
|
||||
header->flags = SVE_PT_REGS_SVE;
|
||||
else
|
||||
header->flags = SVE_PT_REGS_FPSIMD;
|
||||
|
||||
switch (type) {
|
||||
case ARM64_VEC_SVE:
|
||||
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
|
||||
@@ -790,19 +795,14 @@ static void sve_init_header_from_task(struct user_sve_header *header,
|
||||
return;
|
||||
}
|
||||
|
||||
if (active) {
|
||||
if (target->thread.fp_type == FP_STATE_FPSIMD) {
|
||||
header->flags |= SVE_PT_REGS_FPSIMD;
|
||||
} else {
|
||||
header->flags |= SVE_PT_REGS_SVE;
|
||||
}
|
||||
}
|
||||
|
||||
header->vl = task_get_vl(target, type);
|
||||
vq = sve_vq_from_vl(header->vl);
|
||||
|
||||
header->max_vl = vec_max_vl(type);
|
||||
header->size = SVE_PT_SIZE(vq, header->flags);
|
||||
if (active)
|
||||
header->size = SVE_PT_SIZE(vq, header->flags);
|
||||
else
|
||||
header->size = sizeof(header);
|
||||
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
|
||||
SVE_PT_REGS_SVE);
|
||||
}
|
||||
@@ -821,18 +821,25 @@ static int sve_get_common(struct task_struct *target,
|
||||
unsigned int vq;
|
||||
unsigned long start, end;
|
||||
|
||||
if (target == current)
|
||||
fpsimd_preserve_current_state();
|
||||
|
||||
/* Header */
|
||||
sve_init_header_from_task(&header, target, type);
|
||||
vq = sve_vq_from_vl(header.vl);
|
||||
|
||||
membuf_write(&to, &header, sizeof(header));
|
||||
|
||||
if (target == current)
|
||||
fpsimd_preserve_current_state();
|
||||
|
||||
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
|
||||
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
|
||||
|
||||
/*
|
||||
* When the requested vector type is not active, do not present data
|
||||
* from the other mode to userspace.
|
||||
*/
|
||||
if (header.size == sizeof(header))
|
||||
return 0;
|
||||
|
||||
switch ((header.flags & SVE_PT_REGS_MASK)) {
|
||||
case SVE_PT_REGS_FPSIMD:
|
||||
return __fpr_get(target, regset, to);
|
||||
@@ -860,7 +867,7 @@ static int sve_get_common(struct task_struct *target,
|
||||
return membuf_zero(&to, end - start);
|
||||
|
||||
default:
|
||||
return 0;
|
||||
BUILD_BUG();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -884,6 +891,9 @@ static int sve_set_common(struct task_struct *target,
|
||||
struct user_sve_header header;
|
||||
unsigned int vq;
|
||||
unsigned long start, end;
|
||||
bool fpsimd;
|
||||
|
||||
fpsimd_flush_task_state(target);
|
||||
|
||||
/* Header */
|
||||
if (count < sizeof(header))
|
||||
@@ -891,7 +901,16 @@ static int sve_set_common(struct task_struct *target,
|
||||
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
|
||||
0, sizeof(header));
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Streaming SVE data is always stored and presented in SVE format.
|
||||
* Require the user to provide SVE formatted data for consistency, and
|
||||
* to avoid the risk that we configure the task into an invalid state.
|
||||
*/
|
||||
fpsimd = (header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD;
|
||||
if (fpsimd && type == ARM64_VEC_SME)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
|
||||
@@ -900,88 +919,68 @@ static int sve_set_common(struct task_struct *target,
|
||||
ret = vec_set_vector_length(target, type, header.vl,
|
||||
((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
/* Allocate SME storage if necessary, preserving any existing ZA/ZT state */
|
||||
if (type == ARM64_VEC_SME) {
|
||||
sme_alloc(target, false);
|
||||
if (!target->thread.sme_state)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Allocate SVE storage if necessary, zeroing any existing SVE state */
|
||||
if (!fpsimd) {
|
||||
sve_alloc(target, true);
|
||||
if (!target->thread.sve_state)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Actual VL set may be less than the user asked for: */
|
||||
vq = sve_vq_from_vl(task_get_vl(target, type));
|
||||
|
||||
/* Enter/exit streaming mode */
|
||||
if (system_supports_sme()) {
|
||||
u64 old_svcr = target->thread.svcr;
|
||||
|
||||
switch (type) {
|
||||
case ARM64_VEC_SVE:
|
||||
target->thread.svcr &= ~SVCR_SM_MASK;
|
||||
set_tsk_thread_flag(target, TIF_SVE);
|
||||
break;
|
||||
case ARM64_VEC_SME:
|
||||
target->thread.svcr |= SVCR_SM_MASK;
|
||||
|
||||
/*
|
||||
* Disable traps and ensure there is SME storage but
|
||||
* preserve any currently set values in ZA/ZT.
|
||||
*/
|
||||
sme_alloc(target, false);
|
||||
set_tsk_thread_flag(target, TIF_SME);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we switched then invalidate any existing SVE
|
||||
* state and ensure there's storage.
|
||||
*/
|
||||
if (target->thread.svcr != old_svcr)
|
||||
sve_alloc(target, true);
|
||||
}
|
||||
|
||||
/* Always zero V regs, FPSR, and FPCR */
|
||||
memset(¤t->thread.uw.fpsimd_state, 0,
|
||||
sizeof(current->thread.uw.fpsimd_state));
|
||||
|
||||
/* Registers: FPSIMD-only case */
|
||||
|
||||
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
|
||||
if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
|
||||
ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
|
||||
SVE_PT_FPSIMD_OFFSET);
|
||||
if (fpsimd) {
|
||||
clear_tsk_thread_flag(target, TIF_SVE);
|
||||
target->thread.fp_type = FP_STATE_FPSIMD;
|
||||
goto out;
|
||||
ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
|
||||
SVE_PT_FPSIMD_OFFSET);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise: no registers or full SVE case. For backwards
|
||||
* compatibility reasons we treat empty flags as SVE registers.
|
||||
*/
|
||||
/* Otherwise: no registers or full SVE case. */
|
||||
|
||||
target->thread.fp_type = FP_STATE_SVE;
|
||||
|
||||
/*
|
||||
* If setting a different VL from the requested VL and there is
|
||||
* register data, the data layout will be wrong: don't even
|
||||
* try to set the registers in this case.
|
||||
*/
|
||||
if (count && vq != sve_vq_from_vl(header.vl)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sve_alloc(target, true);
|
||||
if (!target->thread.sve_state) {
|
||||
ret = -ENOMEM;
|
||||
clear_tsk_thread_flag(target, TIF_SVE);
|
||||
target->thread.fp_type = FP_STATE_FPSIMD;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure target->thread.sve_state is up to date with target's
|
||||
* FPSIMD regs, so that a short copyin leaves trailing
|
||||
* registers unmodified. Only enable SVE if we are
|
||||
* configuring normal SVE, a system with streaming SVE may not
|
||||
* have normal SVE.
|
||||
*/
|
||||
fpsimd_sync_to_sve(target);
|
||||
if (type == ARM64_VEC_SVE)
|
||||
set_tsk_thread_flag(target, TIF_SVE);
|
||||
target->thread.fp_type = FP_STATE_SVE;
|
||||
if (count && vq != sve_vq_from_vl(header.vl))
|
||||
return -EIO;
|
||||
|
||||
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
|
||||
start = SVE_PT_SVE_OFFSET;
|
||||
@@ -990,7 +989,7 @@ static int sve_set_common(struct task_struct *target,
|
||||
target->thread.sve_state,
|
||||
start, end);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
start = end;
|
||||
end = SVE_PT_SVE_FPSR_OFFSET(vq);
|
||||
@@ -1006,8 +1005,6 @@ static int sve_set_common(struct task_struct *target,
|
||||
&target->thread.uw.fpsimd_state.fpsr,
|
||||
start, end);
|
||||
|
||||
out:
|
||||
fpsimd_flush_task_state(target);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -250,6 +250,8 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
|
||||
¤t->thread.uw.fpsimd_state;
|
||||
int err;
|
||||
|
||||
fpsimd_sync_from_effective_state(current);
|
||||
|
||||
/* copy the FP and status/control registers */
|
||||
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
|
||||
__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
|
||||
@@ -262,37 +264,46 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int restore_fpsimd_context(struct user_ctxs *user)
|
||||
static int read_fpsimd_context(struct user_fpsimd_state *fpsimd,
|
||||
struct user_ctxs *user)
|
||||
{
|
||||
struct user_fpsimd_state fpsimd;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
/* check the size information */
|
||||
if (user->fpsimd_size != sizeof(struct fpsimd_context))
|
||||
return -EINVAL;
|
||||
|
||||
/* copy the FP and status/control registers */
|
||||
err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
|
||||
sizeof(fpsimd.vregs));
|
||||
__get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
|
||||
__get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
|
||||
err = __copy_from_user(fpsimd->vregs, &(user->fpsimd->vregs),
|
||||
sizeof(fpsimd->vregs));
|
||||
__get_user_error(fpsimd->fpsr, &(user->fpsimd->fpsr), err);
|
||||
__get_user_error(fpsimd->fpcr, &(user->fpsimd->fpcr), err);
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int restore_fpsimd_context(struct user_ctxs *user)
|
||||
{
|
||||
struct user_fpsimd_state fpsimd;
|
||||
int err;
|
||||
|
||||
err = read_fpsimd_context(&fpsimd, user);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
clear_thread_flag(TIF_SVE);
|
||||
current->thread.svcr &= ~SVCR_SM_MASK;
|
||||
current->thread.fp_type = FP_STATE_FPSIMD;
|
||||
|
||||
/* load the hardware registers from the fpsimd_state structure */
|
||||
if (!err)
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int preserve_fpmr_context(struct fpmr_context __user *ctx)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR);
|
||||
|
||||
__put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
|
||||
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
|
||||
__put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
|
||||
@@ -310,7 +321,7 @@ static int restore_fpmr_context(struct user_ctxs *user)
|
||||
|
||||
__get_user_error(fpmr, &user->fpmr->fpmr, err);
|
||||
if (!err)
|
||||
write_sysreg_s(fpmr, SYS_FPMR);
|
||||
current->thread.uw.fpmr = fpmr;
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -372,11 +383,6 @@ static int preserve_sve_context(struct sve_context __user *ctx)
|
||||
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
|
||||
|
||||
if (vq) {
|
||||
/*
|
||||
* This assumes that the SVE state has already been saved to
|
||||
* the task struct by calling the function
|
||||
* fpsimd_signal_preserve_current_state().
|
||||
*/
|
||||
err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
|
||||
current->thread.sve_state,
|
||||
SVE_SIG_REGS_SIZE(vq));
|
||||
@@ -391,6 +397,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
|
||||
unsigned int vl, vq;
|
||||
struct user_fpsimd_state fpsimd;
|
||||
u16 user_vl, flags;
|
||||
bool sm;
|
||||
|
||||
if (user->sve_size < sizeof(*user->sve))
|
||||
return -EINVAL;
|
||||
@@ -400,7 +407,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (flags & SVE_SIG_FLAG_SM) {
|
||||
sm = flags & SVE_SIG_FLAG_SM;
|
||||
if (sm) {
|
||||
if (!system_supports_sme())
|
||||
return -EINVAL;
|
||||
|
||||
@@ -420,28 +428,23 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
|
||||
if (user_vl != vl)
|
||||
return -EINVAL;
|
||||
|
||||
if (user->sve_size == sizeof(*user->sve)) {
|
||||
clear_thread_flag(TIF_SVE);
|
||||
current->thread.svcr &= ~SVCR_SM_MASK;
|
||||
current->thread.fp_type = FP_STATE_FPSIMD;
|
||||
goto fpsimd_only;
|
||||
}
|
||||
/*
|
||||
* Non-streaming SVE state may be preserved without an SVE payload, in
|
||||
* which case the SVE context only has a header with VL==0, and all
|
||||
* state can be restored from the FPSIMD context.
|
||||
*
|
||||
* Streaming SVE state is always preserved with an SVE payload. For
|
||||
* consistency and robustness, reject restoring streaming SVE state
|
||||
* without an SVE payload.
|
||||
*/
|
||||
if (!sm && user->sve_size == sizeof(*user->sve))
|
||||
return restore_fpsimd_context(user);
|
||||
|
||||
vq = sve_vq_from_vl(vl);
|
||||
|
||||
if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Careful: we are about __copy_from_user() directly into
|
||||
* thread.sve_state with preemption enabled, so protection is
|
||||
* needed to prevent a racing context switch from writing stale
|
||||
* registers back over the new data.
|
||||
*/
|
||||
|
||||
fpsimd_flush_task_state(current);
|
||||
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
|
||||
|
||||
sve_alloc(current, true);
|
||||
if (!current->thread.sve_state) {
|
||||
clear_thread_flag(TIF_SVE);
|
||||
@@ -461,19 +464,14 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
|
||||
set_thread_flag(TIF_SVE);
|
||||
current->thread.fp_type = FP_STATE_SVE;
|
||||
|
||||
fpsimd_only:
|
||||
/* copy the FP and status/control registers */
|
||||
/* restore_sigframe() already checked that user->fpsimd != NULL. */
|
||||
err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
|
||||
sizeof(fpsimd.vregs));
|
||||
__get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
|
||||
__get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
|
||||
err = read_fpsimd_context(&fpsimd, user);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* load the hardware registers from the fpsimd_state structure */
|
||||
if (!err)
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
/* Merge the FPSIMD registers into the SVE state */
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* ! CONFIG_ARM64_SVE */
|
||||
@@ -493,13 +491,12 @@ extern int preserve_sve_context(void __user *ctx);
|
||||
|
||||
static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
|
||||
{
|
||||
u64 tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
|
||||
int err = 0;
|
||||
|
||||
current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
|
||||
|
||||
__put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
|
||||
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
|
||||
__put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
|
||||
__put_user_error(tpidr2_el0, &ctx->tpidr2, err);
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -541,11 +538,6 @@ static int preserve_za_context(struct za_context __user *ctx)
|
||||
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
|
||||
|
||||
if (vq) {
|
||||
/*
|
||||
* This assumes that the ZA state has already been saved to
|
||||
* the task struct by calling the function
|
||||
* fpsimd_signal_preserve_current_state().
|
||||
*/
|
||||
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
|
||||
current->thread.sme_state,
|
||||
ZA_SIG_REGS_SIZE(vq));
|
||||
@@ -580,16 +572,6 @@ static int restore_za_context(struct user_ctxs *user)
|
||||
if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Careful: we are about __copy_from_user() directly into
|
||||
* thread.sme_state with preemption enabled, so protection is
|
||||
* needed to prevent a racing context switch from writing stale
|
||||
* registers back over the new data.
|
||||
*/
|
||||
|
||||
fpsimd_flush_task_state(current);
|
||||
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
|
||||
|
||||
sme_alloc(current, true);
|
||||
if (!current->thread.sme_state) {
|
||||
current->thread.svcr &= ~SVCR_ZA_MASK;
|
||||
@@ -627,11 +609,6 @@ static int preserve_zt_context(struct zt_context __user *ctx)
|
||||
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
|
||||
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
|
||||
|
||||
/*
|
||||
* This assumes that the ZT state has already been saved to
|
||||
* the task struct by calling the function
|
||||
* fpsimd_signal_preserve_current_state().
|
||||
*/
|
||||
err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
|
||||
thread_zt_state(¤t->thread),
|
||||
ZT_SIG_REGS_SIZE(1));
|
||||
@@ -657,16 +634,6 @@ static int restore_zt_context(struct user_ctxs *user)
|
||||
if (nregs != 1)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Careful: we are about __copy_from_user() directly into
|
||||
* thread.zt_state with preemption enabled, so protection is
|
||||
* needed to prevent a racing context switch from writing stale
|
||||
* registers back over the new data.
|
||||
*/
|
||||
|
||||
fpsimd_flush_task_state(current);
|
||||
/* From now, fpsimd_thread_switch() won't touch ZT in thread state */
|
||||
|
||||
err = __copy_from_user(thread_zt_state(¤t->thread),
|
||||
(char __user const *)user->zt +
|
||||
ZT_SIG_REGS_OFFSET,
|
||||
@@ -929,6 +896,8 @@ static int restore_sigframe(struct pt_regs *regs,
|
||||
*/
|
||||
forget_syscall(regs);
|
||||
|
||||
fpsimd_save_and_flush_current_state();
|
||||
|
||||
err |= !valid_user_regs(®s->user_regs, current);
|
||||
if (err == 0)
|
||||
err = parse_user_sigframe(&user, sf);
|
||||
@@ -1280,21 +1249,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
|
||||
|
||||
/* Signal handlers are invoked with ZA and streaming mode disabled */
|
||||
if (system_supports_sme()) {
|
||||
/*
|
||||
* If we were in streaming mode the saved register
|
||||
* state was SVE but we will exit SM and use the
|
||||
* FPSIMD register state - flush the saved FPSIMD
|
||||
* register state in case it gets loaded.
|
||||
*/
|
||||
if (current->thread.svcr & SVCR_SM_MASK) {
|
||||
memset(¤t->thread.uw.fpsimd_state, 0,
|
||||
sizeof(current->thread.uw.fpsimd_state));
|
||||
current->thread.fp_type = FP_STATE_FPSIMD;
|
||||
}
|
||||
|
||||
current->thread.svcr &= ~(SVCR_ZA_MASK |
|
||||
SVCR_SM_MASK);
|
||||
sme_smstop();
|
||||
task_smstop_sm(current);
|
||||
current->thread.svcr &= ~SVCR_ZA_MASK;
|
||||
write_sysreg_s(0, SYS_TPIDR2_EL0);
|
||||
}
|
||||
|
||||
if (ka->sa.sa_flags & SA_RESTORER)
|
||||
@@ -1313,7 +1270,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
|
||||
struct user_access_state ua_state;
|
||||
int err = 0;
|
||||
|
||||
fpsimd_signal_preserve_current_state();
|
||||
fpsimd_save_and_flush_current_state();
|
||||
|
||||
if (get_sigframe(&user, ksig, regs))
|
||||
return 1;
|
||||
|
||||
@@ -103,7 +103,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
|
||||
* Note that this also saves V16-31, which aren't visible
|
||||
* in AArch32.
|
||||
*/
|
||||
fpsimd_signal_preserve_current_state();
|
||||
fpsimd_save_and_flush_current_state();
|
||||
|
||||
/* Place structure header on the stack */
|
||||
__put_user_error(magic, &frame->magic, err);
|
||||
@@ -169,14 +169,17 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
|
||||
fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK;
|
||||
fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
|
||||
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* We don't need to touch the exception register, so
|
||||
* reload the hardware state.
|
||||
*/
|
||||
if (!err)
|
||||
fpsimd_update_current_state(&fpsimd);
|
||||
fpsimd_save_and_flush_current_state();
|
||||
current->thread.uw.fpsimd_state = fpsimd;
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compat_restore_sigframe(struct pt_regs *regs,
|
||||
|
||||
@@ -2556,14 +2556,6 @@ static void finalize_init_hyp_mode(void)
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
|
||||
kern_hyp_va(sve_state);
|
||||
}
|
||||
} else {
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct user_fpsimd_state *fpsimd_state;
|
||||
|
||||
fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
|
||||
per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
|
||||
kern_hyp_va(fpsimd_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,43 +54,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
||||
fpsimd_kvm_prepare();
|
||||
|
||||
/*
|
||||
* We will check TIF_FOREIGN_FPSTATE just before entering the
|
||||
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
|
||||
* FP_STATE_FREE if the flag set.
|
||||
* Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
|
||||
* that the host kernel is responsible for restoring this state upon
|
||||
* return to userspace, and the hyp code doesn't need to save anything.
|
||||
*
|
||||
* When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures
|
||||
* that PSTATE.{SM,ZA} == {0,0}.
|
||||
*/
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
*host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state);
|
||||
*host_data_ptr(fpmr_ptr) = kern_hyp_va(¤t->thread.uw.fpmr);
|
||||
|
||||
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
|
||||
|
||||
if (system_supports_sme()) {
|
||||
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
|
||||
vcpu_set_flag(vcpu, HOST_SME_ENABLED);
|
||||
|
||||
/*
|
||||
* If PSTATE.SM is enabled then save any pending FP
|
||||
* state and disable PSTATE.SM. If we leave PSTATE.SM
|
||||
* enabled and the guest does not enable SME via
|
||||
* CPACR_EL1.SMEN then operations that should be valid
|
||||
* may generate SME traps from EL1 to EL1 which we
|
||||
* can't intercept and which would confuse the guest.
|
||||
*
|
||||
* Do the same for PSTATE.ZA in the case where there
|
||||
* is state in the registers which has not already
|
||||
* been saved, this is very unlikely to happen.
|
||||
*/
|
||||
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
|
||||
*host_data_ptr(fp_owner) = FP_STATE_FREE;
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
}
|
||||
}
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
*host_data_ptr(fp_owner) = FP_STATE_FREE;
|
||||
|
||||
/*
|
||||
* If normal guests gain SME support, maintain this behavior for pKVM
|
||||
@@ -162,52 +135,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* If we have VHE then the Hyp code will reset CPACR_EL1 to
|
||||
* the default value and we need to reenable SME.
|
||||
*/
|
||||
if (has_vhe() && system_supports_sme()) {
|
||||
/* Also restore EL0 state seen on entry */
|
||||
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
|
||||
else
|
||||
sysreg_clear_set(CPACR_EL1,
|
||||
CPACR_EL1_SMEN_EL0EN,
|
||||
CPACR_EL1_SMEN_EL1EN);
|
||||
isb();
|
||||
}
|
||||
|
||||
if (guest_owns_fp_regs()) {
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
u64 zcr = read_sysreg_el1(SYS_ZCR);
|
||||
|
||||
/*
|
||||
* If the vCPU is in the hyp context then ZCR_EL1 is
|
||||
* loaded with its vEL2 counterpart.
|
||||
*/
|
||||
__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr;
|
||||
|
||||
/*
|
||||
* Restore the VL that was saved when bound to the CPU,
|
||||
* which is the maximum VL for the guest. Because the
|
||||
* layout of the data when saving the sve state depends
|
||||
* on the VL, we need to use a consistent (i.e., the
|
||||
* maximum) VL.
|
||||
* Note that this means that at guest exit ZCR_EL1 is
|
||||
* not necessarily the same as on guest entry.
|
||||
*
|
||||
* ZCR_EL2 holds the guest hypervisor's VL when running
|
||||
* a nested guest, which could be smaller than the
|
||||
* max for the vCPU. Similar to above, we first need to
|
||||
* switch to a VL consistent with the layout of the
|
||||
* vCPU's SVE state. KVM support for NV implies VHE, so
|
||||
* using the ZCR_EL1 alias is safe.
|
||||
*/
|
||||
if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)))
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
|
||||
SYS_ZCR_EL1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush (save and invalidate) the fpsimd/sve state so that if
|
||||
* the host tries to use fpsimd/sve, it's not using stale data
|
||||
@@ -219,18 +147,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
* when needed.
|
||||
*/
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
} else if (has_vhe() && system_supports_sve()) {
|
||||
/*
|
||||
* The FPSIMD/SVE state in the CPU has not been touched, and we
|
||||
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
|
||||
* reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE
|
||||
* for EL0. To avoid spurious traps, restore the trap state
|
||||
* seen by kvm_arch_vcpu_load_fp():
|
||||
*/
|
||||
if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
|
||||
else
|
||||
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
@@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
|
||||
alternative_else_nop_endif
|
||||
mrs x1, isr_el1
|
||||
cbz x1, 1f
|
||||
|
||||
// Ensure that __guest_enter() always provides a context
|
||||
// synchronization event so that callers don't need ISBs for anything
|
||||
// that would usually be synchonized by the ERET.
|
||||
isb
|
||||
mov x0, #ARM_EXCEPTION_IRQ
|
||||
ret
|
||||
|
||||
|
||||
@@ -327,7 +327,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
|
||||
arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
|
||||
@@ -376,7 +376,86 @@ static inline void __hyp_sve_save_host(void)
|
||||
true);
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
|
||||
static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 zcr_el1, zcr_el2;
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
return;
|
||||
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
/* A guest hypervisor may restrict the effective max VL. */
|
||||
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
|
||||
zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
|
||||
else
|
||||
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
|
||||
|
||||
write_sysreg_el2(zcr_el2, SYS_ZCR);
|
||||
|
||||
zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu));
|
||||
write_sysreg_el1(zcr_el1, SYS_ZCR);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 zcr_el1, zcr_el2;
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
return;
|
||||
|
||||
/*
|
||||
* When the guest owns the FP regs, we know that guest+hyp traps for
|
||||
* any FPSIMD/SVE/SME features exposed to the guest have been disabled
|
||||
* by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
|
||||
* prior to __guest_entry(). As __guest_entry() guarantees a context
|
||||
* synchronization event, we don't need an ISB here to avoid taking
|
||||
* traps for anything that was exposed to the guest.
|
||||
*/
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
zcr_el1 = read_sysreg_el1(SYS_ZCR);
|
||||
__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
|
||||
|
||||
/*
|
||||
* The guest's state is always saved using the guest's max VL.
|
||||
* Ensure that the host has the guest's max VL active such that
|
||||
* the host can save the guest's state lazily, but don't
|
||||
* artificially restrict the host to the guest's max VL.
|
||||
*/
|
||||
if (has_vhe()) {
|
||||
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
|
||||
write_sysreg_el2(zcr_el2, SYS_ZCR);
|
||||
} else {
|
||||
zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
|
||||
write_sysreg_el2(zcr_el2, SYS_ZCR);
|
||||
|
||||
zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
|
||||
write_sysreg_el1(zcr_el1, SYS_ZCR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Non-protected kvm relies on the host restoring its sve state.
|
||||
* Protected kvm restores the host's sve state as not to reveal that
|
||||
* fpsimd was used by a guest nor leak upper sve bits.
|
||||
*/
|
||||
if (system_supports_sve()) {
|
||||
__hyp_sve_save_host();
|
||||
|
||||
/* Re-enable SVE traps if not supported for the guest vcpu. */
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
cpacr_clear_set(CPACR_ELx_ZEN, 0);
|
||||
|
||||
} else {
|
||||
__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
|
||||
}
|
||||
|
||||
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
|
||||
*host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
|
||||
}
|
||||
|
||||
/*
|
||||
* We trap the first access to the FP/SIMD to save the host context and
|
||||
@@ -384,7 +463,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
|
||||
* If FP/SIMD is not implemented, handle the trap and inject an undefined
|
||||
* instruction exception to the guest. Similarly for trapped SVE accesses.
|
||||
*/
|
||||
static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
bool sve_guest;
|
||||
u8 esr_ec;
|
||||
@@ -426,7 +505,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
isb();
|
||||
|
||||
/* Write out the host state if it's in the registers */
|
||||
if (host_owns_fp_regs())
|
||||
if (is_protected_kvm_enabled() && host_owns_fp_regs())
|
||||
kvm_hyp_save_fpsimd_host(vcpu);
|
||||
|
||||
/* Restore the guest state */
|
||||
@@ -575,7 +654,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
|
||||
handle_tx2_tvm(vcpu))
|
||||
@@ -595,7 +674,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
|
||||
__vgic_v3_perform_cpuif_access(vcpu) == 1)
|
||||
@@ -604,19 +683,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
|
||||
u64 *exit_code)
|
||||
{
|
||||
if (!__populate_fault_info(vcpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
__alias(kvm_hyp_handle_memory_fault);
|
||||
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
__alias(kvm_hyp_handle_memory_fault);
|
||||
#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault
|
||||
#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault
|
||||
|
||||
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
|
||||
return true;
|
||||
@@ -646,23 +724,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
|
||||
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
|
||||
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
|
||||
/*
|
||||
* Allow the hypervisor to handle the exit with an exit handler if it has one.
|
||||
*
|
||||
* Returns true if the hypervisor handled the exit, and control should go back
|
||||
* to the guest, or false if it hasn't.
|
||||
*/
|
||||
static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
|
||||
const exit_handler_fn *handlers)
|
||||
{
|
||||
const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
|
||||
exit_handler_fn fn;
|
||||
|
||||
fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
|
||||
|
||||
exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
|
||||
if (fn)
|
||||
return fn(vcpu, exit_code);
|
||||
|
||||
@@ -692,20 +763,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code
|
||||
* the guest, false when we should restore the host state and return to the
|
||||
* main run loop.
|
||||
*/
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
|
||||
const exit_handler_fn *handlers)
|
||||
{
|
||||
/*
|
||||
* Save PSTATE early so that we can evaluate the vcpu mode
|
||||
* early on.
|
||||
*/
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
/*
|
||||
* Check whether we want to repaint the state one way or
|
||||
* another.
|
||||
*/
|
||||
early_exit_filter(vcpu, exit_code);
|
||||
|
||||
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
|
||||
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
@@ -735,7 +795,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
goto exit;
|
||||
|
||||
/* Check if there's an exit handler and allow it to handle the exit. */
|
||||
if (kvm_hyp_handle_exit(vcpu, exit_code))
|
||||
if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
|
||||
goto guest;
|
||||
exit:
|
||||
/* Return to the host kernel and handle the exit */
|
||||
|
||||
@@ -69,7 +69,7 @@ struct kvm_iommu_ops {
|
||||
phys_addr_t (*iova_to_phys)(struct kvm_hyp_iommu_domain *domain, unsigned long iova);
|
||||
void (*iotlb_sync)(struct kvm_hyp_iommu_domain *domain,
|
||||
struct iommu_iotlb_gather *gather);
|
||||
bool (*dabt_handler)(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr);
|
||||
bool (*dabt_handler)(struct user_pt_regs *regs, u64 esr, u64 addr);
|
||||
void (*host_stage2_idmap)(struct kvm_hyp_iommu_domain *domain,
|
||||
phys_addr_t start, phys_addr_t end, int prot);
|
||||
void (*host_stage2_idmap_complete)(bool map);
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/kvm_pkvm_module.h>
|
||||
#include <asm/virt.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
@@ -45,6 +46,7 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
|
||||
int ___pkvm_host_donate_hyp(u64 pfn, u64 nr_pages, bool accept_mmio);
|
||||
int ___pkvm_host_donate_hyp_prot(u64 pfn, u64 nr_pages,
|
||||
bool accept_mmio, enum kvm_pgtable_prot prot);
|
||||
int __pkvm_host_donate_sglist_hyp(struct pkvm_sglist_page *sglist, size_t nr_pages);
|
||||
int __pkvm_host_donate_hyp_locked(u64 pfn, u64 nr_pages, enum kvm_pgtable_prot prot);
|
||||
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_guest_share_hyp_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 *hyp_va);
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
|
||||
#define HCALL_HANDLED 0
|
||||
#define HCALL_UNHANDLED -1
|
||||
@@ -19,6 +21,8 @@ int handle_host_dynamic_hcall(struct user_pt_regs *regs, int id);
|
||||
void __pkvm_close_module_registration(void);
|
||||
bool module_handle_host_perm_fault(struct user_pt_regs *regs, u64 esr, u64 addr);
|
||||
bool module_handle_host_smc(struct user_pt_regs *regs);
|
||||
bool module_handle_guest_smc(struct arm_smccc_1_2_regs *regs, struct arm_smccc_1_2_regs *res,
|
||||
pkvm_handle_t handle);
|
||||
#else
|
||||
static inline int __pkvm_init_module(void *module_init) { return -EOPNOTSUPP; }
|
||||
static inline int
|
||||
@@ -31,4 +35,9 @@ handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt, int id)
|
||||
static inline void __pkvm_close_module_registration(void) { }
|
||||
bool module_handle_host_perm_fault(struct user_pt_regs *regs, u64 esr, u64 addr) { return false; }
|
||||
bool module_handle_host_smc(struct user_pt_regs *regs) { return false; }
|
||||
bool module_handle_guest_smc(struct arm_smccc_1_2_regs *regs, struct arm_smccc_1_2_regs *res,
|
||||
pkvm_handle_t handle)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -143,6 +143,8 @@ void pkvm_reset_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
|
||||
bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
bool kvm_hyp_handle_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
|
||||
bool kvm_handle_pvm_smc64(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
|
||||
struct pkvm_hyp_vcpu *pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm *vm, u64 mpidr);
|
||||
|
||||
static inline bool pkvm_hyp_vm_has_pvmfw(struct pkvm_hyp_vm *vm)
|
||||
|
||||
@@ -10,7 +10,8 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o \
|
||||
serial.o alloc_mgt.o iommu/iommu.o power/hvc.o power/scmi.o device/device.o
|
||||
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o iommu/pviommu-host.o iommu/pviommu.o
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o iommu/pviommu-host.o iommu/pviommu.o \
|
||||
../../../kernel/smccc-call.o
|
||||
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
|
||||
hyp-obj-$(CONFIG_TRACING) += clock.o events.o trace.o
|
||||
hyp-obj-$(CONFIG_PROTECTED_NVHE_FTRACE) += ftrace.o
|
||||
|
||||
@@ -31,6 +31,9 @@ int pkvm_init_devices(void)
|
||||
size_t dev_sz;
|
||||
int ret;
|
||||
|
||||
if (!registered_devices_nr)
|
||||
return 0;
|
||||
|
||||
registered_devices = kern_hyp_va(registered_devices);
|
||||
dev_sz = PAGE_ALIGN(size_mul(sizeof(struct pkvm_device),
|
||||
registered_devices_nr));
|
||||
@@ -277,8 +280,9 @@ bool pkvm_device_request_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
|
||||
u64 token;
|
||||
s8 level;
|
||||
|
||||
/* arg2 and arg3 reserved for future use. */
|
||||
if (smccc_get_arg2(vcpu) || smccc_get_arg3(vcpu) || !PAGE_ALIGNED(ipa))
|
||||
/* args 2..6 reserved for future use. */
|
||||
if (smccc_get_arg2(vcpu) || smccc_get_arg3(vcpu) || smccc_get_arg4(vcpu) ||
|
||||
smccc_get_arg5(vcpu) || smccc_get_arg6(vcpu) || !PAGE_ALIGNED(ipa))
|
||||
goto out_inval;
|
||||
|
||||
ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, PAGE_SIZE,
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/arm_ffa.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
|
||||
@@ -1107,8 +1108,7 @@ out_unlock:
|
||||
hyp_spin_unlock(&kvm_ffa_hyp_lock);
|
||||
}
|
||||
|
||||
static void do_ffa_direct_msg(struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt,
|
||||
static void do_ffa_direct_msg(struct kvm_cpu_context *ctxt,
|
||||
u64 vm_handle)
|
||||
{
|
||||
DECLARE_REG(u32, func_id, ctxt, 0);
|
||||
@@ -1120,14 +1120,38 @@ static void do_ffa_direct_msg(struct arm_smccc_res *res,
|
||||
DECLARE_REG(u32, w6, ctxt, 6);
|
||||
DECLARE_REG(u32, w7, ctxt, 7);
|
||||
|
||||
struct arm_smccc_1_2_regs req, resp;
|
||||
|
||||
if (FIELD_GET(FFA_SRC_ENDPOINT_MASK, endp) != vm_handle) {
|
||||
ffa_to_smccc_res(res, FFA_RET_INVALID_PARAMETERS);
|
||||
resp = (struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_ERROR,
|
||||
.a2 = FFA_RET_INVALID_PARAMETERS,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
arm_smccc_1_1_smc(func_id, endp, msg_flags, w3,
|
||||
w4, w5, w6, w7,
|
||||
res);
|
||||
req = (struct arm_smccc_1_2_regs) {
|
||||
.a0 = func_id,
|
||||
.a1 = endp,
|
||||
.a2 = msg_flags,
|
||||
.a3 = w3,
|
||||
.a4 = w4,
|
||||
.a5 = w5,
|
||||
.a6 = w6,
|
||||
.a7 = w7,
|
||||
};
|
||||
|
||||
/*
|
||||
* In case SMCCC 1.2 is not supported we should preserve the
|
||||
* host registers.
|
||||
*/
|
||||
memcpy(&resp, &ctxt->regs.regs[0], sizeof(resp));
|
||||
|
||||
__hyp_exit();
|
||||
arm_smccc_1_2_smc(&req, &resp);
|
||||
__hyp_enter();
|
||||
|
||||
memcpy(&ctxt->regs.regs[0], &resp, sizeof(resp));
|
||||
}
|
||||
|
||||
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
|
||||
@@ -1198,8 +1222,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
|
||||
goto out_handled;
|
||||
case FFA_MSG_SEND_DIRECT_REQ:
|
||||
case FFA_FN64_MSG_SEND_DIRECT_REQ:
|
||||
do_ffa_direct_msg(&res, host_ctxt, HOST_FFA_ID);
|
||||
goto out_handled;
|
||||
do_ffa_direct_msg(host_ctxt, HOST_FFA_ID);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ffa_call_supported(func_id))
|
||||
@@ -1273,8 +1297,8 @@ bool kvm_guest_ffa_handler(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
|
||||
goto out_guest;
|
||||
case FFA_MSG_SEND_DIRECT_REQ:
|
||||
case FFA_FN64_MSG_SEND_DIRECT_REQ:
|
||||
do_ffa_direct_msg(&res, ctxt, hyp_vcpu_to_ffa_handle(hyp_vcpu));
|
||||
goto out_guest;
|
||||
do_ffa_direct_msg(ctxt, hyp_vcpu_to_ffa_handle(hyp_vcpu));
|
||||
return true;
|
||||
default:
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
|
||||
#include <hyp/adjust_pc.h>
|
||||
#include <hyp/switch.h>
|
||||
|
||||
#include <asm/pgtable-types.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
@@ -545,7 +546,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
|
||||
if (system_supports_sve())
|
||||
__hyp_sve_restore_host();
|
||||
else
|
||||
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
|
||||
__fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
|
||||
|
||||
if (has_fpmr)
|
||||
write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
|
||||
@@ -900,30 +901,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
|
||||
hyp_vcpu->exit_code = *exit_code;
|
||||
}
|
||||
|
||||
static void fpsimd_host_restore(void)
|
||||
{
|
||||
cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
|
||||
isb();
|
||||
|
||||
|
||||
if (unlikely(is_protected_kvm_enabled())) {
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
|
||||
if (vcpu_has_sve(&hyp_vcpu->vcpu))
|
||||
__hyp_sve_save_guest(&hyp_vcpu->vcpu);
|
||||
else
|
||||
__fpsimd_save_state(&hyp_vcpu->vcpu.arch.ctxt.fp_regs);
|
||||
|
||||
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
|
||||
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
}
|
||||
|
||||
if (system_supports_sve())
|
||||
sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
|
||||
SYS_ZCR_EL2);
|
||||
}
|
||||
|
||||
static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
@@ -952,8 +929,6 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
|
||||
*last_ran = hyp_vcpu->vcpu.vcpu_id;
|
||||
}
|
||||
|
||||
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
|
||||
|
||||
if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
|
||||
/* Propagate WFx trapping flags */
|
||||
hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI);
|
||||
@@ -972,9 +947,6 @@ static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
|
||||
if (hyp_vcpu) {
|
||||
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
|
||||
|
||||
if (guest_owns_fp_regs())
|
||||
fpsimd_host_restore();
|
||||
|
||||
if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu) &&
|
||||
!vcpu_get_flag(host_vcpu, PKVM_HOST_STATE_DIRTY)) {
|
||||
__sync_hyp_vcpu(hyp_vcpu);
|
||||
@@ -995,9 +967,6 @@ static void handle___pkvm_vcpu_sync_state(struct kvm_cpu_context *host_ctxt)
|
||||
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
|
||||
return;
|
||||
|
||||
if (guest_owns_fp_regs())
|
||||
fpsimd_host_restore();
|
||||
|
||||
__sync_hyp_vcpu(hyp_vcpu);
|
||||
}
|
||||
|
||||
@@ -1064,17 +1033,13 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
|
||||
goto out;
|
||||
|
||||
flush_hyp_vcpu(hyp_vcpu);
|
||||
|
||||
ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
|
||||
|
||||
sync_hyp_vcpu(hyp_vcpu, &ret);
|
||||
|
||||
/* Trap host fpsimd/sve if the guest has used fpsimd/sve. */
|
||||
if (guest_owns_fp_regs())
|
||||
cpacr_clear_set(CPACR_ELx_FPEN | CPACR_ELx_ZEN, 0);
|
||||
} else {
|
||||
/* The host is fully trusted, run its vCPU directly. */
|
||||
fpsimd_lazy_switch_to_guest(host_vcpu);
|
||||
ret = __kvm_vcpu_run(host_vcpu);
|
||||
fpsimd_lazy_switch_to_host(host_vcpu);
|
||||
}
|
||||
out:
|
||||
cpu_reg(host_ctxt, 1) = ret;
|
||||
@@ -2038,13 +2003,8 @@ inval:
|
||||
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, func_id, host_ctxt, 0);
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
bool handled;
|
||||
|
||||
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
if (hyp_vcpu && guest_owns_fp_regs())
|
||||
fpsimd_host_restore();
|
||||
|
||||
func_id &= ~ARM_SMCCC_CALL_HINTS;
|
||||
|
||||
handled = kvm_host_psci_handler(host_ctxt, func_id);
|
||||
@@ -2079,11 +2039,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
||||
case ESR_ELx_EC_SMC64:
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_FP_ASIMD:
|
||||
case ESR_ELx_EC_SVE:
|
||||
case ESR_ELx_EC_SME:
|
||||
fpsimd_host_restore();
|
||||
break;
|
||||
case ESR_ELx_EC_IABT_LOW:
|
||||
case ESR_ELx_EC_DABT_LOW:
|
||||
handle_host_mem_abort(host_ctxt);
|
||||
|
||||
@@ -178,7 +178,7 @@ void *kvm_iommu_donate_pages_atomic(u8 order)
|
||||
|
||||
void kvm_iommu_reclaim_pages_atomic(void *p, u8 order)
|
||||
{
|
||||
__kvm_iommu_reclaim_pages(&iommu_atomic_pool, p, order);
|
||||
hyp_put_page(&iommu_atomic_pool, p);
|
||||
}
|
||||
|
||||
static struct kvm_hyp_iommu_domain *
|
||||
@@ -274,7 +274,7 @@ int kvm_iommu_init(struct kvm_iommu_ops *ops,
|
||||
!ops->alloc_domain ||
|
||||
!ops->free_domain ||
|
||||
!ops->get_iommu_by_id)
|
||||
return 0;
|
||||
return -ENODEV;
|
||||
|
||||
ret = hyp_pool_init_empty(&iommu_host_pool, 64);
|
||||
if (ret)
|
||||
@@ -609,7 +609,7 @@ bool kvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u64 esr, u64
|
||||
bool ret = false;
|
||||
|
||||
if (kvm_iommu_ops && kvm_iommu_ops->dabt_handler)
|
||||
ret = kvm_iommu_ops->dabt_handler(host_ctxt, esr, addr);
|
||||
ret = kvm_iommu_ops->dabt_handler(&host_ctxt->regs, esr, addr);
|
||||
|
||||
if (ret)
|
||||
kvm_skip_host_instr();
|
||||
|
||||
@@ -1898,8 +1898,16 @@ static int __pkvm_use_dma_locked(phys_addr_t phys_addr, size_t size,
|
||||
} else {
|
||||
/* For VMs, we know if we reach this point the VM has access to the page. */
|
||||
if (!hyp_vcpu) {
|
||||
ret = ___host_check_page_state_range(phys_addr, size,
|
||||
PKVM_PAGE_OWNED, reg, false);
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
enum pkvm_page_state state;
|
||||
phys_addr_t this_addr = phys_addr + i * PAGE_SIZE;
|
||||
|
||||
state = hyp_phys_to_page(this_addr)->host_state;
|
||||
if (state != PKVM_PAGE_OWNED) {
|
||||
ret = -EPERM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@@ -2346,6 +2354,83 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_donate_sglist_hyp(struct pkvm_sglist_page *sglist, size_t nr_pages)
|
||||
{
|
||||
int p, ret;
|
||||
|
||||
host_lock_component();
|
||||
hyp_lock_component();
|
||||
|
||||
/* Checking we are reading hyp private memory */
|
||||
if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
|
||||
WARN_ON(__hyp_check_page_state_range((u64)sglist, nr_pages * sizeof(*sglist),
|
||||
PKVM_PAGE_OWNED));
|
||||
|
||||
for (p = 0; p < nr_pages; p++) {
|
||||
u64 phys = hyp_pfn_to_phys(sglist[p].pfn);
|
||||
size_t size;
|
||||
|
||||
if (check_shl_overflow(PAGE_SIZE, sglist[p].order, &size)) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (!addr_is_memory(phys)) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
|
||||
ret = __hyp_check_page_state_range((u64)__hyp_va(phys), size, PKVM_NOPAGE);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
for (p = 0; p < nr_pages; p++) {
|
||||
size_t size = PAGE_SIZE << sglist[p].order;
|
||||
u64 phys = hyp_pfn_to_phys(sglist[p].pfn);
|
||||
enum kvm_pgtable_prot prot;
|
||||
|
||||
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
|
||||
ret = pkvm_create_mappings_locked(__hyp_va(phys), __hyp_va(phys) + size, prot);
|
||||
if (ret) {
|
||||
WARN_ON(ret != -ENOMEM);
|
||||
|
||||
kvm_iommu_host_stage2_idmap_complete(false);
|
||||
|
||||
/* Rollback */
|
||||
for (; p >= 0; p--) {
|
||||
phys = hyp_pfn_to_phys(sglist[p].pfn);
|
||||
size = PAGE_SIZE << sglist[p].order;
|
||||
|
||||
WARN_ON(host_stage2_idmap_locked(phys, size,
|
||||
PKVM_HOST_MEM_PROT, false));
|
||||
kvm_iommu_host_stage2_idmap(phys, phys + size, PKVM_HOST_MEM_PROT);
|
||||
pkvm_remove_mappings_locked(__hyp_va(phys), __hyp_va(phys) + size);
|
||||
}
|
||||
kvm_iommu_host_stage2_idmap_complete(true);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
WARN_ON(__host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP, true, 0, false));
|
||||
kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
|
||||
}
|
||||
|
||||
kvm_iommu_host_stage2_idmap_complete(false);
|
||||
|
||||
unlock:
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void hyp_poison_page(phys_addr_t phys, size_t size)
|
||||
{
|
||||
WARN_ON(!PAGE_ALIGNED(size));
|
||||
|
||||
@@ -134,6 +134,7 @@ static int __hyp_smp_processor_id(void)
|
||||
enum mod_handler_type {
|
||||
HOST_FAULT_HANDLER = 0,
|
||||
HOST_SMC_HANDLER,
|
||||
GUEST_SMC_HANDLER,
|
||||
NUM_MOD_HANDLER_TYPES,
|
||||
};
|
||||
|
||||
@@ -180,6 +181,13 @@ static int __register_host_smc_handler(bool (*cb)(struct user_pt_regs *))
|
||||
return mod_handler_register(HOST_SMC_HANDLER, cb);
|
||||
}
|
||||
|
||||
static int __register_guest_smc_handler(bool (*cb)(struct arm_smccc_1_2_regs *regs,
|
||||
struct arm_smccc_1_2_regs *res,
|
||||
pkvm_handle_t handle))
|
||||
{
|
||||
return mod_handler_register(GUEST_SMC_HANDLER, cb);
|
||||
}
|
||||
|
||||
bool module_handle_host_perm_fault(struct user_pt_regs *regs, u64 esr, u64 addr)
|
||||
{
|
||||
int (*cb)(struct user_pt_regs *regs, u64 esr, u64 addr);
|
||||
@@ -206,6 +214,21 @@ bool module_handle_host_smc(struct user_pt_regs *regs)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool module_handle_guest_smc(struct arm_smccc_1_2_regs *regs, struct arm_smccc_1_2_regs *res,
|
||||
pkvm_handle_t handle)
|
||||
{
|
||||
bool (*cb)(struct arm_smccc_1_2_regs *regs, struct arm_smccc_1_2_regs *res,
|
||||
pkvm_handle_t handle);
|
||||
int i;
|
||||
|
||||
for_each_mod_handler(GUEST_SMC_HANDLER, cb, i) {
|
||||
if (cb(regs, res, handle))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const struct pkvm_module_ops module_ops = {
|
||||
.create_private_mapping = __pkvm_create_private_mapping,
|
||||
.alloc_module_va = __pkvm_alloc_module_va,
|
||||
@@ -229,6 +252,7 @@ const struct pkvm_module_ops module_ops = {
|
||||
.host_stage2_enable_lazy_pte = host_stage2_enable_lazy_pte,
|
||||
.host_stage2_disable_lazy_pte = host_stage2_disable_lazy_pte,
|
||||
.register_host_smc_handler = __register_host_smc_handler,
|
||||
.register_guest_smc_handler = __register_guest_smc_handler,
|
||||
.register_default_trap_handler = __pkvm_register_default_trap_handler,
|
||||
.register_illegal_abt_notifier = __pkvm_register_illegal_abt_notifier,
|
||||
.register_psci_notifier = __pkvm_register_psci_notifier,
|
||||
@@ -236,6 +260,7 @@ const struct pkvm_module_ops module_ops = {
|
||||
.register_unmask_serror = __pkvm_register_unmask_serror,
|
||||
.host_donate_hyp = ___pkvm_host_donate_hyp,
|
||||
.host_donate_hyp_prot = ___pkvm_host_donate_hyp_prot,
|
||||
.host_donate_sglist_hyp = __pkvm_host_donate_sglist_hyp,
|
||||
.hyp_donate_host = __pkvm_hyp_donate_host,
|
||||
.host_share_hyp = __pkvm_host_share_hyp,
|
||||
.host_unshare_hyp = __pkvm_host_unshare_hyp,
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/modules.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/pviommu.h>
|
||||
@@ -593,6 +594,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
|
||||
hyp_vm->kvm.arch.pkvm.pvmfw_load_addr = pvmfw_load_addr;
|
||||
|
||||
hyp_vm->kvm.arch.pkvm.ffa_support = READ_ONCE(host_kvm->arch.pkvm.ffa_support);
|
||||
hyp_vm->kvm.arch.pkvm.smc_forwarded = READ_ONCE(host_kvm->arch.pkvm.smc_forwarded);
|
||||
hyp_vm->kvm.arch.mmu.last_vcpu_ran = (int __percpu *)last_ran;
|
||||
memset(last_ran, -1, pkvm_get_last_ran_size());
|
||||
pkvm_init_features_from_host(hyp_vm, host_kvm);
|
||||
@@ -1676,6 +1678,43 @@ static bool pkvm_forward_trng(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool is_standard_secure_service_call(u64 func_id)
|
||||
{
|
||||
return (func_id >= PSCI_0_2_FN_BASE && func_id <= ARM_CCA_FUNC_END) ||
|
||||
(func_id >= PSCI_0_2_FN64_BASE && func_id <= ARM_CCA_64BIT_FUNC_END);
|
||||
}
|
||||
|
||||
bool kvm_handle_pvm_smc64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
bool handled = false;
|
||||
struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
|
||||
struct pkvm_hyp_vm *vm;
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
struct arm_smccc_1_2_regs regs;
|
||||
struct arm_smccc_1_2_regs res;
|
||||
DECLARE_REG(u64, func_id, ctxt, 0);
|
||||
|
||||
hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
|
||||
vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
|
||||
|
||||
if (is_standard_secure_service_call(func_id))
|
||||
return false;
|
||||
|
||||
if (!vm->kvm.arch.pkvm.smc_forwarded)
|
||||
return false;
|
||||
|
||||
memcpy(®s, &ctxt->regs, sizeof(regs));
|
||||
handled = module_handle_guest_smc(®s, &res, vm->kvm.arch.pkvm.handle);
|
||||
if (handled)
|
||||
memcpy(&ctxt->regs.regs[0], &res, sizeof(res));
|
||||
else
|
||||
ctxt->regs.regs[0] = -1;
|
||||
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for protected VM HVC calls.
|
||||
*
|
||||
|
||||
@@ -112,6 +112,9 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
|
||||
if (has_hvhe()) {
|
||||
val |= CPACR_ELx_TTA;
|
||||
|
||||
@@ -120,6 +123,8 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
if (vcpu_has_sve(vcpu))
|
||||
val |= CPACR_ELx_ZEN;
|
||||
}
|
||||
|
||||
write_sysreg(val, cpacr_el1);
|
||||
} else {
|
||||
val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
|
||||
|
||||
@@ -134,12 +139,32 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
val |= CPTR_EL2_TFP;
|
||||
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
}
|
||||
|
||||
if (!guest_owns_fp_regs())
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (has_hvhe()) {
|
||||
u64 val = CPACR_ELx_FPEN;
|
||||
|
||||
kvm_write_cptr_el2(val);
|
||||
if (cpus_have_final_cap(ARM64_SVE))
|
||||
val |= CPACR_ELx_ZEN;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPACR_ELx_SMEN;
|
||||
|
||||
write_sysreg(val, cpacr_el1);
|
||||
} else {
|
||||
u64 val = CPTR_NVHE_EL2_RES1;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_SVE))
|
||||
val |= CPTR_EL2_TZ;
|
||||
if (!cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPTR_EL2_TSM;
|
||||
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
}
|
||||
|
||||
static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
@@ -205,7 +230,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
|
||||
|
||||
kvm_reset_cptr_el2(vcpu);
|
||||
__deactivate_cptr_traps(vcpu);
|
||||
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
|
||||
}
|
||||
|
||||
@@ -278,34 +303,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
kvm_handle_pvm_sysreg(vcpu, exit_code));
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Non-protected kvm relies on the host restoring its sve state.
|
||||
* Protected kvm restores the host's sve state as not to reveal that
|
||||
* fpsimd was used by a guest nor leak upper sve bits.
|
||||
*/
|
||||
if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
|
||||
__hyp_sve_save_host();
|
||||
|
||||
/* Re-enable SVE traps if not supported for the guest vcpu. */
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
cpacr_clear_set(CPACR_ELx_ZEN, 0);
|
||||
|
||||
} else {
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
}
|
||||
|
||||
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) {
|
||||
u64 val = read_sysreg_s(SYS_FPMR);
|
||||
|
||||
if (unlikely(is_protected_kvm_enabled()))
|
||||
*host_data_ptr(fpmr) = val;
|
||||
else
|
||||
**host_data_ptr(fpmr_ptr) = val;
|
||||
}
|
||||
}
|
||||
|
||||
static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
|
||||
@@ -321,6 +318,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
static const exit_handler_fn pvm_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_HVC64] = kvm_handle_pvm_hvc64,
|
||||
[ESR_ELx_EC_SMC64] = kvm_handle_pvm_smc64,
|
||||
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
|
||||
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
|
||||
[ESR_ELx_EC_SME] = kvm_handle_pvm_restricted,
|
||||
@@ -354,18 +352,25 @@ void vcpu_illegal_trap(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
*exit_code |= ARM_EXCEPTION_IL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
|
||||
* The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
|
||||
* guest from dropping to AArch32 EL0 if implemented by the CPU. If the
|
||||
* hypervisor spots a guest in such a state ensure it is handled, and don't
|
||||
* trust the host to spot or fix it. The check below is based on the one in
|
||||
* kvm_arch_vcpu_ioctl_run().
|
||||
*/
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
|
||||
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
/*
|
||||
* Some guests (e.g., protected VMs) are not be allowed to run in
|
||||
* AArch32. The ARMv8 architecture does not give the hypervisor a
|
||||
* mechanism to prevent a guest from dropping to AArch32 EL0 if
|
||||
* implemented by the CPU. If the hypervisor spots a guest in such a
|
||||
* state ensure it is handled, and don't trust the host to spot or fix
|
||||
* it. The check below is based on the one in
|
||||
* kvm_arch_vcpu_ioctl_run().
|
||||
*/
|
||||
if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu)))
|
||||
vcpu_illegal_trap(vcpu, exit_code);
|
||||
|
||||
return __fixup_guest_exit(vcpu, exit_code, handlers);
|
||||
}
|
||||
|
||||
/* Switch to the guest for legacy non-VHE systems */
|
||||
|
||||
@@ -312,14 +312,6 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
|
||||
|
||||
if (kvm_has_fpmr(vcpu->kvm))
|
||||
**host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR);
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
@@ -434,13 +426,10 @@ static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
|
||||
};
|
||||
|
||||
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
|
||||
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
return hyp_exit_handlers;
|
||||
}
|
||||
synchronize_vcpu_pstate(vcpu, exit_code);
|
||||
|
||||
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
/*
|
||||
* If we were in HYP context on entry, adjust the PSTATE view
|
||||
* so that the usual helpers work correctly.
|
||||
@@ -460,6 +449,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
*vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
|
||||
*vcpu_cpsr(vcpu) |= mode;
|
||||
}
|
||||
|
||||
return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
|
||||
}
|
||||
|
||||
/* Switch to the guest for VHE systems running in EL2 */
|
||||
@@ -474,6 +465,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
|
||||
sysreg_save_host_state_vhe(host_ctxt);
|
||||
|
||||
fpsimd_lazy_switch_to_guest(vcpu);
|
||||
|
||||
/*
|
||||
* Note that ARM erratum 1165522 requires us to configure both stage 1
|
||||
* and stage 2 translation for the guest context before we clear
|
||||
@@ -498,6 +491,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
|
||||
__deactivate_traps(vcpu);
|
||||
|
||||
fpsimd_lazy_switch_to_host(vcpu);
|
||||
|
||||
sysreg_restore_host_state_vhe(host_ctxt);
|
||||
|
||||
if (guest_owns_fp_regs())
|
||||
|
||||
@@ -79,6 +79,7 @@ static bool kvm_smccc_default_allowed(u32 func_id)
|
||||
case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_RELINQUISH_FUNC_ID:
|
||||
return true;
|
||||
default:
|
||||
/* PSCI 0.2 and up is in the 0:0x1f range */
|
||||
|
||||
@@ -84,7 +84,7 @@ int kvm_iommu_init_driver(void)
|
||||
{
|
||||
if (!smp_load_acquire(&iommu_driver) || !iommu_driver->get_iommu_id_by_of) {
|
||||
kvm_err("pKVM enabled without an IOMMU driver, do not run confidential workloads in virtual machines\n");
|
||||
return 0;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
kvm_hyp_iommu_domains = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
|
||||
@@ -753,6 +753,25 @@ void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
|
||||
kfree(ppage);
|
||||
}
|
||||
|
||||
int pkvm_enable_smc_forwarding(struct file *kvm_file)
|
||||
{
|
||||
struct kvm *host_kvm;
|
||||
|
||||
if (!file_is_kvm(kvm_file))
|
||||
return -EINVAL;
|
||||
|
||||
if (!kvm_get_kvm_safe(kvm_file->private_data))
|
||||
return -EINVAL;
|
||||
|
||||
host_kvm = kvm_file->private_data;
|
||||
if (!host_kvm)
|
||||
return -EINVAL;
|
||||
|
||||
host_kvm->arch.pkvm.smc_forwarded = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
|
||||
const char *reason)
|
||||
{
|
||||
@@ -927,11 +946,28 @@ static int __init early_pkvm_modules_cfg(char *arg)
|
||||
}
|
||||
early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
|
||||
|
||||
static void free_modprobe_argv(struct subprocess_info *info)
|
||||
static void __init free_modprobe_argv(struct subprocess_info *info)
|
||||
{
|
||||
kfree(info->argv);
|
||||
}
|
||||
|
||||
static int __init init_modprobe(struct subprocess_info *info, struct cred *new)
|
||||
{
|
||||
struct file *file = filp_open("/dev/kmsg", O_RDWR, 0);
|
||||
|
||||
if (IS_ERR(file)) {
|
||||
pr_warn("Warning: unable to open /dev/kmsg, modprobe will be silent.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
init_dup(file);
|
||||
init_dup(file);
|
||||
init_dup(file);
|
||||
fput(file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Heavily inspired by request_module(). The latest couldn't be reused though as
|
||||
* the feature can be disabled depending on umh configuration. Here some
|
||||
@@ -974,7 +1010,7 @@ static int __init __pkvm_request_early_module(char *module_name,
|
||||
argv[idx++] = NULL;
|
||||
|
||||
info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
|
||||
NULL, free_modprobe_argv, NULL);
|
||||
init_modprobe, free_modprobe_argv, NULL);
|
||||
if (!info)
|
||||
goto err;
|
||||
|
||||
|
||||
@@ -107,3 +107,23 @@ WORKAROUND_REPEAT_TLBI
|
||||
WORKAROUND_SPECULATIVE_AT
|
||||
WORKAROUND_SPECULATIVE_SSBS
|
||||
WORKAROUND_SPECULATIVE_UNPRIV_LOAD
|
||||
ANDROID_KABI_RESERVE_01
|
||||
ANDROID_KABI_RESERVE_02
|
||||
ANDROID_KABI_RESERVE_03
|
||||
ANDROID_KABI_RESERVE_04
|
||||
ANDROID_KABI_RESERVE_05
|
||||
ANDROID_KABI_RESERVE_06
|
||||
ANDROID_KABI_RESERVE_07
|
||||
ANDROID_KABI_RESERVE_08
|
||||
ANDROID_KABI_RESERVE_09
|
||||
ANDROID_KABI_RESERVE_10
|
||||
ANDROID_KABI_RESERVE_11
|
||||
ANDROID_KABI_RESERVE_12
|
||||
ANDROID_KABI_RESERVE_13
|
||||
ANDROID_KABI_RESERVE_14
|
||||
ANDROID_KABI_RESERVE_15
|
||||
ANDROID_KABI_RESERVE_16
|
||||
ANDROID_KABI_RESERVE_17
|
||||
ANDROID_KABI_RESERVE_18
|
||||
ANDROID_KABI_RESERVE_19
|
||||
ANDROID_KABI_RESERVE_20
|
||||
|
||||
@@ -105,6 +105,9 @@
|
||||
|
||||
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
|
||||
|
||||
#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */
|
||||
#define MADV_GUARD_REMOVE 103 /* unguard range */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
||||
@@ -75,6 +75,9 @@
|
||||
#define MADV_HWPOISON 100 /* poison a page for testing */
|
||||
#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
|
||||
|
||||
#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */
|
||||
#define MADV_GUARD_REMOVE 103 /* unguard range */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ CONFIG_NR_CPUS=32
|
||||
CONFIG_EFI=y
|
||||
CONFIG_CMDLINE_BOOL=y
|
||||
CONFIG_CMDLINE="console=ttynull stack_depot_disable=on cgroup_disable=pressure bootconfig"
|
||||
# CONFIG_CFI_AUTO_DEFAULT is not set
|
||||
CONFIG_HIBERNATION=y
|
||||
CONFIG_PM_WAKELOCKS=y
|
||||
CONFIG_PM_WAKELOCKS_LIMIT=0
|
||||
@@ -561,6 +562,7 @@ CONFIG_POWERCAP=y
|
||||
CONFIG_IDLE_INJECT=y
|
||||
CONFIG_ANDROID_BINDER_IPC=y
|
||||
CONFIG_ANDROID_BINDERFS=y
|
||||
CONFIG_ANDROID_BINDER_IPC_RUST=m
|
||||
CONFIG_ANDROID_VENDOR_HOOKS=y
|
||||
CONFIG_LIBNVDIMM=y
|
||||
CONFIG_INTERCONNECT=y
|
||||
@@ -696,6 +698,8 @@ CONFIG_UBSAN_TRAP=y
|
||||
CONFIG_PAGE_OWNER=y
|
||||
CONFIG_PAGE_PINNER=y
|
||||
CONFIG_DEBUG_MEMORY_INIT=y
|
||||
CONFIG_MEM_ALLOC_PROFILING=y
|
||||
# CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT is not set
|
||||
CONFIG_KFENCE=y
|
||||
CONFIG_KFENCE_SAMPLE_INTERVAL=500
|
||||
CONFIG_KFENCE_NUM_OBJECTS=63
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <asm/vdso.h>
|
||||
|
||||
#include <linux/page_size_compat.h>
|
||||
|
||||
/*
|
||||
* Linker script for vDSO. This is an ELF shared object prelinked to
|
||||
* its virtual address, and with only one read-only segment.
|
||||
@@ -16,7 +18,7 @@ SECTIONS
|
||||
* segment.
|
||||
*/
|
||||
|
||||
vvar_start = . - 4 * PAGE_SIZE;
|
||||
vvar_start = . - 4 * __MAX_PAGE_SIZE;
|
||||
vvar_page = vvar_start;
|
||||
|
||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||
@@ -24,9 +26,9 @@ SECTIONS
|
||||
#include <asm/vvar.h>
|
||||
#undef EMIT_VVAR
|
||||
|
||||
pvclock_page = vvar_start + PAGE_SIZE;
|
||||
hvclock_page = vvar_start + 2 * PAGE_SIZE;
|
||||
timens_page = vvar_start + 3 * PAGE_SIZE;
|
||||
pvclock_page = vvar_start + __MAX_PAGE_SIZE;
|
||||
hvclock_page = vvar_start + 2 * __MAX_PAGE_SIZE;
|
||||
timens_page = vvar_start + 3 * __MAX_PAGE_SIZE;
|
||||
|
||||
#undef _ASM_X86_VVAR_H
|
||||
/* Place all vvars in timens too at the offsets in asm/vvar.h. */
|
||||
|
||||
@@ -5,6 +5,16 @@
|
||||
* are built for 32-bit userspace.
|
||||
*/
|
||||
|
||||
/*
|
||||
* For x86_64 16kB page size emulation
|
||||
*
|
||||
* The redefinition is needed here since, vdso2c is a program that runs
|
||||
* on the host.
|
||||
*
|
||||
* It converts the vdso shared lib to a C array.
|
||||
*/
|
||||
#define __MAX_PAGE_SIZE 16384
|
||||
|
||||
static void BITSFUNC(copy)(FILE *outfile, const unsigned char *data, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
@@ -175,7 +185,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
return;
|
||||
}
|
||||
|
||||
mapping_size = (stripped_len + 4095) / 4096 * 4096;
|
||||
mapping_size = (stripped_len + __MAX_PAGE_SIZE-1) / __MAX_PAGE_SIZE * __MAX_PAGE_SIZE;
|
||||
|
||||
fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
|
||||
fprintf(outfile, "#include <linux/linkage.h>\n");
|
||||
@@ -184,8 +194,8 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
fprintf(outfile, "#include <asm/vdso.h>\n");
|
||||
fprintf(outfile, "\n");
|
||||
fprintf(outfile,
|
||||
"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
|
||||
mapping_size);
|
||||
"static unsigned char raw_data[%lu] __ro_after_init __aligned(%d) = {",
|
||||
mapping_size, __MAX_PAGE_SIZE);
|
||||
for (i = 0; i < stripped_len; i++) {
|
||||
if (i % 10 == 0)
|
||||
fprintf(outfile, "\n\t");
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/page_size_compat.h>
|
||||
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/unistd.h>
|
||||
@@ -284,7 +285,7 @@ static const struct vm_operations_struct gate_vma_ops = {
|
||||
};
|
||||
static struct vm_area_struct gate_vma __ro_after_init = {
|
||||
.vm_start = VSYSCALL_ADDR,
|
||||
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
|
||||
.vm_end = VSYSCALL_ADDR + __MAX_PAGE_SIZE,
|
||||
.vm_page_prot = PAGE_READONLY_EXEC,
|
||||
.vm_flags = VM_READ | VM_EXEC,
|
||||
.vm_ops = &gate_vma_ops,
|
||||
|
||||
@@ -1797,6 +1797,8 @@ static __init int pt_init(void)
|
||||
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
|
||||
else
|
||||
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE;
|
||||
|
||||
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
|
||||
pt_pmu.pmu.attr_groups = pt_attr_groups;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
/*
|
||||
* ELF register definitions..
|
||||
*/
|
||||
#include <linux/page_size_compat.h>
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
#include <asm/ia32.h>
|
||||
@@ -228,7 +229,7 @@ extern int force_personality32;
|
||||
#endif /* !CONFIG_X86_32 */
|
||||
|
||||
#define CORE_DUMP_USE_REGSET
|
||||
#define ELF_EXEC_PAGESIZE 4096
|
||||
#define ELF_EXEC_PAGESIZE __PAGE_SIZE
|
||||
|
||||
/*
|
||||
* This is the base location for PIE (ET_DYN with INTERP) loads. On
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/page_size_compat.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/prctl.h>
|
||||
@@ -1006,7 +1007,7 @@ early_param("idle", idle_setup);
|
||||
unsigned long arch_align_stack(unsigned long sp)
|
||||
{
|
||||
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
||||
sp -= get_random_u32_below(8192);
|
||||
sp -= get_random_u32_below(__PAGE_SIZE << 1);
|
||||
return sp & ~0xf;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
* Copyright 2007 Jiri Kosina, SUSE Labs.
|
||||
*/
|
||||
|
||||
#include <linux/page_size_compat.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/random.h>
|
||||
@@ -71,7 +72,7 @@ static unsigned long arch_rnd(unsigned int rndbits)
|
||||
{
|
||||
if (!(current->flags & PF_RANDOMIZE))
|
||||
return 0;
|
||||
return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
|
||||
return (get_random_long() & ((1UL << rndbits) - 1)) << __PAGE_SHIFT;
|
||||
}
|
||||
|
||||
unsigned long arch_mmap_rnd(void)
|
||||
|
||||
@@ -113,6 +113,9 @@
|
||||
|
||||
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
|
||||
|
||||
#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */
|
||||
#define MADV_GUARD_REMOVE 103 /* unguard range */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
||||
@@ -882,8 +882,41 @@ end_io:
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio_noacct);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
/**
|
||||
* blk_bio_is_seq_zoned_write() - Check if @bio requires write serialization.
|
||||
* @bio: Bio to examine.
|
||||
*
|
||||
* Note: REQ_OP_ZONE_APPEND bios do not require serialization.
|
||||
* Note: this function treats conventional zones on a zoned block device as
|
||||
* sequential zones. This is fine since zoned UFS devices have no conventional
|
||||
* zones.
|
||||
*/
|
||||
static bool blk_bio_is_seq_zoned_write(struct bio *bio)
|
||||
{
|
||||
if (!bdev_is_zoned(bio->bi_bdev))
|
||||
return false;
|
||||
|
||||
return bio_op(bio) == REQ_OP_WRITE ||
|
||||
bio_op(bio) == REQ_OP_WRITE_ZEROES;
|
||||
}
|
||||
#else
|
||||
static bool blk_bio_is_seq_zoned_write(struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void bio_set_ioprio(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* Do not set the I/O priority of sequential zoned write bios because
|
||||
* this could lead to reordering by the mq-deadline I/O scheduler and
|
||||
* hence to unaligned write errors.
|
||||
*/
|
||||
if (blk_bio_is_seq_zoned_write(bio))
|
||||
return;
|
||||
|
||||
/* Nobody set ioprio so far? Initialize it based on task's nice value */
|
||||
if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
|
||||
bio->bi_ioprio = get_current_ioprio();
|
||||
@@ -1127,8 +1160,8 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
|
||||
return;
|
||||
|
||||
plug->cur_ktime = 0;
|
||||
plug->mq_list = NULL;
|
||||
plug->cached_rq = NULL;
|
||||
rq_list_init(&plug->mq_list);
|
||||
rq_list_init(&plug->cached_rqs);
|
||||
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
|
||||
plug->rq_count = 0;
|
||||
plug->multiple_queues = false;
|
||||
@@ -1224,7 +1257,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
|
||||
* queue for cached requests, we don't want a blocked task holding
|
||||
* up a queue freeze/quiesce event.
|
||||
*/
|
||||
if (unlikely(!rq_list_empty(plug->cached_rq)))
|
||||
if (unlikely(!rq_list_empty(&plug->cached_rqs)))
|
||||
blk_mq_free_plug_rqs(plug);
|
||||
|
||||
plug->cur_ktime = 0;
|
||||
|
||||
@@ -218,9 +218,7 @@ static ssize_t flag_store(struct device *dev, const char *page, size_t count,
|
||||
else
|
||||
lim.integrity.flags |= flag;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
err = queue_limits_commit_update_frozen(q, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return count;
|
||||
|
||||
@@ -1141,7 +1141,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
struct blk_plug *plug = current->plug;
|
||||
struct request *rq;
|
||||
|
||||
if (!plug || rq_list_empty(plug->mq_list))
|
||||
if (!plug || rq_list_empty(&plug->mq_list))
|
||||
return false;
|
||||
|
||||
rq_list_for_each(&plug->mq_list, rq) {
|
||||
|
||||
@@ -133,6 +133,10 @@ static bool blk_freeze_set_owner(struct request_queue *q,
|
||||
if (!q->mq_freeze_depth) {
|
||||
q->mq_freeze_owner = owner;
|
||||
q->mq_freeze_owner_depth = 1;
|
||||
q->mq_freeze_disk_dead = !q->disk ||
|
||||
test_bit(GD_DEAD, &q->disk->state) ||
|
||||
!blk_queue_registered(q);
|
||||
q->mq_freeze_queue_dying = blk_queue_dying(q);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -191,7 +195,7 @@ bool __blk_freeze_queue_start(struct request_queue *q,
|
||||
void blk_freeze_queue_start(struct request_queue *q)
|
||||
{
|
||||
if (__blk_freeze_queue_start(q, current))
|
||||
blk_freeze_acquire_lock(q, false, false);
|
||||
blk_freeze_acquire_lock(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
|
||||
|
||||
@@ -259,7 +263,7 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
|
||||
void blk_mq_unfreeze_queue(struct request_queue *q)
|
||||
{
|
||||
if (__blk_mq_unfreeze_queue(q, false))
|
||||
blk_unfreeze_release_lock(q, false, false);
|
||||
blk_unfreeze_release_lock(q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
||||
|
||||
@@ -508,7 +512,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
|
||||
prefetch(tags->static_rqs[tag]);
|
||||
tag_mask &= ~(1UL << i);
|
||||
rq = blk_mq_rq_ctx_init(data, tags, tag);
|
||||
rq_list_add(data->cached_rq, rq);
|
||||
rq_list_add_head(data->cached_rqs, rq);
|
||||
nr++;
|
||||
}
|
||||
if (!(data->rq_flags & RQF_SCHED_TAGS))
|
||||
@@ -517,7 +521,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
|
||||
percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
|
||||
data->nr_tags -= nr;
|
||||
|
||||
return rq_list_pop(data->cached_rq);
|
||||
return rq_list_pop(data->cached_rqs);
|
||||
}
|
||||
|
||||
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
||||
@@ -614,7 +618,7 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
|
||||
.flags = flags,
|
||||
.cmd_flags = opf,
|
||||
.nr_tags = plug->nr_ios,
|
||||
.cached_rq = &plug->cached_rq,
|
||||
.cached_rqs = &plug->cached_rqs,
|
||||
};
|
||||
struct request *rq;
|
||||
|
||||
@@ -639,14 +643,14 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
|
||||
if (!plug)
|
||||
return NULL;
|
||||
|
||||
if (rq_list_empty(plug->cached_rq)) {
|
||||
if (rq_list_empty(&plug->cached_rqs)) {
|
||||
if (plug->nr_ios == 1)
|
||||
return NULL;
|
||||
rq = blk_mq_rq_cache_fill(q, plug, opf, flags);
|
||||
if (!rq)
|
||||
return NULL;
|
||||
} else {
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
rq = rq_list_peek(&plug->cached_rqs);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
|
||||
@@ -655,7 +659,7 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
|
||||
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
|
||||
return NULL;
|
||||
|
||||
plug->cached_rq = rq_list_next(rq);
|
||||
rq_list_pop(&plug->cached_rqs);
|
||||
blk_mq_rq_time_init(rq, 0);
|
||||
}
|
||||
|
||||
@@ -832,7 +836,7 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug)
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
|
||||
while ((rq = rq_list_pop(&plug->cached_rqs)) != NULL)
|
||||
blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
@@ -1388,8 +1392,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
|
||||
*/
|
||||
if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
|
||||
plug->has_elevator = true;
|
||||
rq->rq_next = NULL;
|
||||
rq_list_add(&plug->mq_list, rq);
|
||||
rq_list_add_tail(&plug->mq_list, rq);
|
||||
plug->rq_count++;
|
||||
}
|
||||
|
||||
@@ -2801,7 +2804,7 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
while ((rq = rq_list_pop(&plug->mq_list))) {
|
||||
bool last = rq_list_empty(plug->mq_list);
|
||||
bool last = rq_list_empty(&plug->mq_list);
|
||||
|
||||
if (hctx != rq->mq_hctx) {
|
||||
if (hctx) {
|
||||
@@ -2844,8 +2847,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
|
||||
{
|
||||
struct blk_mq_hw_ctx *this_hctx = NULL;
|
||||
struct blk_mq_ctx *this_ctx = NULL;
|
||||
struct request *requeue_list = NULL;
|
||||
struct request **requeue_lastp = &requeue_list;
|
||||
struct rq_list requeue_list = {};
|
||||
unsigned int depth = 0;
|
||||
bool is_passthrough = false;
|
||||
LIST_HEAD(list);
|
||||
@@ -2859,12 +2861,12 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
|
||||
is_passthrough = blk_rq_is_passthrough(rq);
|
||||
} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||
|
||||
is_passthrough != blk_rq_is_passthrough(rq)) {
|
||||
rq_list_add_tail(&requeue_lastp, rq);
|
||||
rq_list_add_tail(&requeue_list, rq);
|
||||
continue;
|
||||
}
|
||||
list_add(&rq->queuelist, &list);
|
||||
list_add_tail(&rq->queuelist, &list);
|
||||
depth++;
|
||||
} while (!rq_list_empty(plug->mq_list));
|
||||
} while (!rq_list_empty(&plug->mq_list));
|
||||
|
||||
plug->mq_list = requeue_list;
|
||||
trace_block_unplug(this_hctx->queue, depth, !from_sched);
|
||||
@@ -2919,19 +2921,19 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
if (q->mq_ops->queue_rqs) {
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
__blk_mq_flush_plug_list(q, plug));
|
||||
if (rq_list_empty(plug->mq_list))
|
||||
if (rq_list_empty(&plug->mq_list))
|
||||
return;
|
||||
}
|
||||
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
blk_mq_plug_issue_direct(plug));
|
||||
if (rq_list_empty(plug->mq_list))
|
||||
if (rq_list_empty(&plug->mq_list))
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
blk_mq_dispatch_plug_list(plug, from_schedule);
|
||||
} while (!rq_list_empty(plug->mq_list));
|
||||
} while (!rq_list_empty(&plug->mq_list));
|
||||
}
|
||||
|
||||
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
@@ -2996,7 +2998,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
|
||||
if (plug) {
|
||||
data.nr_tags = plug->nr_ios;
|
||||
plug->nr_ios = 1;
|
||||
data.cached_rq = &plug->cached_rq;
|
||||
data.cached_rqs = &plug->cached_rqs;
|
||||
}
|
||||
|
||||
rq = __blk_mq_alloc_requests(&data);
|
||||
@@ -3019,7 +3021,7 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
|
||||
|
||||
if (!plug)
|
||||
return NULL;
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
rq = rq_list_peek(&plug->cached_rqs);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
if (type != rq->mq_hctx->type &&
|
||||
@@ -3033,14 +3035,14 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
|
||||
static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
struct bio *bio)
|
||||
{
|
||||
WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
|
||||
if (rq_list_pop(&plug->cached_rqs) != rq)
|
||||
WARN_ON_ONCE(1);
|
||||
|
||||
/*
|
||||
* If any qos ->throttle() end up blocking, we will have flushed the
|
||||
* plug and hence killed the cached_rq list as well. Pop this entry
|
||||
* before we throttle.
|
||||
*/
|
||||
plug->cached_rq = rq_list_next(rq);
|
||||
rq_qos_throttle(rq->q, bio);
|
||||
|
||||
blk_mq_rq_time_init(rq, 0);
|
||||
|
||||
@@ -155,7 +155,7 @@ struct blk_mq_alloc_data {
|
||||
|
||||
/* allocate multiple requests/tags in one go */
|
||||
unsigned int nr_tags;
|
||||
struct request **cached_rq;
|
||||
struct rq_list *cached_rqs;
|
||||
|
||||
/* input & output parameter */
|
||||
struct blk_mq_ctx *ctx;
|
||||
|
||||
@@ -434,6 +434,30 @@ out_unlock:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_commit_update);
|
||||
|
||||
/**
|
||||
* queue_limits_commit_update_frozen - commit an atomic update of queue limits
|
||||
* @q: queue to update
|
||||
* @lim: limits to apply
|
||||
*
|
||||
* Apply the limits in @lim that were obtained from queue_limits_start_update()
|
||||
* and updated with the new values by the caller to @q. Freezes the queue
|
||||
* before the update and unfreezes it after.
|
||||
*
|
||||
* Returns 0 if successful, else a negative error code.
|
||||
*/
|
||||
int queue_limits_commit_update_frozen(struct request_queue *q,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
int ret;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
ret = queue_limits_commit_update(q, lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_commit_update_frozen);
|
||||
|
||||
/**
|
||||
* queue_limits_set - apply queue limits to queue
|
||||
* @q: queue to update
|
||||
|
||||
@@ -25,6 +25,8 @@ struct queue_sysfs_entry {
|
||||
ssize_t (*show)(struct gendisk *disk, char *page);
|
||||
int (*load_module)(struct gendisk *disk, const char *page, size_t count);
|
||||
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
|
||||
int (*store_limit)(struct gendisk *disk, const char *page,
|
||||
size_t count, struct queue_limits *lim);
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
@@ -152,13 +154,11 @@ QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
|
||||
QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
|
||||
QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
|
||||
|
||||
static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
|
||||
const char *page, size_t count)
|
||||
static int queue_max_discard_sectors_store(struct gendisk *disk,
|
||||
const char *page, size_t count, struct queue_limits *lim)
|
||||
{
|
||||
unsigned long max_discard_bytes;
|
||||
struct queue_limits lim;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = queue_var_store(&max_discard_bytes, page, count);
|
||||
if (ret < 0)
|
||||
@@ -170,12 +170,8 @@ static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
|
||||
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
|
||||
err = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -190,30 +186,24 @@ static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page)
|
||||
SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
|
||||
static int
|
||||
queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
unsigned long max_sectors_kb;
|
||||
struct queue_limits lim;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = queue_var_store(&max_sectors_kb, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
lim.max_user_sectors = max_sectors_kb << 1;
|
||||
err = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
lim->max_user_sectors = max_sectors_kb << 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
|
||||
size_t count, blk_features_t feature)
|
||||
size_t count, struct queue_limits *lim, blk_features_t feature)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
unsigned long val;
|
||||
ssize_t ret;
|
||||
|
||||
@@ -221,15 +211,11 @@ static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
if (val)
|
||||
lim.features |= feature;
|
||||
lim->features |= feature;
|
||||
else
|
||||
lim.features &= ~feature;
|
||||
ret = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (ret)
|
||||
return ret;
|
||||
return count;
|
||||
lim->features &= ~feature;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QUEUE_SYSFS_FEATURE(_name, _feature) \
|
||||
@@ -238,10 +224,10 @@ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
return sprintf(page, "%u\n", \
|
||||
!!(disk->queue->limits.features & _feature)); \
|
||||
} \
|
||||
static ssize_t queue_##_name##_store(struct gendisk *disk, \
|
||||
const char *page, size_t count) \
|
||||
static int queue_##_name##_store(struct gendisk *disk, \
|
||||
const char *page, size_t count, struct queue_limits *lim) \
|
||||
{ \
|
||||
return queue_feature_store(disk, page, count, _feature); \
|
||||
return queue_feature_store(disk, page, count, lim, _feature); \
|
||||
}
|
||||
|
||||
QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
|
||||
@@ -381,12 +367,10 @@ static ssize_t queue_wc_show(struct gendisk *disk, char *page)
|
||||
return sprintf(page, "write through\n");
|
||||
}
|
||||
|
||||
static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
static int queue_wc_store(struct gendisk *disk, const char *page,
|
||||
size_t count, struct queue_limits *lim)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
bool disable;
|
||||
int err;
|
||||
|
||||
if (!strncmp(page, "write back", 10)) {
|
||||
disable = false;
|
||||
@@ -397,15 +381,11 @@ static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
if (disable)
|
||||
lim.flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
|
||||
lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
|
||||
else
|
||||
lim.flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
|
||||
err = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return count;
|
||||
lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QUEUE_RO_ENTRY(_prefix, _name) \
|
||||
@@ -421,6 +401,13 @@ static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.store = _prefix##_store, \
|
||||
};
|
||||
|
||||
#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
.show = _prefix##_show, \
|
||||
.store_limit = _prefix##_store, \
|
||||
}
|
||||
|
||||
#define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0644 }, \
|
||||
@@ -431,7 +418,7 @@ static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
|
||||
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
|
||||
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
|
||||
QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
|
||||
QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
|
||||
QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
|
||||
QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
|
||||
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
|
||||
@@ -447,7 +434,7 @@ QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
|
||||
QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
|
||||
QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
|
||||
QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
|
||||
QUEUE_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
|
||||
QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
|
||||
@@ -470,7 +457,7 @@ QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
|
||||
QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
|
||||
QUEUE_RW_ENTRY(queue_poll, "io_poll");
|
||||
QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
|
||||
QUEUE_RW_ENTRY(queue_wc, "write_cache");
|
||||
QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
|
||||
QUEUE_RO_ENTRY(queue_fua, "fua");
|
||||
QUEUE_RO_ENTRY(queue_dax, "dax");
|
||||
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
|
||||
@@ -483,10 +470,10 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
.show = queue_logical_block_size_show,
|
||||
};
|
||||
|
||||
QUEUE_RW_ENTRY(queue_rotational, "rotational");
|
||||
QUEUE_RW_ENTRY(queue_iostats, "iostats");
|
||||
QUEUE_RW_ENTRY(queue_add_random, "add_random");
|
||||
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
|
||||
QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
|
||||
QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats");
|
||||
QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random");
|
||||
QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes");
|
||||
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
static ssize_t queue_var_store64(s64 *var, const char *page)
|
||||
@@ -683,7 +670,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
struct request_queue *q = disk->queue;
|
||||
ssize_t res;
|
||||
|
||||
if (!entry->store)
|
||||
if (!entry->store_limit && !entry->store)
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
@@ -697,11 +684,26 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
return res;
|
||||
}
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
if (entry->store_limit) {
|
||||
struct queue_limits lim = queue_limits_start_update(q);
|
||||
|
||||
res = entry->store_limit(disk, page, length, &lim);
|
||||
if (res < 0) {
|
||||
queue_limits_cancel_update(q);
|
||||
return res;
|
||||
}
|
||||
|
||||
res = queue_limits_commit_update_frozen(q, &lim);
|
||||
if (res)
|
||||
return res;
|
||||
return length;
|
||||
}
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
blk_mq_freeze_queue(q);
|
||||
res = entry->store(disk, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@@ -1520,7 +1520,6 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
unsigned int nr_seq_zones, nr_conv_zones;
|
||||
unsigned int pool_size;
|
||||
struct queue_limits lim;
|
||||
int ret;
|
||||
|
||||
disk->nr_zones = args->nr_zones;
|
||||
disk->zone_capacity = args->zone_capacity;
|
||||
@@ -1571,11 +1570,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
}
|
||||
|
||||
commit:
|
||||
blk_mq_freeze_queue(q);
|
||||
ret = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
return ret;
|
||||
return queue_limits_commit_update_frozen(q, &lim);
|
||||
}
|
||||
|
||||
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||
@@ -1678,6 +1673,25 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (zone->start == 0) {
|
||||
if (zone->len == 0) {
|
||||
pr_warn("%s: Invalid zero zone size", disk->disk_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* Non power-of-2 zone size support was added to remove the gap
|
||||
* between zone capacity and zone size. Though it is technically
|
||||
* possible to have gaps in a non power-of-2 device, Linux
|
||||
* requires the zone size to be equal to zone capacity for non
|
||||
* power-of-2 zoned devices.
|
||||
*/
|
||||
if (!is_power_of_2(zone->len) && zone->capacity < zone->len) {
|
||||
pr_err("%s: Invalid zone capacity %lld with non power-of-2 zone size %lld",
|
||||
disk->disk_name, zone->capacity, zone->len);
|
||||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* All zones must have the same size, with the exception on an eventual
|
||||
* smaller last zone.
|
||||
@@ -1753,9 +1767,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
* Checks that the device driver indicated a valid zone size and that
|
||||
* the max zone append limit is set.
|
||||
*/
|
||||
if (!zone_sectors || !is_power_of_2(zone_sectors)) {
|
||||
pr_warn("%s: Invalid non power of two zone size (%llu)\n",
|
||||
disk->disk_name, zone_sectors);
|
||||
if (!zone_sectors) {
|
||||
pr_warn("%s: Invalid zone size\n", disk->disk_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@@ -1770,7 +1783,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
* GFP_NOIO was specified.
|
||||
*/
|
||||
args.disk = disk;
|
||||
args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors);
|
||||
args.nr_zones = div64_u64(capacity + zone_sectors - 1, zone_sectors);
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = disk_revalidate_zone_resources(disk, args.nr_zones);
|
||||
if (ret) {
|
||||
|
||||
23
block/blk.h
23
block/blk.h
@@ -729,22 +729,29 @@ void blk_integrity_verify(struct bio *bio);
|
||||
void blk_integrity_prepare(struct request *rq);
|
||||
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
|
||||
|
||||
static inline void blk_freeze_acquire_lock(struct request_queue *q, bool
|
||||
disk_dead, bool queue_dying)
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
static inline void blk_freeze_acquire_lock(struct request_queue *q)
|
||||
{
|
||||
if (!disk_dead)
|
||||
if (!q->mq_freeze_disk_dead)
|
||||
rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_);
|
||||
if (!queue_dying)
|
||||
if (!q->mq_freeze_queue_dying)
|
||||
rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_);
|
||||
}
|
||||
|
||||
static inline void blk_unfreeze_release_lock(struct request_queue *q, bool
|
||||
disk_dead, bool queue_dying)
|
||||
static inline void blk_unfreeze_release_lock(struct request_queue *q)
|
||||
{
|
||||
if (!queue_dying)
|
||||
if (!q->mq_freeze_queue_dying)
|
||||
rwsem_release(&q->q_lockdep_map, _RET_IP_);
|
||||
if (!disk_dead)
|
||||
if (!q->mq_freeze_disk_dead)
|
||||
rwsem_release(&q->io_lockdep_map, _RET_IP_);
|
||||
}
|
||||
#else
|
||||
static inline void blk_freeze_acquire_lock(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
static inline void blk_unfreeze_release_lock(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
||||
@@ -601,16 +601,13 @@ void elevator_init_mq(struct request_queue *q)
|
||||
*
|
||||
* Disk isn't added yet, so verifying queue lock only manually.
|
||||
*/
|
||||
blk_freeze_queue_start_non_owner(q);
|
||||
blk_freeze_acquire_lock(q, true, false);
|
||||
blk_mq_freeze_queue_wait(q);
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
blk_mq_cancel_work_sync(q);
|
||||
|
||||
err = blk_mq_init_sched(q, e);
|
||||
|
||||
blk_unfreeze_release_lock(q, true, false);
|
||||
blk_mq_unfreeze_queue_non_owner(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
if (err) {
|
||||
pr_warn("\"%s\" elevator initialization failed, "
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/android_kabi.h>
|
||||
#include "blk-mq.h"
|
||||
|
||||
struct io_cq;
|
||||
@@ -48,6 +49,11 @@ struct elevator_mq_ops {
|
||||
struct request *(*next_request)(struct request_queue *, struct request *);
|
||||
void (*init_icq)(struct io_cq *);
|
||||
void (*exit_icq)(struct io_cq *);
|
||||
|
||||
ANDROID_KABI_RESERVE(1);
|
||||
ANDROID_KABI_RESERVE(2);
|
||||
ANDROID_KABI_RESERVE(3);
|
||||
ANDROID_KABI_RESERVE(4);
|
||||
};
|
||||
|
||||
#define ELV_NAME_MAX (16)
|
||||
@@ -83,6 +89,9 @@ struct elevator_type
|
||||
/* managed by elevator core */
|
||||
char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */
|
||||
struct list_head list;
|
||||
|
||||
ANDROID_KABI_RESERVE(1);
|
||||
ANDROID_KABI_RESERVE(2);
|
||||
};
|
||||
|
||||
static inline bool elevator_tryget(struct elevator_type *e)
|
||||
|
||||
@@ -641,7 +641,7 @@ void del_gendisk(struct gendisk *disk)
|
||||
struct request_queue *q = disk->queue;
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
bool start_drain, queue_dying;
|
||||
bool start_drain;
|
||||
|
||||
might_sleep();
|
||||
|
||||
@@ -670,9 +670,8 @@ void del_gendisk(struct gendisk *disk)
|
||||
*/
|
||||
mutex_lock(&disk->open_mutex);
|
||||
start_drain = __blk_mark_disk_dead(disk);
|
||||
queue_dying = blk_queue_dying(q);
|
||||
if (start_drain)
|
||||
blk_freeze_acquire_lock(q, true, queue_dying);
|
||||
blk_freeze_acquire_lock(q);
|
||||
xa_for_each_start(&disk->part_tbl, idx, part, 1)
|
||||
drop_partition(part);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
@@ -728,7 +727,7 @@ void del_gendisk(struct gendisk *disk)
|
||||
blk_mq_exit_queue(q);
|
||||
|
||||
if (start_drain)
|
||||
blk_unfreeze_release_lock(q, true, queue_dying);
|
||||
blk_unfreeze_release_lock(q);
|
||||
}
|
||||
EXPORT_SYMBOL(del_gendisk);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
BRANCH=android16-6.12
|
||||
KMI_GENERATION=4
|
||||
KMI_GENERATION=5
|
||||
CLANG_VERSION=r536225
|
||||
RUSTC_VERSION=1.82.0
|
||||
AARCH64_NDK_TRIPLE=aarch64-linux-android31
|
||||
|
||||
@@ -23,6 +23,7 @@ config ACPI_APEI_GHES
|
||||
select ACPI_HED
|
||||
select IRQ_WORK
|
||||
select GENERIC_ALLOCATOR
|
||||
select ARM_SDE_INTERFACE if ARM64
|
||||
help
|
||||
Generic Hardware Error Source provides a way to report
|
||||
platform hardware errors (such as that from chipset). It
|
||||
|
||||
@@ -1612,7 +1612,7 @@ void __init acpi_ghes_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
sdei_init();
|
||||
acpi_sdei_init();
|
||||
|
||||
if (acpi_disabled)
|
||||
return;
|
||||
|
||||
@@ -37,6 +37,18 @@ config ANDROID_BINDER_DEVICES
|
||||
created. Each binder device has its own context manager, and is
|
||||
therefore logically separated from the other devices.
|
||||
|
||||
config ANDROID_BINDER_IPC_RUST
|
||||
tristate "Rust version of Android Binder IPC Driver"
|
||||
depends on RUST && ANDROID_BINDER_IPC && ANDROID_BINDERFS
|
||||
help
|
||||
This enables the Rust implementation of the Binder driver. At this
|
||||
time, the Rust implementation can only be built as an alternative to
|
||||
the C implementation. By default, the C implementation is used, but
|
||||
if the binder.impl=rust kernel command-line parameter is provided,
|
||||
then the Rust implementation is used instead.
|
||||
|
||||
To build this as a GKI module, choose m.
|
||||
|
||||
config ANDROID_BINDER_IPC_SELFTEST
|
||||
bool "Android Binder IPC Driver Selftest"
|
||||
depends on ANDROID_BINDER_IPC
|
||||
|
||||
@@ -6,3 +6,7 @@ obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o
|
||||
obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
|
||||
obj-$(CONFIG_ANDROID_VENDOR_HOOKS) += vendor_hooks.o
|
||||
obj-$(CONFIG_ANDROID_DEBUG_KINFO) += debug_kinfo.o
|
||||
|
||||
obj-$(CONFIG_ANDROID_BINDER_IPC_RUST) += rust_binder.o
|
||||
rust_binder-objs := binder/rust_binder.o binder/rust_binderfs.o binder/rust_binder_events.o
|
||||
rust_binder-objs += binder/rust_binder_hooks.o binder/page_range_helper.o
|
||||
|
||||
611
drivers/android/binder/allocation.rs
Normal file
611
drivers/android/binder/allocation.rs
Normal file
@@ -0,0 +1,611 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use core::mem::{size_of, size_of_val, MaybeUninit};
|
||||
use core::ops::Range;
|
||||
|
||||
use kernel::{
|
||||
bindings,
|
||||
fs::file::{File, FileDescriptorReservation},
|
||||
prelude::*,
|
||||
sync::Arc,
|
||||
types::{ARef, AsBytes, FromBytes},
|
||||
uaccess::UserSliceReader,
|
||||
uapi,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
deferred_close::DeferredFdCloser,
|
||||
defs::*,
|
||||
node::{Node, NodeRef},
|
||||
process::Process,
|
||||
DArc,
|
||||
};
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct AllocationInfo {
|
||||
/// Range within the allocation where we can find the offsets to the object descriptors.
|
||||
pub(crate) offsets: Option<Range<usize>>,
|
||||
/// The target node of the transaction this allocation is associated to.
|
||||
/// Not set for replies.
|
||||
pub(crate) target_node: Option<NodeRef>,
|
||||
/// When this allocation is dropped, call `pending_oneway_finished` on the node.
|
||||
///
|
||||
/// This is used to serialize oneway transaction on the same node. Binder guarantees that
|
||||
/// oneway transactions to the same node are delivered sequentially in the order they are sent.
|
||||
pub(crate) oneway_node: Option<DArc<Node>>,
|
||||
/// Zero the data in the buffer on free.
|
||||
pub(crate) clear_on_free: bool,
|
||||
/// List of files embedded in this transaction.
|
||||
file_list: FileList,
|
||||
}
|
||||
|
||||
/// Represents an allocation that the kernel is currently using.
|
||||
///
|
||||
/// When allocations are idle, the range allocator holds the data related to them.
|
||||
///
|
||||
/// # Invariants
|
||||
///
|
||||
/// This allocation corresponds to an allocation in the range allocator, so the relevant pages are
|
||||
/// marked in use in the page range.
|
||||
pub(crate) struct Allocation {
|
||||
pub(crate) offset: usize,
|
||||
size: usize,
|
||||
pub(crate) ptr: usize,
|
||||
pub(crate) process: Arc<Process>,
|
||||
allocation_info: Option<AllocationInfo>,
|
||||
free_on_drop: bool,
|
||||
pub(crate) oneway_spam_detected: bool,
|
||||
#[allow(dead_code)]
|
||||
pub(crate) debug_id: usize,
|
||||
}
|
||||
|
||||
impl Allocation {
|
||||
pub(crate) fn new(
|
||||
process: Arc<Process>,
|
||||
debug_id: usize,
|
||||
offset: usize,
|
||||
size: usize,
|
||||
ptr: usize,
|
||||
oneway_spam_detected: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
process,
|
||||
offset,
|
||||
size,
|
||||
ptr,
|
||||
debug_id,
|
||||
oneway_spam_detected,
|
||||
allocation_info: None,
|
||||
free_on_drop: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn size_check(&self, offset: usize, size: usize) -> Result {
|
||||
let overflow_fail = offset.checked_add(size).is_none();
|
||||
let cmp_size_fail = offset.wrapping_add(size) > self.size;
|
||||
if overflow_fail || cmp_size_fail {
|
||||
return Err(EFAULT);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn copy_into(
|
||||
&self,
|
||||
reader: &mut UserSliceReader,
|
||||
offset: usize,
|
||||
size: usize,
|
||||
) -> Result {
|
||||
self.size_check(offset, size)?;
|
||||
|
||||
// SAFETY: While this object exists, the range allocator will keep the range allocated, and
|
||||
// in turn, the pages will be marked as in use.
|
||||
unsafe {
|
||||
self.process
|
||||
.pages
|
||||
.copy_from_user_slice(reader, self.offset + offset, size)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
|
||||
self.size_check(offset, size_of::<T>())?;
|
||||
|
||||
// SAFETY: While this object exists, the range allocator will keep the range allocated, and
|
||||
// in turn, the pages will be marked as in use.
|
||||
unsafe { self.process.pages.read(self.offset + offset) }
|
||||
}
|
||||
|
||||
pub(crate) fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
|
||||
self.size_check(offset, size_of_val::<T>(obj))?;
|
||||
|
||||
// SAFETY: While this object exists, the range allocator will keep the range allocated, and
|
||||
// in turn, the pages will be marked as in use.
|
||||
unsafe { self.process.pages.write(self.offset + offset, obj) }
|
||||
}
|
||||
|
||||
pub(crate) fn fill_zero(&self) -> Result {
|
||||
// SAFETY: While this object exists, the range allocator will keep the range allocated, and
|
||||
// in turn, the pages will be marked as in use.
|
||||
unsafe { self.process.pages.fill_zero(self.offset, self.size) }
|
||||
}
|
||||
|
||||
pub(crate) fn keep_alive(mut self) {
|
||||
self.process
|
||||
.buffer_make_freeable(self.offset, self.allocation_info.take());
|
||||
self.free_on_drop = false;
|
||||
}
|
||||
|
||||
pub(crate) fn set_info(&mut self, info: AllocationInfo) {
|
||||
self.allocation_info = Some(info);
|
||||
}
|
||||
|
||||
pub(crate) fn get_or_init_info(&mut self) -> &mut AllocationInfo {
|
||||
self.allocation_info.get_or_insert_with(Default::default)
|
||||
}
|
||||
|
||||
pub(crate) fn set_info_offsets(&mut self, offsets: Range<usize>) {
|
||||
self.get_or_init_info().offsets = Some(offsets);
|
||||
}
|
||||
|
||||
pub(crate) fn set_info_oneway_node(&mut self, oneway_node: DArc<Node>) {
|
||||
self.get_or_init_info().oneway_node = Some(oneway_node);
|
||||
}
|
||||
|
||||
pub(crate) fn set_info_clear_on_drop(&mut self) {
|
||||
self.get_or_init_info().clear_on_free = true;
|
||||
}
|
||||
|
||||
pub(crate) fn set_info_target_node(&mut self, target_node: NodeRef) {
|
||||
self.get_or_init_info().target_node = Some(target_node);
|
||||
}
|
||||
|
||||
/// Reserve enough space to push at least `num_fds` fds.
|
||||
pub(crate) fn info_add_fd_reserve(&mut self, num_fds: usize) -> Result {
|
||||
self.get_or_init_info()
|
||||
.file_list
|
||||
.files_to_translate
|
||||
.reserve(num_fds, GFP_KERNEL)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn info_add_fd(
|
||||
&mut self,
|
||||
file: ARef<File>,
|
||||
buffer_offset: usize,
|
||||
close_on_free: bool,
|
||||
) -> Result {
|
||||
self.get_or_init_info().file_list.files_to_translate.push(
|
||||
FileEntry {
|
||||
file,
|
||||
buffer_offset,
|
||||
close_on_free,
|
||||
},
|
||||
GFP_KERNEL,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn set_info_close_on_free(&mut self, cof: FdsCloseOnFree) {
|
||||
self.get_or_init_info().file_list.close_on_free = cof.0;
|
||||
}
|
||||
|
||||
pub(crate) fn translate_fds(&mut self) -> Result<TranslatedFds> {
|
||||
let file_list = match self.allocation_info.as_mut() {
|
||||
Some(info) => &mut info.file_list,
|
||||
None => return Ok(TranslatedFds::new()),
|
||||
};
|
||||
|
||||
let files = core::mem::take(&mut file_list.files_to_translate);
|
||||
|
||||
let num_close_on_free = files.iter().filter(|entry| entry.close_on_free).count();
|
||||
let mut close_on_free = KVec::with_capacity(num_close_on_free, GFP_KERNEL)?;
|
||||
|
||||
let mut reservations = KVec::with_capacity(files.len(), GFP_KERNEL)?;
|
||||
for file_info in files {
|
||||
let res = FileDescriptorReservation::get_unused_fd_flags(bindings::O_CLOEXEC)?;
|
||||
let fd = res.reserved_fd();
|
||||
self.write::<u32>(file_info.buffer_offset, &fd)?;
|
||||
crate::trace::trace_transaction_fd_recv(self.debug_id, fd, file_info.buffer_offset);
|
||||
|
||||
reservations.push(
|
||||
Reservation {
|
||||
res,
|
||||
file: file_info.file,
|
||||
},
|
||||
GFP_KERNEL,
|
||||
)?;
|
||||
if file_info.close_on_free {
|
||||
close_on_free.push(fd, GFP_KERNEL)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(TranslatedFds {
|
||||
reservations,
|
||||
close_on_free: FdsCloseOnFree(close_on_free),
|
||||
})
|
||||
}
|
||||
|
||||
/// Should the looper return to userspace when freeing this allocation?
|
||||
pub(crate) fn looper_need_return_on_free(&self) -> bool {
|
||||
// Closing fds involves pushing task_work for execution when we return to userspace. Hence,
|
||||
// we should return to userspace asap if we are closing fds.
|
||||
match self.allocation_info {
|
||||
Some(ref info) => !info.file_list.close_on_free.is_empty(),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Allocation {
|
||||
fn drop(&mut self) {
|
||||
if !self.free_on_drop {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(mut info) = self.allocation_info.take() {
|
||||
if let Some(oneway_node) = info.oneway_node.as_ref() {
|
||||
oneway_node.pending_oneway_finished();
|
||||
}
|
||||
|
||||
info.target_node = None;
|
||||
|
||||
if let Some(offsets) = info.offsets.clone() {
|
||||
let view = AllocationView::new(self, offsets.start);
|
||||
for i in offsets.step_by(size_of::<usize>()) {
|
||||
if view.cleanup_object(i).is_err() {
|
||||
pr_warn!("Error cleaning up object at offset {}\n", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for &fd in &info.file_list.close_on_free {
|
||||
let closer = match DeferredFdCloser::new(GFP_KERNEL) {
|
||||
Ok(closer) => closer,
|
||||
Err(kernel::alloc::AllocError) => {
|
||||
// Ignore allocation failures.
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Here, we ignore errors. The operation can fail if the fd is not valid, or if the
|
||||
// method is called from a kthread. However, this is always called from a syscall,
|
||||
// so the latter case cannot happen, and we don't care about the first case.
|
||||
let _ = closer.close_fd(fd);
|
||||
}
|
||||
|
||||
if info.clear_on_free {
|
||||
if let Err(e) = self.fill_zero() {
|
||||
pr_warn!("Failed to clear data on free: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.process.buffer_raw_free(self.ptr);
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper around `Allocation` that is being created.
|
||||
///
|
||||
/// If the allocation is destroyed while wrapped in this wrapper, then the allocation will be
|
||||
/// considered to be part of a failed transaction. Successful transactions avoid that by calling
|
||||
/// `success`, which skips the destructor.
|
||||
#[repr(transparent)]
|
||||
pub(crate) struct NewAllocation(pub(crate) Allocation);
|
||||
|
||||
impl NewAllocation {
|
||||
pub(crate) fn success(self) -> Allocation {
|
||||
// This skips the destructor.
|
||||
//
|
||||
// SAFETY: This type is `#[repr(transparent)]`, so the layout matches.
|
||||
unsafe { core::mem::transmute(self) }
|
||||
}
|
||||
}
|
||||
|
||||
impl core::ops::Deref for NewAllocation {
|
||||
type Target = Allocation;
|
||||
fn deref(&self) -> &Allocation {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl core::ops::DerefMut for NewAllocation {
|
||||
fn deref_mut(&mut self) -> &mut Allocation {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for NewAllocation {
|
||||
fn drop(&mut self) {
|
||||
crate::trace::trace_transaction_failed_buffer_release(self.debug_id);
|
||||
}
|
||||
}
|
||||
|
||||
/// A view into the beginning of an allocation.
|
||||
///
|
||||
/// All attempts to read or write outside of the view will fail. To intentionally access outside of
|
||||
/// this view, use the `alloc` field of this struct directly.
|
||||
pub(crate) struct AllocationView<'a> {
|
||||
pub(crate) alloc: &'a mut Allocation,
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
impl<'a> AllocationView<'a> {
|
||||
pub(crate) fn new(alloc: &'a mut Allocation, limit: usize) -> Self {
|
||||
AllocationView { alloc, limit }
|
||||
}
|
||||
|
||||
pub(crate) fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
|
||||
if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit {
|
||||
return Err(EINVAL);
|
||||
}
|
||||
self.alloc.read(offset)
|
||||
}
|
||||
|
||||
pub(crate) fn write<T: AsBytes>(&self, offset: usize, obj: &T) -> Result {
|
||||
if offset.checked_add(size_of::<T>()).ok_or(EINVAL)? > self.limit {
|
||||
return Err(EINVAL);
|
||||
}
|
||||
self.alloc.write(offset, obj)
|
||||
}
|
||||
|
||||
pub(crate) fn copy_into(
|
||||
&self,
|
||||
reader: &mut UserSliceReader,
|
||||
offset: usize,
|
||||
size: usize,
|
||||
) -> Result {
|
||||
if offset.checked_add(size).ok_or(EINVAL)? > self.limit {
|
||||
return Err(EINVAL);
|
||||
}
|
||||
self.alloc.copy_into(reader, offset, size)
|
||||
}
|
||||
|
||||
pub(crate) fn transfer_binder_object(
|
||||
&self,
|
||||
offset: usize,
|
||||
obj: &uapi::flat_binder_object,
|
||||
strong: bool,
|
||||
node_ref: NodeRef,
|
||||
) -> Result {
|
||||
let mut newobj = FlatBinderObject::default();
|
||||
let node = node_ref.node.clone();
|
||||
if Arc::ptr_eq(&node_ref.node.owner, &self.alloc.process) {
|
||||
// The receiving process is the owner of the node, so send it a binder object (instead
|
||||
// of a handle).
|
||||
let (ptr, cookie) = node.get_id();
|
||||
newobj.hdr.type_ = if strong {
|
||||
BINDER_TYPE_BINDER
|
||||
} else {
|
||||
BINDER_TYPE_WEAK_BINDER
|
||||
};
|
||||
newobj.flags = obj.flags;
|
||||
newobj.__bindgen_anon_1.binder = ptr as _;
|
||||
newobj.cookie = cookie as _;
|
||||
self.write(offset, &newobj)?;
|
||||
// Increment the user ref count on the node. It will be decremented as part of the
|
||||
// destruction of the buffer, when we see a binder or weak-binder object.
|
||||
node.update_refcount(true, 1, strong);
|
||||
} else {
|
||||
// The receiving process is different from the owner, so we need to insert a handle to
|
||||
// the binder object.
|
||||
let handle = self
|
||||
.alloc
|
||||
.process
|
||||
.as_arc_borrow()
|
||||
.insert_or_update_handle(node_ref, false)?;
|
||||
newobj.hdr.type_ = if strong {
|
||||
BINDER_TYPE_HANDLE
|
||||
} else {
|
||||
BINDER_TYPE_WEAK_HANDLE
|
||||
};
|
||||
newobj.flags = obj.flags;
|
||||
newobj.__bindgen_anon_1.handle = handle;
|
||||
if self.write(offset, &newobj).is_err() {
|
||||
// Decrement ref count on the handle we just created.
|
||||
let _ = self
|
||||
.alloc
|
||||
.process
|
||||
.as_arc_borrow()
|
||||
.update_ref(handle, false, strong);
|
||||
return Err(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
crate::trace::trace_transaction_node_send(self.alloc.debug_id, &node, obj, &newobj);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cleanup_object(&self, index_offset: usize) -> Result {
|
||||
let offset = self.alloc.read(index_offset)?;
|
||||
let header = self.read::<BinderObjectHeader>(offset)?;
|
||||
match header.type_ {
|
||||
BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => {
|
||||
let obj = self.read::<FlatBinderObject>(offset)?;
|
||||
let strong = header.type_ == BINDER_TYPE_BINDER;
|
||||
// SAFETY: The type is `BINDER_TYPE_{WEAK_}BINDER`, so the `binder` field is
|
||||
// populated.
|
||||
let ptr = unsafe { obj.__bindgen_anon_1.binder };
|
||||
let cookie = obj.cookie;
|
||||
self.alloc.process.update_node(ptr, cookie, strong);
|
||||
Ok(())
|
||||
}
|
||||
BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => {
|
||||
let obj = self.read::<FlatBinderObject>(offset)?;
|
||||
let strong = header.type_ == BINDER_TYPE_HANDLE;
|
||||
// SAFETY: The type is `BINDER_TYPE_{WEAK_}HANDLE`, so the `handle` field is
|
||||
// populated.
|
||||
let handle = unsafe { obj.__bindgen_anon_1.handle };
|
||||
self.alloc
|
||||
.process
|
||||
.as_arc_borrow()
|
||||
.update_ref(handle, false, strong)
|
||||
}
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A binder object as it is serialized.
|
||||
///
|
||||
/// # Invariants
|
||||
///
|
||||
/// All bytes must be initialized, and the value of `self.hdr.type_` must be one of the allowed
|
||||
/// types.
|
||||
#[repr(C)]
|
||||
pub(crate) union BinderObject {
|
||||
hdr: uapi::binder_object_header,
|
||||
fbo: uapi::flat_binder_object,
|
||||
fdo: uapi::binder_fd_object,
|
||||
bbo: uapi::binder_buffer_object,
|
||||
fdao: uapi::binder_fd_array_object,
|
||||
}
|
||||
|
||||
/// A view into a `BinderObject` that can be used in a match statement.
|
||||
pub(crate) enum BinderObjectRef<'a> {
|
||||
Binder(&'a mut uapi::flat_binder_object),
|
||||
Handle(&'a mut uapi::flat_binder_object),
|
||||
Fd(&'a mut uapi::binder_fd_object),
|
||||
Ptr(&'a mut uapi::binder_buffer_object),
|
||||
Fda(&'a mut uapi::binder_fd_array_object),
|
||||
}
|
||||
|
||||
impl BinderObject {
|
||||
pub(crate) fn read_from(reader: &mut UserSliceReader) -> Result<BinderObject> {
|
||||
let object = Self::read_from_inner(|slice| {
|
||||
let read_len = usize::min(slice.len(), reader.len());
|
||||
reader.clone_reader().read_slice(&mut slice[..read_len])?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// If we used a object type smaller than the largest object size, then we've read more
|
||||
// bytes than we needed to. However, we used `.clone_reader()` to avoid advancing the
|
||||
// original reader. Now, we call `skip` so that the caller's reader is advanced by the
|
||||
// right amount.
|
||||
//
|
||||
// The `skip` call fails if the reader doesn't have `size` bytes available. This could
|
||||
// happen if the type header corresponds to an object type that is larger than the rest of
|
||||
// the reader.
|
||||
//
|
||||
// Any extra bytes beyond the size of the object are inaccessible after this call, so
|
||||
// reading them again from the `reader` later does not result in TOCTOU bugs.
|
||||
reader.skip(object.size())?;
|
||||
|
||||
Ok(object)
|
||||
}
|
||||
|
||||
/// Use the provided reader closure to construct a `BinderObject`.
|
||||
///
|
||||
/// The closure should write the bytes for the object into the provided slice.
|
||||
pub(crate) fn read_from_inner<R>(reader: R) -> Result<BinderObject>
|
||||
where
|
||||
R: FnOnce(&mut [u8; size_of::<BinderObject>()]) -> Result<()>,
|
||||
{
|
||||
let mut obj = MaybeUninit::<BinderObject>::zeroed();
|
||||
|
||||
// SAFETY: The lengths of `BinderObject` and `[u8; size_of::<BinderObject>()]` are equal,
|
||||
// and the byte array has an alignment requirement of one, so the pointer cast is okay.
|
||||
// Additionally, `obj` was initialized to zeros, so the byte array will not be
|
||||
// uninitialized.
|
||||
(reader)(unsafe { &mut *obj.as_mut_ptr().cast() })?;
|
||||
|
||||
// SAFETY: The entire object is initialized, so accessing this field is safe.
|
||||
let type_ = unsafe { obj.assume_init_ref().hdr.type_ };
|
||||
if Self::type_to_size(type_).is_none() {
|
||||
// The value of `obj.hdr_type_` was invalid.
|
||||
return Err(EINVAL);
|
||||
}
|
||||
|
||||
// SAFETY: All bytes are initialized (since we zeroed them at the start) and we checked
|
||||
// that `self.hdr.type_` is one of the allowed types, so the type invariants are satisfied.
|
||||
unsafe { Ok(obj.assume_init()) }
|
||||
}
|
||||
|
||||
pub(crate) fn as_ref(&mut self) -> BinderObjectRef<'_> {
|
||||
use BinderObjectRef::*;
|
||||
// SAFETY: The constructor ensures that all bytes of `self` are initialized, and all
|
||||
// variants of this union accept all initialized bit patterns.
|
||||
unsafe {
|
||||
match self.hdr.type_ {
|
||||
BINDER_TYPE_WEAK_BINDER | BINDER_TYPE_BINDER => Binder(&mut self.fbo),
|
||||
BINDER_TYPE_WEAK_HANDLE | BINDER_TYPE_HANDLE => Handle(&mut self.fbo),
|
||||
BINDER_TYPE_FD => Fd(&mut self.fdo),
|
||||
BINDER_TYPE_PTR => Ptr(&mut self.bbo),
|
||||
BINDER_TYPE_FDA => Fda(&mut self.fdao),
|
||||
// SAFETY: By the type invariant, the value of `self.hdr.type_` cannot have any
|
||||
// other value than the ones checked above.
|
||||
_ => core::hint::unreachable_unchecked(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn size(&self) -> usize {
|
||||
// SAFETY: The entire object is initialized, so accessing this field is safe.
|
||||
let type_ = unsafe { self.hdr.type_ };
|
||||
|
||||
// SAFETY: The type invariants guarantee that the type field is correct.
|
||||
unsafe { Self::type_to_size(type_).unwrap_unchecked() }
|
||||
}
|
||||
|
||||
fn type_to_size(type_: u32) -> Option<usize> {
|
||||
match type_ {
|
||||
BINDER_TYPE_WEAK_BINDER => Some(size_of::<uapi::flat_binder_object>()),
|
||||
BINDER_TYPE_BINDER => Some(size_of::<uapi::flat_binder_object>()),
|
||||
BINDER_TYPE_WEAK_HANDLE => Some(size_of::<uapi::flat_binder_object>()),
|
||||
BINDER_TYPE_HANDLE => Some(size_of::<uapi::flat_binder_object>()),
|
||||
BINDER_TYPE_FD => Some(size_of::<uapi::binder_fd_object>()),
|
||||
BINDER_TYPE_PTR => Some(size_of::<uapi::binder_buffer_object>()),
|
||||
BINDER_TYPE_FDA => Some(size_of::<uapi::binder_fd_array_object>()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct FileList {
|
||||
files_to_translate: KVec<FileEntry>,
|
||||
close_on_free: KVec<u32>,
|
||||
}
|
||||
|
||||
struct FileEntry {
|
||||
/// The file for which a descriptor will be created in the recipient process.
|
||||
file: ARef<File>,
|
||||
/// The offset in the buffer where the file descriptor is stored.
|
||||
buffer_offset: usize,
|
||||
/// Whether this fd should be closed when the allocation is freed.
|
||||
close_on_free: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct TranslatedFds {
|
||||
reservations: KVec<Reservation>,
|
||||
/// If commit is called, then these fds should be closed. (If commit is not called, then they
|
||||
/// shouldn't be closed.)
|
||||
close_on_free: FdsCloseOnFree,
|
||||
}
|
||||
|
||||
struct Reservation {
|
||||
res: FileDescriptorReservation,
|
||||
file: ARef<File>,
|
||||
}
|
||||
|
||||
impl TranslatedFds {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
reservations: KVec::new(),
|
||||
close_on_free: FdsCloseOnFree(KVec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn commit(self) -> FdsCloseOnFree {
|
||||
for entry in self.reservations {
|
||||
entry.res.fd_install(entry.file);
|
||||
}
|
||||
|
||||
self.close_on_free
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct FdsCloseOnFree(KVec<u32>);
|
||||
183
drivers/android/binder/context.rs
Normal file
183
drivers/android/binder/context.rs
Normal file
@@ -0,0 +1,183 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use kernel::{
|
||||
error::Error,
|
||||
list::{List, ListArc, ListLinks},
|
||||
prelude::*,
|
||||
security,
|
||||
str::{CStr, CString},
|
||||
sync::{Arc, Mutex},
|
||||
task::Kuid,
|
||||
};
|
||||
|
||||
use crate::{error::BinderError, node::NodeRef, process::Process};
|
||||
|
||||
kernel::sync::global_lock! {
|
||||
// SAFETY: We call `init` in the module initializer, so it's initialized before first use.
|
||||
pub(crate) unsafe(uninit) static CONTEXTS: Mutex<ContextList> = ContextList {
|
||||
list: List::new(),
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) struct ContextList {
|
||||
list: List<Context>,
|
||||
}
|
||||
|
||||
pub(crate) fn get_all_contexts() -> Result<KVec<Arc<Context>>> {
|
||||
let lock = CONTEXTS.lock();
|
||||
|
||||
let count = lock.list.iter().count();
|
||||
|
||||
let mut ctxs = KVec::with_capacity(count, GFP_KERNEL)?;
|
||||
for ctx in &lock.list {
|
||||
ctxs.push(Arc::from(ctx), GFP_KERNEL)?;
|
||||
}
|
||||
Ok(ctxs)
|
||||
}
|
||||
|
||||
/// This struct keeps track of the processes using this context, and which process is the context
|
||||
/// manager.
|
||||
struct Manager {
|
||||
node: Option<NodeRef>,
|
||||
uid: Option<Kuid>,
|
||||
all_procs: List<Process>,
|
||||
}
|
||||
|
||||
/// There is one context per binder file (/dev/binder, /dev/hwbinder, etc)
|
||||
#[pin_data]
|
||||
pub(crate) struct Context {
|
||||
#[pin]
|
||||
manager: Mutex<Manager>,
|
||||
pub(crate) name: CString,
|
||||
#[pin]
|
||||
links: ListLinks,
|
||||
}
|
||||
|
||||
kernel::list::impl_has_list_links! {
|
||||
impl HasListLinks<0> for Context { self.links }
|
||||
}
|
||||
kernel::list::impl_list_arc_safe! {
|
||||
impl ListArcSafe<0> for Context { untracked; }
|
||||
}
|
||||
kernel::list::impl_list_item! {
|
||||
impl ListItem<0> for Context {
|
||||
using ListLinks;
|
||||
}
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub(crate) fn new(name: &CStr) -> Result<Arc<Self>> {
|
||||
let name = CString::try_from(name)?;
|
||||
let list_ctx = ListArc::pin_init::<Error>(
|
||||
try_pin_init!(Context {
|
||||
name,
|
||||
links <- ListLinks::new(),
|
||||
manager <- kernel::new_mutex!(Manager {
|
||||
all_procs: List::new(),
|
||||
node: None,
|
||||
uid: None,
|
||||
}, "Context::manager"),
|
||||
}),
|
||||
GFP_KERNEL,
|
||||
)?;
|
||||
|
||||
let ctx = list_ctx.clone_arc();
|
||||
CONTEXTS.lock().list.push_back(list_ctx);
|
||||
|
||||
Ok(ctx)
|
||||
}
|
||||
|
||||
/// Called when the file for this context is unlinked.
|
||||
///
|
||||
/// No-op if called twice.
|
||||
pub(crate) fn deregister(&self) {
|
||||
// SAFETY: We never add the context to any other linked list than this one, so it is either
|
||||
// in this list, or not in any list.
|
||||
unsafe { CONTEXTS.lock().list.remove(self) };
|
||||
}
|
||||
|
||||
pub(crate) fn register_process(self: &Arc<Self>, proc: ListArc<Process>) {
|
||||
if !Arc::ptr_eq(self, &proc.ctx) {
|
||||
pr_err!("Context::register_process called on the wrong context.");
|
||||
return;
|
||||
}
|
||||
self.manager.lock().all_procs.push_back(proc);
|
||||
}
|
||||
|
||||
pub(crate) fn deregister_process(self: &Arc<Self>, proc: &Process) {
|
||||
if !Arc::ptr_eq(self, &proc.ctx) {
|
||||
pr_err!("Context::deregister_process called on the wrong context.");
|
||||
return;
|
||||
}
|
||||
// SAFETY: We just checked that this is the right list.
|
||||
unsafe { self.manager.lock().all_procs.remove(proc) };
|
||||
}
|
||||
|
||||
pub(crate) fn set_manager_node(&self, node_ref: NodeRef) -> Result {
|
||||
let mut manager = self.manager.lock();
|
||||
if manager.node.is_some() {
|
||||
pr_warn!("BINDER_SET_CONTEXT_MGR already set");
|
||||
return Err(EBUSY);
|
||||
}
|
||||
security::binder_set_context_mgr(&node_ref.node.owner.cred)?;
|
||||
|
||||
// If the context manager has been set before, ensure that we use the same euid.
|
||||
let caller_uid = Kuid::current_euid();
|
||||
if let Some(ref uid) = manager.uid {
|
||||
if *uid != caller_uid {
|
||||
return Err(EPERM);
|
||||
}
|
||||
}
|
||||
|
||||
manager.node = Some(node_ref);
|
||||
manager.uid = Some(caller_uid);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn unset_manager_node(&self) {
|
||||
let node_ref = self.manager.lock().node.take();
|
||||
drop(node_ref);
|
||||
}
|
||||
|
||||
pub(crate) fn get_manager_node(&self, strong: bool) -> Result<NodeRef, BinderError> {
|
||||
self.manager
|
||||
.lock()
|
||||
.node
|
||||
.as_ref()
|
||||
.ok_or_else(BinderError::new_dead)?
|
||||
.clone(strong)
|
||||
.map_err(BinderError::from)
|
||||
}
|
||||
|
||||
pub(crate) fn for_each_proc<F>(&self, mut func: F)
|
||||
where
|
||||
F: FnMut(&Process),
|
||||
{
|
||||
let lock = self.manager.lock();
|
||||
for proc in &lock.all_procs {
|
||||
func(&proc);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_all_procs(&self) -> Result<KVec<Arc<Process>>> {
|
||||
let lock = self.manager.lock();
|
||||
let count = lock.all_procs.iter().count();
|
||||
|
||||
let mut procs = KVec::with_capacity(count, GFP_KERNEL)?;
|
||||
for proc in &lock.all_procs {
|
||||
procs.push(Arc::from(proc), GFP_KERNEL)?;
|
||||
}
|
||||
Ok(procs)
|
||||
}
|
||||
|
||||
pub(crate) fn get_procs_with_pid(&self, pid: i32) -> Result<KVec<Arc<Process>>> {
|
||||
let orig = self.get_all_procs()?;
|
||||
let mut backing = KVec::with_capacity(orig.len(), GFP_KERNEL)?;
|
||||
for proc in orig.into_iter().filter(|proc| proc.task.pid() == pid) {
|
||||
backing.push(proc, GFP_KERNEL)?;
|
||||
}
|
||||
Ok(backing)
|
||||
}
|
||||
}
|
||||
202
drivers/android/binder/deferred_close.rs
Normal file
202
drivers/android/binder/deferred_close.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
//! Logic for closing files in a deferred manner.
|
||||
//!
|
||||
//! This file could make sense to have in `kernel::fs`, but it was rejected for being too
|
||||
//! Binder-specific.
|
||||
|
||||
use core::mem::MaybeUninit;
|
||||
use kernel::{
|
||||
alloc::{AllocError, Flags},
|
||||
bindings,
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
/// Helper used for closing file descriptors in a way that is safe even if the file is currently
|
||||
/// held using `fdget`.
|
||||
///
|
||||
/// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to
|
||||
/// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`.
|
||||
pub(crate) struct DeferredFdCloser {
|
||||
inner: KBox<DeferredFdCloserInner>,
|
||||
}
|
||||
|
||||
/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with
|
||||
/// moving it across threads.
|
||||
unsafe impl Send for DeferredFdCloser {}
|
||||
unsafe impl Sync for DeferredFdCloser {}
|
||||
|
||||
/// # Invariants
|
||||
///
|
||||
/// If the `file` pointer is non-null, then it points at a `struct file` and owns a refcount to
|
||||
/// that file.
|
||||
#[repr(C)]
|
||||
struct DeferredFdCloserInner {
|
||||
twork: MaybeUninit<bindings::callback_head>,
|
||||
file: *mut bindings::file,
|
||||
}
|
||||
|
||||
impl DeferredFdCloser {
|
||||
/// Create a new [`DeferredFdCloser`].
|
||||
pub(crate) fn new(flags: Flags) -> Result<Self, AllocError> {
|
||||
Ok(Self {
|
||||
// INVARIANT: The `file` pointer is null, so the type invariant does not apply.
|
||||
inner: KBox::new(
|
||||
DeferredFdCloserInner {
|
||||
twork: MaybeUninit::uninit(),
|
||||
file: core::ptr::null_mut(),
|
||||
},
|
||||
flags,
|
||||
)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Schedule a task work that closes the file descriptor when this task returns to userspace.
|
||||
///
|
||||
/// Fails if this is called from a context where we cannot run work when returning to
|
||||
/// userspace. (E.g., from a kthread.)
|
||||
pub(crate) fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> {
|
||||
use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME;
|
||||
|
||||
// In this method, we schedule the task work before closing the file. This is because
|
||||
// scheduling a task work is fallible, and we need to know whether it will fail before we
|
||||
// attempt to close the file.
|
||||
|
||||
// Task works are not available on kthreads.
|
||||
let current = kernel::current!();
|
||||
|
||||
// Check if this is a kthread.
|
||||
// SAFETY: Reading `flags` from a task is always okay.
|
||||
if unsafe { ((*current.as_ptr()).flags & bindings::PF_KTHREAD) != 0 } {
|
||||
return Err(DeferredFdCloseError::TaskWorkUnavailable);
|
||||
}
|
||||
|
||||
// Transfer ownership of the box's allocation to a raw pointer. This disables the
|
||||
// destructor, so we must manually convert it back to a KBox to drop it.
|
||||
//
|
||||
// Until we convert it back to a `KBox`, there are no aliasing requirements on this
|
||||
// pointer.
|
||||
let inner = KBox::into_raw(self.inner);
|
||||
|
||||
// The `callback_head` field is first in the struct, so this cast correctly gives us a
|
||||
// pointer to the field.
|
||||
let callback_head = inner.cast::<bindings::callback_head>();
|
||||
// SAFETY: This pointer offset operation does not go out-of-bounds.
|
||||
let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) };
|
||||
|
||||
let current = current.as_ptr();
|
||||
|
||||
// SAFETY: This function currently has exclusive access to the `DeferredFdCloserInner`, so
|
||||
// it is okay for us to perform unsynchronized writes to its `callback_head` field.
|
||||
unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) };
|
||||
|
||||
// SAFETY: This inserts the `DeferredFdCloserInner` into the task workqueue for the current
|
||||
// task. If this operation is successful, then this transfers exclusive ownership of the
|
||||
// `callback_head` field to the C side until it calls `do_close_fd`, and we don't touch or
|
||||
// invalidate the field during that time.
|
||||
//
|
||||
// When the C side calls `do_close_fd`, the safety requirements of that method are
|
||||
// satisfied because when a task work is executed, the callback is given ownership of the
|
||||
// pointer.
|
||||
//
|
||||
// The file pointer is currently null. If it is changed to be non-null before `do_close_fd`
|
||||
// is called, then that change happens due to the write at the end of this function, and
|
||||
// that write has a safety comment that explains why the refcount can be dropped when
|
||||
// `do_close_fd` runs.
|
||||
let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) };
|
||||
|
||||
if res != 0 {
|
||||
// SAFETY: Scheduling the task work failed, so we still have ownership of the box, so
|
||||
// we may destroy it.
|
||||
unsafe { drop(KBox::from_raw(inner)) };
|
||||
|
||||
return Err(DeferredFdCloseError::TaskWorkUnavailable);
|
||||
}
|
||||
|
||||
// This removes the fd from the fd table in `current`. The file is not fully closed until
|
||||
// `filp_close` is called. We are given ownership of one refcount to the file.
|
||||
//
|
||||
// SAFETY: This is safe no matter what `fd` is. If the `fd` is valid (that is, if the
|
||||
// pointer is non-null), then we call `filp_close` on the returned pointer as required by
|
||||
// `file_close_fd`.
|
||||
let file = unsafe { bindings::file_close_fd(fd) };
|
||||
if file.is_null() {
|
||||
// We don't clean up the task work since that might be expensive if the task work queue
|
||||
// is long. Just let it execute and let it clean up for itself.
|
||||
return Err(DeferredFdCloseError::BadFd);
|
||||
}
|
||||
|
||||
// Acquire a second refcount to the file.
|
||||
//
|
||||
// SAFETY: The `file` pointer points at a file with a non-zero refcount.
|
||||
unsafe { bindings::get_file(file) };
|
||||
|
||||
// This method closes the fd, consuming one of our two refcounts. There could be active
|
||||
// light refcounts created from that fd, so we must ensure that the file has a positive
|
||||
// refcount for the duration of those active light refcounts. We do that by holding on to
|
||||
// the second refcount until the current task returns to userspace.
|
||||
//
|
||||
// SAFETY: The `file` pointer is valid. Passing `current->files` as the file table to close
|
||||
// it in is correct, since we just got the `fd` from `file_close_fd` which also uses
|
||||
// `current->files`.
|
||||
//
|
||||
// Note: fl_owner_t is currently a void pointer.
|
||||
unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) };
|
||||
|
||||
// We update the file pointer that the task work is supposed to fput. This transfers
|
||||
// ownership of our last refcount.
|
||||
//
|
||||
// INVARIANT: This changes the `file` field of a `DeferredFdCloserInner` from null to
|
||||
// non-null. This doesn't break the type invariant for `DeferredFdCloserInner` because we
|
||||
// still own a refcount to the file, so we can pass ownership of that refcount to the
|
||||
// `DeferredFdCloserInner`.
|
||||
//
|
||||
// When `do_close_fd` runs, it must be safe for it to `fput` the refcount. However, this is
|
||||
// the case because all light refcounts that are associated with the fd we closed
|
||||
// previously must be dropped when `do_close_fd`, since light refcounts must be dropped
|
||||
// before returning to userspace.
|
||||
//
|
||||
// SAFETY: Task works are executed on the current thread right before we return to
|
||||
// userspace, so this write is guaranteed to happen before `do_close_fd` is called, which
|
||||
// means that a race is not possible here.
|
||||
unsafe { *file_field = file };
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The provided pointer must point at the `twork` field of a `DeferredFdCloserInner` stored in
|
||||
/// a `KBox`, and the caller must pass exclusive ownership of that `KBox`. Furthermore, if the
|
||||
/// file pointer is non-null, then it must be okay to release the refcount by calling `fput`.
|
||||
unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) {
|
||||
// SAFETY: The caller just passed us ownership of this box.
|
||||
let inner = unsafe { KBox::from_raw(inner.cast::<DeferredFdCloserInner>()) };
|
||||
if !inner.file.is_null() {
|
||||
// SAFETY: By the type invariants, we own a refcount to this file, and the caller
|
||||
// guarantees that dropping the refcount now is okay.
|
||||
unsafe { bindings::fput(inner.file) };
|
||||
}
|
||||
// The allocation is freed when `inner` goes out of scope.
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a failure to close an fd in a deferred manner.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub(crate) enum DeferredFdCloseError {
|
||||
/// Closing the fd failed because we were unable to schedule a task work.
|
||||
TaskWorkUnavailable,
|
||||
/// Closing the fd failed because the fd does not exist.
|
||||
BadFd,
|
||||
}
|
||||
|
||||
impl From<DeferredFdCloseError> for Error {
|
||||
fn from(err: DeferredFdCloseError) -> Error {
|
||||
match err {
|
||||
DeferredFdCloseError::TaskWorkUnavailable => ESRCH,
|
||||
DeferredFdCloseError::BadFd => EBADF,
|
||||
}
|
||||
}
|
||||
}
|
||||
182
drivers/android/binder/defs.rs
Normal file
182
drivers/android/binder/defs.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use core::mem::MaybeUninit;
|
||||
use core::ops::{Deref, DerefMut};
|
||||
use kernel::{
|
||||
types::{AsBytes, FromBytes},
|
||||
uapi::{self, *},
|
||||
};
|
||||
|
||||
macro_rules! pub_no_prefix {
|
||||
($prefix:ident, $($newname:ident),+ $(,)?) => {
|
||||
$(pub(crate) const $newname: u32 = kernel::macros::concat_idents!($prefix, $newname);)+
|
||||
};
|
||||
}
|
||||
|
||||
pub_no_prefix!(
|
||||
binder_driver_return_protocol_,
|
||||
BR_TRANSACTION,
|
||||
BR_TRANSACTION_SEC_CTX,
|
||||
BR_REPLY,
|
||||
BR_DEAD_REPLY,
|
||||
BR_FAILED_REPLY,
|
||||
BR_FROZEN_REPLY,
|
||||
BR_NOOP,
|
||||
BR_SPAWN_LOOPER,
|
||||
BR_TRANSACTION_COMPLETE,
|
||||
BR_TRANSACTION_PENDING_FROZEN,
|
||||
BR_ONEWAY_SPAM_SUSPECT,
|
||||
BR_OK,
|
||||
BR_ERROR,
|
||||
BR_INCREFS,
|
||||
BR_ACQUIRE,
|
||||
BR_RELEASE,
|
||||
BR_DECREFS,
|
||||
BR_DEAD_BINDER,
|
||||
BR_CLEAR_DEATH_NOTIFICATION_DONE,
|
||||
);
|
||||
|
||||
pub_no_prefix!(
|
||||
binder_driver_command_protocol_,
|
||||
BC_TRANSACTION,
|
||||
BC_TRANSACTION_SG,
|
||||
BC_REPLY,
|
||||
BC_REPLY_SG,
|
||||
BC_FREE_BUFFER,
|
||||
BC_ENTER_LOOPER,
|
||||
BC_EXIT_LOOPER,
|
||||
BC_REGISTER_LOOPER,
|
||||
BC_INCREFS,
|
||||
BC_ACQUIRE,
|
||||
BC_RELEASE,
|
||||
BC_DECREFS,
|
||||
BC_INCREFS_DONE,
|
||||
BC_ACQUIRE_DONE,
|
||||
BC_REQUEST_DEATH_NOTIFICATION,
|
||||
BC_CLEAR_DEATH_NOTIFICATION,
|
||||
BC_DEAD_BINDER_DONE,
|
||||
);
|
||||
|
||||
pub_no_prefix!(
|
||||
flat_binder_object_shifts_,
|
||||
FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT
|
||||
);
|
||||
|
||||
pub_no_prefix!(
|
||||
flat_binder_object_flags_,
|
||||
FLAT_BINDER_FLAG_ACCEPTS_FDS,
|
||||
FLAT_BINDER_FLAG_INHERIT_RT,
|
||||
FLAT_BINDER_FLAG_PRIORITY_MASK,
|
||||
FLAT_BINDER_FLAG_SCHED_POLICY_MASK,
|
||||
FLAT_BINDER_FLAG_TXN_SECURITY_CTX
|
||||
);
|
||||
|
||||
pub_no_prefix!(
|
||||
transaction_flags_,
|
||||
TF_ONE_WAY,
|
||||
TF_ACCEPT_FDS,
|
||||
TF_CLEAR_BUF,
|
||||
TF_UPDATE_TXN
|
||||
);
|
||||
|
||||
pub(crate) use uapi::{
|
||||
BINDER_TYPE_BINDER, BINDER_TYPE_FD, BINDER_TYPE_FDA, BINDER_TYPE_HANDLE, BINDER_TYPE_PTR,
|
||||
BINDER_TYPE_WEAK_BINDER, BINDER_TYPE_WEAK_HANDLE,
|
||||
};
|
||||
|
||||
macro_rules! decl_wrapper {
|
||||
($newname:ident, $wrapped:ty) => {
|
||||
// Define a wrapper around the C type. Use `MaybeUninit` to enforce that the value of
|
||||
// padding bytes must be preserved.
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub(crate) struct $newname(MaybeUninit<$wrapped>);
|
||||
|
||||
// SAFETY: This macro is only used with types where this is ok.
|
||||
unsafe impl FromBytes for $newname {}
|
||||
unsafe impl AsBytes for $newname {}
|
||||
|
||||
impl Deref for $newname {
|
||||
type Target = $wrapped;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
// SAFETY: We use `MaybeUninit` only to preserve padding. The value must still
|
||||
// always be valid.
|
||||
unsafe { self.0.assume_init_ref() }
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for $newname {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
// SAFETY: We use `MaybeUninit` only to preserve padding. The value must still
|
||||
// always be valid.
|
||||
unsafe { self.0.assume_init_mut() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for $newname {
|
||||
fn default() -> Self {
|
||||
// Create a new value of this type where all bytes (including padding) are zeroed.
|
||||
Self(MaybeUninit::zeroed())
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
decl_wrapper!(BinderNodeDebugInfo, uapi::binder_node_debug_info);
|
||||
decl_wrapper!(BinderNodeInfoForRef, uapi::binder_node_info_for_ref);
|
||||
decl_wrapper!(FlatBinderObject, uapi::flat_binder_object);
|
||||
decl_wrapper!(BinderFdObject, uapi::binder_fd_object);
|
||||
decl_wrapper!(BinderFdArrayObject, uapi::binder_fd_array_object);
|
||||
decl_wrapper!(BinderObjectHeader, uapi::binder_object_header);
|
||||
decl_wrapper!(BinderBufferObject, uapi::binder_buffer_object);
|
||||
decl_wrapper!(BinderTransactionData, uapi::binder_transaction_data);
|
||||
decl_wrapper!(
|
||||
BinderTransactionDataSecctx,
|
||||
uapi::binder_transaction_data_secctx
|
||||
);
|
||||
decl_wrapper!(BinderTransactionDataSg, uapi::binder_transaction_data_sg);
|
||||
decl_wrapper!(BinderWriteRead, uapi::binder_write_read);
|
||||
decl_wrapper!(BinderVersion, uapi::binder_version);
|
||||
decl_wrapper!(BinderFrozenStatusInfo, uapi::binder_frozen_status_info);
|
||||
decl_wrapper!(BinderFreezeInfo, uapi::binder_freeze_info);
|
||||
decl_wrapper!(ExtendedError, uapi::binder_extended_error);
|
||||
|
||||
impl BinderVersion {
|
||||
pub(crate) fn current() -> Self {
|
||||
Self(MaybeUninit::new(uapi::binder_version {
|
||||
protocol_version: BINDER_CURRENT_PROTOCOL_VERSION as _,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl BinderTransactionData {
|
||||
pub(crate) fn with_buffers_size(self, buffers_size: u64) -> BinderTransactionDataSg {
|
||||
BinderTransactionDataSg(MaybeUninit::new(uapi::binder_transaction_data_sg {
|
||||
transaction_data: *self,
|
||||
buffers_size,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl BinderTransactionDataSecctx {
|
||||
/// View the inner data as wrapped in `BinderTransactionData`.
|
||||
pub(crate) fn tr_data(&mut self) -> &mut BinderTransactionData {
|
||||
// SAFETY: Transparent wrapper is safe to transmute.
|
||||
unsafe {
|
||||
&mut *(&mut self.transaction_data as *mut uapi::binder_transaction_data
|
||||
as *mut BinderTransactionData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ExtendedError {
|
||||
pub(crate) fn new(id: u32, command: u32, param: i32) -> Self {
|
||||
Self(MaybeUninit::new(uapi::binder_extended_error {
|
||||
id,
|
||||
command,
|
||||
param,
|
||||
}))
|
||||
}
|
||||
}
|
||||
99
drivers/android/binder/error.rs
Normal file
99
drivers/android/binder/error.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use kernel::prelude::*;
|
||||
|
||||
use crate::defs::*;
|
||||
|
||||
pub(crate) type BinderResult<T = ()> = core::result::Result<T, BinderError>;
|
||||
|
||||
/// An error that will be returned to userspace via the `BINDER_WRITE_READ` ioctl rather than via
|
||||
/// errno.
|
||||
pub(crate) struct BinderError {
|
||||
pub(crate) reply: u32,
|
||||
source: Option<Error>,
|
||||
}
|
||||
|
||||
impl BinderError {
|
||||
pub(crate) fn new_dead() -> Self {
|
||||
Self {
|
||||
reply: BR_DEAD_REPLY,
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_frozen() -> Self {
|
||||
Self {
|
||||
reply: BR_FROZEN_REPLY,
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_frozen_oneway() -> Self {
|
||||
Self {
|
||||
reply: BR_TRANSACTION_PENDING_FROZEN,
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_dead(&self) -> bool {
|
||||
self.reply == BR_DEAD_REPLY
|
||||
}
|
||||
|
||||
pub(crate) fn as_errno(&self) -> kernel::ffi::c_int {
|
||||
self.source.unwrap_or(EINVAL).to_errno()
|
||||
}
|
||||
|
||||
pub(crate) fn should_pr_warn(&self) -> bool {
|
||||
self.source.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an errno into a `BinderError` and store the errno used to construct it. The errno
|
||||
/// should be stored as the thread's extended error when given to userspace.
|
||||
impl From<Error> for BinderError {
|
||||
fn from(source: Error) -> Self {
|
||||
Self {
|
||||
reply: BR_FAILED_REPLY,
|
||||
source: Some(source),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<kernel::fs::file::BadFdError> for BinderError {
|
||||
fn from(source: kernel::fs::file::BadFdError) -> Self {
|
||||
BinderError::from(Error::from(source))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<kernel::alloc::AllocError> for BinderError {
|
||||
fn from(_: kernel::alloc::AllocError) -> Self {
|
||||
Self {
|
||||
reply: BR_FAILED_REPLY,
|
||||
source: Some(ENOMEM),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for BinderError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self.reply {
|
||||
BR_FAILED_REPLY => match self.source.as_ref() {
|
||||
Some(source) => f
|
||||
.debug_struct("BR_FAILED_REPLY")
|
||||
.field("source", source)
|
||||
.finish(),
|
||||
None => f.pad("BR_FAILED_REPLY"),
|
||||
},
|
||||
BR_DEAD_REPLY => f.pad("BR_DEAD_REPLY"),
|
||||
BR_FROZEN_REPLY => f.pad("BR_FROZEN_REPLY"),
|
||||
BR_TRANSACTION_PENDING_FROZEN => f.pad("BR_TRANSACTION_PENDING_FROZEN"),
|
||||
BR_TRANSACTION_COMPLETE => f.pad("BR_TRANSACTION_COMPLETE"),
|
||||
_ => f
|
||||
.debug_struct("BinderError")
|
||||
.field("reply", &self.reply)
|
||||
.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
1102
drivers/android/binder/node.rs
Normal file
1102
drivers/android/binder/node.rs
Normal file
File diff suppressed because it is too large
Load Diff
79
drivers/android/binder/node/wrapper.rs
Normal file
79
drivers/android/binder/node/wrapper.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use kernel::{list::ListArc, prelude::*, seq_file::SeqFile, seq_print, sync::UniqueArc};
|
||||
|
||||
use crate::{node::Node, thread::Thread, BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead};
|
||||
|
||||
use core::mem::MaybeUninit;
|
||||
|
||||
pub(crate) struct CritIncrWrapper {
|
||||
inner: UniqueArc<MaybeUninit<DTRWrap<NodeWrapper>>>,
|
||||
}
|
||||
|
||||
impl CritIncrWrapper {
|
||||
pub(crate) fn new() -> Result<Self> {
|
||||
Ok(CritIncrWrapper {
|
||||
inner: UniqueArc::new_uninit(GFP_KERNEL)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn init(self, node: DArc<Node>) -> DLArc<dyn DeliverToRead> {
|
||||
match self.inner.pin_init_with(DTRWrap::new(NodeWrapper { node })) {
|
||||
Ok(initialized) => ListArc::from(initialized) as _,
|
||||
Err(err) => match err {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct NodeWrapper {
|
||||
node: DArc<Node>,
|
||||
}
|
||||
|
||||
kernel::list::impl_list_arc_safe! {
|
||||
impl ListArcSafe<0> for NodeWrapper {
|
||||
untracked;
|
||||
}
|
||||
}
|
||||
|
||||
impl DeliverToRead for NodeWrapper {
|
||||
fn do_work(
|
||||
self: DArc<Self>,
|
||||
_thread: &Thread,
|
||||
writer: &mut BinderReturnWriter<'_>,
|
||||
) -> Result<bool> {
|
||||
let node = &self.node;
|
||||
let mut owner_inner = node.owner.inner.lock();
|
||||
let inner = node.inner.access_mut(&mut owner_inner);
|
||||
|
||||
let ds = &mut inner.delivery_state;
|
||||
|
||||
assert!(ds.has_pushed_wrapper);
|
||||
assert!(ds.has_strong_zero2one);
|
||||
ds.has_pushed_wrapper = false;
|
||||
ds.has_strong_zero2one = false;
|
||||
|
||||
node.do_work_locked(writer, owner_inner)
|
||||
}
|
||||
|
||||
fn cancel(self: DArc<Self>) {}
|
||||
fn on_thread_selected(&self, _thread: &Thread) {}
|
||||
|
||||
fn should_sync_wakeup(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
|
||||
seq_print!(
|
||||
m,
|
||||
"{}node work {}: u{:016x} c{:016x}\n",
|
||||
prefix,
|
||||
self.node.debug_id,
|
||||
self.node.ptr,
|
||||
self.node.cookie,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
783
drivers/android/binder/page_range.rs
Normal file
783
drivers/android/binder/page_range.rs
Normal file
@@ -0,0 +1,783 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
//! This module has utilities for managing a page range where unused pages may be reclaimed by a
|
||||
//! vma shrinker.
|
||||
|
||||
// To avoid deadlocks, locks are taken in the order:
|
||||
//
|
||||
// 1. mmap lock
|
||||
// 2. spinlock
|
||||
// 3. lru spinlock
|
||||
//
|
||||
// The shrinker will use trylock methods because it locks them in a different order.
|
||||
|
||||
use core::{
|
||||
alloc::Layout,
|
||||
marker::PhantomPinned,
|
||||
mem::{size_of, size_of_val, MaybeUninit},
|
||||
ptr::{self, NonNull},
|
||||
};
|
||||
|
||||
use kernel::{
|
||||
alloc::allocator::Kmalloc,
|
||||
alloc::Allocator,
|
||||
bindings,
|
||||
error::Result,
|
||||
ffi::{c_ulong, c_void},
|
||||
mm::{virt, Mm, MmWithUser},
|
||||
new_mutex, new_spinlock,
|
||||
page::{Page, PAGE_SHIFT, PAGE_SIZE},
|
||||
prelude::*,
|
||||
str::CStr,
|
||||
sync::{Mutex, SpinLock},
|
||||
task::Pid,
|
||||
types::ARef,
|
||||
types::{FromBytes, Opaque},
|
||||
uaccess::UserSliceReader,
|
||||
};
|
||||
|
||||
/// Represents a shrinker that can be registered with the kernel.
|
||||
///
|
||||
/// Each shrinker can be used by many `ShrinkablePageRange` objects.
|
||||
#[repr(C)]
|
||||
pub(crate) struct Shrinker {
|
||||
inner: Opaque<*mut bindings::shrinker>,
|
||||
list_lru: Opaque<bindings::list_lru>,
|
||||
}
|
||||
|
||||
unsafe impl Send for Shrinker {}
|
||||
unsafe impl Sync for Shrinker {}
|
||||
|
||||
impl Shrinker {
|
||||
/// Create a new shrinker.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Before using this shrinker with a `ShrinkablePageRange`, the `register` method must have
|
||||
/// been called exactly once, and it must not have returned an error.
|
||||
pub(crate) const unsafe fn new() -> Self {
|
||||
Self {
|
||||
inner: Opaque::uninit(),
|
||||
list_lru: Opaque::uninit(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Register this shrinker with the kernel.
|
||||
pub(crate) fn register(&'static self, name: &CStr) -> Result<()> {
|
||||
// SAFETY: These fields are not yet used, so it's okay to zero them.
|
||||
unsafe {
|
||||
self.inner.get().write(ptr::null_mut());
|
||||
self.list_lru.get().write_bytes(0, 1);
|
||||
}
|
||||
|
||||
// SAFETY: The field is not yet used, so we can initialize it.
|
||||
let ret = unsafe {
|
||||
bindings::__list_lru_init(self.list_lru.get(), false, ptr::null_mut(), ptr::null_mut())
|
||||
};
|
||||
if ret != 0 {
|
||||
return Err(Error::from_errno(ret));
|
||||
}
|
||||
|
||||
// SAFETY: The `name` points at a valid c string.
|
||||
let shrinker = unsafe { bindings::shrinker_alloc(0, name.as_char_ptr()) };
|
||||
if shrinker.is_null() {
|
||||
// SAFETY: We initialized it, so its okay to destroy it.
|
||||
unsafe { bindings::list_lru_destroy(self.list_lru.get()) };
|
||||
return Err(Error::from_errno(ret));
|
||||
}
|
||||
|
||||
// SAFETY: We're about to register the shrinker, and these are the fields we need to
|
||||
// initialize. (All other fields are already zeroed.)
|
||||
unsafe {
|
||||
ptr::addr_of_mut!((*shrinker).count_objects).write(Some(rust_shrink_count));
|
||||
ptr::addr_of_mut!((*shrinker).scan_objects).write(Some(rust_shrink_scan));
|
||||
}
|
||||
|
||||
// SAFETY: The new shrinker has been fully initialized, so we can register it.
|
||||
unsafe { bindings::shrinker_register(shrinker) };
|
||||
|
||||
// SAFETY: This initializes the pointer to the shrinker so that we can use it.
|
||||
unsafe { self.inner.get().write(shrinker) };
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A container that manages a page range in a vma.
|
||||
///
|
||||
/// The pages can be thought of as an array of booleans of whether the pages are usable. The
|
||||
/// methods `use_range` and `stop_using_range` set all booleans in a range to true or false
|
||||
/// respectively. Initially, no pages are allocated. When a page is not used, it is not freed
|
||||
/// immediately. Instead, it is made available to the memory shrinker to free it if the device is
|
||||
/// under memory pressure.
|
||||
///
|
||||
/// It's okay for `use_range` and `stop_using_range` to race with each other, although there's no
|
||||
/// way to know whether an index ends up with true or false if a call to `use_range` races with
|
||||
/// another call to `stop_using_range` on a given index.
|
||||
///
|
||||
/// It's also okay for the two methods to race with themselves, e.g. if two threads call
|
||||
/// `use_range` on the same index, then that's fine and neither call will return until the page is
|
||||
/// allocated and mapped.
|
||||
///
|
||||
/// The methods that read or write to a range require that the page is marked as in use. So it is
|
||||
/// _not_ okay to call `stop_using_range` on a page that is in use by the methods that read or
|
||||
/// write to the page.
|
||||
#[pin_data(PinnedDrop)]
|
||||
pub(crate) struct ShrinkablePageRange {
|
||||
/// Shrinker object registered with the kernel.
|
||||
shrinker: &'static Shrinker,
|
||||
/// Pid using this page range. Only used as debugging information.
|
||||
pid: Pid,
|
||||
/// The mm for the relevant process.
|
||||
mm: ARef<Mm>,
|
||||
/// Used to synchronize calls to `vm_insert_page` and `zap_page_range_single`.
|
||||
#[pin]
|
||||
mm_lock: Mutex<()>,
|
||||
/// Spinlock protecting changes to pages.
|
||||
#[pin]
|
||||
lock: SpinLock<Inner>,
|
||||
|
||||
/// Must not move, since page info has pointers back.
|
||||
#[pin]
|
||||
_pin: PhantomPinned,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
/// Array of pages.
|
||||
///
|
||||
/// Since this is also accessed by the shrinker, we can't use a `Box`, which asserts exclusive
|
||||
/// ownership. To deal with that, we manage it using raw pointers.
|
||||
pages: *mut PageInfo,
|
||||
/// Length of the `pages` array.
|
||||
size: usize,
|
||||
/// The address of the vma to insert the pages into.
|
||||
vma_addr: usize,
|
||||
}
|
||||
|
||||
unsafe impl Send for ShrinkablePageRange {}
|
||||
unsafe impl Sync for ShrinkablePageRange {}
|
||||
|
||||
type StableMmGuard =
|
||||
kernel::sync::lock::Guard<'static, (), kernel::sync::lock::mutex::MutexBackend>;
|
||||
|
||||
/// An array element that describes the current state of a page.
|
||||
///
|
||||
/// There are three states:
|
||||
///
|
||||
/// * Free. The page is None. The `lru` element is not queued.
|
||||
/// * Available. The page is Some. The `lru` element is queued to the shrinker's lru.
|
||||
/// * Used. The page is Some. The `lru` element is not queued.
|
||||
///
|
||||
/// When an element is available, the shrinker is able to free the page.
|
||||
#[repr(C)]
|
||||
struct PageInfo {
|
||||
lru: bindings::list_head,
|
||||
page: Option<Page>,
|
||||
range: *const ShrinkablePageRange,
|
||||
}
|
||||
|
||||
impl PageInfo {
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller ensures that reading from `me.page` is ok.
|
||||
unsafe fn has_page(me: *const PageInfo) -> bool {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let page = unsafe { ptr::addr_of!((*me).page) };
|
||||
|
||||
unsafe { (*page).is_some() }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller ensures that writing to `me.page` is ok, and that the page is not currently set.
|
||||
unsafe fn set_page(me: *mut PageInfo, page: Page) {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let ptr = unsafe { ptr::addr_of_mut!((*me).page) };
|
||||
|
||||
// SAFETY: The pointer is valid for writing, so also valid for reading.
|
||||
if unsafe { (*ptr).is_some() } {
|
||||
pr_err!("set_page called when there is already a page");
|
||||
// SAFETY: We will initialize the page again below.
|
||||
unsafe { ptr::drop_in_place(ptr) };
|
||||
}
|
||||
|
||||
// SAFETY: The pointer is valid for writing.
|
||||
unsafe { ptr::write(ptr, Some(page)) };
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller ensures that reading from `me.page` is ok for the duration of 'a.
|
||||
unsafe fn get_page<'a>(me: *const PageInfo) -> Option<&'a Page> {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let ptr = unsafe { ptr::addr_of!((*me).page) };
|
||||
|
||||
// SAFETY: The pointer is valid for reading.
|
||||
unsafe { (*ptr).as_ref() }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller ensures that writing to `me.page` is ok for the duration of 'a.
|
||||
unsafe fn take_page(me: *mut PageInfo) -> Option<Page> {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let ptr = unsafe { ptr::addr_of_mut!((*me).page) };
|
||||
|
||||
// SAFETY: The pointer is valid for reading.
|
||||
unsafe { (*ptr).take() }
|
||||
}
|
||||
|
||||
/// Add this page to the lru list, if not already in the list.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The pointer must be valid, and it must be the right shrinker.
|
||||
unsafe fn list_lru_add(me: *mut PageInfo, shrinker: &'static Shrinker) {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let lru_ptr = unsafe { ptr::addr_of_mut!((*me).lru) };
|
||||
// SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
|
||||
unsafe { bindings::list_lru_add_obj(shrinker.list_lru.get(), lru_ptr) };
|
||||
}
|
||||
|
||||
/// Remove this page from the lru list, if it is in the list.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The pointer must be valid, and it must be the right shrinker.
|
||||
unsafe fn list_lru_del(me: *mut PageInfo, shrinker: &'static Shrinker) {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let lru_ptr = unsafe { ptr::addr_of_mut!((*me).lru) };
|
||||
// SAFETY: The lru pointer is valid, and we're not using it with any other lru list.
|
||||
unsafe { bindings::list_lru_del_obj(shrinker.list_lru.get(), lru_ptr) };
|
||||
}
|
||||
}
|
||||
|
||||
impl ShrinkablePageRange {
|
||||
/// Create a new `ShrinkablePageRange` using the given shrinker.
|
||||
pub(crate) fn new(shrinker: &'static Shrinker) -> impl PinInit<Self, Error> {
|
||||
try_pin_init!(Self {
|
||||
shrinker,
|
||||
pid: kernel::current!().pid(),
|
||||
mm: ARef::from(&**kernel::current!().mm().ok_or(ESRCH)?),
|
||||
mm_lock <- new_mutex!((), "ShrinkablePageRange::mm"),
|
||||
lock <- new_spinlock!(Inner {
|
||||
pages: ptr::null_mut(),
|
||||
size: 0,
|
||||
vma_addr: 0,
|
||||
}, "ShrinkablePageRange"),
|
||||
_pin: PhantomPinned,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn stable_trylock_mm(&self) -> Option<StableMmGuard> {
|
||||
// SAFETY: This extends the duration of the reference. Since this call happens before
|
||||
// `mm_lock` is taken in the destructor of `ShrinkablePageRange`, the destructor will block
|
||||
// until the returned guard is dropped. This ensures that the guard is valid until dropped.
|
||||
let mm_lock = unsafe { &*ptr::from_ref(&self.mm_lock) };
|
||||
|
||||
mm_lock.try_lock()
|
||||
}
|
||||
|
||||
/// Register a vma with this page range. Returns the size of the region.
|
||||
pub(crate) fn register_with_vma(&self, vma: &virt::VmAreaNew) -> Result<usize> {
|
||||
let num_bytes = usize::min(vma.end() - vma.start(), bindings::SZ_4M as usize);
|
||||
let num_pages = num_bytes >> PAGE_SHIFT;
|
||||
|
||||
if !ptr::eq::<Mm>(&*self.mm, &**vma.mm()) {
|
||||
pr_debug!("Failed to register with vma: invalid vma->vm_mm");
|
||||
return Err(EINVAL);
|
||||
}
|
||||
if num_pages == 0 {
|
||||
pr_debug!("Failed to register with vma: size zero");
|
||||
return Err(EINVAL);
|
||||
}
|
||||
|
||||
let layout = Layout::array::<PageInfo>(num_pages).map_err(|_| ENOMEM)?;
|
||||
// SAFETY: The layout has non-zero size.
|
||||
let pages = Kmalloc::alloc(layout, GFP_KERNEL)?.cast::<PageInfo>();
|
||||
|
||||
// SAFETY: This just initializes the pages array.
|
||||
unsafe {
|
||||
let self_ptr = self as *const ShrinkablePageRange;
|
||||
for i in 0..num_pages {
|
||||
let info = pages.add(i).as_ptr();
|
||||
ptr::addr_of_mut!((*info).range).write(self_ptr);
|
||||
ptr::addr_of_mut!((*info).page).write(None);
|
||||
let lru = ptr::addr_of_mut!((*info).lru);
|
||||
ptr::addr_of_mut!((*lru).next).write(lru);
|
||||
ptr::addr_of_mut!((*lru).prev).write(lru);
|
||||
}
|
||||
}
|
||||
|
||||
let mut inner = self.lock.lock();
|
||||
if inner.size > 0 {
|
||||
pr_debug!("Failed to register with vma: already registered");
|
||||
drop(inner);
|
||||
// SAFETY: The `pages` array was allocated with the same layout.
|
||||
unsafe { Kmalloc::free(pages.cast(), layout) };
|
||||
return Err(EBUSY);
|
||||
}
|
||||
|
||||
inner.pages = pages.as_ptr();
|
||||
inner.size = num_pages;
|
||||
inner.vma_addr = vma.start();
|
||||
|
||||
Ok(num_pages)
|
||||
}
|
||||
|
||||
/// Make sure that the given pages are allocated and mapped.
|
||||
///
|
||||
/// Must not be called from an atomic context.
|
||||
pub(crate) fn use_range(&self, start: usize, end: usize) -> Result<()> {
|
||||
crate::trace::trace_update_page_range(self.pid, true, start, end);
|
||||
|
||||
if start >= end {
|
||||
return Ok(());
|
||||
}
|
||||
let mut inner = self.lock.lock();
|
||||
assert!(end <= inner.size);
|
||||
|
||||
for i in start..end {
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let page_info = unsafe { inner.pages.add(i) };
|
||||
|
||||
// SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
|
||||
if unsafe { PageInfo::has_page(page_info) } {
|
||||
crate::trace::trace_alloc_lru_start(self.pid, i);
|
||||
|
||||
// Since we're going to use the page, we should remove it from the lru list so that
|
||||
// the shrinker will not free it.
|
||||
//
|
||||
// SAFETY: The pointer is valid, and this is the right shrinker.
|
||||
//
|
||||
// The shrinker can't free the page between the check and this call to
|
||||
// `list_lru_del` because we hold the lock.
|
||||
unsafe { PageInfo::list_lru_del(page_info, self.shrinker) };
|
||||
|
||||
crate::trace::trace_alloc_lru_end(self.pid, i);
|
||||
} else {
|
||||
// We have to allocate a new page. Use the slow path.
|
||||
drop(inner);
|
||||
crate::trace::trace_alloc_page_start(self.pid, i);
|
||||
match self.use_page_slow(i) {
|
||||
Ok(()) => {}
|
||||
Err(err) => {
|
||||
pr_warn!("Error in use_page_slow: {:?}", err);
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
crate::trace::trace_alloc_page_end(self.pid, i);
|
||||
inner = self.lock.lock();
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mark the given page as in use, slow path.
|
||||
///
|
||||
/// Must not be called from an atomic context.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Assumes that `i` is in bounds.
|
||||
#[cold]
|
||||
fn use_page_slow(&self, i: usize) -> Result<()> {
|
||||
let new_page = Page::alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO)?;
|
||||
|
||||
let mm_mutex = self.mm_lock.lock();
|
||||
let inner = self.lock.lock();
|
||||
|
||||
// SAFETY: This pointer offset is in bounds.
|
||||
let page_info = unsafe { inner.pages.add(i) };
|
||||
|
||||
// SAFETY: The pointer is valid, and we hold the lock so reading from the page is okay.
|
||||
if unsafe { PageInfo::has_page(page_info) } {
|
||||
// The page was already there, or someone else added the page while we didn't hold the
|
||||
// spinlock.
|
||||
//
|
||||
// SAFETY: The pointer is valid, and this is the right shrinker.
|
||||
//
|
||||
// The shrinker can't free the page between the check and this call to
|
||||
// `list_lru_del` because we hold the lock.
|
||||
unsafe { PageInfo::list_lru_del(page_info, self.shrinker) };
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let vma_addr = inner.vma_addr;
|
||||
// Release the spinlock while we insert the page into the vma.
|
||||
drop(inner);
|
||||
|
||||
// No overflow since we stay in bounds of the vma.
|
||||
let user_page_addr = vma_addr + (i << PAGE_SHIFT);
|
||||
|
||||
// We use `mmput_async` when dropping the `mm` because `use_page_slow` is usually used from
|
||||
// a remote process. If the call to `mmput` races with the process shutting down, then the
|
||||
// caller of `use_page_slow` becomes responsible for cleaning up the `mm`, which doesn't
|
||||
// happen until it returns to userspace. However, the caller might instead go to sleep and
|
||||
// wait for the owner of the `mm` to wake it up, which doesn't happen because it's in the
|
||||
// middle of a shutdown process that wont complete until the `mm` is dropped. This can
|
||||
// amount to a deadlock.
|
||||
//
|
||||
// Using `mmput_async` avoids this, because then the `mm` cleanup is instead queued to a
|
||||
// workqueue.
|
||||
MmWithUser::into_mmput_async(self.mm.mmget_not_zero().ok_or(ESRCH)?)
|
||||
.mmap_read_lock()
|
||||
.vma_lookup(vma_addr)
|
||||
.ok_or(ESRCH)?
|
||||
.as_mixedmap_vma()
|
||||
.ok_or(ESRCH)?
|
||||
.vm_insert_page(user_page_addr, &new_page)
|
||||
.inspect_err(|err| {
|
||||
pr_warn!(
|
||||
"Failed to vm_insert_page({}): vma_addr:{} i:{} err:{:?}",
|
||||
user_page_addr,
|
||||
vma_addr,
|
||||
i,
|
||||
err
|
||||
)
|
||||
})?;
|
||||
|
||||
let inner = self.lock.lock();
|
||||
|
||||
// SAFETY: The `page_info` pointer is valid and currently does not have a page. The page
|
||||
// can be written to since we hold the lock.
|
||||
//
|
||||
// We released and reacquired the spinlock since we checked that the page is null, but we
|
||||
// always hold the mm_lock mutex when setting the page to a non-null value, so it's not
|
||||
// possible for someone else to have changed it since our check.
|
||||
unsafe { PageInfo::set_page(page_info, new_page) };
|
||||
|
||||
drop(inner);
|
||||
drop(mm_mutex);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// If the given page is in use, then mark it as available so that the shrinker can free it.
|
||||
///
|
||||
/// May be called from an atomic context.
|
||||
pub(crate) fn stop_using_range(&self, start: usize, end: usize) {
|
||||
crate::trace::trace_update_page_range(self.pid, false, start, end);
|
||||
|
||||
if start >= end {
|
||||
return;
|
||||
}
|
||||
let inner = self.lock.lock();
|
||||
assert!(end <= inner.size);
|
||||
|
||||
for i in (start..end).rev() {
|
||||
// SAFETY: The pointer is in bounds.
|
||||
let page_info = unsafe { inner.pages.add(i) };
|
||||
|
||||
// SAFETY: Okay for reading since we have the lock.
|
||||
if unsafe { PageInfo::has_page(page_info) } {
|
||||
crate::trace::trace_free_lru_start(self.pid, i);
|
||||
|
||||
// SAFETY: The pointer is valid, and it's the right shrinker.
|
||||
unsafe { PageInfo::list_lru_add(page_info, self.shrinker) };
|
||||
|
||||
crate::trace::trace_free_lru_end(self.pid, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for reading or writing to a range of bytes that may overlap with several pages.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// All pages touched by this operation must be in use for the duration of this call.
|
||||
unsafe fn iterate<T>(&self, mut offset: usize, mut size: usize, mut cb: T) -> Result
|
||||
where
|
||||
T: FnMut(&Page, usize, usize) -> Result,
|
||||
{
|
||||
if size == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// SAFETY: The caller promises that the pages touched by this call are in use. It's only
|
||||
// possible for a page to be in use if we have already been registered with a vma, and we
|
||||
// only change the `pages` and `size` fields during registration with a vma, so there is no
|
||||
// race when we read them here without taking the lock.
|
||||
let (pages, num_pages) = {
|
||||
let inner = self.lock.lock();
|
||||
(inner.pages, inner.size)
|
||||
};
|
||||
let num_bytes = num_pages << PAGE_SHIFT;
|
||||
|
||||
// Check that the request is within the buffer.
|
||||
if offset.checked_add(size).ok_or(EFAULT)? > num_bytes {
|
||||
return Err(EFAULT);
|
||||
}
|
||||
|
||||
let mut page_index = offset >> PAGE_SHIFT;
|
||||
offset &= PAGE_SIZE - 1;
|
||||
while size > 0 {
|
||||
let available = usize::min(size, PAGE_SIZE - offset);
|
||||
// SAFETY: The pointer is in bounds.
|
||||
let page_info = unsafe { pages.add(page_index) };
|
||||
// SAFETY: The caller guarantees that this page is in the "in use" state for the
|
||||
// duration of this call to `iterate`, so nobody will change the page.
|
||||
let page = unsafe { PageInfo::get_page(page_info) };
|
||||
if page.is_none() {
|
||||
pr_warn!("Page is null!");
|
||||
}
|
||||
let page = page.ok_or(EFAULT)?;
|
||||
cb(page, offset, available)?;
|
||||
size -= available;
|
||||
page_index += 1;
|
||||
offset = 0;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Copy from userspace into this page range.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// All pages touched by this operation must be in use for the duration of this call.
|
||||
pub(crate) unsafe fn copy_from_user_slice(
|
||||
&self,
|
||||
reader: &mut UserSliceReader,
|
||||
offset: usize,
|
||||
size: usize,
|
||||
) -> Result {
|
||||
// SAFETY: `self.iterate` has the same safety requirements as `copy_from_user_slice`.
|
||||
unsafe {
|
||||
self.iterate(offset, size, |page, offset, to_copy| {
|
||||
page.copy_from_user_slice_raw(reader, offset, to_copy)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy from this page range into kernel space.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// All pages touched by this operation must be in use for the duration of this call.
|
||||
pub(crate) unsafe fn read<T: FromBytes>(&self, offset: usize) -> Result<T> {
|
||||
let mut out = MaybeUninit::<T>::uninit();
|
||||
let mut out_offset = 0;
|
||||
// SAFETY: `self.iterate` has the same safety requirements as `read`.
|
||||
unsafe {
|
||||
self.iterate(offset, size_of::<T>(), |page, offset, to_copy| {
|
||||
// SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
|
||||
let obj_ptr = (out.as_mut_ptr() as *mut u8).add(out_offset);
|
||||
// SAFETY: The pointer points is in-bounds of the `out` variable, so it is valid.
|
||||
page.read_raw(obj_ptr, offset, to_copy)?;
|
||||
out_offset += to_copy;
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
// SAFETY: We just initialised the data.
|
||||
Ok(unsafe { out.assume_init() })
|
||||
}
|
||||
|
||||
/// Copy from kernel space into this page range.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// All pages touched by this operation must be in use for the duration of this call.
|
||||
pub(crate) unsafe fn write<T: ?Sized>(&self, offset: usize, obj: &T) -> Result {
|
||||
let mut obj_offset = 0;
|
||||
// SAFETY: `self.iterate` has the same safety requirements as `write`.
|
||||
unsafe {
|
||||
self.iterate(offset, size_of_val(obj), |page, offset, to_copy| {
|
||||
// SAFETY: The sum of `offset` and `to_copy` is bounded by the size of T.
|
||||
let obj_ptr = (obj as *const T as *const u8).add(obj_offset);
|
||||
// SAFETY: We have a reference to the object, so the pointer is valid.
|
||||
page.write_raw(obj_ptr, offset, to_copy)?;
|
||||
obj_offset += to_copy;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Write zeroes to the given range.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// All pages touched by this operation must be in use for the duration of this call.
|
||||
pub(crate) unsafe fn fill_zero(&self, offset: usize, size: usize) -> Result {
|
||||
// SAFETY: `self.iterate` has the same safety requirements as `copy_into`.
|
||||
unsafe {
|
||||
self.iterate(offset, size, |page, offset, len| {
|
||||
page.fill_zero_raw(offset, len)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pinned_drop]
|
||||
impl PinnedDrop for ShrinkablePageRange {
|
||||
fn drop(self: Pin<&mut Self>) {
|
||||
let (pages, size) = {
|
||||
let lock = self.lock.lock();
|
||||
(lock.pages, lock.size)
|
||||
};
|
||||
|
||||
if size == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// This is the destructor, so unlike the other methods, we only need to worry about races
|
||||
// with the shrinker here.
|
||||
for i in 0..size {
|
||||
// SAFETY: The pointer is valid and it's the right shrinker.
|
||||
unsafe { PageInfo::list_lru_del(pages.add(i), self.shrinker) };
|
||||
// SAFETY: If the shrinker was going to free this page, then it would have taken it
|
||||
// from the PageInfo before releasing the lru lock. Thus, the call to `list_lru_del`
|
||||
// will either remove it before the shrinker can access it, or the shrinker will
|
||||
// already have taken the page at this point.
|
||||
unsafe { drop(PageInfo::take_page(pages.add(i))) };
|
||||
}
|
||||
|
||||
// Wait for users of the mutex to go away. This call is necessary for the safety of
|
||||
// `stable_trylock_mm`.
|
||||
drop(self.mm_lock.lock());
|
||||
|
||||
let Some(pages) = NonNull::new(pages) else {
|
||||
return;
|
||||
};
|
||||
|
||||
// SAFETY: This computation did not overflow when allocating the pages array, so it will
|
||||
// not overflow this time.
|
||||
let layout = unsafe { Layout::array::<PageInfo>(size).unwrap_unchecked() };
|
||||
|
||||
// SAFETY: The `pages` array was allocated with the same layout.
|
||||
unsafe { Kmalloc::free(pages.cast(), layout) };
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_shrink_count(
|
||||
shrink: *mut bindings::shrinker,
|
||||
_sc: *mut bindings::shrink_control,
|
||||
) -> c_ulong {
|
||||
// SAFETY: This method is only used with the `Shrinker` type, and the cast is valid since
|
||||
// `shrinker` is the first field of a #[repr(C)] struct.
|
||||
let shrinker = unsafe { &*shrink.cast::<Shrinker>() };
|
||||
// SAFETY: Accessing the lru list is okay. Just an FFI call.
|
||||
unsafe { bindings::list_lru_count(shrinker.list_lru.get()) }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_shrink_scan(
|
||||
shrink: *mut bindings::shrinker,
|
||||
sc: *mut bindings::shrink_control,
|
||||
) -> c_ulong {
|
||||
// SAFETY: This method is only used with the `Shrinker` type, and the cast is valid since
|
||||
// `shrinker` is the first field of a #[repr(C)] struct.
|
||||
let shrinker = unsafe { &*shrink.cast::<Shrinker>() };
|
||||
// SAFETY: Caller guarantees that it is safe to read this field.
|
||||
let nr_to_scan = unsafe { (*sc).nr_to_scan };
|
||||
// SAFETY: Accessing the lru list is okay. Just an FFI call.
|
||||
unsafe {
|
||||
extern "C" {
|
||||
fn rust_shrink_free_page_wrap(
|
||||
item: *mut bindings::list_head,
|
||||
list: *mut bindings::list_lru_one,
|
||||
lock: *mut bindings::spinlock_t,
|
||||
cb_arg: *mut kernel::ffi::c_void,
|
||||
) -> bindings::lru_status;
|
||||
}
|
||||
|
||||
bindings::list_lru_walk(
|
||||
shrinker.list_lru.get(),
|
||||
Some(rust_shrink_free_page_wrap),
|
||||
ptr::null_mut(),
|
||||
nr_to_scan,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const LRU_SKIP: bindings::lru_status = bindings::lru_status_LRU_SKIP;
|
||||
const LRU_REMOVED_ENTRY: bindings::lru_status = bindings::lru_status_LRU_REMOVED_RETRY;
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_shrink_free_page(
|
||||
item: *mut bindings::list_head,
|
||||
lru: *mut bindings::list_lru_one,
|
||||
lru_lock: *mut bindings::spinlock_t,
|
||||
_cb_arg: *mut c_void,
|
||||
) -> bindings::lru_status {
|
||||
// Fields that should survive after unlocking the lru lock.
|
||||
let pid;
|
||||
let page;
|
||||
let page_index;
|
||||
let mm;
|
||||
let mmap_read;
|
||||
let mm_mutex;
|
||||
let vma_addr;
|
||||
|
||||
{
|
||||
// SAFETY: The `list_head` field is first in `PageInfo`.
|
||||
let info = item as *mut PageInfo;
|
||||
let range = unsafe { &*((*info).range) };
|
||||
|
||||
mm = match range.mm.mmget_not_zero() {
|
||||
Some(mm) => MmWithUser::into_mmput_async(mm),
|
||||
None => return LRU_SKIP,
|
||||
};
|
||||
|
||||
mm_mutex = match range.stable_trylock_mm() {
|
||||
Some(guard) => guard,
|
||||
None => return LRU_SKIP,
|
||||
};
|
||||
|
||||
mmap_read = match mm.mmap_read_trylock() {
|
||||
Some(guard) => guard,
|
||||
None => return LRU_SKIP,
|
||||
};
|
||||
|
||||
// We can't lock it normally here, since we hold the lru lock.
|
||||
let inner = match range.lock.try_lock() {
|
||||
Some(inner) => inner,
|
||||
None => return LRU_SKIP,
|
||||
};
|
||||
|
||||
// SAFETY: The item is in this lru list, so it's okay to remove it.
|
||||
unsafe { bindings::list_lru_isolate(lru, item) };
|
||||
|
||||
// SAFETY: Both pointers are in bounds of the same allocation.
|
||||
page_index = unsafe { info.offset_from(inner.pages) } as usize;
|
||||
pid = range.pid;
|
||||
|
||||
crate::trace::trace_unmap_kernel_start(pid, page_index);
|
||||
|
||||
// SAFETY: We hold the spinlock, so we can take the page.
|
||||
//
|
||||
// This sets the page pointer to zero before we unmap it from the vma. However, we call
|
||||
// `zap_page_range` before we release the mmap lock, so `use_page_slow` will not be able to
|
||||
// insert a new page until after our call to `zap_page_range`.
|
||||
page = unsafe { PageInfo::take_page(info) };
|
||||
vma_addr = inner.vma_addr;
|
||||
|
||||
crate::trace::trace_unmap_kernel_end(pid, page_index);
|
||||
|
||||
// From this point on, we don't access this PageInfo or ShrinkablePageRange again, because
|
||||
// they can be freed at any point after we unlock `lru_lock`. This is with the exception of
|
||||
// `mm_mutex` which is kept alive by holding the lock.
|
||||
}
|
||||
|
||||
// SAFETY: The lru lock is locked when this method is called.
|
||||
unsafe { bindings::spin_unlock(lru_lock) };
|
||||
|
||||
if let Some(vma) = mmap_read.vma_lookup(vma_addr) {
|
||||
let user_page_addr = vma_addr + (page_index << PAGE_SHIFT);
|
||||
crate::trace::trace_unmap_user_start(pid, page_index);
|
||||
vma.zap_page_range_single(user_page_addr, PAGE_SIZE);
|
||||
crate::trace::trace_unmap_user_end(pid, page_index);
|
||||
}
|
||||
|
||||
drop(mmap_read);
|
||||
drop(mm_mutex);
|
||||
drop(mm);
|
||||
drop(page);
|
||||
|
||||
// SAFETY: We just unlocked the lru lock, but it should be locked when we return.
|
||||
unsafe { bindings::spin_lock(lru_lock) };
|
||||
|
||||
LRU_REMOVED_ENTRY
|
||||
}
|
||||
25
drivers/android/binder/page_range_helper.c
Normal file
25
drivers/android/binder/page_range_helper.c
Normal file
@@ -0,0 +1,25 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/* C helper for page_range.rs to work around a CFI violation.
|
||||
*
|
||||
* Bindgen currently pretends that `enum lru_status` is the same as an integer.
|
||||
* This assumption is fine ABI-wise, but once you add CFI to the mix, it
|
||||
* triggers a CFI violation because `enum lru_status` gets a different CFI tag.
|
||||
*
|
||||
* This file contains a workaround until bindgen can be fixed.
|
||||
*
|
||||
* Copyright (C) 2024 Google LLC.
|
||||
*/
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
unsigned int rust_shrink_free_page(struct list_head *item,
|
||||
struct list_lru_one *list, spinlock_t *lock,
|
||||
void *cb_arg);
|
||||
|
||||
enum lru_status
|
||||
rust_shrink_free_page_wrap(struct list_head *item, struct list_lru_one *list,
|
||||
spinlock_t *lock, void *cb_arg)
|
||||
{
|
||||
return rust_shrink_free_page(item, list, lock, cb_arg);
|
||||
}
|
||||
80
drivers/android/binder/prio.rs
Normal file
80
drivers/android/binder/prio.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
//! This module defines the types and methods relevant to priority inheritance.
|
||||
|
||||
use kernel::bindings;
|
||||
|
||||
pub(crate) type Policy = kernel::ffi::c_uint;
|
||||
pub(crate) type Priority = kernel::ffi::c_int;
|
||||
pub(crate) type Nice = kernel::ffi::c_int;
|
||||
|
||||
pub(crate) const SCHED_NORMAL: Policy = bindings::SCHED_NORMAL;
|
||||
pub(crate) const SCHED_FIFO: Policy = bindings::SCHED_FIFO;
|
||||
pub(crate) const MIN_NICE: Nice = bindings::MIN_NICE as _;
|
||||
pub(crate) const MAX_NICE: Nice = bindings::MAX_NICE as _;
|
||||
pub(crate) const DEFAULT_PRIO: Priority = bindings::DEFAULT_PRIO as _;
|
||||
pub(crate) const MAX_RT_PRIO: Priority = bindings::MAX_RT_PRIO as _;
|
||||
|
||||
/// Scheduler policy and priority.
|
||||
///
|
||||
/// The binder driver supports inheriting the following scheduler policies:
|
||||
/// * SCHED_NORMAL
|
||||
/// * SCHED_BATCH
|
||||
/// * SCHED_FIFO
|
||||
/// * SCHED_RR
|
||||
#[derive(Copy, Clone, Default)]
|
||||
pub(crate) struct BinderPriority {
|
||||
pub(crate) sched_policy: Policy,
|
||||
pub(crate) prio: Priority,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
pub(crate) enum PriorityState {
|
||||
Set,
|
||||
Pending,
|
||||
Abort,
|
||||
}
|
||||
|
||||
pub(crate) fn get_default_prio_from_task(task: &kernel::task::Task) -> BinderPriority {
|
||||
if is_supported_policy(task.policy()) {
|
||||
BinderPriority {
|
||||
sched_policy: task.policy(),
|
||||
prio: task.normal_prio(),
|
||||
}
|
||||
} else {
|
||||
BinderPriority {
|
||||
sched_policy: SCHED_NORMAL,
|
||||
prio: DEFAULT_PRIO,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_rt_policy(policy: Policy) -> bool {
|
||||
policy == bindings::SCHED_FIFO || policy == bindings::SCHED_RR
|
||||
}
|
||||
|
||||
pub(crate) fn is_fair_policy(policy: Policy) -> bool {
|
||||
policy == bindings::SCHED_NORMAL || policy == bindings::SCHED_BATCH
|
||||
}
|
||||
|
||||
pub(crate) fn is_supported_policy(policy: Policy) -> bool {
|
||||
is_fair_policy(policy) || is_rt_policy(policy)
|
||||
}
|
||||
|
||||
pub(crate) fn to_userspace_prio(policy: Policy, prio: Priority) -> Nice {
|
||||
if is_fair_policy(policy) {
|
||||
prio - DEFAULT_PRIO
|
||||
} else {
|
||||
MAX_RT_PRIO - 1 - prio
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_kernel_prio(policy: Policy, prio: Nice) -> Priority {
|
||||
if is_fair_policy(policy) {
|
||||
prio + DEFAULT_PRIO
|
||||
} else {
|
||||
MAX_RT_PRIO - 1 - prio
|
||||
}
|
||||
}
|
||||
1639
drivers/android/binder/process.rs
Normal file
1639
drivers/android/binder/process.rs
Normal file
File diff suppressed because it is too large
Load Diff
247
drivers/android/binder/range_alloc/array.rs
Normal file
247
drivers/android/binder/range_alloc/array.rs
Normal file
@@ -0,0 +1,247 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use kernel::{
|
||||
page::{PAGE_MASK, PAGE_SIZE},
|
||||
prelude::*,
|
||||
seq_file::SeqFile,
|
||||
seq_print,
|
||||
task::Pid,
|
||||
};
|
||||
|
||||
use crate::range_alloc::{DescriptorState, FreedRange, Range};
|
||||
|
||||
/// Keeps track of allocations in a process' mmap.
|
||||
///
|
||||
/// Each process has an mmap where the data for incoming transactions will be placed. This struct
|
||||
/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
|
||||
/// has metadata related to the allocation. We also keep track of available free space.
|
||||
pub(super) struct ArrayRangeAllocator<T> {
|
||||
/// This stores all ranges that are allocated. Unlike the tree based allocator, we do *not*
|
||||
/// store the free ranges.
|
||||
///
|
||||
/// Sorted by offset.
|
||||
pub(super) ranges: KVec<Range<T>>,
|
||||
size: usize,
|
||||
free_oneway_space: usize,
|
||||
}
|
||||
|
||||
struct FindEmptyRes {
|
||||
/// Which index in `ranges` should we insert the new range at?
|
||||
///
|
||||
/// Inserting the new range at this index keeps `ranges` sorted.
|
||||
insert_at_idx: usize,
|
||||
/// Which offset should we insert the new range at?
|
||||
insert_at_offset: usize,
|
||||
}
|
||||
|
||||
impl<T> ArrayRangeAllocator<T> {
|
||||
pub(crate) fn new(size: usize, alloc: EmptyArrayAlloc<T>) -> Self {
|
||||
Self {
|
||||
ranges: alloc.ranges,
|
||||
size,
|
||||
free_oneway_space: size / 2,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn free_oneway_space(&self) -> usize {
|
||||
self.free_oneway_space
|
||||
}
|
||||
|
||||
pub(crate) fn count_buffers(&self) -> usize {
|
||||
self.ranges.len()
|
||||
}
|
||||
|
||||
pub(crate) fn total_size(&self) -> usize {
|
||||
self.size
|
||||
}
|
||||
|
||||
pub(crate) fn is_full(&self) -> bool {
|
||||
self.ranges.len() == self.ranges.capacity()
|
||||
}
|
||||
|
||||
pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
|
||||
for range in &self.ranges {
|
||||
seq_print!(
|
||||
m,
|
||||
" buffer {}: {} size {} pid {} oneway {}",
|
||||
0,
|
||||
range.offset,
|
||||
range.size,
|
||||
range.state.pid(),
|
||||
range.state.is_oneway(),
|
||||
);
|
||||
if let DescriptorState::Reserved(_) = range.state {
|
||||
seq_print!(m, " reserved\n");
|
||||
} else {
|
||||
seq_print!(m, " allocated\n");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find somewhere to put a new range.
|
||||
///
|
||||
/// Unlike the tree implementation, we do not bother to find the smallest gap. The idea is that
|
||||
/// fragmentation isn't a big issue when we don't have many ranges.
|
||||
///
|
||||
/// Returns the index that the new range should have in `self.ranges` after insertion.
|
||||
fn find_empty_range(&self, size: usize) -> Option<FindEmptyRes> {
|
||||
let after_last_range = self.ranges.last().map(Range::endpoint).unwrap_or(0);
|
||||
|
||||
if size <= self.total_size() - after_last_range {
|
||||
// We can put the range at the end, so just do that.
|
||||
Some(FindEmptyRes {
|
||||
insert_at_idx: self.ranges.len(),
|
||||
insert_at_offset: after_last_range,
|
||||
})
|
||||
} else {
|
||||
let mut end_of_prev = 0;
|
||||
for (i, range) in self.ranges.iter().enumerate() {
|
||||
// Does it fit before the i'th range?
|
||||
if size <= range.offset - end_of_prev {
|
||||
return Some(FindEmptyRes {
|
||||
insert_at_idx: i,
|
||||
insert_at_offset: end_of_prev,
|
||||
});
|
||||
}
|
||||
end_of_prev = range.endpoint();
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn reserve_new(
|
||||
&mut self,
|
||||
debug_id: usize,
|
||||
size: usize,
|
||||
is_oneway: bool,
|
||||
pid: Pid,
|
||||
) -> Result<usize> {
|
||||
// Compute new value of free_oneway_space, which is set only on success.
|
||||
let new_oneway_space = if is_oneway {
|
||||
match self.free_oneway_space.checked_sub(size) {
|
||||
Some(new_oneway_space) => new_oneway_space,
|
||||
None => return Err(ENOSPC),
|
||||
}
|
||||
} else {
|
||||
self.free_oneway_space
|
||||
};
|
||||
|
||||
let FindEmptyRes {
|
||||
insert_at_idx,
|
||||
insert_at_offset,
|
||||
} = self.find_empty_range(size).ok_or(ENOSPC)?;
|
||||
self.free_oneway_space = new_oneway_space;
|
||||
|
||||
let new_range = Range {
|
||||
offset: insert_at_offset,
|
||||
size,
|
||||
state: DescriptorState::new(is_oneway, debug_id, pid),
|
||||
};
|
||||
// Insert the value at the given index to keep the array sorted.
|
||||
self.ranges.insert_within_capacity(insert_at_idx, new_range).ok().unwrap();
|
||||
|
||||
Ok(insert_at_offset)
|
||||
}
|
||||
|
||||
pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
|
||||
// This could use a binary search, but linear scans are usually faster for small arrays.
|
||||
let i = self
|
||||
.ranges
|
||||
.iter()
|
||||
.position(|range| range.offset == offset)
|
||||
.ok_or(EINVAL)?;
|
||||
let range = &self.ranges[i];
|
||||
|
||||
if let DescriptorState::Allocated(_) = range.state {
|
||||
return Err(EPERM);
|
||||
}
|
||||
|
||||
let size = range.size;
|
||||
let offset = range.offset;
|
||||
|
||||
if range.state.is_oneway() {
|
||||
self.free_oneway_space += size;
|
||||
}
|
||||
|
||||
// This computes the range of pages that are no longer used by *any* allocated range. The
|
||||
// caller will mark them as unused, which means that they can be freed if the system comes
|
||||
// under memory pressure.
|
||||
let mut freed_range = FreedRange::interior_pages(offset, size);
|
||||
if offset % PAGE_SIZE != 0 {
|
||||
if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) {
|
||||
freed_range.start_page_idx -= 1;
|
||||
}
|
||||
}
|
||||
if range.endpoint() % PAGE_SIZE != 0 {
|
||||
let page_after = (range.endpoint() & PAGE_MASK) + PAGE_SIZE;
|
||||
if i + 1 == self.ranges.len() || page_after <= self.ranges[i + 1].offset {
|
||||
freed_range.end_page_idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
self.ranges.remove(i)?;
|
||||
Ok(freed_range)
|
||||
}
|
||||
|
||||
pub(crate) fn reservation_commit(&mut self, offset: usize, data: Option<T>) -> Result {
|
||||
// This could use a binary search, but linear scans are usually faster for small arrays.
|
||||
let range = self
|
||||
.ranges
|
||||
.iter_mut()
|
||||
.find(|range| range.offset == offset)
|
||||
.ok_or(ENOENT)?;
|
||||
|
||||
let DescriptorState::Reserved(reservation) = &range.state else {
|
||||
return Err(ENOENT);
|
||||
};
|
||||
|
||||
range.state = DescriptorState::Allocated(reservation.clone().allocate(data));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
|
||||
// This could use a binary search, but linear scans are usually faster for small arrays.
|
||||
let range = self
|
||||
.ranges
|
||||
.iter_mut()
|
||||
.find(|range| range.offset == offset)
|
||||
.ok_or(ENOENT)?;
|
||||
|
||||
let DescriptorState::Allocated(allocation) = &mut range.state else {
|
||||
return Err(ENOENT);
|
||||
};
|
||||
|
||||
let data = allocation.take();
|
||||
let debug_id = allocation.reservation.debug_id;
|
||||
range.state = DescriptorState::Reserved(allocation.reservation.clone());
|
||||
Ok((range.size, debug_id, data))
|
||||
}
|
||||
|
||||
pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
|
||||
for range in self.ranges.iter_mut() {
|
||||
if let DescriptorState::Allocated(allocation) = &mut range.state {
|
||||
callback(
|
||||
range.offset,
|
||||
range.size,
|
||||
allocation.reservation.debug_id,
|
||||
allocation.data.take(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct EmptyArrayAlloc<T> {
|
||||
ranges: KVec<Range<T>>,
|
||||
}
|
||||
|
||||
impl<T> EmptyArrayAlloc<T> {
|
||||
pub(crate) fn try_new(capacity: usize) -> Result<Self> {
|
||||
Ok(Self {
|
||||
ranges: KVec::with_capacity(capacity, GFP_KERNEL)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
326
drivers/android/binder/range_alloc/mod.rs
Normal file
326
drivers/android/binder/range_alloc/mod.rs
Normal file
@@ -0,0 +1,326 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use kernel::{page::PAGE_SIZE, prelude::*, seq_file::SeqFile, task::Pid};
|
||||
|
||||
mod tree;
|
||||
use self::tree::{FromArrayAllocs, ReserveNewTreeAlloc, TreeRangeAllocator};
|
||||
|
||||
mod array;
|
||||
use self::array::{ArrayRangeAllocator, EmptyArrayAlloc};
|
||||
|
||||
enum DescriptorState<T> {
|
||||
Reserved(Reservation),
|
||||
Allocated(Allocation<T>),
|
||||
}
|
||||
|
||||
impl<T> DescriptorState<T> {
|
||||
fn new(is_oneway: bool, debug_id: usize, pid: Pid) -> Self {
|
||||
DescriptorState::Reserved(Reservation {
|
||||
debug_id,
|
||||
is_oneway,
|
||||
pid,
|
||||
})
|
||||
}
|
||||
|
||||
fn pid(&self) -> Pid {
|
||||
match self {
|
||||
DescriptorState::Reserved(inner) => inner.pid,
|
||||
DescriptorState::Allocated(inner) => inner.reservation.pid,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_oneway(&self) -> bool {
|
||||
match self {
|
||||
DescriptorState::Reserved(inner) => inner.is_oneway,
|
||||
DescriptorState::Allocated(inner) => inner.reservation.is_oneway,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Reservation {
|
||||
debug_id: usize,
|
||||
is_oneway: bool,
|
||||
pid: Pid,
|
||||
}
|
||||
|
||||
impl Reservation {
|
||||
fn allocate<T>(self, data: Option<T>) -> Allocation<T> {
|
||||
Allocation {
|
||||
data,
|
||||
reservation: self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Allocation<T> {
|
||||
reservation: Reservation,
|
||||
data: Option<T>,
|
||||
}
|
||||
|
||||
impl<T> Allocation<T> {
|
||||
fn deallocate(self) -> (Reservation, Option<T>) {
|
||||
(self.reservation, self.data)
|
||||
}
|
||||
|
||||
fn debug_id(&self) -> usize {
|
||||
self.reservation.debug_id
|
||||
}
|
||||
|
||||
fn take(&mut self) -> Option<T> {
|
||||
self.data.take()
|
||||
}
|
||||
}
|
||||
|
||||
/// The array implementation must switch to the tree if it wants to go beyond this number of
|
||||
/// ranges.
|
||||
const TREE_THRESHOLD: usize = 8;
|
||||
|
||||
/// Represents a range of pages that have just become completely free.
|
||||
#[derive(Copy, Clone)]
|
||||
pub(crate) struct FreedRange {
|
||||
pub(crate) start_page_idx: usize,
|
||||
pub(crate) end_page_idx: usize,
|
||||
}
|
||||
|
||||
impl FreedRange {
|
||||
fn interior_pages(offset: usize, size: usize) -> FreedRange {
|
||||
FreedRange {
|
||||
// Divide round up
|
||||
start_page_idx: (offset + (PAGE_SIZE - 1)) / PAGE_SIZE,
|
||||
// Divide round down
|
||||
end_page_idx: (offset + size) / PAGE_SIZE,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Range<T> {
|
||||
offset: usize,
|
||||
size: usize,
|
||||
state: DescriptorState<T>,
|
||||
}
|
||||
|
||||
impl<T> Range<T> {
|
||||
fn endpoint(&self) -> usize {
|
||||
self.offset + self.size
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct RangeAllocator<T> {
|
||||
inner: Impl<T>,
|
||||
}
|
||||
|
||||
enum Impl<T> {
|
||||
Empty(usize),
|
||||
Array(ArrayRangeAllocator<T>),
|
||||
Tree(TreeRangeAllocator<T>),
|
||||
}
|
||||
|
||||
impl<T> RangeAllocator<T> {
|
||||
pub(crate) fn new(size: usize) -> Self {
|
||||
Self {
|
||||
inner: Impl::Empty(size),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn free_oneway_space(&self) -> usize {
|
||||
match &self.inner {
|
||||
Impl::Empty(size) => size / 2,
|
||||
Impl::Array(array) => array.free_oneway_space(),
|
||||
Impl::Tree(tree) => tree.free_oneway_space(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn count_buffers(&self) -> usize {
|
||||
match &self.inner {
|
||||
Impl::Empty(_size) => 0,
|
||||
Impl::Array(array) => array.count_buffers(),
|
||||
Impl::Tree(tree) => tree.count_buffers(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
|
||||
match &self.inner {
|
||||
Impl::Empty(_size) => Ok(()),
|
||||
Impl::Array(array) => array.debug_print(m),
|
||||
Impl::Tree(tree) => tree.debug_print(m),
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to reserve a new buffer, using the provided allocation if necessary.
|
||||
pub(crate) fn reserve_new(&mut self, mut args: ReserveNewArgs<T>) -> Result<ReserveNew<T>> {
|
||||
match &mut self.inner {
|
||||
Impl::Empty(size) => {
|
||||
let empty_array = match args.empty_array_alloc.take() {
|
||||
Some(empty_array) => ArrayRangeAllocator::new(*size, empty_array),
|
||||
None => {
|
||||
return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc {
|
||||
args,
|
||||
need_empty_array_alloc: true,
|
||||
need_new_tree_alloc: false,
|
||||
need_tree_alloc: false,
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
self.inner = Impl::Array(empty_array);
|
||||
self.reserve_new(args)
|
||||
}
|
||||
Impl::Array(array) if array.is_full() => {
|
||||
let allocs = match args.new_tree_alloc {
|
||||
Some(ref mut allocs) => allocs,
|
||||
None => {
|
||||
return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc {
|
||||
args,
|
||||
need_empty_array_alloc: false,
|
||||
need_new_tree_alloc: true,
|
||||
need_tree_alloc: true,
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
let new_tree =
|
||||
TreeRangeAllocator::from_array(array.total_size(), &mut array.ranges, allocs);
|
||||
|
||||
self.inner = Impl::Tree(new_tree);
|
||||
self.reserve_new(args)
|
||||
}
|
||||
Impl::Array(array) => {
|
||||
let offset =
|
||||
array.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid)?;
|
||||
Ok(ReserveNew::Success(ReserveNewSuccess {
|
||||
offset,
|
||||
oneway_spam_detected: false,
|
||||
_empty_array_alloc: args.empty_array_alloc,
|
||||
_new_tree_alloc: args.new_tree_alloc,
|
||||
_tree_alloc: args.tree_alloc,
|
||||
}))
|
||||
}
|
||||
Impl::Tree(tree) => {
|
||||
let alloc = match args.tree_alloc {
|
||||
Some(alloc) => alloc,
|
||||
None => {
|
||||
return Ok(ReserveNew::NeedAlloc(ReserveNewNeedAlloc {
|
||||
args,
|
||||
need_empty_array_alloc: false,
|
||||
need_new_tree_alloc: false,
|
||||
need_tree_alloc: true,
|
||||
}));
|
||||
}
|
||||
};
|
||||
let (offset, oneway_spam_detected) =
|
||||
tree.reserve_new(args.debug_id, args.size, args.is_oneway, args.pid, alloc)?;
|
||||
Ok(ReserveNew::Success(ReserveNewSuccess {
|
||||
offset,
|
||||
oneway_spam_detected,
|
||||
_empty_array_alloc: args.empty_array_alloc,
|
||||
_new_tree_alloc: args.new_tree_alloc,
|
||||
_tree_alloc: None,
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deletes the allocations at `offset`.
|
||||
pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
|
||||
match &mut self.inner {
|
||||
Impl::Empty(_size) => Err(EINVAL),
|
||||
Impl::Array(array) => array.reservation_abort(offset),
|
||||
Impl::Tree(tree) => {
|
||||
let freed_range = tree.reservation_abort(offset)?;
|
||||
if tree.is_empty() {
|
||||
self.inner = Impl::Empty(tree.total_size());
|
||||
}
|
||||
Ok(freed_range)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Called when an allocation is no longer in use by the kernel.
|
||||
pub(crate) fn reservation_commit(&mut self, offset: usize, data: Option<T>) -> Result {
|
||||
match &mut self.inner {
|
||||
Impl::Empty(_size) => Err(EINVAL),
|
||||
Impl::Array(array) => array.reservation_commit(offset, data),
|
||||
Impl::Tree(tree) => tree.reservation_commit(offset, data),
|
||||
}
|
||||
}
|
||||
|
||||
/// Called when the kernel starts using an allocation.
|
||||
///
|
||||
/// Returns the size of the existing entry and the data associated with it.
|
||||
pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
|
||||
match &mut self.inner {
|
||||
Impl::Empty(_size) => Err(EINVAL),
|
||||
Impl::Array(array) => array.reserve_existing(offset),
|
||||
Impl::Tree(tree) => tree.reserve_existing(offset),
|
||||
}
|
||||
}
|
||||
|
||||
/// Call the provided callback at every allocated region.
|
||||
///
|
||||
/// This destroys the range allocator. Used only during shutdown.
|
||||
pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
|
||||
match &mut self.inner {
|
||||
Impl::Empty(_size) => {}
|
||||
Impl::Array(array) => array.take_for_each(callback),
|
||||
Impl::Tree(tree) => tree.take_for_each(callback),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The arguments for `reserve_new`.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct ReserveNewArgs<T> {
|
||||
pub(crate) size: usize,
|
||||
pub(crate) is_oneway: bool,
|
||||
pub(crate) debug_id: usize,
|
||||
pub(crate) pid: Pid,
|
||||
pub(crate) empty_array_alloc: Option<EmptyArrayAlloc<T>>,
|
||||
pub(crate) new_tree_alloc: Option<FromArrayAllocs<T>>,
|
||||
pub(crate) tree_alloc: Option<ReserveNewTreeAlloc<T>>,
|
||||
}
|
||||
|
||||
/// The return type of `ReserveNew`.
|
||||
pub(crate) enum ReserveNew<T> {
|
||||
Success(ReserveNewSuccess<T>),
|
||||
NeedAlloc(ReserveNewNeedAlloc<T>),
|
||||
}
|
||||
|
||||
/// Returned by `reserve_new` when the reservation was successul.
|
||||
pub(crate) struct ReserveNewSuccess<T> {
|
||||
pub(crate) offset: usize,
|
||||
pub(crate) oneway_spam_detected: bool,
|
||||
|
||||
// If the user supplied an allocation that we did not end up using, then we return it here.
|
||||
// The caller will kfree it outside of the lock.
|
||||
_empty_array_alloc: Option<EmptyArrayAlloc<T>>,
|
||||
_new_tree_alloc: Option<FromArrayAllocs<T>>,
|
||||
_tree_alloc: Option<ReserveNewTreeAlloc<T>>,
|
||||
}
|
||||
|
||||
/// Returned by `reserve_new` to request the caller to make an allocation before calling the method
|
||||
/// again.
|
||||
pub(crate) struct ReserveNewNeedAlloc<T> {
|
||||
args: ReserveNewArgs<T>,
|
||||
need_empty_array_alloc: bool,
|
||||
need_new_tree_alloc: bool,
|
||||
need_tree_alloc: bool,
|
||||
}
|
||||
|
||||
impl<T> ReserveNewNeedAlloc<T> {
|
||||
/// Make the necessary allocations for another call to `reserve_new`.
|
||||
pub(crate) fn make_alloc(mut self) -> Result<ReserveNewArgs<T>> {
|
||||
if self.need_empty_array_alloc && self.args.empty_array_alloc.is_none() {
|
||||
self.args.empty_array_alloc = Some(EmptyArrayAlloc::try_new(TREE_THRESHOLD)?);
|
||||
}
|
||||
if self.need_new_tree_alloc && self.args.new_tree_alloc.is_none() {
|
||||
self.args.new_tree_alloc = Some(FromArrayAllocs::try_new(TREE_THRESHOLD)?);
|
||||
}
|
||||
if self.need_tree_alloc && self.args.tree_alloc.is_none() {
|
||||
self.args.tree_alloc = Some(ReserveNewTreeAlloc::try_new()?);
|
||||
}
|
||||
Ok(self.args)
|
||||
}
|
||||
}
|
||||
500
drivers/android/binder/range_alloc/tree.rs
Normal file
500
drivers/android/binder/range_alloc/tree.rs
Normal file
@@ -0,0 +1,500 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use kernel::{
|
||||
page::PAGE_SIZE,
|
||||
prelude::*,
|
||||
rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},
|
||||
seq_file::SeqFile,
|
||||
seq_print,
|
||||
task::Pid,
|
||||
};
|
||||
|
||||
use crate::range_alloc::{DescriptorState, FreedRange, Range};
|
||||
|
||||
/// Keeps track of allocations in a process' mmap.
|
||||
///
|
||||
/// Each process has an mmap where the data for incoming transactions will be placed. This struct
|
||||
/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
|
||||
/// has metadata related to the allocation. We also keep track of available free space.
|
||||
pub(super) struct TreeRangeAllocator<T> {
|
||||
/// This collection contains descriptors for *both* ranges containing an allocation, *and* free
|
||||
/// ranges between allocations. The free ranges get merged, so there are never two free ranges
|
||||
/// next to each other.
|
||||
tree: RBTree<usize, Descriptor<T>>,
|
||||
/// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,
|
||||
/// letting us look up the smallest range whose size is at least some lower bound.
|
||||
free_tree: RBTree<FreeKey, ()>,
|
||||
size: usize,
|
||||
free_oneway_space: usize,
|
||||
}
|
||||
|
||||
impl<T> TreeRangeAllocator<T> {
|
||||
pub(crate) fn from_array(
|
||||
size: usize,
|
||||
ranges: &mut KVec<Range<T>>,
|
||||
alloc: &mut FromArrayAllocs<T>,
|
||||
) -> Self {
|
||||
let mut tree = TreeRangeAllocator {
|
||||
tree: RBTree::new(),
|
||||
free_tree: RBTree::new(),
|
||||
size,
|
||||
free_oneway_space: size / 2,
|
||||
};
|
||||
|
||||
let mut free_offset = 0;
|
||||
for range in ranges.drain_all() {
|
||||
let free_size = range.offset - free_offset;
|
||||
if free_size > 0 {
|
||||
let free_node = alloc.free_tree.pop().unwrap();
|
||||
tree.free_tree
|
||||
.insert(free_node.into_node((free_size, free_offset), ()));
|
||||
let tree_node = alloc.tree.pop().unwrap();
|
||||
tree.tree.insert(
|
||||
tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),
|
||||
);
|
||||
}
|
||||
free_offset = range.endpoint();
|
||||
|
||||
if range.state.is_oneway() {
|
||||
tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);
|
||||
}
|
||||
|
||||
let free_res = alloc.free_tree.pop().unwrap();
|
||||
let tree_node = alloc.tree.pop().unwrap();
|
||||
let mut desc = Descriptor::new(range.offset, range.size);
|
||||
desc.state = Some((range.state, free_res));
|
||||
tree.tree.insert(tree_node.into_node(range.offset, desc));
|
||||
}
|
||||
|
||||
// After the last range, we may need a free range.
|
||||
if free_offset < size {
|
||||
let free_size = size - free_offset;
|
||||
let free_node = alloc.free_tree.pop().unwrap();
|
||||
tree.free_tree
|
||||
.insert(free_node.into_node((free_size, free_offset), ()));
|
||||
let tree_node = alloc.tree.pop().unwrap();
|
||||
tree.tree
|
||||
.insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));
|
||||
}
|
||||
|
||||
tree
|
||||
}
|
||||
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
let mut tree_iter = self.tree.values();
|
||||
// There's always at least one range, because index zero is either the start of a free or
|
||||
// allocated range.
|
||||
let first_value = tree_iter.next().unwrap();
|
||||
if tree_iter.next().is_some() {
|
||||
// There are never two free ranges next to each other, so if there is more than one
|
||||
// descriptor, then at least one of them must hold an allocated range.
|
||||
return false;
|
||||
}
|
||||
// There is only one descriptor. Return true if it is for a free range.
|
||||
first_value.state.is_none()
|
||||
}
|
||||
|
||||
pub(crate) fn total_size(&self) -> usize {
|
||||
self.size
|
||||
}
|
||||
|
||||
pub(crate) fn free_oneway_space(&self) -> usize {
|
||||
self.free_oneway_space
|
||||
}
|
||||
|
||||
pub(crate) fn count_buffers(&self) -> usize {
|
||||
self.tree
|
||||
.values()
|
||||
.filter(|desc| desc.state.is_some())
|
||||
.count()
|
||||
}
|
||||
|
||||
pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
|
||||
for desc in self.tree.values() {
|
||||
let state = match &desc.state {
|
||||
Some(state) => &state.0,
|
||||
None => continue,
|
||||
};
|
||||
seq_print!(
|
||||
m,
|
||||
" buffer: {} size {} pid {}",
|
||||
desc.offset,
|
||||
desc.size,
|
||||
state.pid(),
|
||||
);
|
||||
if state.is_oneway() {
|
||||
seq_print!(m, " oneway");
|
||||
}
|
||||
match state {
|
||||
DescriptorState::Reserved(_res) => {
|
||||
seq_print!(m, " reserved\n");
|
||||
}
|
||||
DescriptorState::Allocated(_alloc) => {
|
||||
seq_print!(m, " allocated\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {
|
||||
let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;
|
||||
let ((_, offset), _) = free_cursor.current();
|
||||
self.tree.get_mut(offset)
|
||||
}
|
||||
|
||||
/// Try to reserve a new buffer, using the provided allocation if necessary.
|
||||
pub(crate) fn reserve_new(
|
||||
&mut self,
|
||||
debug_id: usize,
|
||||
size: usize,
|
||||
is_oneway: bool,
|
||||
pid: Pid,
|
||||
alloc: ReserveNewTreeAlloc<T>,
|
||||
) -> Result<(usize, bool)> {
|
||||
// Compute new value of free_oneway_space, which is set only on success.
|
||||
let new_oneway_space = if is_oneway {
|
||||
match self.free_oneway_space.checked_sub(size) {
|
||||
Some(new_oneway_space) => new_oneway_space,
|
||||
None => return Err(ENOSPC),
|
||||
}
|
||||
} else {
|
||||
self.free_oneway_space
|
||||
};
|
||||
|
||||
// Start detecting spammers once we have less than 20%
|
||||
// of async space left (which is less than 10% of total
|
||||
// buffer size).
|
||||
//
|
||||
// (This will short-circut, so `low_oneway_space` is
|
||||
// only called when necessary.)
|
||||
let oneway_spam_detected =
|
||||
is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);
|
||||
|
||||
let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {
|
||||
None => {
|
||||
pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);
|
||||
return Err(ENOSPC);
|
||||
}
|
||||
Some(desc) => {
|
||||
let found_size = desc.size;
|
||||
let found_offset = desc.offset;
|
||||
|
||||
// In case we need to break up the descriptor
|
||||
let new_desc = Descriptor::new(found_offset + size, found_size - size);
|
||||
let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);
|
||||
|
||||
desc.state = Some((
|
||||
DescriptorState::new(is_oneway, debug_id, pid),
|
||||
desc_node_res,
|
||||
));
|
||||
desc.size = size;
|
||||
|
||||
(found_size, found_offset, tree_node, free_tree_node)
|
||||
}
|
||||
};
|
||||
self.free_oneway_space = new_oneway_space;
|
||||
self.free_tree.remove(&(found_size, found_off));
|
||||
|
||||
if found_size != size {
|
||||
self.tree.insert(tree_node);
|
||||
self.free_tree.insert(free_tree_node);
|
||||
}
|
||||
|
||||
Ok((found_off, oneway_spam_detected))
|
||||
}
|
||||
|
||||
pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
|
||||
let mut cursor = self.tree.cursor_lower_bound(&offset).ok_or_else(|| {
|
||||
pr_warn!(
|
||||
"EINVAL from range_alloc.reservation_abort - offset: {}",
|
||||
offset
|
||||
);
|
||||
EINVAL
|
||||
})?;
|
||||
|
||||
let (_, desc) = cursor.current_mut();
|
||||
|
||||
if desc.offset != offset {
|
||||
pr_warn!(
|
||||
"EINVAL from range_alloc.reservation_abort - offset: {}",
|
||||
offset
|
||||
);
|
||||
return Err(EINVAL);
|
||||
}
|
||||
|
||||
let (reservation, free_node_res) = desc.try_change_state(|state| match state {
|
||||
Some((DescriptorState::Reserved(reservation), free_node_res)) => {
|
||||
(None, Ok((reservation, free_node_res)))
|
||||
}
|
||||
None => {
|
||||
pr_warn!(
|
||||
"EINVAL from range_alloc.reservation_abort - offset: {}",
|
||||
offset
|
||||
);
|
||||
(None, Err(EINVAL))
|
||||
}
|
||||
allocated => {
|
||||
pr_warn!(
|
||||
"EPERM from range_alloc.reservation_abort - offset: {}",
|
||||
offset
|
||||
);
|
||||
(allocated, Err(EPERM))
|
||||
}
|
||||
})?;
|
||||
|
||||
let mut size = desc.size;
|
||||
let mut offset = desc.offset;
|
||||
let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };
|
||||
|
||||
self.free_oneway_space += free_oneway_space_add;
|
||||
|
||||
let mut freed_range = FreedRange::interior_pages(offset, size);
|
||||
// Compute how large the next free region needs to be to include one more page in
|
||||
// the newly freed range.
|
||||
let add_next_page_needed = match (offset + size) % PAGE_SIZE {
|
||||
0 => usize::MAX,
|
||||
unalign => PAGE_SIZE - unalign,
|
||||
};
|
||||
// Compute how large the previous free region needs to be to include one more page
|
||||
// in the newly freed range.
|
||||
let add_prev_page_needed = match offset % PAGE_SIZE {
|
||||
0 => usize::MAX,
|
||||
unalign => unalign,
|
||||
};
|
||||
|
||||
// Merge next into current if next is free
|
||||
let remove_next = match cursor.peek_next() {
|
||||
Some((_, next)) if next.state.is_none() => {
|
||||
if next.size >= add_next_page_needed {
|
||||
freed_range.end_page_idx += 1;
|
||||
}
|
||||
self.free_tree.remove(&(next.size, next.offset));
|
||||
size += next.size;
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if remove_next {
|
||||
let (_, desc) = cursor.current_mut();
|
||||
desc.size = size;
|
||||
cursor.remove_next();
|
||||
}
|
||||
|
||||
// Merge current into prev if prev is free
|
||||
match cursor.peek_prev_mut() {
|
||||
Some((_, prev)) if prev.state.is_none() => {
|
||||
if prev.size >= add_prev_page_needed {
|
||||
freed_range.start_page_idx -= 1;
|
||||
}
|
||||
// merge previous with current, remove current
|
||||
self.free_tree.remove(&(prev.size, prev.offset));
|
||||
offset = prev.offset;
|
||||
size += prev.size;
|
||||
prev.size = size;
|
||||
cursor.remove_current();
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
self.free_tree
|
||||
.insert(free_node_res.into_node((size, offset), ()));
|
||||
|
||||
Ok(freed_range)
|
||||
}
|
||||
|
||||
pub(crate) fn reservation_commit(&mut self, offset: usize, data: Option<T>) -> Result {
|
||||
let desc = self.tree.get_mut(&offset).ok_or_else(|| {
|
||||
pr_warn!(
|
||||
"ENOENT from range_alloc.reservation_commit - offset: {}",
|
||||
offset
|
||||
);
|
||||
ENOENT
|
||||
})?;
|
||||
|
||||
desc.try_change_state(|state| match state {
|
||||
Some((DescriptorState::Reserved(reservation), free_node_res)) => (
|
||||
Some((
|
||||
DescriptorState::Allocated(reservation.allocate(data)),
|
||||
free_node_res,
|
||||
)),
|
||||
Ok(()),
|
||||
),
|
||||
other => {
|
||||
pr_warn!(
|
||||
"ENOENT from range_alloc.reservation_commit - offset: {}",
|
||||
offset
|
||||
);
|
||||
(other, Err(ENOENT))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Takes an entry at the given offset from [`DescriptorState::Allocated`] to
|
||||
/// [`DescriptorState::Reserved`].
|
||||
///
|
||||
/// Returns the size of the existing entry and the data associated with it.
|
||||
pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
|
||||
let desc = self.tree.get_mut(&offset).ok_or_else(|| {
|
||||
pr_warn!(
|
||||
"ENOENT from range_alloc.reserve_existing - offset: {}",
|
||||
offset
|
||||
);
|
||||
ENOENT
|
||||
})?;
|
||||
|
||||
let (debug_id, data) = desc.try_change_state(|state| match state {
|
||||
Some((DescriptorState::Allocated(allocation), free_node_res)) => {
|
||||
let (reservation, data) = allocation.deallocate();
|
||||
let debug_id = reservation.debug_id;
|
||||
(
|
||||
Some((DescriptorState::Reserved(reservation), free_node_res)),
|
||||
Ok((debug_id, data)),
|
||||
)
|
||||
}
|
||||
other => {
|
||||
pr_warn!(
|
||||
"ENOENT from range_alloc.reserve_existing - offset: {}",
|
||||
offset
|
||||
);
|
||||
(other, Err(ENOENT))
|
||||
}
|
||||
})?;
|
||||
|
||||
Ok((desc.size, debug_id, data))
|
||||
}
|
||||
|
||||
/// Call the provided callback at every allocated region.
|
||||
///
|
||||
/// This destroys the range allocator. Used only during shutdown.
|
||||
pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
|
||||
for (_, desc) in self.tree.iter_mut() {
|
||||
if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {
|
||||
callback(
|
||||
desc.offset,
|
||||
desc.size,
|
||||
allocation.debug_id(),
|
||||
allocation.take(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the amount and size of buffers allocated by the current caller.
|
||||
///
|
||||
/// The idea is that once we cross the threshold, whoever is responsible
|
||||
/// for the low async space is likely to try to send another async transaction,
|
||||
/// and at some point we'll catch them in the act. This is more efficient
|
||||
/// than keeping a map per pid.
|
||||
fn low_oneway_space(&self, calling_pid: Pid) -> bool {
|
||||
let mut total_alloc_size = 0;
|
||||
let mut num_buffers = 0;
|
||||
for (_, desc) in self.tree.iter() {
|
||||
if let Some((state, _)) = &desc.state {
|
||||
if state.is_oneway() && state.pid() == calling_pid {
|
||||
total_alloc_size += desc.size;
|
||||
num_buffers += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Warn if this pid has more than 50 transactions, or more than 50% of
|
||||
// async space (which is 25% of total buffer size). Oneway spam is only
|
||||
// detected when the threshold is exceeded.
|
||||
num_buffers > 50 || total_alloc_size > self.size / 4
|
||||
}
|
||||
}
|
||||
|
||||
type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);
|
||||
struct Descriptor<T> {
|
||||
size: usize,
|
||||
offset: usize,
|
||||
state: Option<TreeDescriptorState<T>>,
|
||||
}
|
||||
|
||||
impl<T> Descriptor<T> {
|
||||
fn new(offset: usize, size: usize) -> Self {
|
||||
Self {
|
||||
size,
|
||||
offset,
|
||||
state: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>
|
||||
where
|
||||
F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),
|
||||
{
|
||||
let (new_state, result) = f(self.state.take());
|
||||
self.state = new_state;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
// (Descriptor.size, Descriptor.offset)
|
||||
type FreeKey = (usize, usize);
|
||||
type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;
|
||||
|
||||
/// An allocation for use by `reserve_new`.
|
||||
pub(crate) struct ReserveNewTreeAlloc<T> {
|
||||
tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,
|
||||
free_tree_node_res: FreeNodeRes,
|
||||
desc_node_res: FreeNodeRes,
|
||||
}
|
||||
|
||||
impl<T> ReserveNewTreeAlloc<T> {
|
||||
pub(crate) fn try_new() -> Result<Self> {
|
||||
let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
|
||||
let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
|
||||
let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
|
||||
Ok(Self {
|
||||
tree_node_res,
|
||||
free_tree_node_res,
|
||||
desc_node_res,
|
||||
})
|
||||
}
|
||||
|
||||
fn initialize(
|
||||
self,
|
||||
desc: Descriptor<T>,
|
||||
) -> (
|
||||
RBTreeNode<usize, Descriptor<T>>,
|
||||
RBTreeNode<FreeKey, ()>,
|
||||
FreeNodeRes,
|
||||
) {
|
||||
let size = desc.size;
|
||||
let offset = desc.offset;
|
||||
(
|
||||
self.tree_node_res.into_node(offset, desc),
|
||||
self.free_tree_node_res.into_node((size, offset), ()),
|
||||
self.desc_node_res,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// An allocation for creating a tree from an `ArrayRangeAllocator`.
|
||||
pub(crate) struct FromArrayAllocs<T> {
|
||||
tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,
|
||||
free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,
|
||||
}
|
||||
|
||||
impl<T> FromArrayAllocs<T> {
|
||||
pub(crate) fn try_new(len: usize) -> Result<Self> {
|
||||
let num_descriptors = 2 * len + 1;
|
||||
|
||||
let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
|
||||
for _ in 0..num_descriptors {
|
||||
tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
|
||||
}
|
||||
|
||||
let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
|
||||
for _ in 0..num_descriptors {
|
||||
free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
|
||||
}
|
||||
|
||||
Ok(Self { tree, free_tree })
|
||||
}
|
||||
}
|
||||
142
drivers/android/binder/rust_binder.h
Normal file
142
drivers/android/binder/rust_binder.h
Normal file
@@ -0,0 +1,142 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_RUST_BINDER_H
|
||||
#define _LINUX_RUST_BINDER_H
|
||||
|
||||
#include <uapi/linux/android/binder.h>
|
||||
#include <uapi/linux/android/binderfs.h>
|
||||
|
||||
/*
|
||||
* These symbols are exposed by `rust_binderfs.c` and exist here so that Rust
|
||||
* Binder can call them.
|
||||
*/
|
||||
int init_rust_binderfs(void);
|
||||
|
||||
struct dentry;
|
||||
struct inode;
|
||||
struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid);
|
||||
void rust_binderfs_remove_file(struct dentry *dentry);
|
||||
|
||||
typedef void *rust_binder_context;
|
||||
/**
|
||||
* struct binder_device - information about a binder device node
|
||||
* @minor: the minor number used by this device
|
||||
* @ctx: the Rust Context used by this device, or null for binder-control
|
||||
*
|
||||
* This is used as the private data for files directly in binderfs, but not
|
||||
* files in the binder_logs subdirectory. This struct owns a refcount on `ctx`
|
||||
* and the entry for `minor` in `binderfs_minors`. For binder-control `ctx` is
|
||||
* null.
|
||||
*/
|
||||
struct binder_device {
|
||||
int minor;
|
||||
rust_binder_context ctx;
|
||||
};
|
||||
|
||||
/*
|
||||
* The internal data types in the Rust Binder driver are opaque to C, so we use
|
||||
* void pointer typedefs for these types.
|
||||
*/
|
||||
typedef void *rust_binder_transaction;
|
||||
typedef void *rust_binder_thread;
|
||||
typedef void *rust_binder_process;
|
||||
typedef void *rust_binder_node;
|
||||
typedef void *rust_binder_ref_data;
|
||||
|
||||
struct rb_transaction_layout {
|
||||
size_t debug_id;
|
||||
size_t code;
|
||||
size_t flags;
|
||||
size_t from_thread;
|
||||
size_t to_proc;
|
||||
size_t target_node;
|
||||
};
|
||||
|
||||
struct rb_thread_layout {
|
||||
size_t arc_offset;
|
||||
size_t process;
|
||||
size_t id;
|
||||
};
|
||||
|
||||
struct rb_process_layout {
|
||||
size_t arc_offset;
|
||||
size_t task;
|
||||
};
|
||||
|
||||
struct rb_node_layout {
|
||||
size_t arc_offset;
|
||||
size_t debug_id;
|
||||
size_t ptr;
|
||||
};
|
||||
|
||||
struct rust_binder_layout {
|
||||
struct rb_transaction_layout t;
|
||||
struct rb_thread_layout th;
|
||||
struct rb_process_layout p;
|
||||
struct rb_node_layout n;
|
||||
};
|
||||
|
||||
extern const struct rust_binder_layout RUST_BINDER_LAYOUT;
|
||||
|
||||
static inline size_t rust_binder_transaction_debug_id(rust_binder_transaction t)
|
||||
{
|
||||
return * (size_t *) (t + RUST_BINDER_LAYOUT.t.debug_id);
|
||||
}
|
||||
|
||||
static inline u32 rust_binder_transaction_code(rust_binder_transaction t)
|
||||
{
|
||||
return * (u32 *) (t + RUST_BINDER_LAYOUT.t.code);
|
||||
}
|
||||
|
||||
static inline u32 rust_binder_transaction_flags(rust_binder_transaction t)
|
||||
{
|
||||
return * (u32 *) (t + RUST_BINDER_LAYOUT.t.flags);
|
||||
}
|
||||
|
||||
// Nullable!
|
||||
static inline rust_binder_node rust_binder_transaction_target_node(rust_binder_transaction t)
|
||||
{
|
||||
void *p = * (void **) (t + RUST_BINDER_LAYOUT.t.target_node);
|
||||
if (p)
|
||||
p = p + RUST_BINDER_LAYOUT.n.arc_offset;
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline rust_binder_thread rust_binder_transaction_from_thread(rust_binder_transaction t)
|
||||
{
|
||||
void *p = * (void **) (t + RUST_BINDER_LAYOUT.t.from_thread);
|
||||
return p + RUST_BINDER_LAYOUT.th.arc_offset;
|
||||
}
|
||||
|
||||
static inline rust_binder_process rust_binder_transaction_to_proc(rust_binder_transaction t)
|
||||
{
|
||||
void *p = * (void **) (t + RUST_BINDER_LAYOUT.t.to_proc);
|
||||
return p + RUST_BINDER_LAYOUT.p.arc_offset;
|
||||
}
|
||||
|
||||
static inline rust_binder_process rust_binder_thread_proc(rust_binder_thread t)
|
||||
{
|
||||
void *p = * (void **) (t + RUST_BINDER_LAYOUT.th.process);
|
||||
return p + RUST_BINDER_LAYOUT.p.arc_offset;
|
||||
}
|
||||
|
||||
static inline s32 rust_binder_thread_id(rust_binder_thread t)
|
||||
{
|
||||
return * (s32 *) (t + RUST_BINDER_LAYOUT.th.id);
|
||||
}
|
||||
|
||||
static inline struct task_struct *rust_binder_process_task(rust_binder_process t)
|
||||
{
|
||||
return * (struct task_struct **) (t + RUST_BINDER_LAYOUT.p.task);
|
||||
}
|
||||
|
||||
static inline size_t rust_binder_node_debug_id(rust_binder_node t)
|
||||
{
|
||||
return * (size_t *) (t + RUST_BINDER_LAYOUT.n.debug_id);
|
||||
}
|
||||
|
||||
static inline binder_uintptr_t rust_binder_node_ptr(rust_binder_node t)
|
||||
{
|
||||
return * (binder_uintptr_t *) (t + RUST_BINDER_LAYOUT.n.ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
629
drivers/android/binder/rust_binder.rs
Normal file
629
drivers/android/binder/rust_binder.rs
Normal file
@@ -0,0 +1,629 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
//! Binder -- the Android IPC mechanism.
|
||||
#![recursion_limit = "256"]
|
||||
|
||||
use kernel::{
|
||||
bindings::{self, seq_file},
|
||||
fs::File,
|
||||
list::{HasListLinks, ListArc, ListArcSafe, ListLinksSelfPtr, TryNewListArc},
|
||||
prelude::*,
|
||||
seq_file::SeqFile,
|
||||
seq_print,
|
||||
sync::poll::PollTable,
|
||||
sync::Arc,
|
||||
task::Pid,
|
||||
types::{AsBytes, ForeignOwnable},
|
||||
uaccess::UserSliceWriter,
|
||||
};
|
||||
|
||||
use crate::{context::Context, page_range::Shrinker, process::Process, thread::Thread};
|
||||
|
||||
use core::{
|
||||
ptr::NonNull,
|
||||
sync::atomic::{AtomicBool, AtomicUsize, Ordering},
|
||||
};
|
||||
|
||||
mod allocation;
|
||||
mod context;
|
||||
mod deferred_close;
|
||||
mod defs;
|
||||
mod error;
|
||||
mod node;
|
||||
mod page_range;
|
||||
mod prio;
|
||||
mod process;
|
||||
mod range_alloc;
|
||||
mod stats;
|
||||
mod thread;
|
||||
mod trace;
|
||||
mod transaction;
|
||||
|
||||
#[allow(warnings)] // generated bindgen code
|
||||
mod binderfs {
|
||||
use kernel::bindings::{dentry, inode};
|
||||
|
||||
extern "C" {
|
||||
pub fn init_rust_binderfs() -> kernel::ffi::c_int;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rust_binderfs_create_proc_file(
|
||||
nodp: *mut inode,
|
||||
pid: kernel::ffi::c_int,
|
||||
) -> *mut dentry;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rust_binderfs_remove_file(dentry: *mut dentry);
|
||||
}
|
||||
pub type rust_binder_context = *mut kernel::ffi::c_void;
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct binder_device {
|
||||
pub minor: kernel::ffi::c_int,
|
||||
pub ctx: rust_binder_context,
|
||||
}
|
||||
impl Default for binder_device {
|
||||
fn default() -> Self {
|
||||
let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
|
||||
unsafe {
|
||||
::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
|
||||
s.assume_init()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module! {
|
||||
type: BinderModule,
|
||||
name: "rust_binder",
|
||||
author: "Wedson Almeida Filho, Alice Ryhl",
|
||||
description: "Android Binder",
|
||||
license: "GPL",
|
||||
}
|
||||
|
||||
use kernel::bindings::rust_binder_layout;
|
||||
#[no_mangle]
|
||||
static RUST_BINDER_LAYOUT: rust_binder_layout = rust_binder_layout {
|
||||
t: transaction::TRANSACTION_LAYOUT,
|
||||
th: thread::THREAD_LAYOUT,
|
||||
p: process::PROCESS_LAYOUT,
|
||||
n: node::NODE_LAYOUT,
|
||||
};
|
||||
|
||||
fn next_debug_id() -> usize {
|
||||
static NEXT_DEBUG_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
NEXT_DEBUG_ID.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Provides a single place to write Binder return values via the
|
||||
/// supplied `UserSliceWriter`.
|
||||
pub(crate) struct BinderReturnWriter<'a> {
|
||||
writer: UserSliceWriter,
|
||||
thread: &'a Thread,
|
||||
}
|
||||
|
||||
impl<'a> BinderReturnWriter<'a> {
|
||||
fn new(writer: UserSliceWriter, thread: &'a Thread) -> Self {
|
||||
BinderReturnWriter { writer, thread }
|
||||
}
|
||||
|
||||
/// Write a return code back to user space.
|
||||
/// Should be a `BR_` constant from [`defs`] e.g. [`defs::BR_TRANSACTION_COMPLETE`].
|
||||
fn write_code(&mut self, code: u32) -> Result {
|
||||
crate::trace::trace_return(code);
|
||||
stats::GLOBAL_STATS.inc_br(code);
|
||||
self.thread.process.stats.inc_br(code);
|
||||
self.writer.write(&code)
|
||||
}
|
||||
|
||||
/// Write something *other than* a return code to user space.
|
||||
fn write_payload<T: AsBytes>(&mut self, payload: &T) -> Result {
|
||||
self.writer.write(payload)
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.writer.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Specifies how a type should be delivered to the read part of a BINDER_WRITE_READ ioctl.
|
||||
///
|
||||
/// When a value is pushed to the todo list for a process or thread, it is stored as a trait object
|
||||
/// with the type `Arc<dyn DeliverToRead>`. Trait objects are a Rust feature that lets you
|
||||
/// implement dynamic dispatch over many different types. This lets us store many different types
|
||||
/// in the todo list.
|
||||
trait DeliverToRead: ListArcSafe + Send + Sync {
|
||||
/// Performs work. Returns true if remaining work items in the queue should be processed
|
||||
/// immediately, or false if it should return to caller before processing additional work
|
||||
/// items.
|
||||
fn do_work(
|
||||
self: DArc<Self>,
|
||||
thread: &Thread,
|
||||
writer: &mut BinderReturnWriter<'_>,
|
||||
) -> Result<bool>;
|
||||
|
||||
/// Cancels the given work item. This is called instead of [`DeliverToRead::do_work`] when work
|
||||
/// won't be delivered.
|
||||
fn cancel(self: DArc<Self>);
|
||||
|
||||
/// Called when a work item is delivered directly to a specific thread, rather than to the
|
||||
/// process work list.
|
||||
fn on_thread_selected(&self, _thread: &thread::Thread);
|
||||
|
||||
/// Should we use `wake_up_interruptible_sync` or `wake_up_interruptible` when scheduling this
|
||||
/// work item?
|
||||
///
|
||||
/// Generally only set to true for non-oneway transactions.
|
||||
fn should_sync_wakeup(&self) -> bool;
|
||||
|
||||
fn debug_print(&self, m: &SeqFile, prefix: &str, transaction_prefix: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
// Wrapper around a `DeliverToRead` with linked list links.
|
||||
#[pin_data]
|
||||
struct DTRWrap<T: ?Sized> {
|
||||
#[pin]
|
||||
links: ListLinksSelfPtr<DTRWrap<dyn DeliverToRead>>,
|
||||
#[pin]
|
||||
wrapped: T,
|
||||
}
|
||||
kernel::list::impl_has_list_links_self_ptr! {
|
||||
impl HasSelfPtr<DTRWrap<dyn DeliverToRead>> for DTRWrap<dyn DeliverToRead> { self.links }
|
||||
}
|
||||
kernel::list::impl_list_arc_safe! {
|
||||
impl{T: ListArcSafe + ?Sized} ListArcSafe<0> for DTRWrap<T> {
|
||||
tracked_by wrapped: T;
|
||||
}
|
||||
}
|
||||
kernel::list::impl_list_item! {
|
||||
impl ListItem<0> for DTRWrap<dyn DeliverToRead> {
|
||||
using ListLinksSelfPtr;
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ?Sized> core::ops::Deref for DTRWrap<T> {
|
||||
type Target = T;
|
||||
fn deref(&self) -> &T {
|
||||
&self.wrapped
|
||||
}
|
||||
}
|
||||
|
||||
type DArc<T> = kernel::sync::Arc<DTRWrap<T>>;
|
||||
type DLArc<T> = kernel::list::ListArc<DTRWrap<T>>;
|
||||
|
||||
impl<T: ListArcSafe> DTRWrap<T> {
|
||||
fn new(val: impl PinInit<T>) -> impl PinInit<Self> {
|
||||
pin_init!(Self {
|
||||
links <- ListLinksSelfPtr::new(),
|
||||
wrapped <- val,
|
||||
})
|
||||
}
|
||||
|
||||
fn arc_try_new(val: T) -> Result<DLArc<T>, kernel::alloc::AllocError> {
|
||||
ListArc::pin_init(
|
||||
try_pin_init!(Self {
|
||||
links <- ListLinksSelfPtr::new(),
|
||||
wrapped: val,
|
||||
}),
|
||||
GFP_KERNEL,
|
||||
)
|
||||
.map_err(|_| kernel::alloc::AllocError)
|
||||
}
|
||||
|
||||
fn arc_pin_init(init: impl PinInit<T>) -> Result<DLArc<T>, kernel::error::Error> {
|
||||
ListArc::pin_init(
|
||||
try_pin_init!(Self {
|
||||
links <- ListLinksSelfPtr::new(),
|
||||
wrapped <- init,
|
||||
}),
|
||||
GFP_KERNEL,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
struct DeliverCode {
|
||||
code: u32,
|
||||
skip: AtomicBool,
|
||||
}
|
||||
|
||||
kernel::list::impl_list_arc_safe! {
|
||||
impl ListArcSafe<0> for DeliverCode { untracked; }
|
||||
}
|
||||
|
||||
impl DeliverCode {
|
||||
fn new(code: u32) -> Self {
|
||||
Self {
|
||||
code,
|
||||
skip: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Disable this DeliverCode and make it do nothing.
|
||||
///
|
||||
/// This is used instead of removing it from the work list, since `LinkedList::remove` is
|
||||
/// unsafe, whereas this method is not.
|
||||
fn skip(&self) {
|
||||
self.skip.store(true, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
impl DeliverToRead for DeliverCode {
|
||||
fn do_work(
|
||||
self: DArc<Self>,
|
||||
_thread: &Thread,
|
||||
writer: &mut BinderReturnWriter<'_>,
|
||||
) -> Result<bool> {
|
||||
if !self.skip.load(Ordering::Relaxed) {
|
||||
writer.write_code(self.code)?;
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn cancel(self: DArc<Self>) {}
|
||||
fn on_thread_selected(&self, _thread: &Thread) {}
|
||||
|
||||
fn should_sync_wakeup(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn debug_print(&self, m: &SeqFile, prefix: &str, _tprefix: &str) -> Result<()> {
|
||||
seq_print!(m, "{}", prefix);
|
||||
if self.skip.load(Ordering::Relaxed) {
|
||||
seq_print!(m, "(skipped) ");
|
||||
}
|
||||
if self.code == defs::BR_TRANSACTION_COMPLETE {
|
||||
seq_print!(m, "transaction complete\n");
|
||||
} else {
|
||||
seq_print!(m, "transaction error: {}\n", self.code);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
const fn ptr_align(value: usize) -> usize {
|
||||
let size = core::mem::size_of::<usize>() - 1;
|
||||
(value + size) & !size
|
||||
}
|
||||
|
||||
// SAFETY: We call register in `init`.
|
||||
static BINDER_SHRINKER: Shrinker = unsafe { Shrinker::new() };
|
||||
|
||||
struct BinderModule {}
|
||||
|
||||
impl kernel::Module for BinderModule {
|
||||
fn init(_module: &'static kernel::ThisModule) -> Result<Self> {
|
||||
// SAFETY: The module initializer never runs twice, so we only call this once.
|
||||
unsafe { crate::context::CONTEXTS.init() };
|
||||
|
||||
// SAFETY: This just accesses global booleans.
|
||||
unsafe {
|
||||
extern "C" {
|
||||
static mut binder_use_rust: i32;
|
||||
fn unload_binder() -> i32;
|
||||
}
|
||||
|
||||
if binder_use_rust == 0 {
|
||||
return Ok(Self {});
|
||||
}
|
||||
if unload_binder() != 0 {
|
||||
pr_err!("Failed to unload C Binder.");
|
||||
return Ok(Self {});
|
||||
}
|
||||
}
|
||||
|
||||
pr_warn!("Loaded Rust Binder.");
|
||||
|
||||
BINDER_SHRINKER.register(kernel::c_str!("android-binder"))?;
|
||||
|
||||
// SAFETY: The module is being loaded, so we can initialize binderfs.
|
||||
unsafe { kernel::error::to_result(binderfs::init_rust_binderfs())? };
|
||||
|
||||
Ok(Self {})
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes the inner type Sync.
|
||||
#[repr(transparent)]
|
||||
pub struct AssertSync<T>(T);
|
||||
// SAFETY: Used only to insert `file_operations` into a global, which is safe.
|
||||
unsafe impl<T> Sync for AssertSync<T> {}
|
||||
|
||||
/// File operations that rust_binderfs.c can use.
|
||||
#[no_mangle]
|
||||
#[used]
|
||||
pub static rust_binder_fops: AssertSync<kernel::bindings::file_operations> = {
|
||||
// SAFETY: All zeroes is safe for the `file_operations` type.
|
||||
let zeroed_ops = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
|
||||
|
||||
let ops = kernel::bindings::file_operations {
|
||||
owner: THIS_MODULE.as_ptr(),
|
||||
poll: Some(rust_binder_poll),
|
||||
unlocked_ioctl: Some(rust_binder_unlocked_ioctl),
|
||||
compat_ioctl: Some(rust_binder_compat_ioctl),
|
||||
mmap: Some(rust_binder_mmap),
|
||||
open: Some(rust_binder_open),
|
||||
release: Some(rust_binder_release),
|
||||
flush: Some(rust_binder_flush),
|
||||
..zeroed_ops
|
||||
};
|
||||
AssertSync(ops)
|
||||
};
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_binder_new_context(
|
||||
name: *const kernel::ffi::c_char,
|
||||
) -> *mut kernel::ffi::c_void {
|
||||
// SAFETY: The caller will always provide a valid c string here.
|
||||
let name = unsafe { kernel::str::CStr::from_char_ptr(name) };
|
||||
match Context::new(name) {
|
||||
Ok(ctx) => Arc::into_foreign(ctx).cast_mut(),
|
||||
Err(_err) => core::ptr::null_mut(),
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_binder_remove_context(device: *mut kernel::ffi::c_void) {
|
||||
if !device.is_null() {
|
||||
// SAFETY: The caller ensures that the `device` pointer came from a previous call to
|
||||
// `rust_binder_new_device`.
|
||||
let ctx = unsafe { Arc::<Context>::from_foreign(device) };
|
||||
ctx.deregister();
|
||||
drop(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_open(
|
||||
inode: *mut bindings::inode,
|
||||
file_ptr: *mut bindings::file,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: The `rust_binderfs.c` file ensures that `i_private` is set to a
|
||||
// `struct binder_device`.
|
||||
let device = unsafe { (*inode).i_private } as *const binderfs::binder_device;
|
||||
|
||||
assert!(!device.is_null());
|
||||
|
||||
// SAFETY: The `rust_binderfs.c` file ensures that `device->ctx` holds a binder context when
|
||||
// using the rust binder fops.
|
||||
let ctx = unsafe { Arc::<Context>::borrow((*device).ctx) };
|
||||
|
||||
// SAFETY: The caller provides a valid file pointer to a new `struct file`.
|
||||
let file = unsafe { File::from_raw_file(file_ptr) };
|
||||
let process = match Process::open(ctx, file) {
|
||||
Ok(process) => process,
|
||||
Err(err) => return err.to_errno(),
|
||||
};
|
||||
|
||||
// SAFETY: This is an `inode` for a newly created binder file.
|
||||
match unsafe { BinderfsProcFile::new(inode, process.task.pid()) } {
|
||||
Ok(Some(file)) => process.inner.lock().binderfs_file = Some(file),
|
||||
Ok(None) => { /* pid already exists */ }
|
||||
Err(err) => return err.to_errno(),
|
||||
}
|
||||
|
||||
// SAFETY: This file is associated with Rust binder, so we own the `private_data` field.
|
||||
unsafe { (*file_ptr).private_data = process.into_foreign().cast_mut() };
|
||||
0
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_release(
|
||||
_inode: *mut bindings::inode,
|
||||
file: *mut bindings::file,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: We previously set `private_data` in `rust_binder_open`.
|
||||
let process = unsafe { Arc::<Process>::from_foreign((*file).private_data) };
|
||||
// SAFETY: The caller ensures that the file is valid.
|
||||
let file = unsafe { File::from_raw_file(file) };
|
||||
Process::release(process, file);
|
||||
0
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_compat_ioctl(
|
||||
file: *mut bindings::file,
|
||||
cmd: kernel::ffi::c_uint,
|
||||
arg: kernel::ffi::c_ulong,
|
||||
) -> kernel::ffi::c_long {
|
||||
// SAFETY: We previously set `private_data` in `rust_binder_open`.
|
||||
let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
|
||||
// SAFETY: The caller ensures that the file is valid.
|
||||
match Process::compat_ioctl(f, unsafe { File::from_raw_file(file) }, cmd as _, arg as _) {
|
||||
Ok(()) => 0,
|
||||
Err(err) => err.to_errno() as isize,
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_unlocked_ioctl(
|
||||
file: *mut bindings::file,
|
||||
cmd: kernel::ffi::c_uint,
|
||||
arg: kernel::ffi::c_ulong,
|
||||
) -> kernel::ffi::c_long {
|
||||
// SAFETY: We previously set `private_data` in `rust_binder_open`.
|
||||
let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
|
||||
// SAFETY: The caller ensures that the file is valid.
|
||||
match Process::ioctl(f, unsafe { File::from_raw_file(file) }, cmd as _, arg as _) {
|
||||
Ok(()) => 0,
|
||||
Err(err) => err.to_errno() as isize,
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_mmap(
|
||||
file: *mut bindings::file,
|
||||
vma: *mut bindings::vm_area_struct,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: We previously set `private_data` in `rust_binder_open`.
|
||||
let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
|
||||
// SAFETY: The caller ensures that the vma is valid.
|
||||
let area = unsafe { kernel::mm::virt::VmAreaNew::from_raw(vma) };
|
||||
// SAFETY: The caller ensures that the file is valid.
|
||||
match Process::mmap(f, unsafe { File::from_raw_file(file) }, area) {
|
||||
Ok(()) => 0,
|
||||
Err(err) => err.to_errno(),
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_poll(
|
||||
file: *mut bindings::file,
|
||||
wait: *mut bindings::poll_table_struct,
|
||||
) -> bindings::__poll_t {
|
||||
// SAFETY: We previously set `private_data` in `rust_binder_open`.
|
||||
let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
|
||||
// SAFETY: The caller ensures that the file is valid.
|
||||
let fileref = unsafe { File::from_raw_file(file) };
|
||||
// SAFETY: The caller ensures that the `PollTable` is valid.
|
||||
match Process::poll(f, fileref, unsafe { PollTable::from_ptr(wait) }) {
|
||||
Ok(v) => v,
|
||||
Err(_) => bindings::POLLERR,
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_binder_flush(
|
||||
file: *mut bindings::file,
|
||||
_id: bindings::fl_owner_t,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: We previously set `private_data` in `rust_binder_open`.
|
||||
let f = unsafe { Arc::<Process>::borrow((*file).private_data) };
|
||||
match Process::flush(f) {
|
||||
Ok(()) => 0,
|
||||
Err(err) => err.to_errno(),
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_binder_stats_show(
|
||||
ptr: *mut seq_file,
|
||||
_: *mut kernel::ffi::c_void,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
|
||||
// this method is called.
|
||||
let m = unsafe { SeqFile::from_raw(ptr) };
|
||||
if let Err(err) = rust_binder_stats_show_impl(m) {
|
||||
seq_print!(m, "failed to generate state: {:?}\n", err);
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_binder_state_show(
|
||||
ptr: *mut seq_file,
|
||||
_: *mut kernel::ffi::c_void,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
|
||||
// this method is called.
|
||||
let m = unsafe { SeqFile::from_raw(ptr) };
|
||||
if let Err(err) = rust_binder_state_show_impl(m) {
|
||||
seq_print!(m, "failed to generate state: {:?}\n", err);
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_binder_proc_show(
|
||||
ptr: *mut seq_file,
|
||||
_: *mut kernel::ffi::c_void,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: Accessing the private field of `seq_file` is okay.
|
||||
let pid = (unsafe { (*ptr).private }) as usize as Pid;
|
||||
// SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
|
||||
// this method is called.
|
||||
let m = unsafe { SeqFile::from_raw(ptr) };
|
||||
if let Err(err) = rust_binder_proc_show_impl(m, pid) {
|
||||
seq_print!(m, "failed to generate state: {:?}\n", err);
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn rust_binder_transactions_show(
|
||||
ptr: *mut seq_file,
|
||||
_: *mut kernel::ffi::c_void,
|
||||
) -> kernel::ffi::c_int {
|
||||
// SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
|
||||
// this method is called.
|
||||
let m = unsafe { SeqFile::from_raw(ptr) };
|
||||
if let Err(err) = rust_binder_transactions_show_impl(m) {
|
||||
seq_print!(m, "failed to generate state: {:?}\n", err);
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn rust_binder_transactions_show_impl(m: &SeqFile) -> Result<()> {
|
||||
seq_print!(m, "binder transactions:\n");
|
||||
let contexts = context::get_all_contexts()?;
|
||||
for ctx in contexts {
|
||||
let procs = ctx.get_all_procs()?;
|
||||
for proc in procs {
|
||||
proc.debug_print(m, &ctx, false)?;
|
||||
seq_print!(m, "\n");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rust_binder_stats_show_impl(m: &SeqFile) -> Result<()> {
|
||||
seq_print!(m, "binder stats:\n");
|
||||
stats::GLOBAL_STATS.debug_print("", m);
|
||||
let contexts = context::get_all_contexts()?;
|
||||
for ctx in contexts {
|
||||
let procs = ctx.get_all_procs()?;
|
||||
for proc in procs {
|
||||
proc.debug_print_stats(m, &ctx)?;
|
||||
seq_print!(m, "\n");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rust_binder_state_show_impl(m: &SeqFile) -> Result<()> {
|
||||
seq_print!(m, "binder state:\n");
|
||||
let contexts = context::get_all_contexts()?;
|
||||
for ctx in contexts {
|
||||
let procs = ctx.get_all_procs()?;
|
||||
for proc in procs {
|
||||
proc.debug_print(m, &ctx, true)?;
|
||||
seq_print!(m, "\n");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rust_binder_proc_show_impl(m: &SeqFile, pid: Pid) -> Result<()> {
|
||||
seq_print!(m, "binder proc state:\n");
|
||||
let contexts = context::get_all_contexts()?;
|
||||
for ctx in contexts {
|
||||
let procs = ctx.get_procs_with_pid(pid)?;
|
||||
for proc in procs {
|
||||
proc.debug_print(m, &ctx, true)?;
|
||||
seq_print!(m, "\n");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct BinderfsProcFile(NonNull<bindings::dentry>);
|
||||
|
||||
// SAFETY: Safe to drop any thread.
|
||||
unsafe impl Send for BinderfsProcFile {}
|
||||
|
||||
impl BinderfsProcFile {
|
||||
/// # Safety
|
||||
///
|
||||
/// Takes an inode from a newly created binder file.
|
||||
unsafe fn new(nodp: *mut bindings::inode, pid: i32) -> Result<Option<Self>> {
|
||||
// SAFETY: The caller passes an `inode` for a newly created binder file.
|
||||
let dentry = unsafe { binderfs::rust_binderfs_create_proc_file(nodp, pid) };
|
||||
match kernel::error::from_err_ptr(dentry) {
|
||||
Ok(dentry) => Ok(NonNull::new(dentry).map(Self)),
|
||||
Err(err) if err == EEXIST => Ok(None),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for BinderfsProcFile {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: This is a dentry from `rust_binderfs_remove_file` that has not been deleted yet.
|
||||
unsafe { binderfs::rust_binderfs_remove_file(self.0.as_ptr()) };
|
||||
}
|
||||
}
|
||||
59
drivers/android/binder/rust_binder_events.c
Normal file
59
drivers/android/binder/rust_binder_events.c
Normal file
@@ -0,0 +1,59 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* rust_binder_events.c
|
||||
*
|
||||
* Rust Binder tracepoints.
|
||||
*
|
||||
* Copyright 2024 Google LLC
|
||||
*/
|
||||
|
||||
#include "rust_binder.h"
|
||||
|
||||
const char * const binder_command_strings[] = {
|
||||
"BC_TRANSACTION",
|
||||
"BC_REPLY",
|
||||
"BC_ACQUIRE_RESULT",
|
||||
"BC_FREE_BUFFER",
|
||||
"BC_INCREFS",
|
||||
"BC_ACQUIRE",
|
||||
"BC_RELEASE",
|
||||
"BC_DECREFS",
|
||||
"BC_INCREFS_DONE",
|
||||
"BC_ACQUIRE_DONE",
|
||||
"BC_ATTEMPT_ACQUIRE",
|
||||
"BC_REGISTER_LOOPER",
|
||||
"BC_ENTER_LOOPER",
|
||||
"BC_EXIT_LOOPER",
|
||||
"BC_REQUEST_DEATH_NOTIFICATION",
|
||||
"BC_CLEAR_DEATH_NOTIFICATION",
|
||||
"BC_DEAD_BINDER_DONE",
|
||||
"BC_TRANSACTION_SG",
|
||||
"BC_REPLY_SG",
|
||||
};
|
||||
|
||||
const char * const binder_return_strings[] = {
|
||||
"BR_ERROR",
|
||||
"BR_OK",
|
||||
"BR_TRANSACTION",
|
||||
"BR_REPLY",
|
||||
"BR_ACQUIRE_RESULT",
|
||||
"BR_DEAD_REPLY",
|
||||
"BR_TRANSACTION_COMPLETE",
|
||||
"BR_INCREFS",
|
||||
"BR_ACQUIRE",
|
||||
"BR_RELEASE",
|
||||
"BR_DECREFS",
|
||||
"BR_ATTEMPT_ACQUIRE",
|
||||
"BR_NOOP",
|
||||
"BR_SPAWN_LOOPER",
|
||||
"BR_FINISHED",
|
||||
"BR_DEAD_BINDER",
|
||||
"BR_CLEAR_DEATH_NOTIFICATION_DONE",
|
||||
"BR_FAILED_REPLY",
|
||||
"BR_FROZEN_REPLY",
|
||||
"BR_ONEWAY_SPAM_SUSPECT",
|
||||
"BR_TRANSACTION_PENDING_FROZEN"
|
||||
};
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#define CREATE_RUST_TRACE_POINTS
|
||||
#include "rust_binder_events.h"
|
||||
387
drivers/android/binder/rust_binder_events.h
Normal file
387
drivers/android/binder/rust_binder_events.h
Normal file
@@ -0,0 +1,387 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2024 Google, Inc.
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_SYSTEM rust_binder
|
||||
#define TRACE_INCLUDE_FILE rust_binder_events
|
||||
#define TRACE_INCLUDE_PATH ../drivers/android/binder
|
||||
|
||||
#if !defined(_RUST_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _RUST_BINDER_TRACE_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
TRACE_EVENT(rust_binder_ioctl,
|
||||
TP_PROTO(unsigned int cmd, unsigned long arg),
|
||||
TP_ARGS(cmd, arg),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, cmd)
|
||||
__field(unsigned long, arg)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->cmd = cmd;
|
||||
__entry->arg = arg;
|
||||
),
|
||||
TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(rust_binder_function_return_class,
|
||||
TP_PROTO(int ret),
|
||||
TP_ARGS(ret),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, ret)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ret = ret;
|
||||
),
|
||||
TP_printk("ret=%d", __entry->ret)
|
||||
);
|
||||
|
||||
#define DEFINE_RBINDER_FUNCTION_RETURN_EVENT(name) \
|
||||
DEFINE_EVENT(rust_binder_function_return_class, name, \
|
||||
TP_PROTO(int ret), \
|
||||
TP_ARGS(ret))
|
||||
|
||||
DEFINE_RBINDER_FUNCTION_RETURN_EVENT(rust_binder_ioctl_done);
|
||||
DEFINE_RBINDER_FUNCTION_RETURN_EVENT(rust_binder_read_done);
|
||||
DEFINE_RBINDER_FUNCTION_RETURN_EVENT(rust_binder_write_done);
|
||||
|
||||
TRACE_EVENT(rust_binder_set_priority,
|
||||
TP_PROTO(struct task_struct *thread, int desired_prio, int new_prio),
|
||||
TP_ARGS(thread, desired_prio, new_prio),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, proc)
|
||||
__field(int, thread)
|
||||
__field(unsigned int, old_prio)
|
||||
__field(unsigned int, new_prio)
|
||||
__field(unsigned int, desired_prio)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->proc = thread->tgid;
|
||||
__entry->thread = thread->pid;
|
||||
__entry->old_prio = thread->normal_prio;
|
||||
__entry->new_prio = new_prio;
|
||||
__entry->desired_prio = desired_prio;
|
||||
),
|
||||
TP_printk("proc=%d thread=%d old=%d => new=%d desired=%d",
|
||||
__entry->proc, __entry->thread, __entry->old_prio,
|
||||
__entry->new_prio, __entry->desired_prio)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_wait_for_work,
|
||||
TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo),
|
||||
TP_ARGS(proc_work, transaction_stack, thread_todo),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(bool, proc_work)
|
||||
__field(bool, transaction_stack)
|
||||
__field(bool, thread_todo)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->proc_work = proc_work;
|
||||
__entry->transaction_stack = transaction_stack;
|
||||
__entry->thread_todo = thread_todo;
|
||||
),
|
||||
TP_printk("proc_work=%d transaction_stack=%d thread_todo=%d",
|
||||
__entry->proc_work, __entry->transaction_stack,
|
||||
__entry->thread_todo)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction,
|
||||
TP_PROTO(bool reply, rust_binder_transaction t),
|
||||
TP_ARGS(reply, t),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
__field(int, target_node)
|
||||
__field(int, from_proc)
|
||||
__field(int, to_proc)
|
||||
__field(int, reply)
|
||||
__field(unsigned int, code)
|
||||
__field(unsigned int, flags)
|
||||
),
|
||||
TP_fast_assign(
|
||||
rust_binder_thread from_thread = rust_binder_transaction_from_thread(t);
|
||||
rust_binder_process from = rust_binder_thread_proc(from_thread);
|
||||
rust_binder_process to = rust_binder_transaction_to_proc(t);
|
||||
rust_binder_node target_node = rust_binder_transaction_target_node(t);
|
||||
|
||||
__entry->debug_id = rust_binder_transaction_debug_id(t);
|
||||
__entry->target_node = target_node ? rust_binder_node_debug_id(target_node) : 0;
|
||||
__entry->from_proc = rust_binder_process_task(from)->pid;
|
||||
__entry->to_proc = rust_binder_process_task(to)->pid;
|
||||
__entry->reply = reply;
|
||||
__entry->code = rust_binder_transaction_code(t);
|
||||
__entry->flags = rust_binder_transaction_flags(t);
|
||||
),
|
||||
TP_printk("transaction=%d target_node=%d dest_proc=%d from_proc=%d reply=%d flags=0x%x code=0x%x",
|
||||
__entry->debug_id, __entry->target_node, __entry->to_proc,
|
||||
__entry->from_proc, __entry->reply, __entry->flags,
|
||||
__entry->code)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction_thread_selected,
|
||||
TP_PROTO(rust_binder_transaction t, rust_binder_thread thread),
|
||||
TP_ARGS(t, thread),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
__field(int, to_thread)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->debug_id = rust_binder_transaction_debug_id(t);
|
||||
__entry->to_thread = rust_binder_thread_id(thread);
|
||||
),
|
||||
TP_printk("transaction=%d thread=%d", __entry->debug_id, __entry->to_thread)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction_received,
|
||||
TP_PROTO(rust_binder_transaction t),
|
||||
TP_ARGS(t),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->debug_id = rust_binder_transaction_debug_id(t);
|
||||
),
|
||||
TP_printk("transaction=%d", __entry->debug_id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction_node_send,
|
||||
TP_PROTO(int t_debug_id, rust_binder_node node,
|
||||
const struct flat_binder_object *original,
|
||||
const struct flat_binder_object *translated),
|
||||
TP_ARGS(t_debug_id, node, original, translated),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
__field(int, node_debug_id)
|
||||
__field(binder_uintptr_t, node_ptr)
|
||||
__field(int, types)
|
||||
__field(int, original_handle)
|
||||
__field(int, translated_handle)
|
||||
),
|
||||
TP_fast_assign(
|
||||
int orig_is_handle = original->hdr.type == BINDER_TYPE_HANDLE || original->hdr.type == BINDER_TYPE_WEAK_HANDLE;
|
||||
int orig_is_strong = original->hdr.type == BINDER_TYPE_HANDLE || original->hdr.type == BINDER_TYPE_BINDER;
|
||||
int tran_is_handle = translated->hdr.type == BINDER_TYPE_HANDLE || translated->hdr.type == BINDER_TYPE_WEAK_HANDLE;
|
||||
int tran_is_strong = translated->hdr.type == BINDER_TYPE_HANDLE || translated->hdr.type == BINDER_TYPE_BINDER;
|
||||
|
||||
__entry->debug_id = t_debug_id;
|
||||
__entry->node_debug_id = rust_binder_node_debug_id(node);
|
||||
__entry->node_ptr = rust_binder_node_debug_id(node);
|
||||
__entry->types =
|
||||
(orig_is_handle << 0) |
|
||||
(tran_is_handle << 1) |
|
||||
(orig_is_strong << 2) |
|
||||
(tran_is_strong << 3);
|
||||
__entry->original_handle = orig_is_handle ? original->handle : 0;
|
||||
__entry->translated_handle = tran_is_handle ? original->handle : 0;
|
||||
),
|
||||
TP_printk("transaction=%d node=%d ptr=0x%016llx: %s%s [%d] ==> %s%s [%d]",
|
||||
__entry->debug_id, __entry->node_debug_id,
|
||||
(u64)__entry->node_ptr,
|
||||
(__entry->types & (1<<2)) ? "" : "weak ",
|
||||
(__entry->types & (1<<0)) ? "handle" : "binder",
|
||||
__entry->original_handle,
|
||||
(__entry->types & (1<<3)) ? "" : "weak ",
|
||||
(__entry->types & (1<<1)) ? "handle" : "binder",
|
||||
__entry->translated_handle)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction_fd_send,
|
||||
TP_PROTO(int t_debug_id, int fd, size_t offset),
|
||||
TP_ARGS(t_debug_id, fd, offset),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
__field(int, fd)
|
||||
__field(size_t, offset)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->debug_id = t_debug_id;
|
||||
__entry->fd = fd;
|
||||
__entry->offset = offset;
|
||||
),
|
||||
TP_printk("transaction=%d src_fd=%d offset=%zu",
|
||||
__entry->debug_id, __entry->fd, __entry->offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction_fd_recv,
|
||||
TP_PROTO(int t_debug_id, int fd, size_t offset),
|
||||
TP_ARGS(t_debug_id, fd, offset),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
__field(int, fd)
|
||||
__field(size_t, offset)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->debug_id = t_debug_id;
|
||||
__entry->fd = fd;
|
||||
__entry->offset = offset;
|
||||
),
|
||||
TP_printk("transaction=%d dest_fd=%d offset=%zu",
|
||||
__entry->debug_id, __entry->fd, __entry->offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_transaction_alloc_buf,
|
||||
TP_PROTO(int debug_id, const struct binder_transaction_data_sg *data),
|
||||
TP_ARGS(debug_id, data),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
__field(size_t, data_size)
|
||||
__field(size_t, offsets_size)
|
||||
__field(size_t, extra_buffers_size)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->debug_id = debug_id;
|
||||
__entry->data_size = data->transaction_data.data_size;
|
||||
__entry->offsets_size = data->transaction_data.offsets_size;
|
||||
__entry->extra_buffers_size = data->buffers_size;
|
||||
),
|
||||
TP_printk("transaction=%d data_size=%zd offsets_size=%zd extra_buffers_size=%zd",
|
||||
__entry->debug_id, __entry->data_size, __entry->offsets_size,
|
||||
__entry->extra_buffers_size)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(rust_binder_buffer_release_class,
|
||||
TP_PROTO(int debug_id),
|
||||
TP_ARGS(debug_id),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, debug_id)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->debug_id = debug_id;
|
||||
),
|
||||
TP_printk("transaction=%d", __entry->debug_id)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(rust_binder_buffer_release_class, rust_binder_transaction_buffer_release,
|
||||
TP_PROTO(int debug_id),
|
||||
TP_ARGS(debug_id));
|
||||
|
||||
DEFINE_EVENT(rust_binder_buffer_release_class, rust_binder_transaction_failed_buffer_release,
|
||||
TP_PROTO(int debug_id),
|
||||
TP_ARGS(debug_id));
|
||||
|
||||
DEFINE_EVENT(rust_binder_buffer_release_class, rust_binder_transaction_update_buffer_release,
|
||||
TP_PROTO(int debug_id),
|
||||
TP_ARGS(debug_id));
|
||||
|
||||
TRACE_EVENT(rust_binder_update_page_range,
|
||||
TP_PROTO(int pid, bool allocate, size_t start, size_t end),
|
||||
TP_ARGS(pid, allocate, start, end),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, proc)
|
||||
__field(bool, allocate)
|
||||
__field(size_t, offset)
|
||||
__field(size_t, size)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->proc = pid;
|
||||
__entry->allocate = allocate;
|
||||
__entry->offset = start;
|
||||
__entry->size = end - start;
|
||||
),
|
||||
TP_printk("proc=%d allocate=%d offset=%zu size=%zu",
|
||||
__entry->proc, __entry->allocate,
|
||||
__entry->offset, __entry->size)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(rust_binder_lru_page_class,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, proc)
|
||||
__field(size_t, page_index)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->proc = pid;
|
||||
__entry->page_index = page_index;
|
||||
),
|
||||
TP_printk("proc=%d page_index=%zu",
|
||||
__entry->proc, __entry->page_index)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_alloc_lru_start,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_alloc_lru_end,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_free_lru_start,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_free_lru_end,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_alloc_page_start,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_alloc_page_end,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_unmap_user_start,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_unmap_user_end,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_unmap_kernel_start,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
DEFINE_EVENT(rust_binder_lru_page_class, rust_binder_unmap_kernel_end,
|
||||
TP_PROTO(int pid, size_t page_index),
|
||||
TP_ARGS(pid, page_index));
|
||||
|
||||
TRACE_EVENT(rust_binder_command,
|
||||
TP_PROTO(uint32_t cmd),
|
||||
TP_ARGS(cmd),
|
||||
TP_STRUCT__entry(
|
||||
__field(uint32_t, cmd)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->cmd = cmd;
|
||||
),
|
||||
TP_printk("cmd=0x%x %s",
|
||||
__entry->cmd,
|
||||
_IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_command_strings) ?
|
||||
binder_command_strings[_IOC_NR(__entry->cmd)] :
|
||||
"unknown")
|
||||
);
|
||||
|
||||
TRACE_EVENT(rust_binder_return,
|
||||
TP_PROTO(uint32_t ret),
|
||||
TP_ARGS(ret),
|
||||
TP_STRUCT__entry(
|
||||
__field(uint32_t, ret)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ret = ret;
|
||||
),
|
||||
TP_printk("ret=0x%x %s",
|
||||
__entry->ret,
|
||||
_IOC_NR(__entry->ret) < ARRAY_SIZE(binder_return_strings) ?
|
||||
binder_return_strings[_IOC_NR(__entry->ret)] :
|
||||
"unknown")
|
||||
);
|
||||
|
||||
#endif /* _RUST_BINDER_TRACE_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
||||
23
drivers/android/binder/rust_binder_hooks.c
Normal file
23
drivers/android/binder/rust_binder_hooks.c
Normal file
@@ -0,0 +1,23 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* rust_binder_events.c
|
||||
*
|
||||
* Rust Binder vendorhooks.
|
||||
*
|
||||
* Copyright 2024 Google LLC
|
||||
*/
|
||||
|
||||
#include "rust_binder.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#define CREATE_RUST_TRACE_POINTS
|
||||
#include <trace/hooks/vendor_hooks.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#include "rust_binder_hooks.h"
|
||||
|
||||
/*
|
||||
* Export tracepoints that act as a bare tracehook (ie: have no trace event
|
||||
* associated with them) to allow external modules to probe them.
|
||||
*/
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rust_binder_set_priority);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rust_binder_restore_priority);
|
||||
33
drivers/android/binder/rust_binder_hooks.h
Normal file
33
drivers/android/binder/rust_binder_hooks.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2024 Google, Inc.
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_SYSTEM rust_binder
|
||||
#define TRACE_INCLUDE_FILE rust_binder_hooks
|
||||
#define TRACE_INCLUDE_PATH ../drivers/android/binder
|
||||
|
||||
#if !defined(_RUST_BINDER_HOOK_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _RUST_BINDER_HOOK_H
|
||||
|
||||
#include <trace/hooks/vendor_hooks.h>
|
||||
|
||||
/*
|
||||
* Following tracepoints are not exported in tracefs and provide a
|
||||
* mechanism for vendor modules to hook and extend functionality
|
||||
*/
|
||||
|
||||
DECLARE_HOOK(android_vh_rust_binder_set_priority,
|
||||
TP_PROTO(rust_binder_transaction t, struct task_struct *task),
|
||||
TP_ARGS(t, task));
|
||||
DECLARE_HOOK(android_vh_rust_binder_restore_priority,
|
||||
TP_PROTO(struct task_struct *task),
|
||||
TP_ARGS(task));
|
||||
|
||||
#endif /* _RUST_BINDER_HOOK_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
||||
87
drivers/android/binder/rust_binder_internal.h
Normal file
87
drivers/android/binder/rust_binder_internal.h
Normal file
@@ -0,0 +1,87 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* rust_binder_internal.h
|
||||
*
|
||||
* This file contains internal data structures used by Rust Binder. Mostly,
|
||||
* these are type definitions used only by binderfs or things that Rust Binder
|
||||
* define and export to binderfs.
|
||||
*
|
||||
* It does not include things exported by binderfs to Rust Binder since this
|
||||
* file is not included as input to bindgen.
|
||||
*
|
||||
* Copyright (C) 2024 Google LLC.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_RUST_BINDER_INTERNAL_H
|
||||
#define _LINUX_RUST_BINDER_INTERNAL_H
|
||||
|
||||
#define RUST_BINDERFS_SUPER_MAGIC 0x6c6f6f71
|
||||
|
||||
#include <linux/seq_file.h>
|
||||
#include <uapi/linux/android/binder.h>
|
||||
#include <uapi/linux/android/binderfs.h>
|
||||
|
||||
/*
|
||||
* The internal data types in the Rust Binder driver are opaque to C, so we use
|
||||
* void pointer typedefs for these types.
|
||||
*/
|
||||
typedef void *rust_binder_context;
|
||||
|
||||
/**
|
||||
* struct binder_device - information about a binder device node
|
||||
* @minor: the minor number used by this device
|
||||
* @ctx: the Rust Context used by this device, or null for binder-control
|
||||
*
|
||||
* This is used as the private data for files directly in binderfs, but not
|
||||
* files in the binder_logs subdirectory. This struct owns a refcount on `ctx`
|
||||
* and the entry for `minor` in `binderfs_minors`. For binder-control `ctx` is
|
||||
* null.
|
||||
*/
|
||||
struct binder_device {
|
||||
int minor;
|
||||
rust_binder_context ctx;
|
||||
};
|
||||
|
||||
int rust_binder_stats_show(struct seq_file *m, void *unused);
|
||||
int rust_binder_state_show(struct seq_file *m, void *unused);
|
||||
int rust_binder_transactions_show(struct seq_file *m, void *unused);
|
||||
int rust_binder_proc_show(struct seq_file *m, void *pid);
|
||||
|
||||
extern const struct file_operations rust_binder_fops;
|
||||
rust_binder_context rust_binder_new_context(char *name);
|
||||
void rust_binder_remove_context(rust_binder_context device);
|
||||
|
||||
/**
|
||||
* binderfs_mount_opts - mount options for binderfs
|
||||
* @max: maximum number of allocatable binderfs binder devices
|
||||
* @stats_mode: enable binder stats in binderfs.
|
||||
*/
|
||||
struct binderfs_mount_opts {
|
||||
int max;
|
||||
int stats_mode;
|
||||
};
|
||||
|
||||
/**
|
||||
* binderfs_info - information about a binderfs mount
|
||||
* @ipc_ns: The ipc namespace the binderfs mount belongs to.
|
||||
* @control_dentry: This records the dentry of this binderfs mount
|
||||
* binder-control device.
|
||||
* @root_uid: uid that needs to be used when a new binder device is
|
||||
* created.
|
||||
* @root_gid: gid that needs to be used when a new binder device is
|
||||
* created.
|
||||
* @mount_opts: The mount options in use.
|
||||
* @device_count: The current number of allocated binder devices.
|
||||
* @proc_log_dir: Pointer to the directory dentry containing process-specific
|
||||
* logs.
|
||||
*/
|
||||
struct binderfs_info {
|
||||
struct ipc_namespace *ipc_ns;
|
||||
struct dentry *control_dentry;
|
||||
kuid_t root_uid;
|
||||
kgid_t root_gid;
|
||||
struct binderfs_mount_opts mount_opts;
|
||||
int device_count;
|
||||
struct dentry *proc_log_dir;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_RUST_BINDER_INTERNAL_H */
|
||||
849
drivers/android/binder/rust_binderfs.c
Normal file
849
drivers/android/binder/rust_binderfs.c
Normal file
@@ -0,0 +1,849 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/compiler_types.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fsnotify.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/ipc_namespace.h>
|
||||
#include <linux/kdev_t.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/major.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/fs_parser.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock_types.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <uapi/asm-generic/errno-base.h>
|
||||
#include <uapi/linux/android/binder.h>
|
||||
#include <uapi/linux/android/binderfs.h>
|
||||
|
||||
#include "rust_binder_internal.h"
|
||||
|
||||
#define FIRST_INODE 1
|
||||
#define SECOND_INODE 2
|
||||
#define INODE_OFFSET 3
|
||||
#define BINDERFS_MAX_MINOR (1U << MINORBITS)
|
||||
/* Ensure that the initial ipc namespace always has devices available. */
|
||||
#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4)
|
||||
|
||||
DEFINE_SHOW_ATTRIBUTE(rust_binder_stats);
|
||||
DEFINE_SHOW_ATTRIBUTE(rust_binder_state);
|
||||
DEFINE_SHOW_ATTRIBUTE(rust_binder_transactions);
|
||||
DEFINE_SHOW_ATTRIBUTE(rust_binder_proc);
|
||||
|
||||
char *rust_binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
|
||||
module_param_named(rust_devices, rust_binder_devices_param, charp, 0444);
|
||||
|
||||
static dev_t binderfs_dev;
|
||||
static DEFINE_MUTEX(binderfs_minors_mutex);
|
||||
static DEFINE_IDA(binderfs_minors);
|
||||
|
||||
enum binderfs_param {
|
||||
Opt_max,
|
||||
Opt_stats_mode,
|
||||
};
|
||||
|
||||
enum binderfs_stats_mode {
|
||||
binderfs_stats_mode_unset,
|
||||
binderfs_stats_mode_global,
|
||||
};
|
||||
|
||||
struct binder_features {
|
||||
bool oneway_spam_detection;
|
||||
bool extended_error;
|
||||
};
|
||||
|
||||
static const struct constant_table binderfs_param_stats[] = {
|
||||
{ "global", binderfs_stats_mode_global },
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct fs_parameter_spec binderfs_fs_parameters[] = {
|
||||
fsparam_u32("max", Opt_max),
|
||||
fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats),
|
||||
{}
|
||||
};
|
||||
|
||||
static struct binder_features binder_features = {
|
||||
.oneway_spam_detection = true,
|
||||
.extended_error = true,
|
||||
};
|
||||
|
||||
static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb)
|
||||
{
|
||||
return sb->s_fs_info;
|
||||
}
|
||||
|
||||
bool is_rust_binderfs_device(const struct inode *inode)
|
||||
{
|
||||
if (inode->i_sb->s_magic == RUST_BINDERFS_SUPER_MAGIC)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* binderfs_binder_device_create - allocate inode from super block of a
|
||||
* binderfs mount
|
||||
* @ref_inode: inode from wich the super block will be taken
|
||||
* @userp: buffer to copy information about new device for userspace to
|
||||
* @req: struct binderfs_device as copied from userspace
|
||||
*
|
||||
* This function allocates a new binder_device and reserves a new minor
|
||||
* number for it.
|
||||
* Minor numbers are limited and tracked globally in binderfs_minors. The
|
||||
* function will stash a struct binder_device for the specific binder
|
||||
* device in i_private of the inode.
|
||||
* It will go on to allocate a new inode from the super block of the
|
||||
* filesystem mount, stash a struct binder_device in its i_private field
|
||||
* and attach a dentry to that inode.
|
||||
*
|
||||
* Return: 0 on success, negative errno on failure
|
||||
*/
|
||||
static int binderfs_binder_device_create(struct inode *ref_inode,
|
||||
struct binderfs_device __user *userp,
|
||||
struct binderfs_device *req)
|
||||
{
|
||||
int minor, ret;
|
||||
struct dentry *dentry, *root;
|
||||
struct binder_device *device = NULL;
|
||||
rust_binder_context ctx = NULL;
|
||||
struct inode *inode = NULL;
|
||||
struct super_block *sb = ref_inode->i_sb;
|
||||
struct binderfs_info *info = sb->s_fs_info;
|
||||
#if defined(CONFIG_IPC_NS)
|
||||
bool use_reserve = (info->ipc_ns == &init_ipc_ns);
|
||||
#else
|
||||
bool use_reserve = true;
|
||||
#endif
|
||||
|
||||
/* Reserve new minor number for the new device. */
|
||||
mutex_lock(&binderfs_minors_mutex);
|
||||
if (++info->device_count <= info->mount_opts.max)
|
||||
minor = ida_alloc_max(&binderfs_minors,
|
||||
use_reserve ? BINDERFS_MAX_MINOR :
|
||||
BINDERFS_MAX_MINOR_CAPPED,
|
||||
GFP_KERNEL);
|
||||
else
|
||||
minor = -ENOSPC;
|
||||
if (minor < 0) {
|
||||
--info->device_count;
|
||||
mutex_unlock(&binderfs_minors_mutex);
|
||||
return minor;
|
||||
}
|
||||
mutex_unlock(&binderfs_minors_mutex);
|
||||
|
||||
ret = -ENOMEM;
|
||||
device = kzalloc(sizeof(*device), GFP_KERNEL);
|
||||
if (!device)
|
||||
goto err;
|
||||
|
||||
req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */
|
||||
|
||||
ctx = rust_binder_new_context(req->name);
|
||||
if (!ctx)
|
||||
goto err;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
goto err;
|
||||
|
||||
inode->i_ino = minor + INODE_OFFSET;
|
||||
simple_inode_init_ts(inode);
|
||||
init_special_inode(inode, S_IFCHR | 0600,
|
||||
MKDEV(MAJOR(binderfs_dev), minor));
|
||||
inode->i_fop = &rust_binder_fops;
|
||||
inode->i_uid = info->root_uid;
|
||||
inode->i_gid = info->root_gid;
|
||||
|
||||
req->major = MAJOR(binderfs_dev);
|
||||
req->minor = minor;
|
||||
device->ctx = ctx;
|
||||
device->minor = minor;
|
||||
|
||||
if (userp && copy_to_user(userp, req, sizeof(*req))) {
|
||||
ret = -EFAULT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
root = sb->s_root;
|
||||
inode_lock(d_inode(root));
|
||||
|
||||
/* look it up */
|
||||
dentry = lookup_one_len(req->name, root, strlen(req->name));
|
||||
if (IS_ERR(dentry)) {
|
||||
inode_unlock(d_inode(root));
|
||||
ret = PTR_ERR(dentry);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (d_really_is_positive(dentry)) {
|
||||
/* already exists */
|
||||
dput(dentry);
|
||||
inode_unlock(d_inode(root));
|
||||
ret = -EEXIST;
|
||||
goto err;
|
||||
}
|
||||
|
||||
inode->i_private = device;
|
||||
d_instantiate(dentry, inode);
|
||||
fsnotify_create(root->d_inode, dentry);
|
||||
inode_unlock(d_inode(root));
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree(device);
|
||||
rust_binder_remove_context(ctx);
|
||||
mutex_lock(&binderfs_minors_mutex);
|
||||
--info->device_count;
|
||||
ida_free(&binderfs_minors, minor);
|
||||
mutex_unlock(&binderfs_minors_mutex);
|
||||
iput(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* binder_ctl_ioctl - handle binder device node allocation requests
|
||||
*
|
||||
* The request handler for the binder-control device. All requests operate on
|
||||
* the binderfs mount the binder-control device resides in:
|
||||
* - BINDER_CTL_ADD
|
||||
* Allocate a new binder device.
|
||||
*
|
||||
* Return: %0 on success, negative errno on failure.
|
||||
*/
|
||||
static long binder_ctl_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct binderfs_device __user *device = (struct binderfs_device __user *)arg;
|
||||
struct binderfs_device device_req;
|
||||
|
||||
switch (cmd) {
|
||||
case BINDER_CTL_ADD:
|
||||
ret = copy_from_user(&device_req, device, sizeof(device_req));
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = binderfs_binder_device_create(inode, device, &device_req);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void binderfs_evict_inode(struct inode *inode)
|
||||
{
|
||||
struct binder_device *device = inode->i_private;
|
||||
struct binderfs_info *info = BINDERFS_SB(inode->i_sb);
|
||||
|
||||
clear_inode(inode);
|
||||
|
||||
if (!S_ISCHR(inode->i_mode) || !device)
|
||||
return;
|
||||
|
||||
mutex_lock(&binderfs_minors_mutex);
|
||||
--info->device_count;
|
||||
ida_free(&binderfs_minors, device->minor);
|
||||
mutex_unlock(&binderfs_minors_mutex);
|
||||
|
||||
/* ctx is null for binder-control, but this function ignores null pointers */
|
||||
rust_binder_remove_context(device->ctx);
|
||||
|
||||
kfree(device);
|
||||
}
|
||||
|
||||
static int binderfs_fs_context_parse_param(struct fs_context *fc,
|
||||
struct fs_parameter *param)
|
||||
{
|
||||
int opt;
|
||||
struct binderfs_mount_opts *ctx = fc->fs_private;
|
||||
struct fs_parse_result result;
|
||||
|
||||
opt = fs_parse(fc, binderfs_fs_parameters, param, &result);
|
||||
if (opt < 0)
|
||||
return opt;
|
||||
|
||||
switch (opt) {
|
||||
case Opt_max:
|
||||
if (result.uint_32 > BINDERFS_MAX_MINOR)
|
||||
return invalfc(fc, "Bad value for '%s'", param->key);
|
||||
|
||||
ctx->max = result.uint_32;
|
||||
break;
|
||||
case Opt_stats_mode:
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
ctx->stats_mode = result.uint_32;
|
||||
break;
|
||||
default:
|
||||
return invalfc(fc, "Unsupported parameter '%s'", param->key);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int binderfs_fs_context_reconfigure(struct fs_context *fc)
|
||||
{
|
||||
struct binderfs_mount_opts *ctx = fc->fs_private;
|
||||
struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb);
|
||||
|
||||
if (info->mount_opts.stats_mode != ctx->stats_mode)
|
||||
return invalfc(fc, "Binderfs stats mode cannot be changed during a remount");
|
||||
|
||||
info->mount_opts.stats_mode = ctx->stats_mode;
|
||||
info->mount_opts.max = ctx->max;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int binderfs_show_options(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
struct binderfs_info *info = BINDERFS_SB(root->d_sb);
|
||||
|
||||
if (info->mount_opts.max <= BINDERFS_MAX_MINOR)
|
||||
seq_printf(seq, ",max=%d", info->mount_opts.max);
|
||||
|
||||
switch (info->mount_opts.stats_mode) {
|
||||
case binderfs_stats_mode_unset:
|
||||
break;
|
||||
case binderfs_stats_mode_global:
|
||||
seq_printf(seq, ",stats=global");
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct super_operations binderfs_super_ops = {
|
||||
.evict_inode = binderfs_evict_inode,
|
||||
.show_options = binderfs_show_options,
|
||||
.statfs = simple_statfs,
|
||||
};
|
||||
|
||||
static inline bool is_binderfs_control_device(const struct dentry *dentry)
|
||||
{
|
||||
struct binderfs_info *info = dentry->d_sb->s_fs_info;
|
||||
|
||||
return info->control_dentry == dentry;
|
||||
}
|
||||
|
||||
static int binderfs_rename(struct mnt_idmap *idmap,
|
||||
struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
if (is_binderfs_control_device(old_dentry) ||
|
||||
is_binderfs_control_device(new_dentry))
|
||||
return -EPERM;
|
||||
|
||||
return simple_rename(idmap, old_dir, old_dentry, new_dir,
|
||||
new_dentry, flags);
|
||||
}
|
||||
|
||||
static int binderfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
if (is_binderfs_control_device(dentry))
|
||||
return -EPERM;
|
||||
|
||||
return simple_unlink(dir, dentry);
|
||||
}
|
||||
|
||||
static const struct file_operations binder_ctl_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = nonseekable_open,
|
||||
.unlocked_ioctl = binder_ctl_ioctl,
|
||||
.compat_ioctl = binder_ctl_ioctl,
|
||||
.llseek = noop_llseek,
|
||||
};
|
||||
|
||||
/**
|
||||
* binderfs_binder_ctl_create - create a new binder-control device
|
||||
* @sb: super block of the binderfs mount
|
||||
*
|
||||
* This function creates a new binder-control device node in the binderfs mount
|
||||
* referred to by @sb.
|
||||
*
|
||||
* Return: 0 on success, negative errno on failure
|
||||
*/
|
||||
static int binderfs_binder_ctl_create(struct super_block *sb)
|
||||
{
|
||||
int minor, ret;
|
||||
struct dentry *dentry;
|
||||
struct binder_device *device;
|
||||
struct inode *inode = NULL;
|
||||
struct dentry *root = sb->s_root;
|
||||
struct binderfs_info *info = sb->s_fs_info;
|
||||
#if defined(CONFIG_IPC_NS)
|
||||
bool use_reserve = (info->ipc_ns == &init_ipc_ns);
|
||||
#else
|
||||
bool use_reserve = true;
|
||||
#endif
|
||||
|
||||
device = kzalloc(sizeof(*device), GFP_KERNEL);
|
||||
if (!device)
|
||||
return -ENOMEM;
|
||||
|
||||
/* If we have already created a binder-control node, return. */
|
||||
if (info->control_dentry) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
/* Reserve a new minor number for the new device. */
|
||||
mutex_lock(&binderfs_minors_mutex);
|
||||
minor = ida_alloc_max(&binderfs_minors,
|
||||
use_reserve ? BINDERFS_MAX_MINOR :
|
||||
BINDERFS_MAX_MINOR_CAPPED,
|
||||
GFP_KERNEL);
|
||||
mutex_unlock(&binderfs_minors_mutex);
|
||||
if (minor < 0) {
|
||||
ret = minor;
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode->i_ino = SECOND_INODE;
|
||||
simple_inode_init_ts(inode);
|
||||
init_special_inode(inode, S_IFCHR | 0600,
|
||||
MKDEV(MAJOR(binderfs_dev), minor));
|
||||
inode->i_fop = &binder_ctl_fops;
|
||||
inode->i_uid = info->root_uid;
|
||||
inode->i_gid = info->root_gid;
|
||||
|
||||
device->minor = minor;
|
||||
device->ctx = NULL;
|
||||
|
||||
dentry = d_alloc_name(root, "binder-control");
|
||||
if (!dentry)
|
||||
goto out;
|
||||
|
||||
inode->i_private = device;
|
||||
info->control_dentry = dentry;
|
||||
d_add(dentry, inode);
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
kfree(device);
|
||||
iput(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct inode_operations binderfs_dir_inode_operations = {
|
||||
.lookup = simple_lookup,
|
||||
.rename = binderfs_rename,
|
||||
.unlink = binderfs_unlink,
|
||||
};
|
||||
|
||||
static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
|
||||
{
|
||||
struct inode *ret;
|
||||
|
||||
ret = new_inode(sb);
|
||||
if (ret) {
|
||||
ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
|
||||
ret->i_mode = mode;
|
||||
simple_inode_init_ts(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct dentry *binderfs_create_dentry(struct dentry *parent,
|
||||
const char *name)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
|
||||
dentry = lookup_one_len(name, parent, strlen(name));
|
||||
if (IS_ERR(dentry))
|
||||
return dentry;
|
||||
|
||||
/* Return error if the file/dir already exists. */
|
||||
if (d_really_is_positive(dentry)) {
|
||||
dput(dentry);
|
||||
return ERR_PTR(-EEXIST);
|
||||
}
|
||||
|
||||
return dentry;
|
||||
}
|
||||
|
||||
void rust_binderfs_remove_file(struct dentry *dentry)
|
||||
{
|
||||
struct inode *parent_inode;
|
||||
|
||||
parent_inode = d_inode(dentry->d_parent);
|
||||
inode_lock(parent_inode);
|
||||
if (simple_positive(dentry)) {
|
||||
dget(dentry);
|
||||
simple_unlink(parent_inode, dentry);
|
||||
d_delete(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
inode_unlock(parent_inode);
|
||||
}
|
||||
|
||||
struct dentry *rust_binderfs_create_file(struct dentry *parent, const char *name,
|
||||
const struct file_operations *fops,
|
||||
void *data)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
struct inode *new_inode, *parent_inode;
|
||||
struct super_block *sb;
|
||||
|
||||
parent_inode = d_inode(parent);
|
||||
inode_lock(parent_inode);
|
||||
|
||||
dentry = binderfs_create_dentry(parent, name);
|
||||
if (IS_ERR(dentry))
|
||||
goto out;
|
||||
|
||||
sb = parent_inode->i_sb;
|
||||
new_inode = binderfs_make_inode(sb, S_IFREG | 0444);
|
||||
if (!new_inode) {
|
||||
dput(dentry);
|
||||
dentry = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
|
||||
new_inode->i_fop = fops;
|
||||
new_inode->i_private = data;
|
||||
d_instantiate(dentry, new_inode);
|
||||
fsnotify_create(parent_inode, dentry);
|
||||
|
||||
out:
|
||||
inode_unlock(parent_inode);
|
||||
return dentry;
|
||||
}
|
||||
|
||||
struct dentry *rust_binderfs_create_proc_file(struct inode *nodp, int pid)
|
||||
{
|
||||
struct binderfs_info *info = nodp->i_sb->s_fs_info;
|
||||
struct dentry *dir = info->proc_log_dir;
|
||||
char strbuf[20 + 1];
|
||||
void *data = (void *)(unsigned long) pid;
|
||||
|
||||
if (!dir)
|
||||
return NULL;
|
||||
|
||||
snprintf(strbuf, sizeof(strbuf), "%u", pid);
|
||||
return rust_binderfs_create_file(dir, strbuf, &rust_binder_proc_fops, data);
|
||||
}
|
||||
|
||||
static struct dentry *binderfs_create_dir(struct dentry *parent,
|
||||
const char *name)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
struct inode *new_inode, *parent_inode;
|
||||
struct super_block *sb;
|
||||
|
||||
parent_inode = d_inode(parent);
|
||||
inode_lock(parent_inode);
|
||||
|
||||
dentry = binderfs_create_dentry(parent, name);
|
||||
if (IS_ERR(dentry))
|
||||
goto out;
|
||||
|
||||
sb = parent_inode->i_sb;
|
||||
new_inode = binderfs_make_inode(sb, S_IFDIR | 0755);
|
||||
if (!new_inode) {
|
||||
dput(dentry);
|
||||
dentry = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
|
||||
new_inode->i_fop = &simple_dir_operations;
|
||||
new_inode->i_op = &simple_dir_inode_operations;
|
||||
|
||||
set_nlink(new_inode, 2);
|
||||
d_instantiate(dentry, new_inode);
|
||||
inc_nlink(parent_inode);
|
||||
fsnotify_mkdir(parent_inode, dentry);
|
||||
|
||||
out:
|
||||
inode_unlock(parent_inode);
|
||||
return dentry;
|
||||
}
|
||||
|
||||
static int binder_features_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
bool *feature = m->private;
|
||||
|
||||
seq_printf(m, "%d\n", *feature);
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SHOW_ATTRIBUTE(binder_features);
|
||||
|
||||
static int init_binder_features(struct super_block *sb)
|
||||
{
|
||||
struct dentry *dentry, *dir;
|
||||
|
||||
dir = binderfs_create_dir(sb->s_root, "features");
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
dentry = rust_binderfs_create_file(dir, "oneway_spam_detection",
|
||||
&binder_features_fops,
|
||||
&binder_features.oneway_spam_detection);
|
||||
if (IS_ERR(dentry))
|
||||
return PTR_ERR(dentry);
|
||||
|
||||
dentry = rust_binderfs_create_file(dir, "extended_error",
|
||||
&binder_features_fops,
|
||||
&binder_features.extended_error);
|
||||
if (IS_ERR(dentry))
|
||||
return PTR_ERR(dentry);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_binder_logs(struct super_block *sb)
|
||||
{
|
||||
struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir;
|
||||
struct binderfs_info *info;
|
||||
int ret = 0;
|
||||
|
||||
binder_logs_root_dir = binderfs_create_dir(sb->s_root,
|
||||
"binder_logs");
|
||||
if (IS_ERR(binder_logs_root_dir)) {
|
||||
ret = PTR_ERR(binder_logs_root_dir);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dentry = rust_binderfs_create_file(binder_logs_root_dir, "stats",
|
||||
&rust_binder_stats_fops, NULL);
|
||||
if (IS_ERR(dentry)) {
|
||||
ret = PTR_ERR(dentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dentry = rust_binderfs_create_file(binder_logs_root_dir, "state",
|
||||
&rust_binder_state_fops, NULL);
|
||||
if (IS_ERR(dentry)) {
|
||||
ret = PTR_ERR(dentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dentry = rust_binderfs_create_file(binder_logs_root_dir, "transactions",
|
||||
&rust_binder_transactions_fops, NULL);
|
||||
if (IS_ERR(dentry)) {
|
||||
ret = PTR_ERR(dentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc");
|
||||
if (IS_ERR(proc_log_dir)) {
|
||||
ret = PTR_ERR(proc_log_dir);
|
||||
goto out;
|
||||
}
|
||||
info = sb->s_fs_info;
|
||||
info->proc_log_dir = proc_log_dir;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
|
||||
{
|
||||
int ret;
|
||||
struct binderfs_info *info;
|
||||
struct binderfs_mount_opts *ctx = fc->fs_private;
|
||||
struct inode *inode = NULL;
|
||||
struct binderfs_device device_info = {};
|
||||
const char *name;
|
||||
size_t len;
|
||||
|
||||
sb->s_blocksize = PAGE_SIZE;
|
||||
sb->s_blocksize_bits = PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* The binderfs filesystem can be mounted by userns root in a
|
||||
* non-initial userns. By default such mounts have the SB_I_NODEV flag
|
||||
* set in s_iflags to prevent security issues where userns root can
|
||||
* just create random device nodes via mknod() since it owns the
|
||||
* filesystem mount. But binderfs does not allow to create any files
|
||||
* including devices nodes. The only way to create binder devices nodes
|
||||
* is through the binder-control device which userns root is explicitly
|
||||
* allowed to do. So removing the SB_I_NODEV flag from s_iflags is both
|
||||
* necessary and safe.
|
||||
*/
|
||||
sb->s_iflags &= ~SB_I_NODEV;
|
||||
sb->s_iflags |= SB_I_NOEXEC;
|
||||
sb->s_magic = RUST_BINDERFS_SUPER_MAGIC;
|
||||
sb->s_op = &binderfs_super_ops;
|
||||
sb->s_time_gran = 1;
|
||||
|
||||
sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL);
|
||||
if (!sb->s_fs_info)
|
||||
return -ENOMEM;
|
||||
info = sb->s_fs_info;
|
||||
|
||||
info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
|
||||
|
||||
info->root_gid = make_kgid(sb->s_user_ns, 0);
|
||||
if (!gid_valid(info->root_gid))
|
||||
info->root_gid = GLOBAL_ROOT_GID;
|
||||
info->root_uid = make_kuid(sb->s_user_ns, 0);
|
||||
if (!uid_valid(info->root_uid))
|
||||
info->root_uid = GLOBAL_ROOT_UID;
|
||||
info->mount_opts.max = ctx->max;
|
||||
info->mount_opts.stats_mode = ctx->stats_mode;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return -ENOMEM;
|
||||
|
||||
inode->i_ino = FIRST_INODE;
|
||||
inode->i_fop = &simple_dir_operations;
|
||||
inode->i_mode = S_IFDIR | 0755;
|
||||
simple_inode_init_ts(inode);
|
||||
inode->i_op = &binderfs_dir_inode_operations;
|
||||
set_nlink(inode, 2);
|
||||
|
||||
sb->s_root = d_make_root(inode);
|
||||
if (!sb->s_root)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = binderfs_binder_ctl_create(sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
name = rust_binder_devices_param;
|
||||
for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
|
||||
strscpy(device_info.name, name, len + 1);
|
||||
ret = binderfs_binder_device_create(inode, NULL, &device_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
name += len;
|
||||
if (*name == ',')
|
||||
name++;
|
||||
}
|
||||
|
||||
ret = init_binder_features(sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (info->mount_opts.stats_mode == binderfs_stats_mode_global)
|
||||
return init_binder_logs(sb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int binderfs_fs_context_get_tree(struct fs_context *fc)
|
||||
{
|
||||
return get_tree_nodev(fc, binderfs_fill_super);
|
||||
}
|
||||
|
||||
static void binderfs_fs_context_free(struct fs_context *fc)
|
||||
{
|
||||
struct binderfs_mount_opts *ctx = fc->fs_private;
|
||||
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
static const struct fs_context_operations binderfs_fs_context_ops = {
|
||||
.free = binderfs_fs_context_free,
|
||||
.get_tree = binderfs_fs_context_get_tree,
|
||||
.parse_param = binderfs_fs_context_parse_param,
|
||||
.reconfigure = binderfs_fs_context_reconfigure,
|
||||
};
|
||||
|
||||
static int binderfs_init_fs_context(struct fs_context *fc)
|
||||
{
|
||||
struct binderfs_mount_opts *ctx;
|
||||
|
||||
ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->max = BINDERFS_MAX_MINOR;
|
||||
ctx->stats_mode = binderfs_stats_mode_unset;
|
||||
|
||||
fc->fs_private = ctx;
|
||||
fc->ops = &binderfs_fs_context_ops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void binderfs_kill_super(struct super_block *sb)
|
||||
{
|
||||
struct binderfs_info *info = sb->s_fs_info;
|
||||
|
||||
/*
|
||||
* During inode eviction struct binderfs_info is needed.
|
||||
* So first wipe the super_block then free struct binderfs_info.
|
||||
*/
|
||||
kill_litter_super(sb);
|
||||
|
||||
if (info && info->ipc_ns)
|
||||
put_ipc_ns(info->ipc_ns);
|
||||
|
||||
kfree(info);
|
||||
}
|
||||
|
||||
static struct file_system_type binder_fs_type = {
|
||||
.name = "binder",
|
||||
.init_fs_context = binderfs_init_fs_context,
|
||||
.parameters = binderfs_fs_parameters,
|
||||
.kill_sb = binderfs_kill_super,
|
||||
.fs_flags = FS_USERNS_MOUNT,
|
||||
};
|
||||
|
||||
int init_rust_binderfs(void)
|
||||
{
|
||||
int ret;
|
||||
const char *name;
|
||||
size_t len;
|
||||
|
||||
/* Verify that the default binderfs device names are valid. */
|
||||
name = rust_binder_devices_param;
|
||||
for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) {
|
||||
if (len > BINDERFS_MAX_NAME)
|
||||
return -E2BIG;
|
||||
name += len;
|
||||
if (*name == ',')
|
||||
name++;
|
||||
}
|
||||
|
||||
/* Allocate new major number for binderfs. */
|
||||
ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR,
|
||||
"rust_binder");
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = register_filesystem(&binder_fs_type);
|
||||
if (ret) {
|
||||
unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
88
drivers/android/binder/stats.rs
Normal file
88
drivers/android/binder/stats.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
//! Keep track of statistics for binder_logs.
|
||||
|
||||
use crate::defs::*;
|
||||
use core::sync::atomic::{AtomicU32, Ordering::Relaxed};
|
||||
use kernel::{ioctl::_IOC_NR, seq_file::SeqFile, seq_print};
|
||||
|
||||
const BC_COUNT: usize = _IOC_NR(BC_REPLY_SG) as usize + 1;
|
||||
const BR_COUNT: usize = _IOC_NR(BR_TRANSACTION_PENDING_FROZEN) as usize + 1;
|
||||
|
||||
pub(crate) static GLOBAL_STATS: BinderStats = BinderStats::new();
|
||||
|
||||
pub(crate) struct BinderStats {
|
||||
bc: [AtomicU32; BC_COUNT],
|
||||
br: [AtomicU32; BR_COUNT],
|
||||
}
|
||||
|
||||
impl BinderStats {
|
||||
pub(crate) const fn new() -> Self {
|
||||
const ZERO: AtomicU32 = AtomicU32::new(0);
|
||||
|
||||
Self {
|
||||
bc: [ZERO; BC_COUNT],
|
||||
br: [ZERO; BR_COUNT],
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn inc_bc(&self, bc: u32) {
|
||||
let idx = _IOC_NR(bc) as usize;
|
||||
if let Some(bc_ref) = self.bc.get(idx) {
|
||||
bc_ref.fetch_add(1, Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn inc_br(&self, br: u32) {
|
||||
let idx = _IOC_NR(br) as usize;
|
||||
if let Some(br_ref) = self.br.get(idx) {
|
||||
br_ref.fetch_add(1, Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn debug_print(&self, prefix: &str, m: &SeqFile) {
|
||||
for (i, cnt) in self.bc.iter().enumerate() {
|
||||
let cnt = cnt.load(Relaxed);
|
||||
if cnt > 0 {
|
||||
seq_print!(m, "{}{}: {}\n", prefix, command_string(i), cnt);
|
||||
}
|
||||
}
|
||||
for (i, cnt) in self.br.iter().enumerate() {
|
||||
let cnt = cnt.load(Relaxed);
|
||||
if cnt > 0 {
|
||||
seq_print!(m, "{}{}: {}\n", prefix, return_string(i), cnt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod strings {
|
||||
use core::str::from_utf8_unchecked;
|
||||
use kernel::str::CStr;
|
||||
|
||||
extern "C" {
|
||||
static binder_command_strings: [*const u8; super::BC_COUNT];
|
||||
static binder_return_strings: [*const u8; super::BR_COUNT];
|
||||
}
|
||||
|
||||
pub(super) fn command_string(i: usize) -> &'static str {
|
||||
// SAFETY: Accessing `binder_command_strings` is always safe.
|
||||
let c_str_ptr = unsafe { binder_command_strings[i] };
|
||||
// SAFETY: The `binder_command_strings` array only contains nul-terminated strings.
|
||||
let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.as_bytes();
|
||||
// SAFETY: The `binder_command_strings` array only contains strings with ascii-chars.
|
||||
unsafe { from_utf8_unchecked(bytes) }
|
||||
}
|
||||
|
||||
pub(super) fn return_string(i: usize) -> &'static str {
|
||||
// SAFETY: Accessing `binder_return_strings` is always safe.
|
||||
let c_str_ptr = unsafe { binder_return_strings[i] };
|
||||
// SAFETY: The `binder_command_strings` array only contains nul-terminated strings.
|
||||
let bytes = unsafe { CStr::from_char_ptr(c_str_ptr) }.as_bytes();
|
||||
// SAFETY: The `binder_command_strings` array only contains strings with ascii-chars.
|
||||
unsafe { from_utf8_unchecked(bytes) }
|
||||
}
|
||||
}
|
||||
use strings::{command_string, return_string};
|
||||
1722
drivers/android/binder/thread.rs
Normal file
1722
drivers/android/binder/thread.rs
Normal file
File diff suppressed because it is too large
Load Diff
236
drivers/android/binder/trace.rs
Normal file
236
drivers/android/binder/trace.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use crate::{defs::BinderTransactionDataSg, node::Node, thread::Thread, transaction::Transaction};
|
||||
|
||||
use kernel::bindings::{
|
||||
binder_transaction_data_sg, flat_binder_object, rust_binder_node, rust_binder_thread,
|
||||
rust_binder_transaction, task_struct,
|
||||
};
|
||||
use kernel::error::Result;
|
||||
use kernel::ffi::{c_int, c_uint, c_ulong};
|
||||
use kernel::task::{Pid, Task};
|
||||
use kernel::tracepoint::declare_trace;
|
||||
use kernel::uapi;
|
||||
|
||||
declare_trace! {
|
||||
unsafe fn rust_binder_ioctl(cmd: c_uint, arg: c_ulong);
|
||||
unsafe fn rust_binder_ioctl_done(ret: c_int);
|
||||
unsafe fn rust_binder_read_done(ret: c_int);
|
||||
unsafe fn rust_binder_write_done(ret: c_int);
|
||||
unsafe fn rust_binder_set_priority(thread: *mut task_struct, desired_prio: c_int, new_prio: c_int);
|
||||
unsafe fn android_vh_rust_binder_set_priority(t: rust_binder_transaction, task: *mut task_struct);
|
||||
unsafe fn android_vh_rust_binder_restore_priority(task: *mut task_struct);
|
||||
unsafe fn rust_binder_wait_for_work(proc_work: bool, transaction_stack: bool, thread_todo: bool);
|
||||
unsafe fn rust_binder_transaction(reply: bool, t: rust_binder_transaction);
|
||||
unsafe fn rust_binder_transaction_received(t: rust_binder_transaction);
|
||||
unsafe fn rust_binder_transaction_thread_selected(t: rust_binder_transaction, thread: rust_binder_thread);
|
||||
unsafe fn rust_binder_transaction_node_send(t_debug_id: c_int, n: rust_binder_node,
|
||||
orig: *const flat_binder_object,
|
||||
trans: *const flat_binder_object);
|
||||
unsafe fn rust_binder_transaction_fd_send(t_debug_id: c_int, fd: c_int, offset: usize);
|
||||
unsafe fn rust_binder_transaction_fd_recv(t_debug_id: c_int, fd: c_int, offset: usize);
|
||||
unsafe fn rust_binder_transaction_alloc_buf(debug_id: c_int, data: *const binder_transaction_data_sg);
|
||||
unsafe fn rust_binder_transaction_buffer_release(debug_id: c_int);
|
||||
unsafe fn rust_binder_transaction_failed_buffer_release(debug_id: c_int);
|
||||
unsafe fn rust_binder_transaction_update_buffer_release(debug_id: c_int);
|
||||
unsafe fn rust_binder_update_page_range(pid: c_int, allocate: bool, start: usize, end: usize);
|
||||
unsafe fn rust_binder_alloc_lru_start(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_alloc_lru_end(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_free_lru_start(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_free_lru_end(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_alloc_page_start(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_alloc_page_end(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_unmap_user_start(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_unmap_user_end(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_unmap_kernel_start(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_unmap_kernel_end(pid: c_int, page_index: usize);
|
||||
unsafe fn rust_binder_command(cmd: u32);
|
||||
unsafe fn rust_binder_return(ret: u32);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn raw_transaction(t: &Transaction) -> rust_binder_transaction {
|
||||
t as *const Transaction as rust_binder_transaction
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn raw_thread(t: &Thread) -> rust_binder_thread {
|
||||
t as *const Thread as rust_binder_thread
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn raw_node(n: &Node) -> rust_binder_node {
|
||||
n as *const Node as rust_binder_node
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_errno(ret: Result) -> i32 {
|
||||
match ret {
|
||||
Ok(()) => 0,
|
||||
Err(err) => err.to_errno(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_ioctl(cmd: u32, arg: usize) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_ioctl(cmd, arg as c_ulong) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_ioctl_done(ret: Result) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_ioctl_done(to_errno(ret)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_read_done(ret: Result) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_read_done(to_errno(ret)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_write_done(ret: Result) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_write_done(to_errno(ret)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_set_priority(thread: &Task, desired_prio: c_int, new_prio: c_int) {
|
||||
// SAFETY: The pointer to the task is valid for the duration of this call.
|
||||
unsafe { rust_binder_set_priority(thread.as_ptr(), desired_prio, new_prio) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn vh_set_priority(t: &Transaction, task: &Task) {
|
||||
// SAFETY: The pointers to `t` and `task` are valid.
|
||||
unsafe { android_vh_rust_binder_set_priority(raw_transaction(t), task.as_ptr()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn vh_restore_priority(task: &Task) {
|
||||
// SAFETY: The pointer to `task` is valid.
|
||||
unsafe { android_vh_rust_binder_restore_priority(task.as_ptr()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_wait_for_work(proc_work: bool, transaction_stack: bool, thread_todo: bool) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_wait_for_work(proc_work, transaction_stack, thread_todo) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction(reply: bool, t: &Transaction) {
|
||||
// SAFETY: The raw transaction is valid for the duration of this call.
|
||||
unsafe { rust_binder_transaction(reply, raw_transaction(t)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_received(t: &Transaction) {
|
||||
// SAFETY: The raw transaction is valid for the duration of this call.
|
||||
unsafe { rust_binder_transaction_received(raw_transaction(t)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_thread_selected(t: &Transaction, th: &Thread) {
|
||||
// SAFETY: The raw transaction is valid for the duration of this call.
|
||||
unsafe { rust_binder_transaction_thread_selected(raw_transaction(t), raw_thread(th)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_node_send(
|
||||
t_debug_id: usize,
|
||||
n: &Node,
|
||||
orig: &uapi::flat_binder_object,
|
||||
trans: &uapi::flat_binder_object,
|
||||
) {
|
||||
// CAST: Types are identical.
|
||||
let orig = orig as *const uapi::flat_binder_object as *const flat_binder_object;
|
||||
// CAST: Types are identical.
|
||||
let trans = trans as *const uapi::flat_binder_object as *const flat_binder_object;
|
||||
|
||||
// SAFETY: The pointers are valid for the duration of this call.
|
||||
unsafe { rust_binder_transaction_node_send(t_debug_id as c_int, raw_node(n), orig, trans) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_fd_send(t_debug_id: usize, fd: u32, offset: usize) {
|
||||
// SAFETY: This function is always safe to call.
|
||||
unsafe { rust_binder_transaction_fd_send(t_debug_id as c_int, fd as c_int, offset) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_fd_recv(t_debug_id: usize, fd: u32, offset: usize) {
|
||||
// SAFETY: This function is always safe to call.
|
||||
unsafe { rust_binder_transaction_fd_recv(t_debug_id as c_int, fd as c_int, offset) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_alloc_buf(debug_id: usize, data: &BinderTransactionDataSg) {
|
||||
let data = data as *const BinderTransactionDataSg;
|
||||
// SAFETY: The `data` pointer is valid.
|
||||
unsafe { rust_binder_transaction_alloc_buf(debug_id as c_int, data.cast()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_buffer_release(debug_id: usize) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_transaction_buffer_release(debug_id as c_int) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_failed_buffer_release(debug_id: usize) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_transaction_failed_buffer_release(debug_id as c_int) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_transaction_update_buffer_release(debug_id: usize) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_transaction_update_buffer_release(debug_id as c_int) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_update_page_range(pid: Pid, allocate: bool, start: usize, end: usize) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { rust_binder_update_page_range(pid as c_int, allocate, start, end) }
|
||||
}
|
||||
|
||||
macro_rules! define_wrapper_lru_page_class {
|
||||
($(fn $name:ident;)*) => {$(
|
||||
kernel::macros::paste! {
|
||||
#[inline]
|
||||
pub(crate) fn [< trace_ $name >](pid: Pid, page_index: usize) {
|
||||
// SAFETY: Always safe to call.
|
||||
unsafe { [< rust_binder_ $name >](pid as c_int, page_index) }
|
||||
}
|
||||
}
|
||||
)*}
|
||||
}
|
||||
|
||||
define_wrapper_lru_page_class! {
|
||||
fn alloc_lru_start;
|
||||
fn alloc_lru_end;
|
||||
fn free_lru_start;
|
||||
fn free_lru_end;
|
||||
fn alloc_page_start;
|
||||
fn alloc_page_end;
|
||||
fn unmap_user_start;
|
||||
fn unmap_user_end;
|
||||
fn unmap_kernel_start;
|
||||
fn unmap_kernel_end;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_command(cmd: u32) {
|
||||
// SAFETY: Trivially safe to call with primitive u32.
|
||||
unsafe { rust_binder_command(cmd) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn trace_return(ret: u32) {
|
||||
// SAFETY: Trivially safe to call with primitive u32.
|
||||
unsafe { rust_binder_return(ret) }
|
||||
}
|
||||
557
drivers/android/binder/transaction.rs
Normal file
557
drivers/android/binder/transaction.rs
Normal file
@@ -0,0 +1,557 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// Copyright (C) 2024 Google LLC.
|
||||
|
||||
use core::sync::atomic::{AtomicBool, Ordering};
|
||||
use kernel::{
|
||||
prelude::*,
|
||||
seq_file::SeqFile,
|
||||
seq_print,
|
||||
sync::{Arc, SpinLock},
|
||||
task::Kuid,
|
||||
time::{ktime_ms_delta, Ktime},
|
||||
types::ScopeGuard,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
allocation::{Allocation, TranslatedFds},
|
||||
defs::*,
|
||||
error::{BinderError, BinderResult},
|
||||
node::{Node, NodeRef},
|
||||
prio::{self, BinderPriority, PriorityState},
|
||||
process::{Process, ProcessInner},
|
||||
ptr_align,
|
||||
thread::{PushWorkRes, Thread},
|
||||
BinderReturnWriter, DArc, DLArc, DTRWrap, DeliverToRead,
|
||||
};
|
||||
|
||||
use core::mem::offset_of;
|
||||
use kernel::bindings::rb_transaction_layout;
|
||||
pub(crate) const TRANSACTION_LAYOUT: rb_transaction_layout = rb_transaction_layout {
|
||||
debug_id: offset_of!(Transaction, debug_id),
|
||||
code: offset_of!(Transaction, code),
|
||||
flags: offset_of!(Transaction, flags),
|
||||
from_thread: offset_of!(Transaction, from),
|
||||
to_proc: offset_of!(Transaction, to),
|
||||
target_node: offset_of!(Transaction, target_node),
|
||||
};
|
||||
|
||||
#[pin_data(PinnedDrop)]
|
||||
pub(crate) struct Transaction {
|
||||
pub(crate) debug_id: usize,
|
||||
target_node: Option<DArc<Node>>,
|
||||
pub(crate) from_parent: Option<DArc<Transaction>>,
|
||||
pub(crate) from: Arc<Thread>,
|
||||
pub(crate) to: Arc<Process>,
|
||||
#[pin]
|
||||
allocation: SpinLock<Option<Allocation>>,
|
||||
is_outstanding: AtomicBool,
|
||||
set_priority_called: AtomicBool,
|
||||
priority: BinderPriority,
|
||||
#[pin]
|
||||
saved_priority: SpinLock<BinderPriority>,
|
||||
code: u32,
|
||||
pub(crate) flags: u32,
|
||||
data_size: usize,
|
||||
offsets_size: usize,
|
||||
data_address: usize,
|
||||
sender_euid: Kuid,
|
||||
txn_security_ctx_off: Option<usize>,
|
||||
pub(crate) oneway_spam_detected: bool,
|
||||
start_time: Ktime,
|
||||
}
|
||||
|
||||
kernel::list::impl_list_arc_safe! {
|
||||
impl ListArcSafe<0> for Transaction { untracked; }
|
||||
}
|
||||
|
||||
impl Transaction {
|
||||
pub(crate) fn new(
|
||||
node_ref: NodeRef,
|
||||
from_parent: Option<DArc<Transaction>>,
|
||||
from: &Arc<Thread>,
|
||||
tr: &BinderTransactionDataSg,
|
||||
) -> BinderResult<DLArc<Self>> {
|
||||
let debug_id = super::next_debug_id();
|
||||
let trd = &tr.transaction_data;
|
||||
let allow_fds = node_ref.node.flags & FLAT_BINDER_FLAG_ACCEPTS_FDS != 0;
|
||||
let txn_security_ctx = node_ref.node.flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX != 0;
|
||||
let mut txn_security_ctx_off = if txn_security_ctx { Some(0) } else { None };
|
||||
let to = node_ref.node.owner.clone();
|
||||
let mut alloc = match from.copy_transaction_data(
|
||||
to.clone(),
|
||||
tr,
|
||||
debug_id,
|
||||
allow_fds,
|
||||
txn_security_ctx_off.as_mut(),
|
||||
) {
|
||||
Ok(alloc) => alloc,
|
||||
Err(err) => {
|
||||
if !err.is_dead() {
|
||||
pr_warn!("Failure in copy_transaction_data: {:?}", err);
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
let oneway_spam_detected = alloc.oneway_spam_detected;
|
||||
if trd.flags & TF_ONE_WAY != 0 {
|
||||
if from_parent.is_some() {
|
||||
pr_warn!("Oneway transaction should not be in a transaction stack.");
|
||||
return Err(EINVAL.into());
|
||||
}
|
||||
alloc.set_info_oneway_node(node_ref.node.clone());
|
||||
}
|
||||
if trd.flags & TF_CLEAR_BUF != 0 {
|
||||
alloc.set_info_clear_on_drop();
|
||||
}
|
||||
let target_node = node_ref.node.clone();
|
||||
alloc.set_info_target_node(node_ref);
|
||||
let data_address = alloc.ptr;
|
||||
|
||||
let priority =
|
||||
if (trd.flags & TF_ONE_WAY == 0) && prio::is_supported_policy(from.task.policy()) {
|
||||
BinderPriority {
|
||||
sched_policy: from.task.policy(),
|
||||
prio: from.task.normal_prio(),
|
||||
}
|
||||
} else {
|
||||
from.process.default_priority
|
||||
};
|
||||
|
||||
Ok(DTRWrap::arc_pin_init(pin_init!(Transaction {
|
||||
debug_id,
|
||||
target_node: Some(target_node),
|
||||
from_parent,
|
||||
sender_euid: from.process.cred.euid(),
|
||||
from: from.clone(),
|
||||
to,
|
||||
code: trd.code,
|
||||
flags: trd.flags,
|
||||
data_size: trd.data_size as _,
|
||||
offsets_size: trd.offsets_size as _,
|
||||
data_address,
|
||||
allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"),
|
||||
is_outstanding: AtomicBool::new(false),
|
||||
priority,
|
||||
saved_priority <- kernel::new_spinlock!(BinderPriority::default(), "Transaction::saved_priority"),
|
||||
set_priority_called: AtomicBool::new(false),
|
||||
txn_security_ctx_off,
|
||||
oneway_spam_detected,
|
||||
start_time: Ktime::ktime_get(),
|
||||
}))?)
|
||||
}
|
||||
|
||||
pub(crate) fn new_reply(
|
||||
from: &Arc<Thread>,
|
||||
to: Arc<Process>,
|
||||
tr: &BinderTransactionDataSg,
|
||||
allow_fds: bool,
|
||||
) -> BinderResult<DLArc<Self>> {
|
||||
let debug_id = super::next_debug_id();
|
||||
let trd = &tr.transaction_data;
|
||||
let mut alloc = match from.copy_transaction_data(to.clone(), tr, debug_id, allow_fds, None)
|
||||
{
|
||||
Ok(alloc) => alloc,
|
||||
Err(err) => {
|
||||
pr_warn!("Failure in copy_transaction_data: {:?}", err);
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
let oneway_spam_detected = alloc.oneway_spam_detected;
|
||||
if trd.flags & TF_CLEAR_BUF != 0 {
|
||||
alloc.set_info_clear_on_drop();
|
||||
}
|
||||
Ok(DTRWrap::arc_pin_init(pin_init!(Transaction {
|
||||
debug_id,
|
||||
target_node: None,
|
||||
from_parent: None,
|
||||
sender_euid: from.process.task.euid(),
|
||||
from: from.clone(),
|
||||
to,
|
||||
code: trd.code,
|
||||
flags: trd.flags,
|
||||
data_size: trd.data_size as _,
|
||||
offsets_size: trd.offsets_size as _,
|
||||
data_address: alloc.ptr,
|
||||
allocation <- kernel::new_spinlock!(Some(alloc.success()), "Transaction::new"),
|
||||
is_outstanding: AtomicBool::new(false),
|
||||
priority: BinderPriority::default(),
|
||||
saved_priority <- kernel::new_spinlock!(BinderPriority::default(), "Transaction::saved_priority"),
|
||||
set_priority_called: AtomicBool::new(false),
|
||||
txn_security_ctx_off: None,
|
||||
oneway_spam_detected,
|
||||
start_time: Ktime::ktime_get(),
|
||||
}))?)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub(crate) fn debug_print_inner(&self, m: &SeqFile, prefix: &str) {
|
||||
seq_print!(
|
||||
m,
|
||||
"{}{}: from {}:{} to {} code {:x} flags {:x} pri {}:{} elapsed {}ms",
|
||||
prefix,
|
||||
self.debug_id,
|
||||
self.from.process.task.pid(),
|
||||
self.from.id,
|
||||
self.to.task.pid(),
|
||||
self.code,
|
||||
self.flags,
|
||||
self.priority.sched_policy,
|
||||
self.priority.prio,
|
||||
ktime_ms_delta(Ktime::ktime_get(), self.start_time),
|
||||
);
|
||||
if let Some(target_node) = &self.target_node {
|
||||
seq_print!(m, " node {}", target_node.debug_id);
|
||||
}
|
||||
seq_print!(m, " size {}:{}\n", self.data_size, self.offsets_size);
|
||||
}
|
||||
|
||||
pub(crate) fn saved_priority(&self) -> BinderPriority {
|
||||
*self.saved_priority.lock()
|
||||
}
|
||||
|
||||
/// Determines if the transaction is stacked on top of the given transaction.
|
||||
pub(crate) fn is_stacked_on(&self, onext: &Option<DArc<Self>>) -> bool {
|
||||
match (&self.from_parent, onext) {
|
||||
(None, None) => true,
|
||||
(Some(from_parent), Some(next)) => Arc::ptr_eq(from_parent, next),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a pointer to the next transaction on the transaction stack, if there is one.
|
||||
pub(crate) fn clone_next(&self) -> Option<DArc<Self>> {
|
||||
Some(self.from_parent.as_ref()?.clone())
|
||||
}
|
||||
|
||||
/// Searches in the transaction stack for a thread that belongs to the target process. This is
|
||||
/// useful when finding a target for a new transaction: if the node belongs to a process that
|
||||
/// is already part of the transaction stack, we reuse the thread.
|
||||
fn find_target_thread(&self) -> Option<Arc<Thread>> {
|
||||
let mut it = &self.from_parent;
|
||||
while let Some(transaction) = it {
|
||||
if Arc::ptr_eq(&transaction.from.process, &self.to) {
|
||||
return Some(transaction.from.clone());
|
||||
}
|
||||
it = &transaction.from_parent;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Searches in the transaction stack for a transaction originating at the given thread.
|
||||
pub(crate) fn find_from(&self, thread: &Thread) -> Option<DArc<Transaction>> {
|
||||
let mut it = &self.from_parent;
|
||||
while let Some(transaction) = it {
|
||||
if core::ptr::eq(thread, transaction.from.as_ref()) {
|
||||
return Some(transaction.clone());
|
||||
}
|
||||
|
||||
it = &transaction.from_parent;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn set_outstanding(&self, to_process: &mut ProcessInner) {
|
||||
// No race because this method is only called once.
|
||||
if !self.is_outstanding.load(Ordering::Relaxed) {
|
||||
self.is_outstanding.store(true, Ordering::Relaxed);
|
||||
to_process.add_outstanding_txn();
|
||||
}
|
||||
}
|
||||
|
||||
/// Decrement `outstanding_txns` in `to` if it hasn't already been decremented.
|
||||
fn drop_outstanding_txn(&self) {
|
||||
// No race because this is called at most twice, and one of the calls are in the
|
||||
// destructor, which is guaranteed to not race with any other operations on the
|
||||
// transaction. It also cannot race with `set_outstanding`, since submission happens
|
||||
// before delivery.
|
||||
if self.is_outstanding.load(Ordering::Relaxed) {
|
||||
self.is_outstanding.store(false, Ordering::Relaxed);
|
||||
self.to.drop_outstanding_txn();
|
||||
}
|
||||
}
|
||||
|
||||
/// Submits the transaction to a work queue. Uses a thread if there is one in the transaction
|
||||
/// stack, otherwise uses the destination process.
|
||||
///
|
||||
/// Not used for replies.
|
||||
pub(crate) fn submit(self: DLArc<Self>) -> BinderResult {
|
||||
crate::trace::trace_transaction(false, &self);
|
||||
|
||||
// Defined before `process_inner` so that the destructor runs after releasing the lock.
|
||||
let mut _t_outdated;
|
||||
|
||||
let oneway = self.flags & TF_ONE_WAY != 0;
|
||||
let process = self.to.clone();
|
||||
let mut process_inner = process.inner.lock();
|
||||
|
||||
self.set_outstanding(&mut process_inner);
|
||||
|
||||
if oneway {
|
||||
if let Some(target_node) = self.target_node.clone() {
|
||||
if process_inner.is_frozen {
|
||||
process_inner.async_recv = true;
|
||||
if self.flags & TF_UPDATE_TXN != 0 {
|
||||
if let Some(t_outdated) =
|
||||
target_node.take_outdated_transaction(&self, &mut process_inner)
|
||||
{
|
||||
crate::trace::trace_transaction_update_buffer_release(
|
||||
t_outdated.debug_id,
|
||||
);
|
||||
// Save the transaction to be dropped after locks are released.
|
||||
_t_outdated = t_outdated;
|
||||
}
|
||||
}
|
||||
}
|
||||
match target_node.submit_oneway(self, &mut process_inner) {
|
||||
Ok(()) => {}
|
||||
Err((err, work)) => {
|
||||
drop(process_inner);
|
||||
// Drop work after releasing process lock.
|
||||
drop(work);
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
|
||||
if process_inner.is_frozen {
|
||||
return Err(BinderError::new_frozen_oneway());
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
} else {
|
||||
pr_err!("Failed to submit oneway transaction to node.");
|
||||
}
|
||||
}
|
||||
|
||||
if process_inner.is_frozen {
|
||||
process_inner.sync_recv = true;
|
||||
return Err(BinderError::new_frozen());
|
||||
}
|
||||
|
||||
let res = if let Some(thread) = self.find_target_thread() {
|
||||
match thread.push_work(self) {
|
||||
PushWorkRes::Ok => Ok(()),
|
||||
PushWorkRes::FailedDead(me) => Err((BinderError::new_dead(), me)),
|
||||
}
|
||||
} else {
|
||||
process_inner.push_work(self)
|
||||
};
|
||||
drop(process_inner);
|
||||
|
||||
match res {
|
||||
Ok(()) => Ok(()),
|
||||
Err((err, work)) => {
|
||||
// Drop work after releasing process lock.
|
||||
drop(work);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether one oneway transaction can supersede another.
|
||||
pub(crate) fn can_replace(&self, old: &Transaction) -> bool {
|
||||
if self.from.process.task.pid() != old.from.process.task.pid() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.flags & old.flags & (TF_ONE_WAY | TF_UPDATE_TXN) != (TF_ONE_WAY | TF_UPDATE_TXN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let target_node_match = match (self.target_node.as_ref(), old.target_node.as_ref()) {
|
||||
(None, None) => true,
|
||||
(Some(tn1), Some(tn2)) => Arc::ptr_eq(tn1, tn2),
|
||||
_ => false,
|
||||
};
|
||||
|
||||
self.code == old.code && self.flags == old.flags && target_node_match
|
||||
}
|
||||
|
||||
fn prepare_file_list(&self) -> Result<TranslatedFds> {
|
||||
let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?;
|
||||
|
||||
match alloc.translate_fds() {
|
||||
Ok(translated) => {
|
||||
*self.allocation.lock() = Some(alloc);
|
||||
Ok(translated)
|
||||
}
|
||||
Err(err) => {
|
||||
// Free the allocation eagerly.
|
||||
drop(alloc);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DeliverToRead for Transaction {
|
||||
fn do_work(
|
||||
self: DArc<Self>,
|
||||
thread: &Thread,
|
||||
writer: &mut BinderReturnWriter<'_>,
|
||||
) -> Result<bool> {
|
||||
let send_failed_reply = ScopeGuard::new(|| {
|
||||
if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 {
|
||||
let reply = Err(BR_FAILED_REPLY);
|
||||
self.from.deliver_reply(reply, &self);
|
||||
}
|
||||
self.drop_outstanding_txn();
|
||||
});
|
||||
|
||||
// Update thread priority. This only has an effect if the transaction is delivered via the
|
||||
// process work list, since the priority has otherwise already been updated.
|
||||
self.on_thread_selected(thread);
|
||||
|
||||
let files = if let Ok(list) = self.prepare_file_list() {
|
||||
list
|
||||
} else {
|
||||
// On failure to process the list, we send a reply back to the sender and ignore the
|
||||
// transaction on the recipient.
|
||||
return Ok(true);
|
||||
};
|
||||
|
||||
let mut tr_sec = BinderTransactionDataSecctx::default();
|
||||
let tr = tr_sec.tr_data();
|
||||
if let Some(target_node) = &self.target_node {
|
||||
let (ptr, cookie) = target_node.get_id();
|
||||
tr.target.ptr = ptr as _;
|
||||
tr.cookie = cookie as _;
|
||||
};
|
||||
tr.code = self.code;
|
||||
tr.flags = self.flags;
|
||||
tr.data_size = self.data_size as _;
|
||||
tr.data.ptr.buffer = self.data_address as _;
|
||||
tr.offsets_size = self.offsets_size as _;
|
||||
if tr.offsets_size > 0 {
|
||||
tr.data.ptr.offsets = (self.data_address + ptr_align(self.data_size)) as _;
|
||||
}
|
||||
tr.sender_euid = self.sender_euid.into_uid_in_current_ns();
|
||||
tr.sender_pid = 0;
|
||||
if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 {
|
||||
// Not a reply and not one-way.
|
||||
tr.sender_pid = self.from.process.task.pid_in_current_ns();
|
||||
}
|
||||
let code = if self.target_node.is_none() {
|
||||
BR_REPLY
|
||||
} else if self.txn_security_ctx_off.is_some() {
|
||||
BR_TRANSACTION_SEC_CTX
|
||||
} else {
|
||||
BR_TRANSACTION
|
||||
};
|
||||
|
||||
// Write the transaction code and data to the user buffer.
|
||||
writer.write_code(code)?;
|
||||
if let Some(off) = self.txn_security_ctx_off {
|
||||
tr_sec.secctx = (self.data_address + off) as u64;
|
||||
writer.write_payload(&tr_sec)?;
|
||||
} else {
|
||||
writer.write_payload(&*tr)?;
|
||||
}
|
||||
|
||||
let mut alloc = self.allocation.lock().take().ok_or(ESRCH)?;
|
||||
|
||||
// Dismiss the completion of transaction with a failure. No failure paths are allowed from
|
||||
// here on out.
|
||||
send_failed_reply.dismiss();
|
||||
|
||||
// Commit files, and set FDs in FDA to be closed on buffer free.
|
||||
let close_on_free = files.commit();
|
||||
alloc.set_info_close_on_free(close_on_free);
|
||||
|
||||
// It is now the user's responsibility to clear the allocation.
|
||||
alloc.keep_alive();
|
||||
|
||||
self.drop_outstanding_txn();
|
||||
|
||||
crate::trace::trace_transaction_received(&self);
|
||||
|
||||
// When this is not a reply and not a oneway transaction, update `current_transaction`. If
|
||||
// it's a reply, `current_transaction` has already been updated appropriately.
|
||||
if self.target_node.is_some() && tr_sec.transaction_data.flags & TF_ONE_WAY == 0 {
|
||||
thread.set_current_transaction(self);
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn cancel(self: DArc<Self>) {
|
||||
let allocation = self.allocation.lock().take();
|
||||
drop(allocation);
|
||||
|
||||
// If this is not a reply or oneway transaction, then send a dead reply.
|
||||
if self.target_node.is_some() && self.flags & TF_ONE_WAY == 0 {
|
||||
let reply = Err(BR_DEAD_REPLY);
|
||||
self.from.deliver_reply(reply, &self);
|
||||
}
|
||||
|
||||
self.drop_outstanding_txn();
|
||||
}
|
||||
|
||||
fn on_thread_selected(&self, to_thread: &Thread) {
|
||||
// Return immediately if reply.
|
||||
let target_node = match self.target_node.as_ref() {
|
||||
Some(target_node) => target_node,
|
||||
None => return,
|
||||
};
|
||||
|
||||
// We only need to do this once.
|
||||
if self.set_priority_called.swap(true, Ordering::Relaxed) {
|
||||
return;
|
||||
}
|
||||
|
||||
crate::trace::trace_transaction_thread_selected(self, to_thread);
|
||||
|
||||
let node_prio = target_node.node_prio();
|
||||
let mut desired = self.priority;
|
||||
|
||||
if !target_node.inherit_rt() && prio::is_rt_policy(desired.sched_policy) {
|
||||
desired.prio = prio::DEFAULT_PRIO;
|
||||
desired.sched_policy = prio::SCHED_NORMAL;
|
||||
}
|
||||
|
||||
if node_prio.prio < self.priority.prio
|
||||
|| (node_prio.prio == self.priority.prio && node_prio.sched_policy == prio::SCHED_FIFO)
|
||||
{
|
||||
// In case the minimum priority on the node is
|
||||
// higher (lower value), use that priority. If
|
||||
// the priority is the same, but the node uses
|
||||
// SCHED_FIFO, prefer SCHED_FIFO, since it can
|
||||
// run unbounded, unlike SCHED_RR.
|
||||
desired = node_prio;
|
||||
}
|
||||
|
||||
let mut prio_state = to_thread.prio_lock.lock();
|
||||
if prio_state.state == PriorityState::Pending {
|
||||
// Task is in the process of changing priorities
|
||||
// saving its current values would be incorrect.
|
||||
// Instead, save the pending priority and signal
|
||||
// the task to abort the priority restore.
|
||||
prio_state.state = PriorityState::Abort;
|
||||
*self.saved_priority.lock() = prio_state.next;
|
||||
} else {
|
||||
let task = &*self.to.task;
|
||||
let mut saved_priority = self.saved_priority.lock();
|
||||
saved_priority.sched_policy = task.policy();
|
||||
saved_priority.prio = task.normal_prio();
|
||||
}
|
||||
drop(prio_state);
|
||||
|
||||
to_thread.set_priority(&desired, self);
|
||||
}
|
||||
|
||||
fn should_sync_wakeup(&self) -> bool {
|
||||
self.flags & TF_ONE_WAY == 0
|
||||
}
|
||||
|
||||
fn debug_print(&self, m: &SeqFile, _prefix: &str, tprefix: &str) -> Result<()> {
|
||||
self.debug_print_inner(m, tprefix);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[pinned_drop]
|
||||
impl PinnedDrop for Transaction {
|
||||
fn drop(self: Pin<&mut Self>) {
|
||||
self.drop_outstanding_txn();
|
||||
}
|
||||
}
|
||||
@@ -109,6 +109,7 @@ struct binder_alloc {
|
||||
int pid;
|
||||
size_t pages_high;
|
||||
bool oneway_spam_detected;
|
||||
ANDROID_OEM_DATA(1);
|
||||
};
|
||||
|
||||
#ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
|
||||
|
||||
@@ -105,6 +105,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpu_idle_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mpam_set);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_wq_lockup_pool);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_alloc_and_link_pwqs);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_alloc_workqueue);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_create_worker);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ipi_stop);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sysrq_crash);
|
||||
@@ -130,6 +131,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_si_mem_available_adjust);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_si_meminfo_adjust);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_si_meminfo_adjust_shmem);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_fill_prdt);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_ufs_complete_init);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_ufs_reprogram_all_keys);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_prepare_command);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_update_sysfs);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_send_command);
|
||||
@@ -151,6 +154,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_f2fs_restore_priority);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_f2fs_printk);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_f2fs_create);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_io_statistics);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_dpm_prepare);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ogki_check_vip_status);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_ogki_task_util);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_ogki_uclamp_task_util);
|
||||
@@ -399,6 +403,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_count_workingset_refault);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sk_clone_lock);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_unref_folios_to_pcp_bypass);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cma_alloc_fail);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cma_alloc_start);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cma_alloc_finish);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cma_alloc_busy_info);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_vmalloc_node_bypass);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_vfree_bypass);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_modify_scan_control);
|
||||
@@ -527,3 +534,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mm_direct_reclaim_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mm_may_oom_exit);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_calculate_totalreserve_pages);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_madvise_cold_pageout_skip);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rmqueue_pcplist_override_batch);
|
||||
|
||||
@@ -1869,7 +1869,9 @@ int dpm_prepare(pm_message_t state)
|
||||
* disable probing of devices. This sync point is important at least
|
||||
* at boot time + hibernation restore.
|
||||
*/
|
||||
trace_android_rvh_dpm_prepare(0);
|
||||
wait_for_device_probe();
|
||||
trace_android_rvh_dpm_prepare(1);
|
||||
/*
|
||||
* It is unsafe if probing of devices will happen during suspend or
|
||||
* hibernation and system behavior will be unpredictable in this case.
|
||||
|
||||
@@ -311,6 +311,13 @@ static void loop_clear_limits(struct loop_device *lo, int mode)
|
||||
lim.discard_granularity = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: this updates the queue limits without freezing the queue, which
|
||||
* is against the locking protocol and dangerous. But we can't just
|
||||
* freeze the queue as we're inside the ->queue_rq method here. So this
|
||||
* should move out into a workqueue unless we get the file operations to
|
||||
* advertise if they support specific fallocate operations.
|
||||
*/
|
||||
queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
}
|
||||
|
||||
@@ -770,12 +777,11 @@ static void loop_sysfs_exit(struct loop_device *lo)
|
||||
&loop_attribute_group);
|
||||
}
|
||||
|
||||
static void loop_config_discard(struct loop_device *lo,
|
||||
struct queue_limits *lim)
|
||||
static void loop_get_discard_config(struct loop_device *lo,
|
||||
u32 *granularity, u32 *max_discard_sectors)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
u32 granularity = 0, max_discard_sectors = 0;
|
||||
struct kstatfs sbuf;
|
||||
|
||||
/*
|
||||
@@ -786,27 +792,19 @@ static void loop_config_discard(struct loop_device *lo,
|
||||
* file-backed loop devices: discarded regions read back as zero.
|
||||
*/
|
||||
if (S_ISBLK(inode->i_mode)) {
|
||||
struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
|
||||
struct block_device *bdev = I_BDEV(inode);
|
||||
|
||||
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
|
||||
granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
|
||||
queue_physical_block_size(backingq);
|
||||
*max_discard_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
*granularity = bdev_discard_granularity(bdev);
|
||||
|
||||
/*
|
||||
* We use punch hole to reclaim the free space used by the
|
||||
* image a.k.a. discard.
|
||||
*/
|
||||
} else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) {
|
||||
max_discard_sectors = UINT_MAX >> 9;
|
||||
granularity = sbuf.f_bsize;
|
||||
*max_discard_sectors = UINT_MAX >> 9;
|
||||
*granularity = sbuf.f_bsize;
|
||||
}
|
||||
|
||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
||||
lim->max_write_zeroes_sectors = max_discard_sectors;
|
||||
if (max_discard_sectors)
|
||||
lim->discard_granularity = granularity;
|
||||
else
|
||||
lim->discard_granularity = 0;
|
||||
}
|
||||
|
||||
struct loop_worker {
|
||||
@@ -986,12 +984,13 @@ static unsigned int loop_default_blocksize(struct loop_device *lo,
|
||||
return SECTOR_SIZE;
|
||||
}
|
||||
|
||||
static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize)
|
||||
static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim,
|
||||
unsigned int bsize)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct block_device *backing_bdev = NULL;
|
||||
struct queue_limits lim;
|
||||
u32 granularity = 0, max_discard_sectors = 0;
|
||||
|
||||
if (S_ISBLK(inode->i_mode))
|
||||
backing_bdev = I_BDEV(inode);
|
||||
@@ -1001,17 +1000,22 @@ static int loop_reconfigure_limits(struct loop_device *lo, unsigned int bsize)
|
||||
if (!bsize)
|
||||
bsize = loop_default_blocksize(lo, backing_bdev);
|
||||
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
lim.logical_block_size = bsize;
|
||||
lim.physical_block_size = bsize;
|
||||
lim.io_min = bsize;
|
||||
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL);
|
||||
loop_get_discard_config(lo, &granularity, &max_discard_sectors);
|
||||
|
||||
lim->logical_block_size = bsize;
|
||||
lim->physical_block_size = bsize;
|
||||
lim->io_min = bsize;
|
||||
lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL);
|
||||
if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY))
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
lim->features |= BLK_FEAT_WRITE_CACHE;
|
||||
if (backing_bdev && !bdev_nonrot(backing_bdev))
|
||||
lim.features |= BLK_FEAT_ROTATIONAL;
|
||||
loop_config_discard(lo, &lim);
|
||||
return queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
lim->features |= BLK_FEAT_ROTATIONAL;
|
||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
||||
lim->max_write_zeroes_sectors = max_discard_sectors;
|
||||
if (max_discard_sectors)
|
||||
lim->discard_granularity = granularity;
|
||||
else
|
||||
lim->discard_granularity = 0;
|
||||
}
|
||||
|
||||
static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
@@ -1020,6 +1024,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
{
|
||||
struct file *file = fget(config->fd);
|
||||
struct address_space *mapping;
|
||||
struct queue_limits lim;
|
||||
int error;
|
||||
loff_t size;
|
||||
bool partscan;
|
||||
@@ -1091,7 +1096,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
lo->old_gfp_mask = mapping_gfp_mask(mapping);
|
||||
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
|
||||
|
||||
error = loop_reconfigure_limits(lo, config->block_size);
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
loop_update_limits(lo, &lim, config->block_size);
|
||||
/* No need to freeze the queue as the device isn't bound yet. */
|
||||
error = queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
@@ -1151,7 +1159,12 @@ static void __loop_clr_fd(struct loop_device *lo)
|
||||
lo->lo_sizelimit = 0;
|
||||
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
|
||||
|
||||
/* reset the block size to the default */
|
||||
/*
|
||||
* Reset the block size to the default.
|
||||
*
|
||||
* No queue freezing needed because this is called from the final
|
||||
* ->release call only, so there can't be any outstanding I/O.
|
||||
*/
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
lim.logical_block_size = SECTOR_SIZE;
|
||||
lim.physical_block_size = SECTOR_SIZE;
|
||||
@@ -1459,6 +1472,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
|
||||
|
||||
static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int err = 0;
|
||||
|
||||
if (lo->lo_state != Lo_bound)
|
||||
@@ -1470,8 +1484,11 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
sync_blockdev(lo->lo_device);
|
||||
invalidate_bdev(lo->lo_device);
|
||||
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
loop_update_limits(lo, &lim, arg);
|
||||
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
err = loop_reconfigure_limits(lo, arg);
|
||||
err = queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
loop_update_dio(lo);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
|
||||
@@ -327,8 +327,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
|
||||
nsock->sent = 0;
|
||||
}
|
||||
|
||||
static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
loff_t blksize)
|
||||
static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int error;
|
||||
@@ -368,7 +367,7 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
|
||||
lim.logical_block_size = blksize;
|
||||
lim.physical_block_size = blksize;
|
||||
error = queue_limits_commit_update(nbd->disk->queue, &lim);
|
||||
error = queue_limits_commit_update_frozen(nbd->disk->queue, &lim);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
@@ -379,18 +378,6 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
loff_t blksize)
|
||||
{
|
||||
int error;
|
||||
|
||||
blk_mq_freeze_queue(nbd->disk->queue);
|
||||
error = __nbd_set_size(nbd, bytesize, blksize);
|
||||
blk_mq_unfreeze_queue(nbd->disk->queue);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static void nbd_complete_rq(struct request *req)
|
||||
{
|
||||
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user