Merge android16-6.12 into android16-6.12-lts

This merges the android16-6.12 branch into the -lts branch, catching
it up with the latest changes in there.

It contains the following commits:

* 5da94eacba ANDROID: rust_binder: print and avoid underflow with invalid refcnt
* 3744083cdb ANDROID: rust: miscdevice: fix formatting
* 4f7b72e026 FROMLIST: gendwarfksyms: order -T symtypes output by name
* 1493f0937f ANDROID: rust_binder: allow PollTable to be null
* 2a0e6416e5 FROMLIST: poll: rust: allow poll_table ptrs to be null
* 68f4f0b069 ANDROID: virt: gunyah: Fix ADDRSPACE_VMMIO_CONFIGURE hypercall handling
* 153a0a38e9 ANDROID: GKI: Update xiaomi symbol list.
* 111ee63e22 FROMGIT: dm: Check for forbidden splitting of zone write operations
* 177d6611a9 FROMGIT: dm: dm-crypt: Do not partially accept write BIOs with zoned targets
* 61e56b7483 FROMGIT: dm: Always split write BIOs to zoned device limits
* 5d1966b61d FROMGIT: block: Introduce bio_needs_zone_write_plugging()
* 5aca882326 BACKPORT: block: Make REQ_OP_ZONE_FINISH a write operation
* 1895d73589 ANDROID: "Revert: dm: Split zoned writes at the top of the stack"
* 8c8d462c9d ANDROID: GKI: add vendor_hook android_vh_health_report
* 1ccb219f8c ANDROID: GKI: update the pixel ABI symbol list
* 273b788415 FROMLIST: arm64: perf: Make exporting of pmu events configurable
* 22d634eb24 FROMLIST: KVM: arm64: Fix error path in init_hyp_mode()
* cd7cc3a247 ANDROID: rust_binder: do not take refcount during GET_NODE_INFO_FOR_REF
* d47caa0b28 ANDROID: rust_binder: reduce counts in NodeRef::absorb
* bc31899f7c ANDROID: KVM: arm64: Always unmap the pvmfw region at stage-2
* 453cb67e8f ANDROID: gki: Enable pkvm pviommu driver

Change-Id: If3192f301916efd2250924270d67b8ed0f82b0ef
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2025-06-30 12:13:27 +00:00
26 changed files with 325 additions and 106 deletions

View File

@@ -1541,6 +1541,10 @@
Format: { "fix" }
Permit 'security.evm' to be updated regardless of
current integrity status.
export_pmu_events
[KNL,ARM64] Sets the PMU export bit (PMCR_EL0.X), which enables
the exporting of events over an IMPLEMENTATION DEFINED PMU event
export bus to another device.
early_page_ext [KNL,EARLY] Enforces page_ext initialization to earlier
stages so cover more early boot allocations.

View File

@@ -286,6 +286,17 @@ domain names are in general different. For a detailed discussion
see the ``hostname(1)`` man page.
export_pmu_events (arm64 only)
==============================
Controls the PMU export bit (PMCR_EL0.X), which enables the exporting of
events over an IMPLEMENTATION DEFINED PMU event export bus to another device.
0: disables exporting of events (default).
1: enables exporting of events.
firmware_config
===============

View File

@@ -626,6 +626,7 @@ CONFIG_HWSPINLOCK=y
# CONFIG_SUN50I_ERRATUM_UNKNOWN1 is not set
CONFIG_MAILBOX=y
CONFIG_IOMMU_IO_PGTABLE_ARMV7S=y
CONFIG_PKVM_PVIOMMU=y
CONFIG_REMOTEPROC=y
CONFIG_REMOTEPROC_CDEV=y
CONFIG_RPMSG_CHAR=y

View File

@@ -2391,7 +2391,9 @@ static void __init teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT);
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu])
continue;
if (free_sve) {
struct cpu_sve_state *sve_state;
@@ -2399,6 +2401,9 @@ static void __init teardown_hyp_mode(void)
sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
}
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
}
}

View File

@@ -85,6 +85,7 @@ static void __init sort_memblock_regions(void)
static int __init register_memblock_regions(void)
{
struct memblock_region *reg;
bool pvmfw_in_mem = false;
for_each_mem_region(reg) {
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
@@ -92,6 +93,26 @@ static int __init register_memblock_regions(void)
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
(*hyp_memblock_nr_ptr)++;
if (!pvmfw_size || pvmfw_in_mem ||
!memblock_addrs_overlap(reg->base, reg->size, pvmfw_base, pvmfw_size))
continue;
/* If the pvmfw region overlaps a memblock, it must be a subset */
if (pvmfw_base < reg->base || (pvmfw_base + pvmfw_size) > (reg->base + reg->size))
return -EINVAL;
pvmfw_in_mem = true;
}
if (pvmfw_size && !pvmfw_in_mem) {
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
return -ENOMEM;
hyp_memory[*hyp_memblock_nr_ptr] = (struct memblock_region) {
.base = pvmfw_base,
.size = pvmfw_size,
.flags = MEMBLOCK_NOMAP,
};
(*hyp_memblock_nr_ptr)++;
}
sort_memblock_regions();

View File

@@ -3139,8 +3139,10 @@ void blk_mq_submit_bio(struct bio *bio)
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
goto queue_exit;
if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs))
goto queue_exit;
if (bio_needs_zone_write_plugging(bio)) {
if (blk_zone_plug_bio(bio, nr_segs))
goto queue_exit;
}
new_request:
if (!rq) {

View File

@@ -1131,25 +1131,7 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
{
struct block_device *bdev = bio->bi_bdev;
if (!bdev->bd_disk->zone_wplugs_hash)
return false;
/*
* If the BIO already has the plugging flag set, then it was already
* handled through this path and this is a submission from the zone
* plug bio submit work.
*/
if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING))
return false;
/*
* We do not need to do anything special for empty flush BIOs, e.g
* BIOs such as issued by blkdev_issue_flush(). The is because it is
* the responsibility of the user to first wait for the completion of
* write operations for flush to have any effect on the persistence of
* the written data.
*/
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
if (WARN_ON_ONCE(!bdev->bd_disk->zone_wplugs_hash))
return false;
/*

View File

@@ -827,6 +827,22 @@ impl NodeRef {
other.weak_count = 0;
other.strong_node_count = 0;
other.weak_node_count = 0;
if self.strong_node_count >= 2 || self.weak_node_count >= 2 {
let mut guard = self.node.owner.inner.lock();
let inner = self.node.inner.access_mut(&mut guard);
if self.strong_node_count >= 2 {
inner.strong.count -= self.strong_node_count - 1;
self.strong_node_count = 1;
assert_ne!(inner.strong.count, 0);
}
if self.weak_node_count >= 2 {
inner.weak.count -= self.weak_node_count - 1;
self.weak_node_count = 1;
assert_ne!(inner.weak.count, 0);
}
}
}
pub(crate) fn get_count(&self) -> (usize, usize) {
@@ -874,6 +890,13 @@ impl NodeRef {
}
*count += 1;
} else {
if *count == 0 {
pr_warn!(
"pid {} performed invalid decrement on ref\n",
kernel::current!().pid()
);
return false;
}
*count -= 1;
if *count == 0 {
self.node.update_refcount(false, *node_count, strong);

View File

@@ -926,6 +926,8 @@ impl Process {
refs.by_handle.remove(&handle);
refs.by_node.remove(&id);
}
} else {
pr_warn!("{}: no such ref {handle}\n", kernel::current!().pid());
}
Ok(())
}
@@ -1141,11 +1143,10 @@ impl Process {
return Err(EPERM);
}
let node_ref = self
.get_node_from_handle(out.handle, true)
.or(Err(EINVAL))?;
// Get the counts from the node.
{
let mut node_refs = self.node_refs.lock();
let node_info = node_refs.by_handle.get_mut(&out.handle).ok_or(ENOENT)?;
let node_ref = node_info.node_ref();
let owner_inner = node_ref.node.owner.inner.lock();
node_ref.node.populate_counts(&mut out, &owner_inner);
}
@@ -1634,7 +1635,7 @@ impl Process {
pub(crate) fn poll(
this: ArcBorrow<'_, Process>,
file: &File,
table: &mut PollTable,
table: PollTable<'_>,
) -> Result<u32> {
let thread = this.get_current_thread()?;
let (from_proc, mut mask) = thread.poll(file, table);

View File

@@ -472,7 +472,7 @@ unsafe extern "C" fn rust_binder_poll(
// SAFETY: The caller ensures that the file is valid.
let fileref = unsafe { File::from_raw_file(file) };
// SAFETY: The caller ensures that the `PollTable` is valid.
match Process::poll(f, fileref, unsafe { PollTable::from_ptr(wait) }) {
match Process::poll(f, fileref, unsafe { PollTable::from_raw(wait) }) {
Ok(v) => v,
Err(_) => bindings::POLLERR,
}

View File

@@ -1614,7 +1614,7 @@ impl Thread {
ret
}
pub(crate) fn poll(&self, file: &File, table: &mut PollTable) -> (bool, u32) {
pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> (bool, u32) {
table.register_wait(file, &self.work_condvar);
let mut inner = self.inner.lock();
(inner.should_use_process_work_queue(), inner.poll())

View File

@@ -33,6 +33,7 @@
#include <trace/hooks/pm_domain.h>
#include <trace/hooks/cpuidle_psci.h>
#include <trace/hooks/vmscan.h>
#include <trace/hooks/health_report.h>
#include <trace/hooks/avc.h>
#include <trace/hooks/creds.h>
#include <trace/hooks/selinux.h>
@@ -203,6 +204,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_tune_mmap_readaround);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_hw_protection_shutdown);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shrink_slab_bypass);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_do_shrink_slab_ex);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_health_report);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_drain_all_pages_bypass);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_security_audit_log_setid);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_selinux_avc_insert);

View File

@@ -253,17 +253,35 @@ MODULE_PARM_DESC(max_read_size, "Maximum size of a read request");
static unsigned int max_write_size = 0;
module_param(max_write_size, uint, 0644);
MODULE_PARM_DESC(max_write_size, "Maximum size of a write request");
static unsigned get_max_request_size(struct crypt_config *cc, bool wrt)
static unsigned get_max_request_sectors(struct dm_target *ti, struct bio *bio)
{
struct crypt_config *cc = ti->private;
unsigned val, sector_align;
val = !wrt ? READ_ONCE(max_read_size) : READ_ONCE(max_write_size);
if (likely(!val))
val = !wrt ? DM_CRYPT_DEFAULT_MAX_READ_SIZE : DM_CRYPT_DEFAULT_MAX_WRITE_SIZE;
if (wrt || cc->used_tag_size) {
if (unlikely(val > BIO_MAX_VECS << PAGE_SHIFT))
val = BIO_MAX_VECS << PAGE_SHIFT;
bool wrt = op_is_write(bio_op(bio));
if (wrt) {
/*
* For zoned devices, splitting write operations creates the
* risk of deadlocking queue freeze operations with zone write
* plugging BIO work when the reminder of a split BIO is
* issued. So always allow the entire BIO to proceed.
*/
if (ti->emulate_zone_append)
return bio_sectors(bio);
val = min_not_zero(READ_ONCE(max_write_size),
DM_CRYPT_DEFAULT_MAX_WRITE_SIZE);
} else {
val = min_not_zero(READ_ONCE(max_read_size),
DM_CRYPT_DEFAULT_MAX_READ_SIZE);
}
sector_align = max(bdev_logical_block_size(cc->dev->bdev), (unsigned)cc->sector_size);
if (wrt || cc->used_tag_size)
val = min(val, BIO_MAX_VECS << PAGE_SHIFT);
sector_align = max(bdev_logical_block_size(cc->dev->bdev),
(unsigned)cc->sector_size);
val = round_down(val, sector_align);
if (unlikely(!val))
val = sector_align;
@@ -3517,7 +3535,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
/*
* Check if bio is too large, split as needed.
*/
max_sectors = get_max_request_size(cc, bio_data_dir(bio) == WRITE);
max_sectors = get_max_request_sectors(ti, bio);
if (unlikely(bio_sectors(bio) > max_sectors))
dm_accept_partial_bio(bio, max_sectors);
@@ -3754,6 +3772,17 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
max_t(unsigned int, limits->physical_block_size, cc->sector_size);
limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
limits->dma_alignment = limits->logical_block_size - 1;
/*
* For zoned dm-crypt targets, there will be no internal splitting of
* write BIOs to avoid exceeding BIO_MAX_VECS vectors per BIO. But
* without respecting this limit, crypt_alloc_buffer() will trigger a
* BUG(). Avoid this by forcing DM core to split write BIOs to this
* limit.
*/
if (ti->emulate_zone_append)
limits->max_hw_sectors = min(limits->max_hw_sectors,
BIO_MAX_VECS << PAGE_SECTORS_SHIFT);
}
static struct target_type crypt_target = {

View File

@@ -1307,8 +1307,9 @@ out:
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
* operations, REQ_OP_ZONE_APPEND (zone append writes) and any bio serviced by
* __send_duplicate_bios().
* operations, zone append writes (native with REQ_OP_ZONE_APPEND or emulated
* with write BIOs flagged with BIO_EMULATES_ZONE_APPEND) and any bio serviced
* by __send_duplicate_bios().
*
* dm_accept_partial_bio informs the dm that the target only wants to process
* additional n_sectors sectors of the bio and the rest of the data should be
@@ -1341,11 +1342,19 @@ void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors)
unsigned int bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
BUG_ON(op_is_zone_mgmt(bio_op(bio)));
BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
BUG_ON(bio_sectors > *tio->len_ptr);
BUG_ON(n_sectors > bio_sectors);
if (static_branch_unlikely(&zoned_enabled) &&
unlikely(bdev_is_zoned(bio->bi_bdev))) {
enum req_op op = bio_op(bio);
BUG_ON(op_is_zone_mgmt(op));
BUG_ON(op == REQ_OP_WRITE);
BUG_ON(op == REQ_OP_WRITE_ZEROES);
BUG_ON(op == REQ_OP_ZONE_APPEND);
}
*tio->len_ptr -= bio_sectors - n_sectors;
bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
@@ -1788,18 +1797,34 @@ static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
struct bio *bio)
{
/*
* For a mapped device that needs zone append emulation, we must
* split any large BIO that straddles zone boundaries. Additionally,
* split sequential zoned writes to prevent that splitting lower in the
* stack causes bio reordering.
* Special case the zone operations that cannot or should not be split.
*/
return ((dm_emulate_zone_append(md) && bio_straddles_zones(bio)) ||
(bio_op(bio) == REQ_OP_WRITE && bdev_is_zoned(bio->bi_bdev))) &&
!bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
switch (bio_op(bio)) {
case REQ_OP_ZONE_APPEND:
case REQ_OP_ZONE_FINISH:
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_RESET_ALL:
return false;
default:
break;
}
/*
* Mapped devices that require zone append emulation will use the block
* layer zone write plugging. In such case, we must split any large BIO
* to the mapped device limits to avoid potential deadlocks with queue
* freeze operations.
*/
if (!dm_emulate_zone_append(md))
return false;
return bio_needs_zone_write_plugging(bio) || bio_straddles_zones(bio);
}
static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
if (!bio_needs_zone_write_plugging(bio))
return false;
return blk_zone_plug_bio(bio, 0);
}
static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
@@ -1943,9 +1968,7 @@ static void dm_split_and_process_bio(struct mapped_device *md,
is_abnormal = is_abnormal_io(bio);
if (static_branch_unlikely(&zoned_enabled)) {
/* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
(is_abnormal || dm_zone_bio_needs_split(md, bio));
need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
} else {
need_split = is_abnormal;
}

View File

@@ -325,6 +325,7 @@ GEN_PMU_FORMAT_ATTR(threshold_compare);
GEN_PMU_FORMAT_ATTR(threshold);
static int sysctl_perf_user_access __read_mostly;
static int sysctl_export_pmu_events __read_mostly;
static bool armv8pmu_event_is_64bit(struct perf_event *event)
{
@@ -1053,6 +1054,17 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}
static int __init export_pmu_events(char *str)
{
/* Enable exporting of pmu events at early bootup with kernel
* arguments.
*/
sysctl_export_pmu_events = 1;
return 0;
}
early_param("export_pmu_events", export_pmu_events);
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1077,6 +1089,9 @@ static void armv8pmu_reset(void *info)
if (armv8pmu_has_long_event(cpu_pmu))
pmcr |= ARMV8_PMU_PMCR_LP;
if (sysctl_export_pmu_events)
pmcr |= ARMV8_PMU_PMCR_X;
armv8pmu_pmcr_write(pmcr);
}
@@ -1280,6 +1295,15 @@ static struct ctl_table armv8_pmu_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "export_pmu_events",
.data = &sysctl_export_pmu_events,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
};
static void armv8_pmu_register_sysctl_table(void)

View File

@@ -33,7 +33,8 @@ static int gunyah_mmio_guard_ioremap_hook(phys_addr_t phys, size_t size, pgprot_
ret = gunyah_hypercall_addrspc_configure_vmmio_range(our_addrspace_capid,
phys, size, GUNYAH_ADDRSPACE_VMMIO_CONFIGURE_OP_ADD_RANGE);
if (ret == GUNYAH_ERROR_UNIMPLEMENTED || ret == GUNYAH_ERROR_BUSY)
if (ret == GUNYAH_ERROR_UNIMPLEMENTED || ret == GUNYAH_ERROR_BUSY
|| ret == GUNYAH_ERROR_CSPACE_INSUF_RIGHTS)
/* Gunyah would have configured VMMIO via DT */
ret = GUNYAH_ERROR_OK;

View File

@@ -1432,6 +1432,7 @@
kthread_stop_put
kthread_unpark
kthread_worker_fn
ktime_add_safe
ktime_get
ktime_get_coarse_ts64
ktime_get_coarse_with_offset

View File

@@ -89,6 +89,7 @@
#required by stability
sock_from_file
get_dmabuf_debugfs_data
__traceiter_android_vh_cma_alloc_fail
__tracepoint_android_vh_cma_alloc_fail
__traceiter_android_vh_warn_alloc_show_mem_bypass

View File

@@ -443,7 +443,7 @@ static inline enum req_op bio_op(const struct bio *bio)
static inline bool op_is_write(blk_opf_t op)
{
return !!(op & (__force blk_opf_t)1);
return (op & (__force blk_opf_t)1) || op == REQ_OP_ZONE_FINISH;
}
/*

View File

@@ -840,6 +840,61 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
(sb->s_blocksize_bits - SECTOR_SHIFT);
}
#ifdef CONFIG_BLK_DEV_ZONED
/**
* bio_needs_zone_write_plugging - Check if a BIO needs to be handled with zone
* write plugging
* @bio: The BIO being submitted
*
* Return true whenever @bio execution needs to be handled through zone
* write plugging (using blk_zone_plug_bio()). Return false otherwise.
*/
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
{
enum req_op op = bio_op(bio);
/*
* Only zoned block devices have a zone write plug hash table. But not
* all of them have one (e.g. DM devices may not need one).
*/
if (!bio->bi_bdev->bd_disk->zone_wplugs_hash)
return false;
/* Only write operations need zone write plugging. */
if (!op_is_write(op))
return false;
/* Ignore empty flush */
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false;
/* Ignore BIOs that already have been handled by zone write plugging. */
if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING))
return false;
/*
* All zone write operations must be handled through zone write plugging
* using blk_zone_plug_bio().
*/
switch (op) {
case REQ_OP_ZONE_APPEND:
case REQ_OP_WRITE:
case REQ_OP_WRITE_ZEROES:
case REQ_OP_ZONE_FINISH:
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_RESET_ALL:
return true;
default:
return false;
}
}
#else /* CONFIG_BLK_DEV_ZONED */
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
{
return false;
}
#endif
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void put_disk(struct gendisk *disk);

View File

@@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM health_report
#define TRACE_INCLUDE_PATH trace/hooks
#if !defined(_TRACE_HOOK_HEALTH_REPORT_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HOOK_HEALTH_REPORT_H
#include <trace/hooks/vendor_hooks.h>
DECLARE_HOOK(android_vh_health_report,
TP_PROTO(unsigned int err_code, const char *func, unsigned int line,
unsigned long *entries, unsigned int nr_entries),
TP_ARGS(err_code, func, line, entries, nr_entries));
#endif /* _TRACE_HOOK_HEALTH_REPORT_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@@ -21,6 +21,7 @@
#include "mman.c"
#include "mutex.c"
#include "page.c"
#include "poll.c"
#include "rbtree.c"
#include "refcount.c"
#include "security.c"

10
rust/helpers/poll.c Normal file
View File

@@ -0,0 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/poll.h>
void rust_helper_poll_wait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p)
{
poll_wait(filp, wait_address, p);
}

View File

@@ -20,12 +20,7 @@ use crate::{
str::CStr,
types::{AsBytes, ForeignOwnable, Opaque},
};
use core::{
marker::PhantomData,
mem::MaybeUninit,
pin::Pin,
ptr::NonNull,
};
use core::{marker::PhantomData, mem::MaybeUninit, pin::Pin, ptr::NonNull};
/// The kernel `loff_t` type.
#[allow(non_camel_case_types)]

View File

@@ -9,9 +9,8 @@ use crate::{
fs::File,
prelude::*,
sync::{CondVar, LockClassKey},
types::Opaque,
};
use core::ops::Deref;
use core::{marker::PhantomData, ops::Deref};
/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class.
#[macro_export]
@@ -23,58 +22,43 @@ macro_rules! new_poll_condvar {
};
}
/// Wraps the kernel's `struct poll_table`.
/// Wraps the kernel's `poll_table`.
///
/// # Invariants
///
/// This struct contains a valid `struct poll_table`.
///
/// For a `struct poll_table` to be valid, its `_qproc` function must follow the safety
/// requirements of `_qproc` functions:
///
/// * The `_qproc` function is given permission to enqueue a waiter to the provided `poll_table`
/// during the call. Once the waiter is removed and an rcu grace period has passed, it must no
/// longer access the `wait_queue_head`.
/// The pointer must be null or reference a valid `poll_table`.
#[repr(transparent)]
pub struct PollTable(Opaque<bindings::poll_table>);
pub struct PollTable<'a> {
table: *mut bindings::poll_table,
_lifetime: PhantomData<&'a bindings::poll_table>,
}
impl PollTable {
/// Creates a reference to a [`PollTable`] from a valid pointer.
impl<'a> PollTable<'a> {
/// Creates a [`PollTable`] from a valid pointer.
///
/// # Safety
///
/// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll
/// table (as defined in the type invariants).
///
/// The caller must also ensure that the `poll_table` is only accessed via the returned
/// reference for the duration of 'a.
pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable {
// SAFETY: The safety requirements guarantee the validity of the dereference, while the
// `PollTable` type being transparent makes the cast ok.
unsafe { &mut *ptr.cast() }
}
fn get_qproc(&self) -> bindings::poll_queue_proc {
let ptr = self.0.get();
// SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc`
// field is not modified concurrently with this call since we have an immutable reference.
unsafe { (*ptr)._qproc }
/// The pointer must be null or reference a valid `poll_table` for the duration of `'a`.
pub unsafe fn from_raw(table: *mut bindings::poll_table) -> Self {
// INVARIANTS: The safety requirements are the same as the struct invariants.
PollTable {
table,
_lifetime: PhantomData,
}
}
/// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified
/// using the condition variable.
pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) {
if let Some(qproc) = self.get_qproc() {
// SAFETY: The pointers to `file` and `self` need to be valid for the duration of this
// call to `qproc`, which they are because they are references.
//
// The `cv.wait_queue_head` pointer must be valid until an rcu grace period after the
// waiter is removed. The `PollCondVar` is pinned, so before `cv.wait_queue_head` can
// be destroyed, the destructor must run. That destructor first removes all waiters,
// and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for
// long enough.
unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) };
}
pub fn register_wait(&self, file: &File, cv: &PollCondVar) {
// SAFETY:
// * `file.as_ptr()` references a valid file for the duration of this call.
// * `self.table` is null or references a valid poll_table for the duration of this call.
// * Since `PollCondVar` is pinned, its destructor is guaranteed to run before the memory
// containing `cv.wait_queue_head` is invalidated. Since the destructor clears all
// waiters and then waits for an rcu grace period, it's guaranteed that
// `cv.wait_queue_head` remains valid for at least an rcu grace period after the removal
// of the last waiter.
unsafe { bindings::poll_wait(file.as_ptr(), cv.wait_queue_head.get(), self.table) }
}
}

View File

@@ -6,6 +6,8 @@
#define _GNU_SOURCE
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include "gendwarfksyms.h"
@@ -179,20 +181,41 @@ static int type_map_get(const char *name, struct type_expansion **res)
return -1;
}
static int cmp_expansion_name(const void *p1, const void *p2)
{
struct type_expansion *const *e1 = p1;
struct type_expansion *const *e2 = p2;
return strcmp((*e1)->name, (*e2)->name);
}
static void type_map_write(FILE *file)
{
struct type_expansion *e;
struct hlist_node *tmp;
struct type_expansion **es;
size_t count = 0;
size_t i = 0;
if (!file)
return;
hash_for_each_safe(type_map, e, tmp, hash) {
checkp(fputs(e->name, file));
hash_for_each_safe(type_map, e, tmp, hash)
++count;
es = xmalloc(count * sizeof(struct type_expansion *));
hash_for_each_safe(type_map, e, tmp, hash)
es[i++] = e;
qsort(es, count, sizeof(struct type_expansion *), cmp_expansion_name);
for (i = 0; i < count; ++i) {
checkp(fputs(es[i]->name, file));
checkp(fputs(" ", file));
type_list_write(&e->expanded, file);
type_list_write(&es[i]->expanded, file);
checkp(fputs("\n", file));
}
free(es);
}
static void type_map_free(void)