Merge branch kvm-arm64/selftest/s2-faults into kvmarm-master/next
* kvm-arm64/selftest/s2-faults: : . : New KVM/arm64 selftests exercising various sorts of S2 faults, courtesy : of Ricardo Koller. From the cover letter: : : "This series adds a new aarch64 selftest for testing stage 2 fault handling : for various combinations of guest accesses (e.g., write, S1PTW), backing : sources (e.g., anon), and types of faults (e.g., read on hugetlbfs with a : hole, write on a readonly memslot). Each test tries a different combination : and then checks that the access results in the right behavior (e.g., uffd : faults with the right address and write/read flag). [...]" : . KVM: selftests: aarch64: Add mix of tests into page_fault_test KVM: selftests: aarch64: Add readonly memslot tests into page_fault_test KVM: selftests: aarch64: Add dirty logging tests into page_fault_test KVM: selftests: aarch64: Add userfaultfd tests into page_fault_test KVM: selftests: aarch64: Add aarch64/page_fault_test KVM: selftests: Use the right memslot for code, page-tables, and data allocations KVM: selftests: Fix alignment in virt_arch_pgd_alloc() and vm_vaddr_alloc() KVM: selftests: Add vm->memslots[] and enum kvm_mem_region_type KVM: selftests: Stash backing_src_type in struct userspace_mem_region tools: Copy bitfield.h from the kernel sources KVM: selftests: aarch64: Construct DEFAULT_MAIR_EL1 using sysreg.h macros KVM: selftests: Add missing close and munmap in __vm_mem_region_delete() KVM: selftests: aarch64: Add virt_get_pte_hva() library function KVM: selftests: Add a userfaultfd library Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2014 Felix Fietkau <nbd@nbd.name>
|
||||
* Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_BITFIELD_H
|
||||
#define _LINUX_BITFIELD_H
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
/*
|
||||
* Bitfield access macros
|
||||
*
|
||||
* FIELD_{GET,PREP} macros take as first parameter shifted mask
|
||||
* from which they extract the base mask and shift amount.
|
||||
* Mask must be a compilation time constant.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* #define REG_FIELD_A GENMASK(6, 0)
|
||||
* #define REG_FIELD_B BIT(7)
|
||||
* #define REG_FIELD_C GENMASK(15, 8)
|
||||
* #define REG_FIELD_D GENMASK(31, 16)
|
||||
*
|
||||
* Get:
|
||||
* a = FIELD_GET(REG_FIELD_A, reg);
|
||||
* b = FIELD_GET(REG_FIELD_B, reg);
|
||||
*
|
||||
* Set:
|
||||
* reg = FIELD_PREP(REG_FIELD_A, 1) |
|
||||
* FIELD_PREP(REG_FIELD_B, 0) |
|
||||
* FIELD_PREP(REG_FIELD_C, c) |
|
||||
* FIELD_PREP(REG_FIELD_D, 0x40);
|
||||
*
|
||||
* Modify:
|
||||
* reg &= ~REG_FIELD_C;
|
||||
* reg |= FIELD_PREP(REG_FIELD_C, c);
|
||||
*/
|
||||
|
||||
#define __bf_shf(x) (__builtin_ffsll(x) - 1)
|
||||
|
||||
#define __scalar_type_to_unsigned_cases(type) \
|
||||
unsigned type: (unsigned type)0, \
|
||||
signed type: (unsigned type)0
|
||||
|
||||
#define __unsigned_scalar_typeof(x) typeof( \
|
||||
_Generic((x), \
|
||||
char: (unsigned char)0, \
|
||||
__scalar_type_to_unsigned_cases(char), \
|
||||
__scalar_type_to_unsigned_cases(short), \
|
||||
__scalar_type_to_unsigned_cases(int), \
|
||||
__scalar_type_to_unsigned_cases(long), \
|
||||
__scalar_type_to_unsigned_cases(long long), \
|
||||
default: (x)))
|
||||
|
||||
#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x))
|
||||
|
||||
#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \
|
||||
({ \
|
||||
BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \
|
||||
_pfx "mask is not constant"); \
|
||||
BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \
|
||||
~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
|
||||
_pfx "value too large for the field"); \
|
||||
BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \
|
||||
__bf_cast_unsigned(_reg, ~0ull), \
|
||||
_pfx "type of reg too small for mask"); \
|
||||
__BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \
|
||||
(1ULL << __bf_shf(_mask))); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_MAX() - produce the maximum value representable by a field
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
*
|
||||
* FIELD_MAX() returns the maximum value that can be held in the field
|
||||
* specified by @_mask.
|
||||
*/
|
||||
#define FIELD_MAX(_mask) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \
|
||||
(typeof(_mask))((_mask) >> __bf_shf(_mask)); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_FIT() - check if value fits in the field
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
* @_val: value to test against the field
|
||||
*
|
||||
* Return: true if @_val can fit inside @_mask, false if @_val is too big.
|
||||
*/
|
||||
#define FIELD_FIT(_mask, _val) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \
|
||||
!((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_PREP() - prepare a bitfield element
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
* @_val: value to put in the field
|
||||
*
|
||||
* FIELD_PREP() masks and shifts up the value. The result should
|
||||
* be combined with other fields of the bitfield using logical OR.
|
||||
*/
|
||||
#define FIELD_PREP(_mask, _val) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \
|
||||
((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
|
||||
})
|
||||
|
||||
/**
|
||||
* FIELD_GET() - extract a bitfield element
|
||||
* @_mask: shifted mask defining the field's length and position
|
||||
* @_reg: value of entire bitfield
|
||||
*
|
||||
* FIELD_GET() extracts the field specified by @_mask from the
|
||||
* bitfield passed in as @_reg by masking and shifting it down.
|
||||
*/
|
||||
#define FIELD_GET(_mask, _reg) \
|
||||
({ \
|
||||
__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
|
||||
(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
|
||||
})
|
||||
|
||||
extern void __compiletime_error("value doesn't fit into mask")
|
||||
__field_overflow(void);
|
||||
extern void __compiletime_error("bad bitfield mask")
|
||||
__bad_mask(void);
|
||||
static __always_inline u64 field_multiplier(u64 field)
|
||||
{
|
||||
if ((field | (field - 1)) & ((field | (field - 1)) + 1))
|
||||
__bad_mask();
|
||||
return field & -field;
|
||||
}
|
||||
static __always_inline u64 field_mask(u64 field)
|
||||
{
|
||||
return field / field_multiplier(field);
|
||||
}
|
||||
#define field_max(field) ((typeof(field))field_mask(field))
|
||||
#define ____MAKE_OP(type,base,to,from) \
|
||||
static __always_inline __##type type##_encode_bits(base v, base field) \
|
||||
{ \
|
||||
if (__builtin_constant_p(v) && (v & ~field_mask(field))) \
|
||||
__field_overflow(); \
|
||||
return to((v & field_mask(field)) * field_multiplier(field)); \
|
||||
} \
|
||||
static __always_inline __##type type##_replace_bits(__##type old, \
|
||||
base val, base field) \
|
||||
{ \
|
||||
return (old & ~to(field)) | type##_encode_bits(val, field); \
|
||||
} \
|
||||
static __always_inline void type##p_replace_bits(__##type *p, \
|
||||
base val, base field) \
|
||||
{ \
|
||||
*p = (*p & ~to(field)) | type##_encode_bits(val, field); \
|
||||
} \
|
||||
static __always_inline base type##_get_bits(__##type v, base field) \
|
||||
{ \
|
||||
return (from(v) & field)/field_multiplier(field); \
|
||||
}
|
||||
#define __MAKE_OP(size) \
|
||||
____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \
|
||||
____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \
|
||||
____MAKE_OP(u##size,u##size,,)
|
||||
____MAKE_OP(u8,u8,,)
|
||||
__MAKE_OP(16)
|
||||
__MAKE_OP(32)
|
||||
__MAKE_OP(64)
|
||||
#undef __MAKE_OP
|
||||
#undef ____MAKE_OP
|
||||
|
||||
#endif
|
||||
@@ -4,6 +4,7 @@
|
||||
/aarch64/debug-exceptions
|
||||
/aarch64/get-reg-list
|
||||
/aarch64/hypercalls
|
||||
/aarch64/page_fault_test
|
||||
/aarch64/psci_test
|
||||
/aarch64/vcpu_width_config
|
||||
/aarch64/vgic_init
|
||||
|
||||
@@ -47,6 +47,7 @@ LIBKVM += lib/perf_test_util.c
|
||||
LIBKVM += lib/rbtree.c
|
||||
LIBKVM += lib/sparsebit.c
|
||||
LIBKVM += lib/test_util.c
|
||||
LIBKVM += lib/userfaultfd_util.c
|
||||
|
||||
LIBKVM_STRING += lib/string_override.c
|
||||
|
||||
@@ -152,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/psci_test
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -22,23 +22,13 @@
|
||||
#include "test_util.h"
|
||||
#include "perf_test_util.h"
|
||||
#include "guest_modes.h"
|
||||
#include "userfaultfd_util.h"
|
||||
|
||||
#ifdef __NR_userfaultfd
|
||||
|
||||
#ifdef PRINT_PER_PAGE_UPDATES
|
||||
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_PER_VCPU_UPDATES
|
||||
#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
static int nr_vcpus = 1;
|
||||
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
|
||||
|
||||
static size_t demand_paging_size;
|
||||
static char *guest_data_prototype;
|
||||
|
||||
@@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
}
|
||||
|
||||
static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
|
||||
static int handle_uffd_page_request(int uffd_mode, int uffd,
|
||||
struct uffd_msg *msg)
|
||||
{
|
||||
pid_t tid = syscall(__NR_gettid);
|
||||
uint64_t addr = msg->arg.pagefault.address;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
int r;
|
||||
@@ -116,157 +108,6 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool quit_uffd_thread;
|
||||
|
||||
struct uffd_handler_args {
|
||||
int uffd_mode;
|
||||
int uffd;
|
||||
int pipefd;
|
||||
useconds_t delay;
|
||||
};
|
||||
|
||||
static void *uffd_handler_thread_fn(void *arg)
|
||||
{
|
||||
struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
|
||||
int uffd = uffd_args->uffd;
|
||||
int pipefd = uffd_args->pipefd;
|
||||
useconds_t delay = uffd_args->delay;
|
||||
int64_t pages = 0;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
while (!quit_uffd_thread) {
|
||||
struct uffd_msg msg;
|
||||
struct pollfd pollfd[2];
|
||||
char tmp_chr;
|
||||
int r;
|
||||
uint64_t addr;
|
||||
|
||||
pollfd[0].fd = uffd;
|
||||
pollfd[0].events = POLLIN;
|
||||
pollfd[1].fd = pipefd;
|
||||
pollfd[1].events = POLLIN;
|
||||
|
||||
r = poll(pollfd, 2, -1);
|
||||
switch (r) {
|
||||
case -1:
|
||||
pr_info("poll err");
|
||||
continue;
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
break;
|
||||
default:
|
||||
pr_info("Polling uffd returned %d", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[0].revents & POLLERR) {
|
||||
pr_info("uffd revents has POLLERR");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[1].revents & POLLIN) {
|
||||
r = read(pollfd[1].fd, &tmp_chr, 1);
|
||||
TEST_ASSERT(r == 1,
|
||||
"Error reading pipefd in UFFD thread\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(pollfd[0].revents & POLLIN))
|
||||
continue;
|
||||
|
||||
r = read(uffd, &msg, sizeof(msg));
|
||||
if (r == -1) {
|
||||
if (errno == EAGAIN)
|
||||
continue;
|
||||
pr_info("Read of uffd got errno %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (r != sizeof(msg)) {
|
||||
pr_info("Read on uffd returned unexpected size: %d bytes", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
|
||||
continue;
|
||||
|
||||
if (delay)
|
||||
usleep(delay);
|
||||
addr = msg.arg.pagefault.address;
|
||||
r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
pages++;
|
||||
}
|
||||
|
||||
ts_diff = timespec_elapsed(start);
|
||||
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
|
||||
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
|
||||
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void setup_demand_paging(struct kvm_vm *vm,
|
||||
pthread_t *uffd_handler_thread, int pipefd,
|
||||
int uffd_mode, useconds_t uffd_delay,
|
||||
struct uffd_handler_args *uffd_args,
|
||||
void *hva, void *alias, uint64_t len)
|
||||
{
|
||||
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
|
||||
int uffd;
|
||||
struct uffdio_api uffdio_api;
|
||||
struct uffdio_register uffdio_register;
|
||||
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
|
||||
int ret;
|
||||
|
||||
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
|
||||
is_minor ? "MINOR" : "MISSING",
|
||||
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
|
||||
|
||||
/* In order to get minor faults, prefault via the alias. */
|
||||
if (is_minor) {
|
||||
size_t p;
|
||||
|
||||
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
|
||||
|
||||
TEST_ASSERT(alias != NULL, "Alias required for minor faults");
|
||||
for (p = 0; p < (len / demand_paging_size); ++p) {
|
||||
memcpy(alias + (p * demand_paging_size),
|
||||
guest_data_prototype, demand_paging_size);
|
||||
}
|
||||
}
|
||||
|
||||
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
|
||||
|
||||
uffdio_api.api = UFFD_API;
|
||||
uffdio_api.features = 0;
|
||||
ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
|
||||
TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
|
||||
|
||||
uffdio_register.range.start = (uint64_t)hva;
|
||||
uffdio_register.range.len = len;
|
||||
uffdio_register.mode = uffd_mode;
|
||||
ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
|
||||
TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
|
||||
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
|
||||
expected_ioctls, "missing userfaultfd ioctls");
|
||||
|
||||
uffd_args->uffd_mode = uffd_mode;
|
||||
uffd_args->uffd = uffd;
|
||||
uffd_args->pipefd = pipefd;
|
||||
uffd_args->delay = uffd_delay;
|
||||
pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
|
||||
uffd_args);
|
||||
|
||||
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
|
||||
hva, hva + len);
|
||||
}
|
||||
|
||||
struct test_params {
|
||||
int uffd_mode;
|
||||
useconds_t uffd_delay;
|
||||
@@ -274,16 +115,25 @@ struct test_params {
|
||||
bool partition_vcpu_memory_access;
|
||||
};
|
||||
|
||||
static void prefault_mem(void *alias, uint64_t len)
|
||||
{
|
||||
size_t p;
|
||||
|
||||
TEST_ASSERT(alias != NULL, "Alias required for minor faults");
|
||||
for (p = 0; p < (len / demand_paging_size); ++p) {
|
||||
memcpy(alias + (p * demand_paging_size),
|
||||
guest_data_prototype, demand_paging_size);
|
||||
}
|
||||
}
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
{
|
||||
struct test_params *p = arg;
|
||||
pthread_t *uffd_handler_threads = NULL;
|
||||
struct uffd_handler_args *uffd_args = NULL;
|
||||
struct uffd_desc **uffd_descs = NULL;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
int *pipefds = NULL;
|
||||
struct kvm_vm *vm;
|
||||
int r, i;
|
||||
int i;
|
||||
|
||||
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
|
||||
p->src_type, p->partition_vcpu_memory_access);
|
||||
@@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
memset(guest_data_prototype, 0xAB, demand_paging_size);
|
||||
|
||||
if (p->uffd_mode) {
|
||||
uffd_handler_threads =
|
||||
malloc(nr_vcpus * sizeof(*uffd_handler_threads));
|
||||
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
|
||||
|
||||
uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
|
||||
TEST_ASSERT(uffd_args, "Memory allocation failed");
|
||||
|
||||
pipefds = malloc(sizeof(int) * nr_vcpus * 2);
|
||||
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
|
||||
uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
|
||||
TEST_ASSERT(uffd_descs, "Memory allocation failed");
|
||||
|
||||
for (i = 0; i < nr_vcpus; i++) {
|
||||
struct perf_test_vcpu_args *vcpu_args;
|
||||
@@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
|
||||
vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
|
||||
|
||||
prefault_mem(vcpu_alias,
|
||||
vcpu_args->pages * perf_test_args.guest_page_size);
|
||||
|
||||
/*
|
||||
* Set up user fault fd to handle demand paging
|
||||
* requests.
|
||||
*/
|
||||
r = pipe2(&pipefds[i * 2],
|
||||
O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(!r, "Failed to set up pipefd");
|
||||
|
||||
setup_demand_paging(vm, &uffd_handler_threads[i],
|
||||
pipefds[i * 2], p->uffd_mode,
|
||||
p->uffd_delay, &uffd_args[i],
|
||||
vcpu_hva, vcpu_alias,
|
||||
vcpu_args->pages * perf_test_args.guest_page_size);
|
||||
uffd_descs[i] = uffd_setup_demand_paging(
|
||||
p->uffd_mode, p->uffd_delay, vcpu_hva,
|
||||
vcpu_args->pages * perf_test_args.guest_page_size,
|
||||
&handle_uffd_page_request);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
pr_info("All vCPU threads joined\n");
|
||||
|
||||
if (p->uffd_mode) {
|
||||
char c;
|
||||
|
||||
/* Tell the user fault fd handler threads to quit */
|
||||
for (i = 0; i < nr_vcpus; i++) {
|
||||
r = write(pipefds[i * 2 + 1], &c, 1);
|
||||
TEST_ASSERT(r == 1, "Unable to write to pipefd");
|
||||
|
||||
pthread_join(uffd_handler_threads[i], NULL);
|
||||
}
|
||||
for (i = 0; i < nr_vcpus; i++)
|
||||
uffd_stop_demand_paging(uffd_descs[i]);
|
||||
}
|
||||
|
||||
pr_info("Total guest execution time: %ld.%.9lds\n",
|
||||
@@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
|
||||
perf_test_destroy_vm(vm);
|
||||
|
||||
free(guest_data_prototype);
|
||||
if (p->uffd_mode) {
|
||||
free(uffd_handler_threads);
|
||||
free(uffd_args);
|
||||
free(pipefds);
|
||||
}
|
||||
if (p->uffd_mode)
|
||||
free(uffd_descs);
|
||||
}
|
||||
|
||||
static void help(char *name)
|
||||
|
||||
@@ -38,12 +38,25 @@
|
||||
* NORMAL 4 1111:1111
|
||||
* NORMAL_WT 5 1011:1011
|
||||
*/
|
||||
#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
|
||||
(0x04ul << (1 * 8)) | \
|
||||
(0x0cul << (2 * 8)) | \
|
||||
(0x44ul << (3 * 8)) | \
|
||||
(0xfful << (4 * 8)) | \
|
||||
(0xbbul << (5 * 8)))
|
||||
|
||||
/* Linux doesn't use these memory types, so let's define them. */
|
||||
#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
|
||||
#define MAIR_ATTR_NORMAL_WT UL(0xbb)
|
||||
|
||||
#define MT_DEVICE_nGnRnE 0
|
||||
#define MT_DEVICE_nGnRE 1
|
||||
#define MT_DEVICE_GRE 2
|
||||
#define MT_NORMAL_NC 3
|
||||
#define MT_NORMAL 4
|
||||
#define MT_NORMAL_WT 5
|
||||
|
||||
#define DEFAULT_MAIR_EL1 \
|
||||
(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
|
||||
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
|
||||
|
||||
#define MPIDR_HWID_BITMASK (0xff00fffffful)
|
||||
|
||||
@@ -92,11 +105,19 @@ enum {
|
||||
#define ESR_EC_MASK (ESR_EC_NUM - 1)
|
||||
|
||||
#define ESR_EC_SVC64 0x15
|
||||
#define ESR_EC_IABT 0x21
|
||||
#define ESR_EC_DABT 0x25
|
||||
#define ESR_EC_HW_BP_CURRENT 0x31
|
||||
#define ESR_EC_SSTEP_CURRENT 0x33
|
||||
#define ESR_EC_WP_CURRENT 0x35
|
||||
#define ESR_EC_BRK_INS 0x3c
|
||||
|
||||
/* Access flag */
|
||||
#define PTE_AF (1ULL << 10)
|
||||
|
||||
/* Access flag update enable/disable */
|
||||
#define TCR_EL1_HA (1ULL << 39)
|
||||
|
||||
void aarch64_get_supported_page_sizes(uint32_t ipa,
|
||||
bool *ps4k, bool *ps16k, bool *ps64k);
|
||||
|
||||
@@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm,
|
||||
void vm_install_sync_handler(struct kvm_vm *vm,
|
||||
int vector, int ec, handler_fn handler);
|
||||
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
|
||||
|
||||
static inline void cpu_relax(void)
|
||||
{
|
||||
asm volatile("yield" ::: "memory");
|
||||
|
||||
@@ -34,6 +34,7 @@ struct userspace_mem_region {
|
||||
struct sparsebit *unused_phy_pages;
|
||||
int fd;
|
||||
off_t offset;
|
||||
enum vm_mem_backing_src_type backing_src_type;
|
||||
void *host_mem;
|
||||
void *host_alias;
|
||||
void *mmap_start;
|
||||
@@ -64,6 +65,14 @@ struct userspace_mem_regions {
|
||||
DECLARE_HASHTABLE(slot_hash, 9);
|
||||
};
|
||||
|
||||
enum kvm_mem_region_type {
|
||||
MEM_REGION_CODE,
|
||||
MEM_REGION_DATA,
|
||||
MEM_REGION_PT,
|
||||
MEM_REGION_TEST_DATA,
|
||||
NR_MEM_REGIONS,
|
||||
};
|
||||
|
||||
struct kvm_vm {
|
||||
int mode;
|
||||
unsigned long type;
|
||||
@@ -92,6 +101,13 @@ struct kvm_vm {
|
||||
int stats_fd;
|
||||
struct kvm_stats_header stats_header;
|
||||
struct kvm_stats_desc *stats_desc;
|
||||
|
||||
/*
|
||||
* KVM region slots. These are the default memslots used by page
|
||||
* allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
|
||||
* memslot.
|
||||
*/
|
||||
uint32_t memslots[NR_MEM_REGIONS];
|
||||
};
|
||||
|
||||
|
||||
@@ -104,6 +120,13 @@ struct kvm_vm {
|
||||
struct userspace_mem_region *
|
||||
memslot2region(struct kvm_vm *vm, uint32_t memslot);
|
||||
|
||||
static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
|
||||
enum kvm_mem_region_type type)
|
||||
{
|
||||
assert(type < NR_MEM_REGIONS);
|
||||
return memslot2region(vm, vm->memslots[type]);
|
||||
}
|
||||
|
||||
/* Minimum allocated guest virtual and physical addresses */
|
||||
#define KVM_UTIL_MIN_VADDR 0x2000
|
||||
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
|
||||
@@ -384,7 +407,11 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
|
||||
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
|
||||
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
|
||||
vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
enum kvm_mem_region_type type);
|
||||
vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
|
||||
vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
|
||||
enum kvm_mem_region_type type);
|
||||
vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
|
||||
|
||||
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
@@ -646,13 +673,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
|
||||
* __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
|
||||
* calculate the amount of memory needed for per-vCPU data, e.g. stacks.
|
||||
*/
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages);
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode);
|
||||
struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
uint64_t nr_extra_pages);
|
||||
|
||||
static inline struct kvm_vm *vm_create_barebones(void)
|
||||
{
|
||||
return ____vm_create(VM_MODE_DEFAULT, 0);
|
||||
return ____vm_create(VM_MODE_DEFAULT);
|
||||
}
|
||||
|
||||
static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM userfaultfd util
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
* Copyright (C) 2019-2022 Google LLC
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for pipe2 */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
|
||||
|
||||
struct uffd_desc {
|
||||
int uffd_mode;
|
||||
int uffd;
|
||||
int pipefds[2];
|
||||
useconds_t delay;
|
||||
uffd_handler_t handler;
|
||||
pthread_t thread;
|
||||
};
|
||||
|
||||
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
|
||||
void *hva, uint64_t len,
|
||||
uffd_handler_t handler);
|
||||
|
||||
void uffd_stop_demand_paging(struct uffd_desc *uffd);
|
||||
|
||||
#ifdef PRINT_PER_PAGE_UPDATES
|
||||
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_PER_VCPU_UPDATES
|
||||
#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
@@ -77,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
|
||||
|
||||
void virt_arch_pgd_alloc(struct kvm_vm *vm)
|
||||
{
|
||||
if (!vm->pgd_created) {
|
||||
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
|
||||
page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
vm->pgd = paddr;
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
|
||||
|
||||
if (vm->pgd_created)
|
||||
return;
|
||||
|
||||
vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
|
||||
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
@@ -134,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||
|
||||
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
{
|
||||
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
|
||||
uint64_t attr_idx = MT_NORMAL;
|
||||
|
||||
_virt_pg_map(vm, vaddr, paddr, attr_idx);
|
||||
}
|
||||
|
||||
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
uint64_t *ptep;
|
||||
|
||||
@@ -170,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
TEST_FAIL("Page table levels must be 2, 3, or 4");
|
||||
}
|
||||
|
||||
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
|
||||
return ptep;
|
||||
|
||||
unmapped_gva:
|
||||
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
|
||||
exit(1);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
{
|
||||
uint64_t *ptep = virt_get_pte_hva(vm, gva);
|
||||
|
||||
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
|
||||
}
|
||||
|
||||
static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
|
||||
@@ -319,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
|
||||
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
struct kvm_vcpu_init *init, void *guest_code)
|
||||
{
|
||||
size_t stack_size = vm->page_size == 4096 ?
|
||||
DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
|
||||
size_t stack_size;
|
||||
uint64_t stack_vaddr;
|
||||
struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
|
||||
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
aarch64_vcpu_setup(vcpu, init);
|
||||
|
||||
vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
|
||||
@@ -429,8 +441,8 @@ unexpected_exception:
|
||||
|
||||
void vm_init_descriptor_tables(struct kvm_vm *vm)
|
||||
{
|
||||
vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
|
||||
vm->page_size);
|
||||
vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
|
||||
vm->page_size, MEM_REGION_DATA);
|
||||
|
||||
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
|
||||
}
|
||||
|
||||
@@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
|
||||
seg_vend |= vm->page_size - 1;
|
||||
size_t seg_size = seg_vend - seg_vstart + 1;
|
||||
|
||||
vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
|
||||
vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
|
||||
MEM_REGION_CODE);
|
||||
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
|
||||
"virtual memory for segment at requested min addr,\n"
|
||||
" segment idx: %u\n"
|
||||
|
||||
@@ -185,13 +185,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
|
||||
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
|
||||
"Missing new mode params?");
|
||||
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
|
||||
struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
|
||||
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
|
||||
vm_guest_mode_string(mode), nr_pages);
|
||||
|
||||
vm = calloc(1, sizeof(*vm));
|
||||
TEST_ASSERT(vm != NULL, "Insufficient Memory");
|
||||
|
||||
@@ -287,9 +284,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
|
||||
|
||||
/* Allocate and setup memory for guest. */
|
||||
vm->vpages_mapped = sparsebit_alloc();
|
||||
if (nr_pages != 0)
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
0, 0, nr_pages, 0);
|
||||
|
||||
return vm;
|
||||
}
|
||||
@@ -335,8 +329,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
|
||||
uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
|
||||
nr_extra_pages);
|
||||
struct kvm_vm *vm;
|
||||
int i;
|
||||
|
||||
vm = ____vm_create(mode, nr_pages);
|
||||
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
|
||||
vm_guest_mode_string(mode), nr_pages);
|
||||
|
||||
vm = ____vm_create(mode);
|
||||
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
|
||||
for (i = 0; i < NR_MEM_REGIONS; i++)
|
||||
vm->memslots[i] = 0;
|
||||
|
||||
kvm_vm_elf_load(vm, program_invocation_name);
|
||||
|
||||
@@ -586,6 +588,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
|
||||
sparsebit_free(®ion->unused_phy_pages);
|
||||
ret = munmap(region->mmap_start, region->mmap_size);
|
||||
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
|
||||
if (region->fd >= 0) {
|
||||
/* There's an extra map when using shared memory. */
|
||||
ret = munmap(region->mmap_alias, region->mmap_size);
|
||||
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
|
||||
close(region->fd);
|
||||
}
|
||||
|
||||
free(region);
|
||||
}
|
||||
@@ -923,6 +931,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
vm_mem_backing_src_alias(src_type)->name);
|
||||
}
|
||||
|
||||
region->backing_src_type = src_type;
|
||||
region->unused_phy_pages = sparsebit_alloc();
|
||||
sparsebit_set_num(region->unused_phy_pages,
|
||||
guest_paddr >> vm->page_shift, npages);
|
||||
@@ -1217,32 +1226,15 @@ va_found:
|
||||
return pgidx_start * vm->page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* sz - Size in bytes
|
||||
* vaddr_min - Minimum starting virtual address
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return:
|
||||
* Starting guest virtual address
|
||||
*
|
||||
* Allocates at least sz bytes within the virtual address space of the vm
|
||||
* given by vm. The allocated bytes are mapped to a virtual address >=
|
||||
* the address given by vaddr_min. Note that each allocation uses a
|
||||
* a unique set of pages, with the minimum real allocation being at least
|
||||
* a page.
|
||||
*/
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
|
||||
vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
|
||||
enum kvm_mem_region_type type)
|
||||
{
|
||||
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
|
||||
|
||||
virt_pgd_alloc(vm);
|
||||
vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
|
||||
KVM_UTIL_MIN_PFN * vm->page_size, 0);
|
||||
KVM_UTIL_MIN_PFN * vm->page_size,
|
||||
vm->memslots[type]);
|
||||
|
||||
/*
|
||||
* Find an unused range of virtual page addresses of at least
|
||||
@@ -1263,6 +1255,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
|
||||
return vaddr_start;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* sz - Size in bytes
|
||||
* vaddr_min - Minimum starting virtual address
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return:
|
||||
* Starting guest virtual address
|
||||
*
|
||||
* Allocates at least sz bytes within the virtual address space of the vm
|
||||
* given by vm. The allocated bytes are mapped to a virtual address >=
|
||||
* the address given by vaddr_min. Note that each allocation uses a
|
||||
* a unique set of pages, with the minimum real allocation being at least
|
||||
* a page. The allocated physical space comes from the TEST_DATA memory region.
|
||||
*/
|
||||
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
|
||||
{
|
||||
return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate Pages
|
||||
*
|
||||
@@ -1282,6 +1298,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
|
||||
return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
|
||||
}
|
||||
|
||||
vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
|
||||
{
|
||||
return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Virtual Address Allocate Page
|
||||
*
|
||||
@@ -1847,7 +1868,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
|
||||
|
||||
vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
|
||||
{
|
||||
return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
|
||||
|
||||
void virt_arch_pgd_alloc(struct kvm_vm *vm)
|
||||
{
|
||||
if (!vm->pgd_created) {
|
||||
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
|
||||
page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
vm->pgd = paddr;
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
|
||||
|
||||
if (vm->pgd_created)
|
||||
return;
|
||||
|
||||
vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
vm->pgd_created = true;
|
||||
}
|
||||
|
||||
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
|
||||
@@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
void *guest_code)
|
||||
{
|
||||
int r;
|
||||
size_t stack_size = vm->page_size == 4096 ?
|
||||
DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
|
||||
size_t stack_size;
|
||||
unsigned long stack_vaddr;
|
||||
unsigned long current_gp = 0;
|
||||
struct kvm_mp_state mps;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
|
||||
vm->page_size;
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
riscv_vcpu_mmu_setup(vcpu);
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
|
||||
return;
|
||||
|
||||
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
|
||||
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
|
||||
vm->memslots[MEM_REGION_PT]);
|
||||
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
|
||||
|
||||
vm->pgd = paddr;
|
||||
@@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
|
||||
vm->page_size);
|
||||
|
||||
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN);
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* KVM userfaultfd util
|
||||
* Adapted from demand_paging_test.c
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
* Copyright (C) 2019-2022 Google LLC
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for pipe2 */
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <poll.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "perf_test_util.h"
|
||||
#include "userfaultfd_util.h"
|
||||
|
||||
#ifdef __NR_userfaultfd
|
||||
|
||||
static void *uffd_handler_thread_fn(void *arg)
|
||||
{
|
||||
struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
|
||||
int uffd = uffd_desc->uffd;
|
||||
int pipefd = uffd_desc->pipefds[0];
|
||||
useconds_t delay = uffd_desc->delay;
|
||||
int64_t pages = 0;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
while (1) {
|
||||
struct uffd_msg msg;
|
||||
struct pollfd pollfd[2];
|
||||
char tmp_chr;
|
||||
int r;
|
||||
|
||||
pollfd[0].fd = uffd;
|
||||
pollfd[0].events = POLLIN;
|
||||
pollfd[1].fd = pipefd;
|
||||
pollfd[1].events = POLLIN;
|
||||
|
||||
r = poll(pollfd, 2, -1);
|
||||
switch (r) {
|
||||
case -1:
|
||||
pr_info("poll err");
|
||||
continue;
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
break;
|
||||
default:
|
||||
pr_info("Polling uffd returned %d", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[0].revents & POLLERR) {
|
||||
pr_info("uffd revents has POLLERR");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pollfd[1].revents & POLLIN) {
|
||||
r = read(pollfd[1].fd, &tmp_chr, 1);
|
||||
TEST_ASSERT(r == 1,
|
||||
"Error reading pipefd in UFFD thread\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(pollfd[0].revents & POLLIN))
|
||||
continue;
|
||||
|
||||
r = read(uffd, &msg, sizeof(msg));
|
||||
if (r == -1) {
|
||||
if (errno == EAGAIN)
|
||||
continue;
|
||||
pr_info("Read of uffd got errno %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (r != sizeof(msg)) {
|
||||
pr_info("Read on uffd returned unexpected size: %d bytes", r);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
|
||||
continue;
|
||||
|
||||
if (delay)
|
||||
usleep(delay);
|
||||
r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
pages++;
|
||||
}
|
||||
|
||||
ts_diff = timespec_elapsed(start);
|
||||
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
|
||||
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
|
||||
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
|
||||
void *hva, uint64_t len,
|
||||
uffd_handler_t handler)
|
||||
{
|
||||
struct uffd_desc *uffd_desc;
|
||||
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
|
||||
int uffd;
|
||||
struct uffdio_api uffdio_api;
|
||||
struct uffdio_register uffdio_register;
|
||||
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
|
||||
int ret;
|
||||
|
||||
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
|
||||
is_minor ? "MINOR" : "MISSING",
|
||||
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
|
||||
|
||||
uffd_desc = malloc(sizeof(struct uffd_desc));
|
||||
TEST_ASSERT(uffd_desc, "malloc failed");
|
||||
|
||||
/* In order to get minor faults, prefault via the alias. */
|
||||
if (is_minor)
|
||||
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
|
||||
|
||||
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
|
||||
|
||||
uffdio_api.api = UFFD_API;
|
||||
uffdio_api.features = 0;
|
||||
TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
|
||||
"ioctl UFFDIO_API failed: %" PRIu64,
|
||||
(uint64_t)uffdio_api.api);
|
||||
|
||||
uffdio_register.range.start = (uint64_t)hva;
|
||||
uffdio_register.range.len = len;
|
||||
uffdio_register.mode = uffd_mode;
|
||||
TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
|
||||
"ioctl UFFDIO_REGISTER failed");
|
||||
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
|
||||
expected_ioctls, "missing userfaultfd ioctls");
|
||||
|
||||
ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
|
||||
TEST_ASSERT(!ret, "Failed to set up pipefd");
|
||||
|
||||
uffd_desc->uffd_mode = uffd_mode;
|
||||
uffd_desc->uffd = uffd;
|
||||
uffd_desc->delay = delay;
|
||||
uffd_desc->handler = handler;
|
||||
pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
|
||||
uffd_desc);
|
||||
|
||||
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
|
||||
hva, hva + len);
|
||||
|
||||
return uffd_desc;
|
||||
}
|
||||
|
||||
void uffd_stop_demand_paging(struct uffd_desc *uffd)
|
||||
{
|
||||
char c = 0;
|
||||
int ret;
|
||||
|
||||
ret = write(uffd->pipefds[1], &c, 1);
|
||||
TEST_ASSERT(ret == 1, "Unable to write to pipefd");
|
||||
|
||||
ret = pthread_join(uffd->thread, NULL);
|
||||
TEST_ASSERT(ret == 0, "Pthread_join failed.");
|
||||
|
||||
close(uffd->uffd);
|
||||
|
||||
close(uffd->pipefds[1]);
|
||||
close(uffd->pipefds[0]);
|
||||
|
||||
free(uffd);
|
||||
}
|
||||
|
||||
#endif /* __NR_userfaultfd */
|
||||
@@ -552,7 +552,7 @@ unmapped_gva:
|
||||
static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
|
||||
{
|
||||
if (!vm->gdt)
|
||||
vm->gdt = vm_vaddr_alloc_page(vm);
|
||||
vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
|
||||
dt->base = vm->gdt;
|
||||
dt->limit = getpagesize();
|
||||
@@ -562,7 +562,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
|
||||
int selector)
|
||||
{
|
||||
if (!vm->tss)
|
||||
vm->tss = vm_vaddr_alloc_page(vm);
|
||||
vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
|
||||
memset(segp, 0, sizeof(*segp));
|
||||
segp->base = vm->tss;
|
||||
@@ -647,8 +647,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
vm_vaddr_t stack_vaddr;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN);
|
||||
stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
|
||||
DEFAULT_GUEST_STACK_VADDR_MIN,
|
||||
MEM_REGION_DATA);
|
||||
|
||||
vcpu = __vm_vcpu_add(vm, vcpu_id);
|
||||
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
|
||||
@@ -1145,8 +1146,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
|
||||
extern void *idt_handlers;
|
||||
int i;
|
||||
|
||||
vm->idt = vm_vaddr_alloc_page(vm);
|
||||
vm->handlers = vm_vaddr_alloc_page(vm);
|
||||
vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
|
||||
/* Handlers have the same address in both address spaces.*/
|
||||
for (i = 0; i < NUM_INTERRUPTS; i++)
|
||||
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
|
||||
|
||||
Reference in New Issue
Block a user