Merge tag 'powerpc-4.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
 "This was delayed a day or two by some build-breakage on old toolchains
  which we've now fixed.

  There's two PCI commits both acked by Bjorn.

  There's one commit to mm/hugepage.c which is (co)authored by Kirill.

  Highlights:
   - Restructure Linux PTE on Book3S/64 to Radix format from Paul
     Mackerras
   - Book3s 64 MMU cleanup in preparation for Radix MMU from Aneesh
     Kumar K.V
   - Add POWER9 cputable entry from Michael Neuling
   - FPU/Altivec/VSX save/restore optimisations from Cyril Bur
   - Add support for new ftrace ABI on ppc64le from Torsten Duwe

  Various cleanups & minor fixes from:
   - Adam Buchbinder, Andrew Donnellan, Balbir Singh, Christophe Leroy,
     Cyril Bur, Luis Henriques, Madhavan Srinivasan, Pan Xinhui, Russell
     Currey, Sukadev Bhattiprolu, Suraj Jitindar Singh.

  General:
   - atomics: Allow architectures to define their own __atomic_op_*
     helpers from Boqun Feng
   - Implement atomic{, 64}_*_return_* variants and acquire/release/
     relaxed variants for (cmp)xchg from Boqun Feng
   - Add powernv_defconfig from Jeremy Kerr
   - Fix BUG_ON() reporting in real mode from Balbir Singh
   - Add xmon command to dump OPAL msglog from Andrew Donnellan
   - Add xmon command to dump process/task similar to ps(1) from Douglas
     Miller
   - Clean up memory hotplug failure paths from David Gibson

  pci/eeh:
   - Redesign SR-IOV on PowerNV to give absolute isolation between VFs
     from Wei Yang.
   - EEH Support for SRIOV VFs from Wei Yang and Gavin Shan.
   - PCI/IOV: Rename and export virtfn_{add, remove} from Wei Yang
   - PCI: Add pcibios_bus_add_device() weak function from Wei Yang
   - MAINTAINERS: Update EEH details and maintainership from Russell
     Currey

  cxl:
   - Support added to the CXL driver for running on both bare-metal and
     hypervisor systems, from Christophe Lombard and Frederic Barrat.
   - Ignore probes for virtual afu pci devices from Vaibhav Jain

  perf:
   - Export Power8 generic and cache events to sysfs from Sukadev
     Bhattiprolu
   - hv-24x7: Fix usage with chip events, display change in counter
     values, display domain indices in sysfs, eliminate domain suffix in
     event names, from Sukadev Bhattiprolu

  Freescale:
   - Updates from Scott: "Highlights include 8xx optimizations, 32-bit
     checksum optimizations, 86xx consolidation, e5500/e6500 cpu
     hotplug, more fman and other dt bits, and minor fixes/cleanup"

* tag 'powerpc-4.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (179 commits)
  powerpc: Fix unrecoverable SLB miss during restore_math()
  powerpc/8xx: Fix do_mtspr_cpu6() build on older compilers
  powerpc/rcpm: Fix build break when SMP=n
  powerpc/book3e-64: Use hardcoded mttmr opcode
  powerpc/fsl/dts: Add "jedec,spi-nor" flash compatible
  powerpc/T104xRDB: add tdm riser card node to device tree
  powerpc32: PAGE_EXEC required for inittext
  powerpc/mpc85xx: Add pcsphy nodes to FManV3 device tree
  powerpc/mpc85xx: Add MDIO bus muxing support to the board device tree(s)
  powerpc/86xx: Introduce and use common dtsi
  powerpc/86xx: Update device tree
  powerpc/86xx: Move dts files to fsl directory
  powerpc/86xx: Switch to kconfig fragments approach
  powerpc/86xx: Update defconfigs
  powerpc/86xx: Consolidate common platform code
  powerpc32: Remove one insn in mulhdu
  powerpc32: small optimisation in flush_icache_range()
  powerpc: Simplify test in __dma_sync()
  powerpc32: move xxxxx_dcache_range() functions inline
  powerpc32: Remove clear_pages() and define clear_page() inline
  ...
This commit is contained in:
Linus Torvalds
2016-03-19 15:38:41 -07:00
324 changed files with 14771 additions and 7166 deletions

View File

@@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc)
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
export CFLAGS
@@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \
switch_endian \
syscalls \
tm \
vphn
vphn \
math
endif

View File

@@ -0,0 +1,70 @@
#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H
#define _SELFTESTS_POWERPC_BASIC_ASM_H
#include <ppc-asm.h>
#include <asm/unistd.h>
#define LOAD_REG_IMMEDIATE(reg,expr) \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher; \
rldicr reg,reg,32,31; \
oris reg,reg,(expr)@high; \
ori reg,reg,(expr)@l;
/*
* Note: These macros assume that variables being stored on the stack are
* doublewords, while this is usually the case it may not always be the
* case for each use case.
*/
#if defined(_CALL_ELF) && _CALL_ELF == 2
#define STACK_FRAME_MIN_SIZE 32
#define STACK_FRAME_TOC_POS 24
#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8))
#define __STACK_FRAME_LOCAL(_num_params,_var_num) ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
#else
#define STACK_FRAME_MIN_SIZE 112
#define STACK_FRAME_TOC_POS 40
#define __STACK_FRAME_PARAM(i) (48 + ((i)*8))
/*
* Caveat: if a function passed more than 8 doublewords, the caller will have
* made more space... which would render the 112 incorrect.
*/
#define __STACK_FRAME_LOCAL(_num_params,_var_num) (112 + ((_var_num)*8))
#endif
/* Parameter x saved to the stack */
#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var)
/* Local variable x saved to the stack after x parameters */
#define STACK_FRAME_LOCAL(num_params,var) __STACK_FRAME_LOCAL(num_params,var)
#define STACK_FRAME_LR_POS 16
#define STACK_FRAME_CR_POS 8
/*
* It is very important to note here that _extra is the extra amount of
* stack space needed. This space can be accessed using STACK_FRAME_PARAM()
* or STACK_FRAME_LOCAL() macros.
*
* r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
* and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
* %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
* preprocessed incorrectly, hence r0.
*/
#define PUSH_BASIC_STACK(_extra) \
mflr r0; \
std r0,STACK_FRAME_LR_POS(%r1); \
stdu %r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
mfcr r0; \
stw r0,STACK_FRAME_CR_POS(%r1); \
std %r2,STACK_FRAME_TOC_POS(%r1);
#define POP_BASIC_STACK(_extra) \
ld %r2,STACK_FRAME_TOC_POS(%r1); \
lwz r0,STACK_FRAME_CR_POS(%r1); \
mtcr r0; \
addi %r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \
ld r0,STACK_FRAME_LR_POS(%r1); \
mtlr r0;
#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */

View File

@@ -0,0 +1,6 @@
fpu_syscall
vmx_syscall
fpu_preempt
vmx_preempt
fpu_signal
vmx_signal

View File

@@ -0,0 +1,19 @@
TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal
all: $(TEST_PROGS)
$(TEST_PROGS): ../harness.c
$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
fpu_syscall: fpu_asm.S
fpu_preempt: fpu_asm.S
fpu_signal: fpu_asm.S
vmx_syscall: vmx_asm.S
vmx_preempt: vmx_asm.S
vmx_signal: vmx_asm.S
include ../../lib.mk
clean:
rm -f $(TEST_PROGS) *.o

View File

@@ -0,0 +1,198 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include "../basic_asm.h"
#define PUSH_FPU(pos) \
stfd f14,pos(sp); \
stfd f15,pos+8(sp); \
stfd f16,pos+16(sp); \
stfd f17,pos+24(sp); \
stfd f18,pos+32(sp); \
stfd f19,pos+40(sp); \
stfd f20,pos+48(sp); \
stfd f21,pos+56(sp); \
stfd f22,pos+64(sp); \
stfd f23,pos+72(sp); \
stfd f24,pos+80(sp); \
stfd f25,pos+88(sp); \
stfd f26,pos+96(sp); \
stfd f27,pos+104(sp); \
stfd f28,pos+112(sp); \
stfd f29,pos+120(sp); \
stfd f30,pos+128(sp); \
stfd f31,pos+136(sp);
#define POP_FPU(pos) \
lfd f14,pos(sp); \
lfd f15,pos+8(sp); \
lfd f16,pos+16(sp); \
lfd f17,pos+24(sp); \
lfd f18,pos+32(sp); \
lfd f19,pos+40(sp); \
lfd f20,pos+48(sp); \
lfd f21,pos+56(sp); \
lfd f22,pos+64(sp); \
lfd f23,pos+72(sp); \
lfd f24,pos+80(sp); \
lfd f25,pos+88(sp); \
lfd f26,pos+96(sp); \
lfd f27,pos+104(sp); \
lfd f28,pos+112(sp); \
lfd f29,pos+120(sp); \
lfd f30,pos+128(sp); \
lfd f31,pos+136(sp);
# Careful calling this, it will 'clobber' fpu (by design)
# Don't call this from C
FUNC_START(load_fpu)
lfd f14,0(r3)
lfd f15,8(r3)
lfd f16,16(r3)
lfd f17,24(r3)
lfd f18,32(r3)
lfd f19,40(r3)
lfd f20,48(r3)
lfd f21,56(r3)
lfd f22,64(r3)
lfd f23,72(r3)
lfd f24,80(r3)
lfd f25,88(r3)
lfd f26,96(r3)
lfd f27,104(r3)
lfd f28,112(r3)
lfd f29,120(r3)
lfd f30,128(r3)
lfd f31,136(r3)
blr
FUNC_END(load_fpu)
FUNC_START(check_fpu)
mr r4,r3
li r3,1 # assume a bad result
lfd f0,0(r4)
fcmpu cr1,f0,f14
bne cr1,1f
lfd f0,8(r4)
fcmpu cr1,f0,f15
bne cr1,1f
lfd f0,16(r4)
fcmpu cr1,f0,f16
bne cr1,1f
lfd f0,24(r4)
fcmpu cr1,f0,f17
bne cr1,1f
lfd f0,32(r4)
fcmpu cr1,f0,f18
bne cr1,1f
lfd f0,40(r4)
fcmpu cr1,f0,f19
bne cr1,1f
lfd f0,48(r4)
fcmpu cr1,f0,f20
bne cr1,1f
lfd f0,56(r4)
fcmpu cr1,f0,f21
bne cr1,1f
lfd f0,64(r4)
fcmpu cr1,f0,f22
bne cr1,1f
lfd f0,72(r4)
fcmpu cr1,f0,f23
bne cr1,1f
lfd f0,80(r4)
fcmpu cr1,f0,f24
bne cr1,1f
lfd f0,88(r4)
fcmpu cr1,f0,f25
bne cr1,1f
lfd f0,96(r4)
fcmpu cr1,f0,f26
bne cr1,1f
lfd f0,104(r4)
fcmpu cr1,f0,f27
bne cr1,1f
lfd f0,112(r4)
fcmpu cr1,f0,f28
bne cr1,1f
lfd f0,120(r4)
fcmpu cr1,f0,f29
bne cr1,1f
lfd f0,128(r4)
fcmpu cr1,f0,f30
bne cr1,1f
lfd f0,136(r4)
fcmpu cr1,f0,f31
bne cr1,1f
li r3,0 # Success!!!
1: blr
FUNC_START(test_fpu)
# r3 holds pointer to where to put the result of fork
# r4 holds pointer to the pid
# f14-f31 are non volatiles
PUSH_BASIC_STACK(256)
std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
PUSH_FPU(STACK_FRAME_LOCAL(2,0))
bl load_fpu
nop
li r0,__NR_fork
sc
# pass the result of the fork to the caller
ld r9,STACK_FRAME_PARAM(1)(sp)
std r3,0(r9)
ld r3,STACK_FRAME_PARAM(0)(sp)
bl check_fpu
nop
POP_FPU(STACK_FRAME_LOCAL(2,0))
POP_BASIC_STACK(256)
blr
FUNC_END(test_fpu)
# int preempt_fpu(double *darray, int *threads_running, int *running)
# On starting will (atomically) decrement not_ready as a signal that the FPU
# has been loaded with darray. Will proceed to check the validity of the FPU
# registers while running is not zero.
FUNC_START(preempt_fpu)
PUSH_BASIC_STACK(256)
std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
std r5,STACK_FRAME_PARAM(2)(sp) # int *running
PUSH_FPU(STACK_FRAME_LOCAL(3,0))
bl load_fpu
nop
sync
# Atomic DEC
ld r3,STACK_FRAME_PARAM(1)(sp)
1: lwarx r4,0,r3
addi r4,r4,-1
stwcx. r4,0,r3
bne- 1b
2: ld r3,STACK_FRAME_PARAM(0)(sp)
bl check_fpu
nop
cmpdi r3,0
bne 3f
ld r4,STACK_FRAME_PARAM(2)(sp)
ld r5,0(r4)
cmpwi r5,0
bne 2b
3: POP_FPU(STACK_FRAME_LOCAL(3,0))
POP_BASIC_STACK(256)
blr
FUNC_END(preempt_fpu)

View File

@@ -0,0 +1,113 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This test attempts to see if the FPU registers change across preemption.
* Two things should be noted here a) The check_fpu function in asm only checks
* the non volatile registers as it is reused from the syscall test b) There is
* no way to be sure preemption happened so this test just uses many threads
* and a long wait. As such, a successful test doesn't mean much but a failure
* is bad.
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <pthread.h>
#include "utils.h"
/* Time to wait for workers to get preempted (seconds) */
#define PREEMPT_TIME 20
/*
* Factor by which to multiply number of online CPUs for total number of
* worker threads
*/
#define THREAD_FACTOR 8
__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
2.1};
int threads_starting;
int running;
extern void preempt_fpu(double *darray, int *threads_starting, int *running);
void *preempt_fpu_c(void *p)
{
int i;
srand(pthread_self());
for (i = 0; i < 21; i++)
darray[i] = rand();
/* Test failed if it ever returns */
preempt_fpu(darray, &threads_starting, &running);
return p;
}
int test_preempt_fpu(void)
{
int i, rc, threads;
pthread_t *tids;
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc((threads) * sizeof(pthread_t));
FAIL_IF(!tids);
running = true;
threads_starting = threads;
for (i = 0; i < threads; i++) {
rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL);
FAIL_IF(rc);
}
setbuf(stdout, NULL);
/* Not really necessary but nice to wait for every thread to start */
printf("\tWaiting for all workers to start...");
while(threads_starting)
asm volatile("": : :"memory");
printf("done\n");
printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
sleep(PREEMPT_TIME);
printf("done\n");
printf("\tStopping workers...");
/*
* Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'.
* r5 will have loaded the value of running.
*/
running = 0;
for (i = 0; i < threads; i++) {
void *rc_p;
pthread_join(tids[i], &rc_p);
/*
* Harness will say the fail was here, look at why preempt_fpu
* returned
*/
if ((long) rc_p)
printf("oops\n");
FAIL_IF((long) rc_p);
}
printf("done\n");
free(tids);
return 0;
}
int main(int argc, char *argv[])
{
return test_harness(test_preempt_fpu, "fpu_preempt");
}

View File

@@ -0,0 +1,135 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This test attempts to see if the FPU registers are correctly reported in a
* signal context. Each worker just spins checking its FPU registers, at some
* point a signal will interrupt it and C code will check the signal context
* ensuring it is also the same.
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <pthread.h>
#include "utils.h"
/* Number of times each thread should receive the signal */
#define ITERATIONS 10
/*
* Factor by which to multiply number of online CPUs for total number of
* worker threads
*/
#define THREAD_FACTOR 8
__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
2.1};
bool bad_context;
int threads_starting;
int running;
extern long preempt_fpu(double *darray, int *threads_starting, int *running);
void signal_fpu_sig(int sig, siginfo_t *info, void *context)
{
int i;
ucontext_t *uc = context;
mcontext_t *mc = &uc->uc_mcontext;
/* Only the non volatiles were loaded up */
for (i = 14; i < 32; i++) {
if (mc->fp_regs[i] != darray[i - 14]) {
bad_context = true;
break;
}
}
}
void *signal_fpu_c(void *p)
{
int i;
long rc;
struct sigaction act;
act.sa_sigaction = signal_fpu_sig;
act.sa_flags = SA_SIGINFO;
rc = sigaction(SIGUSR1, &act, NULL);
if (rc)
return p;
srand(pthread_self());
for (i = 0; i < 21; i++)
darray[i] = rand();
rc = preempt_fpu(darray, &threads_starting, &running);
return (void *) rc;
}
int test_signal_fpu(void)
{
int i, j, rc, threads;
void *rc_p;
pthread_t *tids;
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
running = true;
threads_starting = threads;
for (i = 0; i < threads; i++) {
rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL);
FAIL_IF(rc);
}
setbuf(stdout, NULL);
printf("\tWaiting for all workers to start...");
while (threads_starting)
asm volatile("": : :"memory");
printf("done\n");
printf("\tSending signals to all threads %d times...", ITERATIONS);
for (i = 0; i < ITERATIONS; i++) {
for (j = 0; j < threads; j++) {
pthread_kill(tids[j], SIGUSR1);
}
sleep(1);
}
printf("done\n");
printf("\tStopping workers...");
running = 0;
for (i = 0; i < threads; i++) {
pthread_join(tids[i], &rc_p);
/*
* Harness will say the fail was here, look at why signal_fpu
* returned
*/
if ((long) rc_p || bad_context)
printf("oops\n");
if (bad_context)
fprintf(stderr, "\t!! bad_context is true\n");
FAIL_IF((long) rc_p || bad_context);
}
printf("done\n");
free(tids);
return 0;
}
int main(int argc, char *argv[])
{
return test_harness(test_signal_fpu, "fpu_signal");
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This test attempts to see if the FPU registers change across a syscall (fork).
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include "utils.h"
extern int test_fpu(double *darray, pid_t *pid);
double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
2.1};
int syscall_fpu(void)
{
pid_t fork_pid;
int i;
int ret;
int child_ret;
for (i = 0; i < 1000; i++) {
/* test_fpu will fork() */
ret = test_fpu(darray, &fork_pid);
if (fork_pid == -1)
return -1;
if (fork_pid == 0)
exit(ret);
waitpid(fork_pid, &child_ret, 0);
if (ret || child_ret)
return 1;
}
return 0;
}
int test_syscall_fpu(void)
{
/*
* Setup an environment with much context switching
*/
pid_t pid2;
pid_t pid = fork();
int ret;
int child_ret;
FAIL_IF(pid == -1);
pid2 = fork();
/* Can't FAIL_IF(pid2 == -1); because already forked once */
if (pid2 == -1) {
/*
* Couldn't fork, ensure test is a fail
*/
child_ret = ret = 1;
} else {
ret = syscall_fpu();
if (pid2)
waitpid(pid2, &child_ret, 0);
else
exit(ret);
}
ret |= child_ret;
if (pid)
waitpid(pid, &child_ret, 0);
else
exit(ret);
FAIL_IF(ret || child_ret);
return 0;
}
int main(int argc, char *argv[])
{
return test_harness(test_syscall_fpu, "syscall_fpu");
}

View File

@@ -0,0 +1,235 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include "../basic_asm.h"
# POS MUST BE 16 ALIGNED!
#define PUSH_VMX(pos,reg) \
li reg,pos; \
stvx v20,reg,sp; \
addi reg,reg,16; \
stvx v21,reg,sp; \
addi reg,reg,16; \
stvx v22,reg,sp; \
addi reg,reg,16; \
stvx v23,reg,sp; \
addi reg,reg,16; \
stvx v24,reg,sp; \
addi reg,reg,16; \
stvx v25,reg,sp; \
addi reg,reg,16; \
stvx v26,reg,sp; \
addi reg,reg,16; \
stvx v27,reg,sp; \
addi reg,reg,16; \
stvx v28,reg,sp; \
addi reg,reg,16; \
stvx v29,reg,sp; \
addi reg,reg,16; \
stvx v30,reg,sp; \
addi reg,reg,16; \
stvx v31,reg,sp;
# POS MUST BE 16 ALIGNED!
#define POP_VMX(pos,reg) \
li reg,pos; \
lvx v20,reg,sp; \
addi reg,reg,16; \
lvx v21,reg,sp; \
addi reg,reg,16; \
lvx v22,reg,sp; \
addi reg,reg,16; \
lvx v23,reg,sp; \
addi reg,reg,16; \
lvx v24,reg,sp; \
addi reg,reg,16; \
lvx v25,reg,sp; \
addi reg,reg,16; \
lvx v26,reg,sp; \
addi reg,reg,16; \
lvx v27,reg,sp; \
addi reg,reg,16; \
lvx v28,reg,sp; \
addi reg,reg,16; \
lvx v29,reg,sp; \
addi reg,reg,16; \
lvx v30,reg,sp; \
addi reg,reg,16; \
lvx v31,reg,sp;
# Carefull this will 'clobber' vmx (by design)
# Don't call this from C
FUNC_START(load_vmx)
li r5,0
lvx v20,r5,r3
addi r5,r5,16
lvx v21,r5,r3
addi r5,r5,16
lvx v22,r5,r3
addi r5,r5,16
lvx v23,r5,r3
addi r5,r5,16
lvx v24,r5,r3
addi r5,r5,16
lvx v25,r5,r3
addi r5,r5,16
lvx v26,r5,r3
addi r5,r5,16
lvx v27,r5,r3
addi r5,r5,16
lvx v28,r5,r3
addi r5,r5,16
lvx v29,r5,r3
addi r5,r5,16
lvx v30,r5,r3
addi r5,r5,16
lvx v31,r5,r3
blr
FUNC_END(load_vmx)
# Should be safe from C, only touches r4, r5 and v0,v1,v2
FUNC_START(check_vmx)
PUSH_BASIC_STACK(32)
mr r4,r3
li r3,1 # assume a bad result
li r5,0
lvx v0,r5,r4
vcmpequd. v1,v0,v20
vmr v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v21
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v22
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v23
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v24
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v25
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v26
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v27
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v28
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v29
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v30
vand v2,v2,v1
addi r5,r5,16
lvx v0,r5,r4
vcmpequd. v1,v0,v31
vand v2,v2,v1
li r5,STACK_FRAME_LOCAL(0,0)
stvx v2,r5,sp
ldx r0,r5,sp
cmpdi r0,0xffffffffffffffff
bne 1f
li r3,0
1: POP_BASIC_STACK(32)
blr
FUNC_END(check_vmx)
# Safe from C
FUNC_START(test_vmx)
# r3 holds pointer to where to put the result of fork
# r4 holds pointer to the pid
# v20-v31 are non-volatile
PUSH_BASIC_STACK(512)
std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
bl load_vmx
nop
li r0,__NR_fork
sc
# Pass the result of fork back to the caller
ld r9,STACK_FRAME_PARAM(1)(sp)
std r3,0(r9)
ld r3,STACK_FRAME_PARAM(0)(sp)
bl check_vmx
nop
POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
POP_BASIC_STACK(512)
blr
FUNC_END(test_vmx)
# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
# On starting will (atomically) decrement threads_starting as a signal that
# the VMX have been loaded with varray. Will proceed to check the validity of
# the VMX registers while running is not zero.
FUNC_START(preempt_vmx)
PUSH_BASIC_STACK(512)
std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
std r5,STACK_FRAME_PARAM(2)(sp) # int *running
# VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
bl load_vmx
nop
sync
# Atomic DEC
ld r3,STACK_FRAME_PARAM(1)(sp)
1: lwarx r4,0,r3
addi r4,r4,-1
stwcx. r4,0,r3
bne- 1b
2: ld r3,STACK_FRAME_PARAM(0)(sp)
bl check_vmx
nop
cmpdi r3,0
bne 3f
ld r4,STACK_FRAME_PARAM(2)(sp)
ld r5,0(r4)
cmpwi r5,0
bne 2b
3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
POP_BASIC_STACK(512)
blr
FUNC_END(preempt_vmx)

View File

@@ -0,0 +1,112 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This test attempts to see if the VMX registers change across preemption.
* Two things should be noted here a) The check_vmx function in asm only checks
* the non volatile registers as it is reused from the syscall test b) There is
* no way to be sure preemption happened so this test just uses many threads
* and a long wait. As such, a successful test doesn't mean much but a failure
* is bad.
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <pthread.h>
#include "utils.h"
/* Time to wait for workers to get preempted (seconds) */
#define PREEMPT_TIME 20
/*
* Factor by which to multiply number of online CPUs for total number of
* worker threads
*/
#define THREAD_FACTOR 8
__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
{13,14,15,16},{17,18,19,20},{21,22,23,24},
{25,26,27,28},{29,30,31,32},{33,34,35,36},
{37,38,39,40},{41,42,43,44},{45,46,47,48}};
int threads_starting;
int running;
extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
void *preempt_vmx_c(void *p)
{
int i, j;
srand(pthread_self());
for (i = 0; i < 12; i++)
for (j = 0; j < 4; j++)
varray[i][j] = rand();
/* Test fails if it ever returns */
preempt_vmx(varray, &threads_starting, &running);
return p;
}
int test_preempt_vmx(void)
{
int i, rc, threads;
pthread_t *tids;
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
running = true;
threads_starting = threads;
for (i = 0; i < threads; i++) {
rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL);
FAIL_IF(rc);
}
setbuf(stdout, NULL);
/* Not really nessesary but nice to wait for every thread to start */
printf("\tWaiting for all workers to start...");
while(threads_starting)
asm volatile("": : :"memory");
printf("done\n");
printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
sleep(PREEMPT_TIME);
printf("done\n");
printf("\tStopping workers...");
/*
* Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'.
* r5 will have loaded the value of running.
*/
running = 0;
for (i = 0; i < threads; i++) {
void *rc_p;
pthread_join(tids[i], &rc_p);
/*
* Harness will say the fail was here, look at why preempt_vmx
* returned
*/
if ((long) rc_p)
printf("oops\n");
FAIL_IF((long) rc_p);
}
printf("done\n");
return 0;
}
int main(int argc, char *argv[])
{
return test_harness(test_preempt_vmx, "vmx_preempt");
}

View File

@@ -0,0 +1,156 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This test attempts to see if the VMX registers are correctly reported in a
* signal context. Each worker just spins checking its VMX registers, at some
* point a signal will interrupt it and C code will check the signal context
* ensuring it is also the same.
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <altivec.h>
#include "utils.h"
/* Number of times each thread should receive the signal */
#define ITERATIONS 10
/*
* Factor by which to multiply number of online CPUs for total number of
* worker threads
*/
#define THREAD_FACTOR 8
__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
{13,14,15,16},{17,18,19,20},{21,22,23,24},
{25,26,27,28},{29,30,31,32},{33,34,35,36},
{37,38,39,40},{41,42,43,44},{45,46,47,48}};
bool bad_context;
int running;
int threads_starting;
extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal);
void signal_vmx_sig(int sig, siginfo_t *info, void *context)
{
int i;
ucontext_t *uc = context;
mcontext_t *mc = &uc->uc_mcontext;
/* Only the non volatiles were loaded up */
for (i = 20; i < 32; i++) {
if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) {
int j;
/*
* Shouldn't printf() in a signal handler, however, this is a
* test and we've detected failure. Understanding what failed
* is paramount. All that happens after this is tests exit with
* failure.
*/
printf("VMX mismatch at reg %d!\n", i);
printf("Reg | Actual | Expected\n");
for (j = 20; j < 32; j++) {
printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0],
mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3],
varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]);
}
bad_context = true;
break;
}
}
}
void *signal_vmx_c(void *p)
{
int i, j;
long rc;
struct sigaction act;
act.sa_sigaction = signal_vmx_sig;
act.sa_flags = SA_SIGINFO;
rc = sigaction(SIGUSR1, &act, NULL);
if (rc)
return p;
srand(pthread_self());
for (i = 0; i < 12; i++)
for (j = 0; j < 4; j++)
varray[i][j] = rand();
rc = preempt_vmx(varray, &threads_starting, &running);
return (void *) rc;
}
int test_signal_vmx(void)
{
int i, j, rc, threads;
void *rc_p;
pthread_t *tids;
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
running = true;
threads_starting = threads;
for (i = 0; i < threads; i++) {
rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL);
FAIL_IF(rc);
}
setbuf(stdout, NULL);
printf("\tWaiting for %d workers to start... %d", threads, threads_starting);
while (threads_starting) {
asm volatile("": : :"memory");
usleep(1000);
printf(", %d", threads_starting);
}
printf(" ...done\n");
printf("\tSending signals to all threads %d times...", ITERATIONS);
for (i = 0; i < ITERATIONS; i++) {
for (j = 0; j < threads; j++) {
pthread_kill(tids[j], SIGUSR1);
}
sleep(1);
}
printf("done\n");
printf("\tKilling workers...");
running = 0;
for (i = 0; i < threads; i++) {
pthread_join(tids[i], &rc_p);
/*
* Harness will say the fail was here, look at why signal_vmx
* returned
*/
if ((long) rc_p || bad_context)
printf("oops\n");
if (bad_context)
fprintf(stderr, "\t!! bad_context is true\n");
FAIL_IF((long) rc_p || bad_context);
}
printf("done\n");
free(tids);
return 0;
}
int main(int argc, char *argv[])
{
return test_harness(test_signal_vmx, "vmx_signal");
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This test attempts to see if the VMX registers change across a syscall (fork).
*/
#include <altivec.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "utils.h"
vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
{13,14,15,16},{17,18,19,20},{21,22,23,24},
{25,26,27,28},{29,30,31,32},{33,34,35,36},
{37,38,39,40},{41,42,43,44},{45,46,47,48}};
extern int test_vmx(vector int *varray, pid_t *pid);
int vmx_syscall(void)
{
pid_t fork_pid;
int i;
int ret;
int child_ret;
for (i = 0; i < 1000; i++) {
/* test_vmx will fork() */
ret = test_vmx(varray, &fork_pid);
if (fork_pid == -1)
return -1;
if (fork_pid == 0)
exit(ret);
waitpid(fork_pid, &child_ret, 0);
if (ret || child_ret)
return 1;
}
return 0;
}
int test_vmx_syscall(void)
{
/*
* Setup an environment with much context switching
*/
pid_t pid2;
pid_t pid = fork();
int ret;
int child_ret;
FAIL_IF(pid == -1);
pid2 = fork();
ret = vmx_syscall();
/* Can't FAIL_IF(pid2 == -1); because we've already forked */
if (pid2 == -1) {
/*
* Couldn't fork, ensure child_ret is set and is a fail
*/
ret = child_ret = 1;
} else {
if (pid2)
waitpid(pid2, &child_ret, 0);
else
exit(ret);
}
ret |= child_ret;
if (pid)
waitpid(pid, &child_ret, 0);
else
exit(ret);
FAIL_IF(ret || child_ret);
return 0;
}
int main(int argc, char *argv[])
{
return test_harness(test_vmx_syscall, "vmx_syscall");
}

View File

@@ -40,7 +40,7 @@ void signal_usr1(int signum, siginfo_t *info, void *uc)
#ifdef __powerpc64__
ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32);
#else
ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL);
ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL);
#endif
/* Should segv on return becuase of invalid context */
segv_expected = 1;