Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "This tree contains various perf fixes on the kernel side, plus three hw/event-enablement late additions: - Intel Memory Bandwidth Monitoring events and handling - the AMD Accumulated Power Mechanism reporting facility - more IOMMU events ... and a final round of perf tooling updates/fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) perf llvm: Use strerror_r instead of the thread unsafe strerror one perf llvm: Use realpath to canonicalize paths perf tools: Unexport some methods unused outside strbuf.c perf probe: No need to use formatting strbuf method perf help: Use asprintf instead of adhoc equivalents perf tools: Remove unused perf_pathdup, xstrdup functions perf tools: Do not include stringify.h from the kernel sources tools include: Copy linux/stringify.h from the kernel tools lib traceevent: Remove redundant CPU output perf tools: Remove needless 'extern' from function prototypes perf tools: Simplify die() mechanism perf tools: Remove unused DIE_IF macro perf script: Remove lots of unused arguments perf thread: Rename perf_event__preprocess_sample_addr to thread__resolve perf machine: Rename perf_event__preprocess_sample to machine__resolve perf tools: Add cpumode to struct perf_sample perf tests: Forward the perf_sample in the dwarf unwind test perf tools: Remove misplaced __maybe_unused perf list: Fix documentation of :ppp perf bench numa: Fix assertion for nodes bitfield ...
This commit is contained in:
@@ -376,8 +376,11 @@ static void update_perf_cpu_limits(void)
|
||||
u64 tmp = perf_sample_period_ns;
|
||||
|
||||
tmp *= sysctl_perf_cpu_time_max_percent;
|
||||
do_div(tmp, 100);
|
||||
ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
|
||||
tmp = div_u64(tmp, 100);
|
||||
if (!tmp)
|
||||
tmp = 1;
|
||||
|
||||
WRITE_ONCE(perf_sample_allowed_ns, tmp);
|
||||
}
|
||||
|
||||
static int perf_rotate_context(struct perf_cpu_context *cpuctx);
|
||||
@@ -409,7 +412,13 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
|
||||
update_perf_cpu_limits();
|
||||
if (sysctl_perf_cpu_time_max_percent == 100) {
|
||||
printk(KERN_WARNING
|
||||
"perf: Dynamic interrupt throttling disabled, can hang your system!\n");
|
||||
WRITE_ONCE(perf_sample_allowed_ns, 0);
|
||||
} else {
|
||||
update_perf_cpu_limits();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -423,62 +432,68 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||
#define NR_ACCUMULATED_SAMPLES 128
|
||||
static DEFINE_PER_CPU(u64, running_sample_length);
|
||||
|
||||
static u64 __report_avg;
|
||||
static u64 __report_allowed;
|
||||
|
||||
static void perf_duration_warn(struct irq_work *w)
|
||||
{
|
||||
u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
|
||||
u64 avg_local_sample_len;
|
||||
u64 local_samples_len;
|
||||
|
||||
local_samples_len = __this_cpu_read(running_sample_length);
|
||||
avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
|
||||
|
||||
printk_ratelimited(KERN_WARNING
|
||||
"perf interrupt took too long (%lld > %lld), lowering "
|
||||
"kernel.perf_event_max_sample_rate to %d\n",
|
||||
avg_local_sample_len, allowed_ns >> 1,
|
||||
sysctl_perf_event_sample_rate);
|
||||
"perf: interrupt took too long (%lld > %lld), lowering "
|
||||
"kernel.perf_event_max_sample_rate to %d\n",
|
||||
__report_avg, __report_allowed,
|
||||
sysctl_perf_event_sample_rate);
|
||||
}
|
||||
|
||||
static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
|
||||
|
||||
void perf_sample_event_took(u64 sample_len_ns)
|
||||
{
|
||||
u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
|
||||
u64 avg_local_sample_len;
|
||||
u64 local_samples_len;
|
||||
u64 max_len = READ_ONCE(perf_sample_allowed_ns);
|
||||
u64 running_len;
|
||||
u64 avg_len;
|
||||
u32 max;
|
||||
|
||||
if (allowed_ns == 0)
|
||||
if (max_len == 0)
|
||||
return;
|
||||
|
||||
/* decay the counter by 1 average sample */
|
||||
local_samples_len = __this_cpu_read(running_sample_length);
|
||||
local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
|
||||
local_samples_len += sample_len_ns;
|
||||
__this_cpu_write(running_sample_length, local_samples_len);
|
||||
/* Decay the counter by 1 average sample. */
|
||||
running_len = __this_cpu_read(running_sample_length);
|
||||
running_len -= running_len/NR_ACCUMULATED_SAMPLES;
|
||||
running_len += sample_len_ns;
|
||||
__this_cpu_write(running_sample_length, running_len);
|
||||
|
||||
/*
|
||||
* note: this will be biased artifically low until we have
|
||||
* seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
|
||||
* Note: this will be biased artifically low until we have
|
||||
* seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
|
||||
* from having to maintain a count.
|
||||
*/
|
||||
avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
|
||||
|
||||
if (avg_local_sample_len <= allowed_ns)
|
||||
avg_len = running_len/NR_ACCUMULATED_SAMPLES;
|
||||
if (avg_len <= max_len)
|
||||
return;
|
||||
|
||||
if (max_samples_per_tick <= 1)
|
||||
return;
|
||||
__report_avg = avg_len;
|
||||
__report_allowed = max_len;
|
||||
|
||||
max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
|
||||
sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
|
||||
/*
|
||||
* Compute a throttle threshold 25% below the current duration.
|
||||
*/
|
||||
avg_len += avg_len / 4;
|
||||
max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
|
||||
if (avg_len < max)
|
||||
max /= (u32)avg_len;
|
||||
else
|
||||
max = 1;
|
||||
|
||||
WRITE_ONCE(perf_sample_allowed_ns, avg_len);
|
||||
WRITE_ONCE(max_samples_per_tick, max);
|
||||
|
||||
sysctl_perf_event_sample_rate = max * HZ;
|
||||
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
|
||||
|
||||
update_perf_cpu_limits();
|
||||
|
||||
if (!irq_work_queue(&perf_duration_work)) {
|
||||
early_printk("perf interrupt took too long (%lld > %lld), lowering "
|
||||
early_printk("perf: interrupt took too long (%lld > %lld), lowering "
|
||||
"kernel.perf_event_max_sample_rate to %d\n",
|
||||
avg_local_sample_len, allowed_ns >> 1,
|
||||
__report_avg, __report_allowed,
|
||||
sysctl_perf_event_sample_rate);
|
||||
}
|
||||
}
|
||||
@@ -4210,6 +4225,14 @@ static void __perf_event_period(struct perf_event *event,
|
||||
active = (event->state == PERF_EVENT_STATE_ACTIVE);
|
||||
if (active) {
|
||||
perf_pmu_disable(ctx->pmu);
|
||||
/*
|
||||
* We could be throttled; unthrottle now to avoid the tick
|
||||
* trying to unthrottle while we already re-started the event.
|
||||
*/
|
||||
if (event->hw.interrupts == MAX_INTERRUPTS) {
|
||||
event->hw.interrupts = 0;
|
||||
perf_log_throttle(event, 1);
|
||||
}
|
||||
event->pmu->stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
@@ -9426,10 +9449,29 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
/*
|
||||
* This must be done before the CPU comes alive, because the
|
||||
* moment we can run tasks we can encounter (software) events.
|
||||
*
|
||||
* Specifically, someone can have inherited events on kthreadd
|
||||
* or a pre-existing worker thread that gets re-bound.
|
||||
*/
|
||||
perf_event_init_cpu(cpu);
|
||||
break;
|
||||
|
||||
case CPU_DOWN_PREPARE:
|
||||
/*
|
||||
* This must be done before the CPU dies because after that an
|
||||
* active event might want to IPI the CPU and that'll not work
|
||||
* so great for dead CPUs.
|
||||
*
|
||||
* XXX smp_call_function_single() return -ENXIO without a warn
|
||||
* so we could possibly deal with this.
|
||||
*
|
||||
* This is safe against new events arriving because
|
||||
* sys_perf_event_open() serializes against hotplug using
|
||||
* get_online_cpus().
|
||||
*/
|
||||
perf_event_exit_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -746,8 +746,10 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
|
||||
|
||||
rb->user_page = all_buf;
|
||||
rb->data_pages[0] = all_buf + PAGE_SIZE;
|
||||
rb->page_order = ilog2(nr_pages);
|
||||
rb->nr_pages = !!nr_pages;
|
||||
if (nr_pages) {
|
||||
rb->nr_pages = 1;
|
||||
rb->page_order = ilog2(nr_pages);
|
||||
}
|
||||
|
||||
ring_buffer_init(rb, watermark, flags);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user