drm/xe/bmg: Update Wa_22019338487
[ Upstream commit 84c0b4a00610afbde650fdb8ad6db0424f7b2cc3 ]
Limit GT max frequency to 2600MHz and wait for frequency to reduce
before proceeding with a transient flush. This is really only needed for
the transient flush: if L2 flush is needed due to 16023588340 then
there's no need to do this additional wait since we are already using
the bigger hammer.
v2: Use generic names, ensure user set max frequency requests wait
for flush to complete (Rodrigo)
v3:
- User requests wait via wait_var_event_timeout (Lucas)
- Close races on flush + user requests (Lucas)
- Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt
rather than root gt (Lucas)
v4:
- Only apply the freq reducing part if a TDF is needed: L2 flush trumps
the need for waiting a lower frequency
Fixes: aaa08078e7 ("drm/xe/bmg: Apply Wa_22019338487")
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-4-b888388477f2@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit deea6a7d6d803d6bb874a3e6f1b312e560e6c6df)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
beb89ada57
commit
994b0bc2a0
@@ -37,6 +37,7 @@
|
||||
#include "xe_gt_printk.h"
|
||||
#include "xe_gt_sriov_vf.h"
|
||||
#include "xe_guc.h"
|
||||
#include "xe_guc_pc.h"
|
||||
#include "xe_hw_engine_group.h"
|
||||
#include "xe_hwmon.h"
|
||||
#include "xe_irq.h"
|
||||
@@ -871,31 +872,37 @@ void xe_device_td_flush(struct xe_device *xe)
|
||||
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
|
||||
return;
|
||||
|
||||
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
|
||||
gt = xe_root_mmio_gt(xe);
|
||||
if (XE_WA(gt, 16023588340)) {
|
||||
/* A transient flush is not sufficient: flush the L2 */
|
||||
xe_device_l2_flush(xe);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
xe_guc_pc_apply_flush_freq_limit(>->uc.guc.pc);
|
||||
|
||||
/* Execute TDF flush on all graphics GTs */
|
||||
for_each_gt(gt, xe, id) {
|
||||
if (xe_gt_is_media_type(gt))
|
||||
continue;
|
||||
|
||||
for_each_gt(gt, xe, id) {
|
||||
if (xe_gt_is_media_type(gt))
|
||||
continue;
|
||||
if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
|
||||
return;
|
||||
|
||||
if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
|
||||
return;
|
||||
xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
|
||||
/*
|
||||
* FIXME: We can likely do better here with our choice of
|
||||
* timeout. Currently we just assume the worst case, i.e. 150us,
|
||||
* which is believed to be sufficient to cover the worst case
|
||||
* scenario on current platforms if all cache entries are
|
||||
* transient and need to be flushed..
|
||||
*/
|
||||
if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
|
||||
150, NULL, false))
|
||||
xe_gt_err_once(gt, "TD flush timeout\n");
|
||||
|
||||
xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
|
||||
/*
|
||||
* FIXME: We can likely do better here with our choice of
|
||||
* timeout. Currently we just assume the worst case, i.e. 150us,
|
||||
* which is believed to be sufficient to cover the worst case
|
||||
* scenario on current platforms if all cache entries are
|
||||
* transient and need to be flushed..
|
||||
*/
|
||||
if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
|
||||
150, NULL, false))
|
||||
xe_gt_err_once(gt, "TD flush timeout\n");
|
||||
|
||||
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
|
||||
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
|
||||
}
|
||||
|
||||
xe_guc_pc_remove_flush_freq_limit(&xe_root_mmio_gt(xe)->uc.guc.pc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "xe_guc_pc.h"
|
||||
|
||||
#include <linux/delay.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/wait_bit.h>
|
||||
|
||||
#include <drm/drm_managed.h>
|
||||
#include <generated/xe_wa_oob.h>
|
||||
@@ -47,6 +50,12 @@
|
||||
|
||||
#define LNL_MERT_FREQ_CAP 800
|
||||
#define BMG_MERT_FREQ_CAP 2133
|
||||
#define BMG_MIN_FREQ 1200
|
||||
#define BMG_MERT_FLUSH_FREQ_CAP 2600
|
||||
|
||||
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
|
||||
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
|
||||
#define SLPC_ACT_FREQ_TIMEOUT_MS 100
|
||||
|
||||
/**
|
||||
* DOC: GuC Power Conservation (PC)
|
||||
@@ -133,6 +142,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
static int wait_for_flush_complete(struct xe_guc_pc *pc)
|
||||
{
|
||||
const unsigned long timeout = msecs_to_jiffies(30);
|
||||
|
||||
if (!wait_var_event_timeout(&pc->flush_freq_limit,
|
||||
!atomic_read(&pc->flush_freq_limit),
|
||||
timeout))
|
||||
return -ETIMEDOUT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
|
||||
int slept, wait = 10;
|
||||
|
||||
for (slept = 0; slept < timeout_us;) {
|
||||
if (xe_guc_pc_get_act_freq(pc) <= freq)
|
||||
return 0;
|
||||
|
||||
usleep_range(wait, wait << 1);
|
||||
slept += wait;
|
||||
wait <<= 1;
|
||||
if (slept + wait > timeout_us)
|
||||
wait = timeout_us - slept;
|
||||
}
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
static int pc_action_reset(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_guc_ct *ct = pc_to_ct(pc);
|
||||
@@ -584,6 +623,11 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (XE_WA(pc_to_gt(pc), 22019338487)) {
|
||||
if (wait_for_flush_complete(pc) != 0)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
if (!pc->freq_ready) {
|
||||
/* Might be in the middle of a gt reset */
|
||||
@@ -793,6 +837,106 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_gt *gt = pc_to_gt(pc);
|
||||
|
||||
return XE_WA(gt, 22019338487) &&
|
||||
pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
|
||||
* @pc: the xe_guc_pc object
|
||||
*
|
||||
* As per the WA, reduce max GT frequency during L2 cache flush
|
||||
*/
|
||||
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_gt *gt = pc_to_gt(pc);
|
||||
u32 max_freq;
|
||||
int ret;
|
||||
|
||||
if (!needs_flush_freq_limit(pc))
|
||||
return;
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
|
||||
if (!pc->freq_ready) {
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = pc_action_query_task_state(pc);
|
||||
if (ret) {
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
max_freq = pc_get_max_freq(pc);
|
||||
if (max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
|
||||
ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
|
||||
if (ret) {
|
||||
xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
|
||||
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_set(&pc->flush_freq_limit, 1);
|
||||
|
||||
/*
|
||||
* If user has previously changed max freq, stash that value to
|
||||
* restore later, otherwise use the current max. New user
|
||||
* requests wait on flush.
|
||||
*/
|
||||
if (pc->user_requested_max != 0)
|
||||
pc->stashed_max_freq = pc->user_requested_max;
|
||||
else
|
||||
pc->stashed_max_freq = max_freq;
|
||||
}
|
||||
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
|
||||
/*
|
||||
* Wait for actual freq to go below the flush cap: even if the previous
|
||||
* max was below cap, the current one might still be above it
|
||||
*/
|
||||
ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
|
||||
if (ret)
|
||||
xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
|
||||
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
|
||||
* @pc: the xe_guc_pc object
|
||||
*
|
||||
* Retrieve the previous GT max frequency value.
|
||||
*/
|
||||
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_gt *gt = pc_to_gt(pc);
|
||||
int ret = 0;
|
||||
|
||||
if (!needs_flush_freq_limit(pc))
|
||||
return;
|
||||
|
||||
if (!atomic_read(&pc->flush_freq_limit))
|
||||
return;
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
|
||||
ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq);
|
||||
if (ret)
|
||||
xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
|
||||
pc->stashed_max_freq, ret);
|
||||
|
||||
atomic_set(&pc->flush_freq_limit, 0);
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
wake_up_var(&pc->flush_freq_limit);
|
||||
}
|
||||
|
||||
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@@ -34,5 +34,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
|
||||
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
|
||||
|
||||
#endif /* _XE_GUC_PC_H_ */
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
struct xe_guc_pc {
|
||||
/** @bo: GGTT buffer object that is shared with GuC PC */
|
||||
struct xe_bo *bo;
|
||||
/** @flush_freq_limit: 1 when max freq changes are limited by driver */
|
||||
atomic_t flush_freq_limit;
|
||||
/** @rp0_freq: HW RP0 frequency - The Maximum one */
|
||||
u32 rp0_freq;
|
||||
/** @rpe_freq: HW RPe frequency - The Efficient one */
|
||||
|
||||
Reference in New Issue
Block a user