RAS/AMD/ATL: Include row[13] bit in row retirement
commit 6c44e5354d4d16d9d891a419ca3f57abfe18ce7a upstream.
Based on feedback from hardware folks, row[13] is part of the variable
bits within a physical row (along with all column bits).
Only half the physical addresses affected by a row are calculated if
this bit is not included.
Add the row[13] bit to the row retirement flow.
Fixes: 3b566b30b4 ("RAS/AMD/ATL: Add MI300 row retirement support")
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20250401-fix-fmpm-extra-records-v1-1-840bcf7a8ac5@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
d23fd7a539
commit
b2748ae609
@@ -320,7 +320,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr)
|
|||||||
* See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
|
* See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
|
||||||
*/
|
*/
|
||||||
#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL))
|
#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL))
|
||||||
static void retire_row_mi300(struct atl_err *a_err)
|
static void _retire_row_mi300(struct atl_err *a_err)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
struct page *p;
|
struct page *p;
|
||||||
@@ -351,6 +351,23 @@ static void retire_row_mi300(struct atl_err *a_err)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In addition to the column bits, the row[13] bit should also be included when
|
||||||
|
* calculating addresses affected by a physical row.
|
||||||
|
*
|
||||||
|
* Instead of running through another loop over a single bit, just run through
|
||||||
|
* the column bits twice and flip the row[13] bit in-between.
|
||||||
|
*
|
||||||
|
* See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value.
|
||||||
|
*/
|
||||||
|
#define MI300_UMC_MCA_ROW13 BIT(23)
|
||||||
|
static void retire_row_mi300(struct atl_err *a_err)
|
||||||
|
{
|
||||||
|
_retire_row_mi300(a_err);
|
||||||
|
a_err->addr ^= MI300_UMC_MCA_ROW13;
|
||||||
|
_retire_row_mi300(a_err);
|
||||||
|
}
|
||||||
|
|
||||||
void amd_retire_dram_row(struct atl_err *a_err)
|
void amd_retire_dram_row(struct atl_err *a_err)
|
||||||
{
|
{
|
||||||
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
|
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
|
||||||
|
|||||||
Reference in New Issue
Block a user