From 6d0fc416c42a98b39a74151376928d577873941c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:28 -0800 Subject: [PATCH 1/9] cxl/trace: Pass UUID explicitly to event traces CXL CPER events are identified by the CPER Section Type GUID. The GUID correlates with the CXL UUID for the event record. It turns out that a CXL CPER record is a strict subset of the CXL event record, only the UUID header field is chopped. In order to unify handling between native and CPER flavors of CXL events, prepare the code for the UUID to be passed in rather than inferred from the record itself. Later patches update the passed in record to only refer to the common data between the formats. Pass the UUID explicitly to each trace event to be able to remove the UUID from the event structures. Originally it was desirable to remove the UUID from the well known event because the UUID value was redundant. However, the trace API was already in place.[1] Signed-off-by: Ira Weiny Link: https://lore.kernel.org/all/36f2d12934d64a278f2c0313cbd01abc@huawei.com [1] Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-1-1bb8a4ca2c7a@intel.com Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 8 ++++---- drivers/cxl/core/trace.h | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 36270dcfb42e..00f429c440df 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -870,19 +870,19 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, struct cxl_event_gen_media *rec = (struct cxl_event_gen_media *)record; - trace_cxl_general_media(cxlmd, type, rec); + trace_cxl_general_media(cxlmd, type, id, rec); } else if (uuid_equal(id, &dram_event_uuid)) { struct cxl_event_dram *rec = (struct cxl_event_dram *)record; - trace_cxl_dram(cxlmd, type, rec); + trace_cxl_dram(cxlmd, type, id, rec); } else if (uuid_equal(id, &mem_mod_event_uuid)) { struct cxl_event_mem_module *rec = (struct cxl_event_mem_module *)record; - trace_cxl_memory_module(cxlmd, type, rec); + trace_cxl_memory_module(cxlmd, type, id, rec); } else { /* For unknown record types print just the header */ - trace_cxl_generic_event(cxlmd, type, record); + trace_cxl_generic_event(cxlmd, type, id, record); } } diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a0b5819bc70b..3da16026b8db 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -198,12 +198,12 @@ TRACE_EVENT(cxl_overflow, __field(u8, hdr_length) \ __field(u8, hdr_maint_op_class) -#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ +#define CXL_EVT_TP_fast_assign(cxlmd, l, uuid, hdr) \ __assign_str(memdev, dev_name(&(cxlmd)->dev)); \ __assign_str(host, dev_name((cxlmd)->dev.parent)); \ __entry->log = (l); \ __entry->serial = (cxlmd)->cxlds->serial; \ - memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ + memcpy(&__entry->hdr_uuid, (uuid), sizeof(uuid_t)); \ __entry->hdr_length = (hdr).length; \ __entry->hdr_flags = get_unaligned_le24((hdr).flags); \ __entry->hdr_handle = le16_to_cpu((hdr).handle); \ @@ -225,9 +225,9 @@ TRACE_EVENT(cxl_overflow, TRACE_EVENT(cxl_generic_event, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_event_record_raw *rec), + const uuid_t *uuid, struct cxl_event_record_raw *rec), - TP_ARGS(cxlmd, log, rec), + TP_ARGS(cxlmd, log, uuid, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -235,7 +235,7 @@ TRACE_EVENT(cxl_generic_event, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); ), @@ -315,9 +315,9 @@ TRACE_EVENT(cxl_generic_event, TRACE_EVENT(cxl_general_media, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_event_gen_media *rec), + const uuid_t *uuid, struct cxl_event_gen_media *rec), - TP_ARGS(cxlmd, log, rec), + TP_ARGS(cxlmd, log, uuid, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -336,7 +336,7 @@ TRACE_EVENT(cxl_general_media, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); /* General Media */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -398,9 +398,9 @@ TRACE_EVENT(cxl_general_media, TRACE_EVENT(cxl_dram, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_event_dram *rec), + const uuid_t *uuid, struct cxl_event_dram *rec), - TP_ARGS(cxlmd, log, rec), + TP_ARGS(cxlmd, log, uuid, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -422,7 +422,7 @@ TRACE_EVENT(cxl_dram, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); /* DRAM */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -547,9 +547,9 @@ TRACE_EVENT(cxl_dram, TRACE_EVENT(cxl_memory_module, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - struct cxl_event_mem_module *rec), + const uuid_t *uuid, struct cxl_event_mem_module *rec), - TP_ARGS(cxlmd, log, rec), + TP_ARGS(cxlmd, log, uuid, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -569,7 +569,7 @@ TRACE_EVENT(cxl_memory_module, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); /* Memory Module Event */ __entry->event_type = rec->event_type; From 26a1a86dd093a10d0653429bf013dae6e95dccbf Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:29 -0800 Subject: [PATCH 2/9] cxl/events: Promote CXL event structures to a core header UEFI code can process CXL events through CPER records. Those records use almost the same format as the CXL events. Lift the CXL event structures to a core header to be shared in later patches. [jic123: drop "CXL rev 3.0" mention] Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-2-1bb8a4ca2c7a@intel.com [djbw: add F: entry to maintainers for include/linux/cxl-event.h] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- MAINTAINERS | 1 + drivers/cxl/cxlmem.h | 90 +------------------------------------ include/linux/cxl-event.h | 95 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 89 deletions(-) create mode 100644 include/linux/cxl-event.h diff --git a/MAINTAINERS b/MAINTAINERS index 7cef2d2ef8d7..04f6c52d0a8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5245,6 +5245,7 @@ M: Dan Williams L: linux-cxl@vger.kernel.org S: Maintained F: drivers/cxl/ +F: include/linux/cxl-event.h F: include/uapi/linux/cxl_mem.h F: tools/testing/cxl/ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index a2fcbca253f3..f0e7ebb84f02 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -579,27 +580,6 @@ struct cxl_mbox_identify { u8 qos_telemetry_caps; } __packed; -/* - * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ -struct cxl_event_record_hdr { - uuid_t id; - u8 length; - u8 flags[3]; - __le16 handle; - __le16 related_handle; - __le64 timestamp; - u8 maint_op_class; - u8 reserved[15]; -} __packed; - -#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 -struct cxl_event_record_raw { - struct cxl_event_record_hdr hdr; - u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; -} __packed; - /* * Get Event Records output payload * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 @@ -641,74 +621,6 @@ struct cxl_mbox_clear_event_payload { } __packed; #define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 -struct cxl_event_gen_media { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; - u8 device[3]; - u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; - u8 reserved[46]; -} __packed; - -/* - * DRAM Event Record - DER - * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 - */ -#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 -struct cxl_event_dram { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; - u8 nibble_mask[3]; - u8 bank_group; - u8 bank; - u8 row[3]; - u8 column[2]; - u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; - u8 reserved[0x17]; -} __packed; - -/* - * Get Health Info Record - * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 - */ -struct cxl_get_health_info { - u8 health_status; - u8 media_status; - u8 add_status; - u8 life_used; - u8 device_temp[2]; - u8 dirty_shutdown_cnt[4]; - u8 cor_vol_err_cnt[4]; - u8 cor_per_err_cnt[4]; -} __packed; - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -struct cxl_event_mem_module { - struct cxl_event_record_hdr hdr; - u8 event_type; - struct cxl_get_health_info info; - u8 reserved[0x3d]; -} __packed; - struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h new file mode 100644 index 000000000000..0fc068123f8e --- /dev/null +++ b/include/linux/cxl-event.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Intel Corporation. */ +#ifndef _LINUX_CXL_EVENT_H +#define _LINUX_CXL_EVENT_H + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + uuid_t id; + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_record_raw { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + +#endif /* _LINUX_CXL_EVENT_H */ From 4c115c9c1f81a6efe2bd68fcefec6836f7f3dc71 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:30 -0800 Subject: [PATCH 3/9] cxl/events: Create common event UUID defines Dan points out in review that the cxl_test code could be made better through the use of UUID's defines rather than being open coded.[1] Create UUID defines and use them rather than open coding them. Suggested-by: Dan Williams Signed-off-by: Ira Weiny Link: http://lore.kernel.org/r/65738d09e30e2_45e0129451@dwillia2-xfh.jf.intel.com.notmuch [1] Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-3-1bb8a4ca2c7a@intel.com [djbw: clang-format uuid definitions] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 30 +++--------------------------- drivers/cxl/cxlmem.h | 24 ++++++++++++++++++++++++ tools/testing/cxl/test/mem.c | 9 +++------ 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 00f429c440df..1ccc3a56e0af 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -836,46 +836,22 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -static const uuid_t gen_media_event_uuid = - UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, - 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6); - -/* - * DRAM Event Record - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 - */ -static const uuid_t dram_event_uuid = - UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, - 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24); - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -static const uuid_t mem_mod_event_uuid = - UUID_INIT(0xfe927475, 0xdd59, 0x4339, - 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74); - static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, struct cxl_event_record_raw *record) { uuid_t *id = &record->hdr.id; - if (uuid_equal(id, &gen_media_event_uuid)) { + if (uuid_equal(id, &CXL_EVENT_GEN_MEDIA_UUID)) { struct cxl_event_gen_media *rec = (struct cxl_event_gen_media *)record; trace_cxl_general_media(cxlmd, type, id, rec); - } else if (uuid_equal(id, &dram_event_uuid)) { + } else if (uuid_equal(id, &CXL_EVENT_DRAM_UUID)) { struct cxl_event_dram *rec = (struct cxl_event_dram *)record; trace_cxl_dram(cxlmd, type, id, rec); - } else if (uuid_equal(id, &mem_mod_event_uuid)) { + } else if (uuid_equal(id, &CXL_EVENT_MEM_MODULE_UUID)) { struct cxl_event_mem_module *rec = (struct cxl_event_mem_module *)record; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index f0e7ebb84f02..27575513ec68 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -580,6 +580,30 @@ struct cxl_mbox_identify { u8 qos_telemetry_caps; } __packed; +/* + * General Media Event Record UUID + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MEDIA_UUID \ + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, 0x85, 0xa9, 0x08, 0x8b, 0x16, \ + 0x21, 0xeb, 0xa6) + +/* + * DRAM Event Record UUID + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CXL_EVENT_DRAM_UUID \ + UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, \ + 0x5c, 0x96, 0x24) + +/* + * Memory Module Event Record UUID + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CXL_EVENT_MEM_MODULE_UUID \ + UUID_INIT(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, 0x79, 0xba, 0xb1, \ + 0x13, 0xb7, 0x74) + /* * Get Event Records output payload * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index ee61fa3a2411..5a95b04b329a 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -362,8 +362,7 @@ struct cxl_event_record_raw hardware_replace = { struct cxl_event_gen_media gen_media = { .hdr = { - .id = UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, - 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6), + .id = CXL_EVENT_GEN_MEDIA_UUID, .length = sizeof(struct cxl_event_gen_media), .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, /* .handle = Set dynamically */ @@ -380,8 +379,7 @@ struct cxl_event_gen_media gen_media = { struct cxl_event_dram dram = { .hdr = { - .id = UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, - 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), + .id = CXL_EVENT_DRAM_UUID, .length = sizeof(struct cxl_event_dram), .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, /* .handle = Set dynamically */ @@ -400,8 +398,7 @@ struct cxl_event_dram dram = { struct cxl_event_mem_module mem_module = { .hdr = { - .id = UUID_INIT(0xfe927475, 0xdd59, 0x4339, - 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), + .id = CXL_EVENT_MEM_MODULE_UUID, .length = sizeof(struct cxl_event_mem_module), /* .handle = Set dynamically */ .related_handle = cpu_to_le16(0), From 207a1f82301de0b4123f00a8d26ea55bb2484757 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:31 -0800 Subject: [PATCH 4/9] cxl/events: Remove passing a UUID to known event traces The UUID data is redundant in the known event trace types. The addition of static defines allows the trace macros to create the UUID data inside the trace thus removing unnecessary code. Have well known trace events use static data to set the uuid field based on the event type. Suggested-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-4-1bb8a4ca2c7a@intel.com Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 6 +++--- drivers/cxl/core/trace.h | 28 ++++++++++++++++------------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 1ccc3a56e0af..5f3681de10de 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -846,16 +846,16 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, struct cxl_event_gen_media *rec = (struct cxl_event_gen_media *)record; - trace_cxl_general_media(cxlmd, type, id, rec); + trace_cxl_general_media(cxlmd, type, rec); } else if (uuid_equal(id, &CXL_EVENT_DRAM_UUID)) { struct cxl_event_dram *rec = (struct cxl_event_dram *)record; - trace_cxl_dram(cxlmd, type, id, rec); + trace_cxl_dram(cxlmd, type, rec); } else if (uuid_equal(id, &CXL_EVENT_MEM_MODULE_UUID)) { struct cxl_event_mem_module *rec = (struct cxl_event_mem_module *)record; - trace_cxl_memory_module(cxlmd, type, id, rec); + trace_cxl_memory_module(cxlmd, type, rec); } else { /* For unknown record types print just the header */ trace_cxl_generic_event(cxlmd, type, id, record); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 3da16026b8db..312cfa9e0004 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -181,6 +181,7 @@ TRACE_EVENT(cxl_overflow, * 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry * 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign; * pass the dev, log, and CXL event header + * NOTE: The uuid must be assigned by the specific trace event * 3) Use CXL_EVT_TP_printk() instead of TP_printk() * * See the generic_event tracepoint as an example. @@ -198,12 +199,11 @@ TRACE_EVENT(cxl_overflow, __field(u8, hdr_length) \ __field(u8, hdr_maint_op_class) -#define CXL_EVT_TP_fast_assign(cxlmd, l, uuid, hdr) \ +#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ __assign_str(memdev, dev_name(&(cxlmd)->dev)); \ __assign_str(host, dev_name((cxlmd)->dev.parent)); \ __entry->log = (l); \ __entry->serial = (cxlmd)->cxlds->serial; \ - memcpy(&__entry->hdr_uuid, (uuid), sizeof(uuid_t)); \ __entry->hdr_length = (hdr).length; \ __entry->hdr_flags = get_unaligned_le24((hdr).flags); \ __entry->hdr_handle = le16_to_cpu((hdr).handle); \ @@ -235,7 +235,8 @@ TRACE_EVENT(cxl_generic_event, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, uuid, sizeof(uuid_t)); memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); ), @@ -315,9 +316,9 @@ TRACE_EVENT(cxl_generic_event, TRACE_EVENT(cxl_general_media, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - const uuid_t *uuid, struct cxl_event_gen_media *rec), + struct cxl_event_gen_media *rec), - TP_ARGS(cxlmd, log, uuid, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -336,7 +337,8 @@ TRACE_EVENT(cxl_general_media, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, &CXL_EVENT_GEN_MEDIA_UUID, sizeof(uuid_t)); /* General Media */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -398,9 +400,9 @@ TRACE_EVENT(cxl_general_media, TRACE_EVENT(cxl_dram, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - const uuid_t *uuid, struct cxl_event_dram *rec), + struct cxl_event_dram *rec), - TP_ARGS(cxlmd, log, uuid, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -422,7 +424,8 @@ TRACE_EVENT(cxl_dram, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, &CXL_EVENT_DRAM_UUID, sizeof(uuid_t)); /* DRAM */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -547,9 +550,9 @@ TRACE_EVENT(cxl_dram, TRACE_EVENT(cxl_memory_module, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - const uuid_t *uuid, struct cxl_event_mem_module *rec), + struct cxl_event_mem_module *rec), - TP_ARGS(cxlmd, log, uuid, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -569,7 +572,8 @@ TRACE_EVENT(cxl_memory_module, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, uuid, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + memcpy(&__entry->hdr_uuid, &CXL_EVENT_MEM_MODULE_UUID, sizeof(uuid_t)); /* Memory Module Event */ __entry->event_type = rec->event_type; From 6eade110754c085cee9e46f4d87d2c3ea4e59e8c Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:32 -0800 Subject: [PATCH 5/9] cxl/events: Separate UUID from event structures The UEFI CXL CPER structure does not include the UUID. Now that the UUID is passed separately to the trace event there is no need to have the UUID in those structures. Move UUID from the event record header to the raw structures. Adjust cxl-test to Create dummy structures for creating test records. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-5-1bb8a4ca2c7a@intel.com Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- include/linux/cxl-event.h | 6 +- tools/testing/cxl/test/mem.c | 129 ++++++++++++++++++++--------------- 3 files changed, 81 insertions(+), 56 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 5f3681de10de..4c5161896826 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -840,7 +840,7 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, struct cxl_event_record_raw *record) { - uuid_t *id = &record->hdr.id; + uuid_t *id = &record->id; if (uuid_equal(id, &CXL_EVENT_GEN_MEDIA_UUID)) { struct cxl_event_gen_media *rec = diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 0fc068123f8e..3d9b5954d0c1 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -8,7 +8,6 @@ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 */ struct cxl_event_record_hdr { - uuid_t id; u8 length; u8 flags[3]; __le16 handle; @@ -18,8 +17,13 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 struct cxl_event_record_raw { + uuid_t id; struct cxl_event_record_hdr hdr; u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; } __packed; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 5a95b04b329a..9cc2b8ce1efd 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -337,9 +337,9 @@ static void cxl_mock_event_trigger(struct device *dev) } struct cxl_event_record_raw maint_needed = { + .id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB, + 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), .hdr = { - .id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB, - 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), .length = sizeof(struct cxl_event_record_raw), .flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, /* .handle = Set dynamically */ @@ -349,9 +349,9 @@ struct cxl_event_record_raw maint_needed = { }; struct cxl_event_record_raw hardware_replace = { + .id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E, + 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), .hdr = { - .id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E, - 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), .length = sizeof(struct cxl_event_record_raw), .flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE, /* .handle = Set dynamically */ @@ -360,61 +360,82 @@ struct cxl_event_record_raw hardware_replace = { .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }; -struct cxl_event_gen_media gen_media = { - .hdr = { - .id = CXL_EVENT_GEN_MEDIA_UUID, - .length = sizeof(struct cxl_event_gen_media), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), +struct cxl_test_gen_media { + uuid_t id; + struct cxl_event_gen_media rec; +} __packed; + +struct cxl_test_gen_media gen_media = { + .id = CXL_EVENT_GEN_MEDIA_UUID, + .rec = { + .hdr = { + .length = sizeof(struct cxl_test_gen_media), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x2000), + .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, + .transaction_type = CXL_GMER_TRANS_HOST_WRITE, + /* .validity_flags = */ + .channel = 1, + .rank = 30 }, - .phys_addr = cpu_to_le64(0x2000), - .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, - .transaction_type = CXL_GMER_TRANS_HOST_WRITE, - /* .validity_flags = */ - .channel = 1, - .rank = 30 }; -struct cxl_event_dram dram = { - .hdr = { - .id = CXL_EVENT_DRAM_UUID, - .length = sizeof(struct cxl_event_dram), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), +struct cxl_test_dram { + uuid_t id; + struct cxl_event_dram rec; +} __packed; + +struct cxl_test_dram dram = { + .id = CXL_EVENT_DRAM_UUID, + .rec = { + .hdr = { + .length = sizeof(struct cxl_test_dram), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x8000), + .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, + .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, + /* .validity_flags = */ + .channel = 1, + .bank_group = 5, + .bank = 2, + .column = {0xDE, 0xAD}, }, - .phys_addr = cpu_to_le64(0x8000), - .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, - .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, - /* .validity_flags = */ - .channel = 1, - .bank_group = 5, - .bank = 2, - .column = {0xDE, 0xAD}, }; -struct cxl_event_mem_module mem_module = { - .hdr = { - .id = CXL_EVENT_MEM_MODULE_UUID, - .length = sizeof(struct cxl_event_mem_module), - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), +struct cxl_test_mem_module { + uuid_t id; + struct cxl_event_mem_module rec; +} __packed; + +struct cxl_test_mem_module mem_module = { + .id = CXL_EVENT_MEM_MODULE_UUID, + .rec = { + .hdr = { + .length = sizeof(struct cxl_test_mem_module), + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .event_type = CXL_MMER_TEMP_CHANGE, + .info = { + .health_status = CXL_DHI_HS_PERFORMANCE_DEGRADED, + .media_status = CXL_DHI_MS_ALL_DATA_LOST, + .add_status = (CXL_DHI_AS_CRITICAL << 2) | + (CXL_DHI_AS_WARNING << 4) | + (CXL_DHI_AS_WARNING << 5), + .device_temp = { 0xDE, 0xAD}, + .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef }, + .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, + .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, + } }, - .event_type = CXL_MMER_TEMP_CHANGE, - .info = { - .health_status = CXL_DHI_HS_PERFORMANCE_DEGRADED, - .media_status = CXL_DHI_MS_ALL_DATA_LOST, - .add_status = (CXL_DHI_AS_CRITICAL << 2) | - (CXL_DHI_AS_WARNING << 4) | - (CXL_DHI_AS_WARNING << 5), - .device_temp = { 0xDE, 0xAD}, - .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef }, - .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, - .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, - } }; static int mock_set_timestamp(struct cxl_dev_state *cxlds, @@ -436,11 +457,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, - &gen_media.validity_flags); + &gen_media.rec.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, - &dram.validity_flags); + &dram.rec.validity_flags); mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, From f9c683386f5bc0364615138ce2b14be50848dbcf Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:33 -0800 Subject: [PATCH 6/9] cxl/events: Create a CXL event union The CXL CPER and event log records share everything but a UUID/GUID in their structures. Define a cxl_event union without the UUID/GUID to be shared between the CPER and event log record formats. Adjust the code to use this union. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-6-1bb8a4ca2c7a@intel.com Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 32 +++++++++++++------------------- drivers/cxl/core/trace.h | 8 ++++---- include/linux/cxl-event.h | 23 +++++++++++++++++------ tools/testing/cxl/test/mem.c | 31 ++++++++++++++++++------------- 4 files changed, 52 insertions(+), 42 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 4c5161896826..06957696247b 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -840,26 +840,17 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, struct cxl_event_record_raw *record) { + union cxl_event *evt = &record->event; uuid_t *id = &record->id; - if (uuid_equal(id, &CXL_EVENT_GEN_MEDIA_UUID)) { - struct cxl_event_gen_media *rec = - (struct cxl_event_gen_media *)record; - - trace_cxl_general_media(cxlmd, type, rec); - } else if (uuid_equal(id, &CXL_EVENT_DRAM_UUID)) { - struct cxl_event_dram *rec = (struct cxl_event_dram *)record; - - trace_cxl_dram(cxlmd, type, rec); - } else if (uuid_equal(id, &CXL_EVENT_MEM_MODULE_UUID)) { - struct cxl_event_mem_module *rec = - (struct cxl_event_mem_module *)record; - - trace_cxl_memory_module(cxlmd, type, rec); - } else { - /* For unknown record types print just the header */ - trace_cxl_generic_event(cxlmd, type, id, record); - } + if (uuid_equal(id, &CXL_EVENT_GEN_MEDIA_UUID)) + trace_cxl_general_media(cxlmd, type, &evt->gen_media); + else if (uuid_equal(id, &CXL_EVENT_DRAM_UUID)) + trace_cxl_dram(cxlmd, type, &evt->dram); + else if (uuid_equal(id, &CXL_EVENT_MEM_MODULE_UUID)) + trace_cxl_memory_module(cxlmd, type, &evt->mem_module); + else + trace_cxl_generic_event(cxlmd, type, id, &evt->generic); } static int cxl_clear_event_record(struct cxl_memdev_state *mds, @@ -902,7 +893,10 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds, */ i = 0; for (cnt = 0; cnt < total; cnt++) { - payload->handles[i++] = get_pl->records[cnt].hdr.handle; + struct cxl_event_record_raw *raw = &get_pl->records[cnt]; + struct cxl_event_generic *gen = &raw->event.generic; + + payload->handles[i++] = gen->hdr.handle; dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log, le16_to_cpu(payload->handles[i])); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 312cfa9e0004..89445435303a 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -225,9 +225,9 @@ TRACE_EVENT(cxl_overflow, TRACE_EVENT(cxl_generic_event, TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, - const uuid_t *uuid, struct cxl_event_record_raw *rec), + const uuid_t *uuid, struct cxl_event_generic *gen_rec), - TP_ARGS(cxlmd, log, uuid, rec), + TP_ARGS(cxlmd, log, uuid, gen_rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -235,9 +235,9 @@ TRACE_EVENT(cxl_generic_event, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, gen_rec->hdr); memcpy(&__entry->hdr_uuid, uuid, sizeof(uuid_t)); - memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); + memcpy(__entry->data, gen_rec->data, CXL_EVENT_RECORD_DATA_LENGTH); ), CXL_EVT_TP_printk("%s", diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 3d9b5954d0c1..4d6c05f535f8 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -17,13 +17,8 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; -/* - * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 -struct cxl_event_record_raw { - uuid_t id; +struct cxl_event_generic { struct cxl_event_record_hdr hdr; u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; } __packed; @@ -96,4 +91,20 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; +union cxl_event { + struct cxl_event_generic generic; + struct cxl_event_gen_media gen_media; + struct cxl_event_dram dram; + struct cxl_event_mem_module mem_module; +} __packed; + +/* + * Common Event Record Format; in event logs + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_raw { + uuid_t id; + union cxl_event event; +} __packed; + #endif /* _LINUX_CXL_EVENT_H */ diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 9cc2b8ce1efd..35ee41e435ab 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -251,7 +251,8 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) { memcpy(&pl->records[i], event_get_current(log), sizeof(pl->records[i])); - pl->records[i].hdr.handle = event_get_cur_event_handle(log); + pl->records[i].event.generic.hdr.handle = + event_get_cur_event_handle(log); log->cur_idx++; } @@ -339,25 +340,29 @@ static void cxl_mock_event_trigger(struct device *dev) struct cxl_event_record_raw maint_needed = { .id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB, 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), - .hdr = { - .length = sizeof(struct cxl_event_record_raw), - .flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0xa5b6), + .event.generic = { + .hdr = { + .length = sizeof(struct cxl_event_record_raw), + .flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0xa5b6), + }, + .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }, - .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }; struct cxl_event_record_raw hardware_replace = { .id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E, 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5), - .hdr = { - .length = sizeof(struct cxl_event_record_raw), - .flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0xb6a5), + .event.generic = { + .hdr = { + .length = sizeof(struct cxl_event_record_raw), + .flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0xb6a5), + }, + .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }, - .data = { 0xDE, 0xAD, 0xBE, 0xEF }, }; struct cxl_test_gen_media { From 671a794c33c6e048ca5cedd5ad6af44d52d5d7e5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:34 -0800 Subject: [PATCH 7/9] acpi/ghes: Process CXL Component Events BIOS can configure memory devices as firmware first. This will send CXL events to the firmware instead of the OS. The firmware can then send these events to the OS via UEFI. UEFI v2.10 section N.2.14 defines a Common Platform Error Record (CPER) format for CXL Component Events. The format is mostly the same as the CXL Common Event Record Format. The difference is the use of a GUID in the Section Type rather than a UUID as part of the event itself. Add GHES support to detect CXL CPER records and call a registered callback with the event. A notifier chain was considered for the callback but the complexity did not justify the use case as only the CXL subsystem requires this event. Enforce that only one callback can be registered at any time. Cc: Ard Biesheuvel Cc: Rafael J. Wysocki Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-7-1bb8a4ca2c7a@intel.com [djbw: fixup checkpatch errors] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/acpi/apei/ghes.c | 89 +++++++++++++++++++++++++++++++++++++++ include/linux/cxl-event.h | 50 ++++++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 63ad0541db38..56a5d2ef9e0a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -657,6 +658,78 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } +/* + * Only a single callback can be registered for CXL CPER events. + */ +static DECLARE_RWSEM(cxl_cper_rw_sem); +static cxl_cper_callback cper_callback; + +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) + +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + +static void cxl_cper_post_event(enum cxl_event_type event_type, + struct cxl_cper_event_rec *rec) +{ + if (rec->hdr.length <= sizeof(rec->hdr) || + rec->hdr.length > sizeof(*rec)) { + pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", + rec->hdr.length); + return; + } + + if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { + pr_err(FW_WARN "CXL CPER invalid event\n"); + return; + } + + guard(rwsem_read)(&cxl_cper_rw_sem); + if (cper_callback) + cper_callback(event_type, rec); +} + +int cxl_cper_register_callback(cxl_cper_callback callback) +{ + guard(rwsem_write)(&cxl_cper_rw_sem); + if (cper_callback) + return -EINVAL; + cper_callback = callback; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL); + +int cxl_cper_unregister_callback(cxl_cper_callback callback) +{ + guard(rwsem_write)(&cxl_cper_rw_sem); + if (callback != cper_callback) + return -EINVAL; + cper_callback = NULL; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL); + static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -690,6 +763,22 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { + struct cxl_cper_event_rec *rec = + acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); + } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { + struct cxl_cper_event_rec *rec = + acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); + } else if (guid_equal(sec_type, + &CPER_SEC_CXL_MEM_MODULE_GUID)) { + struct cxl_cper_event_rec *rec = + acpi_hest_get_payload(gdata); + + cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 4d6c05f535f8..95841750a383 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -107,4 +107,54 @@ struct cxl_event_record_raw { union cxl_event event; } __packed; +enum cxl_event_type { + CXL_CPER_EVENT_GEN_MEDIA, + CXL_CPER_EVENT_DRAM, + CXL_CPER_EVENT_MEM_MODULE, +}; + +#define CPER_CXL_DEVICE_ID_VALID BIT(0) +#define CPER_CXL_DEVICE_SN_VALID BIT(1) +#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) +struct cxl_cper_event_rec { + struct { + u32 length; + u64 validation_bits; + struct cper_cxl_event_devid { + u16 vendor_id; + u16 device_id; + u8 func_num; + u8 device_num; + u8 bus_num; + u16 segment_num; + u16 slot_num; /* bits 2:0 reserved */ + u8 reserved; + } __packed device_id; + struct cper_cxl_event_sn { + u32 lower_dw; + u32 upper_dw; + } __packed dev_serial_num; + } __packed hdr; + + union cxl_event event; +} __packed; + +typedef void (*cxl_cper_callback)(enum cxl_event_type type, + struct cxl_cper_event_rec *rec); + +#ifdef CONFIG_ACPI_APEI_GHES +int cxl_cper_register_callback(cxl_cper_callback callback); +int cxl_cper_unregister_callback(cxl_cper_callback callback); +#else +static inline int cxl_cper_register_callback(cxl_cper_callback callback) +{ + return 0; +} + +static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) +{ + return 0; +} +#endif + #endif /* _LINUX_CXL_EVENT_H */ From ced085ef369af7a2b6da962ec2fbd01339f60693 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:35 -0800 Subject: [PATCH 8/9] PCI: Introduce cleanup helpers for device reference counts and locks The "goto error" pattern is notorious for introducing subtle resource leaks. Use the new cleanup.h helpers for PCI device reference counts and locks. Similar to the new put_device() and device_lock() cleanup helpers, __free(put_device) and guard(device), define the same for PCI devices, __free(pci_dev_put) and guard(pci_dev). These helpers eliminate the need for "goto free;" and "goto unlock;" patterns. For example, A 'struct pci_dev *' instance declared as: struct pci_dev *pdev __free(pci_dev_put) = NULL; ...will automatically call pci_dev_put() if @pdev is non-NULL when @pdev goes out of scope (automatic variable scope). If a function wants to invoke pci_dev_put() on error, but return @pdev on success, it can do: return no_free_ptr(pdev); ...or: return_ptr(pdev); For potential cleanup opportunity there are 587 open-coded calls to pci_dev_put() in the kernel with 65 instances within 10 lines of a goto statement with the CXL driver threatening to add another one. The guard() helper holds the associated lock for the remainder of the current scope in which it was invoked. So, for example: func(...) { if (...) { ... guard(pci_dev); /* pci_dev_lock() invoked here */ ... } /* <- implied pci_dev_unlock() triggered here */ } There are 15 invocations of pci_dev_unlock() in the kernel with 5 instances within 10 lines of a goto statement. Again, the CXL driver is threatening to add another. Introduce these helpers to preclude the addition of new more error prone goto put; / goto unlock; sequences. For now, these helpers are used in drivers/cxl/pci.c to allow ACPI error reports to be fed back into the CXL driver associated with the PCI device identified in the report. Cc: Bjorn Helgaas Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-8-1bb8a4ca2c7a@intel.com [djbw: rewrite changelog] Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index dea043bc1e38..0d23d2e0eb1a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1170,6 +1170,7 @@ int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); struct pci_dev *pci_dev_get(struct pci_dev *dev); void pci_dev_put(struct pci_dev *dev); +DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) void pci_remove_bus(struct pci_bus *b); void pci_stop_and_remove_bus_device(struct pci_dev *dev); void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); @@ -1874,6 +1875,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); void pci_dev_lock(struct pci_dev *dev); int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); +DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), From dc97f6344f205b0dfa144e1b3e16d6dc05383d57 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 20 Dec 2023 16:17:36 -0800 Subject: [PATCH 9/9] cxl/pci: Register for and process CPER events If the firmware has configured CXL event support to be firmware first the OS can process those events through CPER records. The CXL layer has unique DPA to HPA knowledge and standard event trace parsing in place. CPER records contain Bus, Device, Function information which can be used to identify the PCI device which is sending the event. Change the PCI driver registration to include registration of a CXL CPER callback to process events through the trace subsystem. Use new scoped based management to simplify the handling of the PCI device object. Tested-by: Smita-Koralahalli Reviewed-by: Smita-Koralahalli Link: https://lore.kernel.org/r/20231220-cxl-cper-v5-9-1bb8a4ca2c7a@intel.com Signed-off-by: Ira Weiny [djbw: use new pci_dev guard, flip init order] Reviewed-by: Jonathan Cameron Acked-by: Ard Biesheuvel Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 40 +++++++++++++++++++-------- drivers/cxl/cxlmem.h | 4 +++ drivers/cxl/pci.c | 58 ++++++++++++++++++++++++++++++++++++++- include/linux/cxl-event.h | 1 + 4 files changed, 90 insertions(+), 13 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 06957696247b..23021920aace 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -836,21 +836,37 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); -static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, - enum cxl_event_log_type type, - struct cxl_event_record_raw *record) +void cxl_event_trace_record(const struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + enum cxl_event_type event_type, + const uuid_t *uuid, union cxl_event *evt) { - union cxl_event *evt = &record->event; - uuid_t *id = &record->id; - - if (uuid_equal(id, &CXL_EVENT_GEN_MEDIA_UUID)) + if (event_type == CXL_CPER_EVENT_GEN_MEDIA) trace_cxl_general_media(cxlmd, type, &evt->gen_media); - else if (uuid_equal(id, &CXL_EVENT_DRAM_UUID)) + else if (event_type == CXL_CPER_EVENT_DRAM) trace_cxl_dram(cxlmd, type, &evt->dram); - else if (uuid_equal(id, &CXL_EVENT_MEM_MODULE_UUID)) + else if (event_type == CXL_CPER_EVENT_MEM_MODULE) trace_cxl_memory_module(cxlmd, type, &evt->mem_module); else - trace_cxl_generic_event(cxlmd, type, id, &evt->generic); + trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic); +} +EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL); + +static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + struct cxl_event_record_raw *record) +{ + enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC; + const uuid_t *uuid = &record->id; + + if (uuid_equal(uuid, &CXL_EVENT_GEN_MEDIA_UUID)) + ev_type = CXL_CPER_EVENT_GEN_MEDIA; + else if (uuid_equal(uuid, &CXL_EVENT_DRAM_UUID)) + ev_type = CXL_CPER_EVENT_DRAM; + else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID)) + ev_type = CXL_CPER_EVENT_MEM_MODULE; + + cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event); } static int cxl_clear_event_record(struct cxl_memdev_state *mds, @@ -961,8 +977,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, break; for (i = 0; i < nr_rec; i++) - cxl_event_trace_record(cxlmd, type, - &payload->records[i]); + __cxl_event_trace_record(cxlmd, type, + &payload->records[i]); if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) trace_cxl_overflow(cxlmd, type, payload); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 27575513ec68..3c201324a3b3 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -802,6 +802,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); +void cxl_event_trace_record(const struct cxl_memdev *cxlmd, + enum cxl_event_log_type type, + enum cxl_event_type event_type, + const uuid_t *uuid, union cxl_event *evt); int cxl_set_timestamp(struct cxl_memdev_state *mds); int cxl_poison_state_init(struct cxl_memdev_state *mds); int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0155fb66b580..4fd1f207c84e 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -969,6 +970,61 @@ static struct pci_driver cxl_pci_driver = { }, }; +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) +static void cxl_cper_event_call(enum cxl_event_type ev_type, + struct cxl_cper_event_rec *rec) +{ + struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; + struct pci_dev *pdev __free(pci_dev_put) = NULL; + enum cxl_event_log_type log_type; + struct cxl_dev_state *cxlds; + unsigned int devfn; + u32 hdr_flags; + + devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); + pdev = pci_get_domain_bus_and_slot(device_id->segment_num, + device_id->bus_num, devfn); + if (!pdev) + return; + + guard(pci_dev)(pdev); + if (pdev->driver != &cxl_pci_driver) + return; + + cxlds = pci_get_drvdata(pdev); + if (!cxlds) + return; + + /* Fabricate a log type */ + hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); + log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); + + cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, + &uuid_null, &rec->event); +} + +static int __init cxl_pci_driver_init(void) +{ + int rc; + + rc = cxl_cper_register_callback(cxl_cper_event_call); + if (rc) + return rc; + + rc = pci_register_driver(&cxl_pci_driver); + if (rc) + cxl_cper_unregister_callback(cxl_cper_event_call); + + return rc; +} + +static void __exit cxl_pci_driver_exit(void) +{ + pci_unregister_driver(&cxl_pci_driver); + cxl_cper_unregister_callback(cxl_cper_event_call); +} + +module_init(cxl_pci_driver_init); +module_exit(cxl_pci_driver_exit); MODULE_LICENSE("GPL v2"); -module_pci_driver(cxl_pci_driver); MODULE_IMPORT_NS(CXL); diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 95841750a383..91125eca4c8a 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -108,6 +108,7 @@ struct cxl_event_record_raw { } __packed; enum cxl_event_type { + CXL_CPER_EVENT_GENERIC, CXL_CPER_EVENT_GEN_MEDIA, CXL_CPER_EVENT_DRAM, CXL_CPER_EVENT_MEM_MODULE,