From 16e87fe23d4af6df920406494ced5c0f4354567b Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 6 Mar 2024 13:01:04 +0800 Subject: [PATCH 01/55] nouveau/dmem: handle kcalloc() allocation failure The kcalloc() in nouveau_dmem_evict_chunk() will return null if the physical memory has run out. As a result, if we dereference src_pfns, dst_pfns or dma_addrs, the null pointer dereference bugs will happen. Moreover, the GPU is going away. If the kcalloc() fails, we could not evict all pages mapping a chunk. So this patch adds a __GFP_NOFAIL flag in kcalloc(). Finally, as there is no need to have physically contiguous memory, this patch switches kcalloc() to kvcalloc() in order to avoid failing allocations. CC: # v6.1 Fixes: 249881232e14 ("nouveau/dmem: evict device private memory during release") Suggested-by: Danilo Krummrich Signed-off-by: Duoming Zhou Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240306050104.11259-1-duoming@zju.edu.cn --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 12feecf71e75..6fb65b01d778 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -378,9 +378,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) dma_addr_t *dma_addrs; struct nouveau_fence *fence; - src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); - dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); - dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL); + src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL); + dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL); + dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL); migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT, npages); @@ -406,11 +406,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) migrate_device_pages(src_pfns, dst_pfns, npages); nouveau_dmem_fence_done(&fence); migrate_device_finalize(src_pfns, dst_pfns, npages); - kfree(src_pfns); - kfree(dst_pfns); + kvfree(src_pfns); + kvfree(dst_pfns); for (i = 0; i < npages; i++) dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL); - kfree(dma_addrs); + kvfree(dma_addrs); } void From f34e8bb7d6c6626933fe993e03ed59ae85e16abb Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Thu, 14 Mar 2024 22:39:26 -0400 Subject: [PATCH 02/55] drm/sched: fix null-ptr-deref in init entity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug can be triggered by sending an amdgpu_cs_wait_ioctl to the AMDGPU DRM driver on any ASICs with valid context. The bug was reported by Joonkyo Jung . For example the following code: static void Syzkaller2(int fd) { union drm_amdgpu_ctx arg1; union drm_amdgpu_wait_cs arg2; arg1.in.op = AMDGPU_CTX_OP_ALLOC_CTX; ret = drmIoctl(fd, 0x140106442 /* amdgpu_ctx_ioctl */, &arg1); arg2.in.handle = 0x0; arg2.in.timeout = 0x2000000000000; arg2.in.ip_type = AMD_IP_VPE /* 0x9 */; arg2->in.ip_instance = 0x0; arg2.in.ring = 0x0; arg2.in.ctx_id = arg1.out.alloc.ctx_id; drmIoctl(fd, 0xc0206449 /* AMDGPU_WAIT_CS * /, &arg2); } The ioctl AMDGPU_WAIT_CS without previously submitted job could be assumed that the error should be returned, but the following commit 1decbf6bb0b4dc56c9da6c5e57b994ebfc2be3aa modified the logic and allowed to have sched_rq equal to NULL. As a result when there is no job the ioctl AMDGPU_WAIT_CS returns success. The change fixes null-ptr-deref in init entity and the stack below demonstrates the error condition: [ +0.000007] BUG: kernel NULL pointer dereference, address: 0000000000000028 [ +0.007086] #PF: supervisor read access in kernel mode [ +0.005234] #PF: error_code(0x0000) - not-present page [ +0.005232] PGD 0 P4D 0 [ +0.002501] Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI [ +0.005034] CPU: 10 PID: 9229 Comm: amd_basic Tainted: G B W L 6.7.0+ #4 [ +0.007797] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING (WI-FI), BIOS 1401 12/03/2020 [ +0.009798] RIP: 0010:drm_sched_entity_init+0x2d3/0x420 [gpu_sched] [ +0.006426] Code: 80 00 00 00 00 00 00 00 e8 1a 81 82 e0 49 89 9c 24 c0 00 00 00 4c 89 ef e8 4a 80 82 e0 49 8b 5d 00 48 8d 7b 28 e8 3d 80 82 e0 <48> 83 7b 28 00 0f 84 28 01 00 00 4d 8d ac 24 98 00 00 00 49 8d 5c [ +0.019094] RSP: 0018:ffffc90014c1fa40 EFLAGS: 00010282 [ +0.005237] RAX: 0000000000000001 RBX: 0000000000000000 RCX: ffffffff8113f3fa [ +0.007326] RDX: fffffbfff0a7889d RSI: 0000000000000008 RDI: ffffffff853c44e0 [ +0.007264] RBP: ffffc90014c1fa80 R08: 0000000000000001 R09: fffffbfff0a7889c [ +0.007266] R10: ffffffff853c44e7 R11: 0000000000000001 R12: ffff8881a719b010 [ +0.007263] R13: ffff88810d412748 R14: 0000000000000002 R15: 0000000000000000 [ +0.007264] FS: 00007ffff7045540(0000) GS:ffff8883cc900000(0000) knlGS:0000000000000000 [ +0.008236] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.005851] CR2: 0000000000000028 CR3: 000000011912e000 CR4: 0000000000350ef0 [ +0.007175] Call Trace: [ +0.002561] [ +0.002141] ? show_regs+0x6a/0x80 [ +0.003473] ? __die+0x25/0x70 [ +0.003124] ? page_fault_oops+0x214/0x720 [ +0.004179] ? preempt_count_sub+0x18/0xc0 [ +0.004093] ? __pfx_page_fault_oops+0x10/0x10 [ +0.004590] ? srso_return_thunk+0x5/0x5f [ +0.004000] ? vprintk_default+0x1d/0x30 [ +0.004063] ? srso_return_thunk+0x5/0x5f [ +0.004087] ? vprintk+0x5c/0x90 [ +0.003296] ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched] [ +0.005807] ? srso_return_thunk+0x5/0x5f [ +0.004090] ? _printk+0xb3/0xe0 [ +0.003293] ? __pfx__printk+0x10/0x10 [ +0.003735] ? asm_sysvec_apic_timer_interrupt+0x1b/0x20 [ +0.005482] ? do_user_addr_fault+0x345/0x770 [ +0.004361] ? exc_page_fault+0x64/0xf0 [ +0.003972] ? asm_exc_page_fault+0x27/0x30 [ +0.004271] ? add_taint+0x2a/0xa0 [ +0.003476] ? drm_sched_entity_init+0x2d3/0x420 [gpu_sched] [ +0.005812] amdgpu_ctx_get_entity+0x3f9/0x770 [amdgpu] [ +0.009530] ? finish_task_switch.isra.0+0x129/0x470 [ +0.005068] ? __pfx_amdgpu_ctx_get_entity+0x10/0x10 [amdgpu] [ +0.010063] ? __kasan_check_write+0x14/0x20 [ +0.004356] ? srso_return_thunk+0x5/0x5f [ +0.004001] ? mutex_unlock+0x81/0xd0 [ +0.003802] ? srso_return_thunk+0x5/0x5f [ +0.004096] amdgpu_cs_wait_ioctl+0xf6/0x270 [amdgpu] [ +0.009355] ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu] [ +0.009981] ? srso_return_thunk+0x5/0x5f [ +0.004089] ? srso_return_thunk+0x5/0x5f [ +0.004090] ? __srcu_read_lock+0x20/0x50 [ +0.004096] drm_ioctl_kernel+0x140/0x1f0 [drm] [ +0.005080] ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu] [ +0.009974] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] [ +0.005618] ? srso_return_thunk+0x5/0x5f [ +0.004088] ? __kasan_check_write+0x14/0x20 [ +0.004357] drm_ioctl+0x3da/0x730 [drm] [ +0.004461] ? __pfx_amdgpu_cs_wait_ioctl+0x10/0x10 [amdgpu] [ +0.009979] ? __pfx_drm_ioctl+0x10/0x10 [drm] [ +0.004993] ? srso_return_thunk+0x5/0x5f [ +0.004090] ? __kasan_check_write+0x14/0x20 [ +0.004356] ? srso_return_thunk+0x5/0x5f [ +0.004090] ? _raw_spin_lock_irqsave+0x99/0x100 [ +0.004712] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ +0.005063] ? __pfx_arch_do_signal_or_restart+0x10/0x10 [ +0.005477] ? srso_return_thunk+0x5/0x5f [ +0.004000] ? preempt_count_sub+0x18/0xc0 [ +0.004237] ? srso_return_thunk+0x5/0x5f [ +0.004090] ? _raw_spin_unlock_irqrestore+0x27/0x50 [ +0.005069] amdgpu_drm_ioctl+0x7e/0xe0 [amdgpu] [ +0.008912] __x64_sys_ioctl+0xcd/0x110 [ +0.003918] do_syscall_64+0x5f/0xe0 [ +0.003649] ? noist_exc_debug+0xe6/0x120 [ +0.004095] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ +0.005150] RIP: 0033:0x7ffff7b1a94f [ +0.003647] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ +0.019097] RSP: 002b:00007fffffffe0a0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ +0.007708] RAX: ffffffffffffffda RBX: 000055555558b360 RCX: 00007ffff7b1a94f [ +0.007176] RDX: 000055555558b360 RSI: 00000000c0206449 RDI: 0000000000000003 [ +0.007326] RBP: 00000000c0206449 R08: 000055555556ded0 R09: 000000007fffffff [ +0.007176] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fffffffe5d8 [ +0.007238] R13: 0000000000000003 R14: 000055555555cba8 R15: 00007ffff7ffd040 [ +0.007250] v2: Reworked check to guard against null ptr deref and added helpful comments (Christian) Cc: Christian Koenig Cc: Alex Deucher Cc: Luben Tuikov Cc: Bas Nieuwenhuizen Cc: Joonkyo Jung Cc: Dokyung Song Cc: Cc: Signed-off-by: Vitaly Prosyak Reviewed-by: Christian König Fixes: 56e449603f0a ("drm/sched: Convert the GPU scheduler to variable number of run-queues") Link: https://patchwork.freedesktop.org/patch/msgid/20240315023926.343164-1-vitaly.prosyak@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/scheduler/sched_entity.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 3c4f5a392b06..58c8161289fe 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -71,13 +71,19 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + /* + * It's perfectly valid to initialize an entity without having a valid + * scheduler attached. It's just not valid to use the scheduler before it + * is initialized itself. + */ entity->sched_list = num_sched_list > 1 ? sched_list : NULL; RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - if (!sched_list[0]->sched_rq) { - /* Warn drivers not to do this and to fix their DRM - * calling order. + if (num_sched_list && !sched_list[0]->sched_rq) { + /* Since every entry covered by num_sched_list + * should be non-NULL and therefore we warn drivers + * not to do this and to fix their DRM calling order. */ pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { From e3f18b0dd1db242791afbc3bd173026163ce0ccc Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 11 Jan 2024 13:38:04 +0100 Subject: [PATCH 03/55] drm/bridge: Select DRM_KMS_HELPER for DRM_PANEL_BRIDGE Since the kconfig symbol of DRM_PANEL_BRIDGE is only adding bridge/panel.o to drm_kms_helper object, we need to select DRM_KMS_HELPER to make sure the file is actually getting built. Otherwise with certain defconfigs e.g. devm_drm_of_get_bridge will not be properly available: aarch64-linux-gnu-ld: drivers/phy/qualcomm/phy-qcom-qmp-combo.o: in function `qmp_combo_bridge_attach': drivers/phy/qualcomm/phy-qcom-qmp-combo.c:3204:(.text+0x8f4): undefined reference to `devm_drm_of_get_bridge' Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20240111-drm-panel-bridge-fixup-v1-1-e06292f6f500@fairphone.com Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240111-drm-panel-bridge-fixup-v1-1-e06292f6f500@fairphone.com --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index efd996f6c138..1d4f010af97b 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -8,6 +8,7 @@ config DRM_BRIDGE config DRM_PANEL_BRIDGE def_bool y depends on DRM_BRIDGE + select DRM_KMS_HELPER select DRM_PANEL help DRM bridge wrapper of DRM panels From dbd9698830ebafcb6f3be6498fd4a6968dcbf89a Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 18 Mar 2024 15:16:21 +0100 Subject: [PATCH 04/55] Revert "drm/bridge: Select DRM_KMS_HELPER for DRM_PANEL_BRIDGE" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e3f18b0dd1db242791afbc3bd173026163ce0ccc. Selecting DRM_KMS_HELPER for DRM_PANEL_BRIDGE leads to: WARNING: unmet direct dependencies detected for DRM_KMS_HELPER Depends on [m]: HAS_IOMEM [=y] && DRM [=m] ... and builds with CONFIG_DRM=m will fail with the above kconfig warns and then multiple linker error. Reported-by: Imre Deak Reported-by: Jani Nikula Reported-by: Ville Syrjälä Fixes: e3f18b0dd1db ("drm/bridge: Select DRM_KMS_HELPER for DRM_PANEL_BRIDGE") Acked-by: Lucas De Marchi Acked-by: Jani Nikula Tested-by: Jani Nikula Link: https://lore.kernel.org/r/20240318-revert-select-drm_kms_helper-for-drm_panel_bridge-v1-1-52a42a116286@linaro.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240318-revert-select-drm_kms_helper-for-drm_panel_bridge-v1-1-52a42a116286@linaro.org --- drivers/gpu/drm/bridge/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 1d4f010af97b..efd996f6c138 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -8,7 +8,6 @@ config DRM_BRIDGE config DRM_PANEL_BRIDGE def_bool y depends on DRM_BRIDGE - select DRM_KMS_HELPER select DRM_PANEL help DRM bridge wrapper of DRM panels From 9cbd1dae842737bfafa4b10a87909fa209dde250 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Sat, 16 Mar 2024 12:05:59 +0000 Subject: [PATCH 05/55] drm/dp: Fix divide-by-zero regression on DP MST unplug with nouveau Fix a regression when using nouveau and unplugging a StarTech MSTDP122DP DisplayPort 1.2 MST hub (the same regression does not appear when using a Cable Matters DisplayPort 1.4 MST hub). Trace: divide error: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 2962 Comm: Xorg Not tainted 6.8.0-rc3+ #744 Hardware name: Razer Blade/DANA_MB, BIOS 01.01 08/31/2018 RIP: 0010:drm_dp_bw_overhead+0xb4/0x110 [drm_display_helper] Code: c6 b8 01 00 00 00 75 61 01 c6 41 0f af f3 41 0f af f1 c1 e1 04 48 63 c7 31 d2 89 ff 48 8b 5d f8 c9 48 0f af f1 48 8d 44 06 ff <48> f7 f7 31 d2 31 c9 31 f6 31 ff 45 31 c0 45 31 c9 45 31 d2 45 31 RSP: 0018:ffffb2c5c211fa30 EFLAGS: 00010206 RAX: ffffffffffffffff RBX: 0000000000000000 RCX: 0000000000f59b00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb2c5c211fa48 R08: 0000000000000001 R09: 0000000000000020 R10: 0000000000000004 R11: 0000000000000000 R12: 0000000000023b4a R13: ffff91d37d165800 R14: ffff91d36fac6d80 R15: ffff91d34a764010 FS: 00007f4a1ca3fa80(0000) GS:ffff91d6edbc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000559491d49000 CR3: 000000011d180002 CR4: 00000000003706f0 Call Trace: ? show_regs+0x6d/0x80 ? die+0x37/0xa0 ? do_trap+0xd4/0xf0 ? do_error_trap+0x71/0xb0 ? drm_dp_bw_overhead+0xb4/0x110 [drm_display_helper] ? exc_divide_error+0x3a/0x70 ? drm_dp_bw_overhead+0xb4/0x110 [drm_display_helper] ? asm_exc_divide_error+0x1b/0x20 ? drm_dp_bw_overhead+0xb4/0x110 [drm_display_helper] ? drm_dp_calc_pbn_mode+0x2e/0x70 [drm_display_helper] nv50_msto_atomic_check+0xda/0x120 [nouveau] drm_atomic_helper_check_modeset+0xa87/0xdf0 [drm_kms_helper] drm_atomic_helper_check+0x19/0xa0 [drm_kms_helper] nv50_disp_atomic_check+0x13f/0x2f0 [nouveau] drm_atomic_check_only+0x668/0xb20 [drm] ? drm_connector_list_iter_next+0x86/0xc0 [drm] drm_atomic_commit+0x58/0xd0 [drm] ? __pfx___drm_printfn_info+0x10/0x10 [drm] drm_atomic_connector_commit_dpms+0xd7/0x100 [drm] drm_mode_obj_set_property_ioctl+0x1c5/0x450 [drm] ? __pfx_drm_connector_property_set_ioctl+0x10/0x10 [drm] drm_connector_property_set_ioctl+0x3b/0x60 [drm] drm_ioctl_kernel+0xb9/0x120 [drm] drm_ioctl+0x2d0/0x550 [drm] ? __pfx_drm_connector_property_set_ioctl+0x10/0x10 [drm] nouveau_drm_ioctl+0x61/0xc0 [nouveau] __x64_sys_ioctl+0xa0/0xf0 do_syscall_64+0x76/0x140 ? do_syscall_64+0x85/0x140 ? do_syscall_64+0x85/0x140 entry_SYSCALL_64_after_hwframe+0x6e/0x76 RIP: 0033:0x7f4a1cd1a94f Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 RSP: 002b:00007ffd2f1df520 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffd2f1df5b0 RCX: 00007f4a1cd1a94f RDX: 00007ffd2f1df5b0 RSI: 00000000c01064ab RDI: 000000000000000f RBP: 00000000c01064ab R08: 000056347932deb8 R09: 000056347a7d99c0 R10: 0000000000000000 R11: 0000000000000246 R12: 000056347938a220 R13: 000000000000000f R14: 0000563479d9f3f0 R15: 0000000000000000 Modules linked in: rfcomm xt_conntrack nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack_netlink nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xfrm_user xfrm_algo xt_addrtype nft_compat nf_tables nfnetlink br_netfilter bridge stp llc ccm cmac algif_hash overlay algif_skcipher af_alg bnep binfmt_misc snd_sof_pci_intel_cnl snd_sof_intel_hda_common snd_soc_hdac_hda snd_sof_pci snd_sof_xtensa_dsp snd_sof_intel_hda snd_sof snd_sof_utils snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core snd_compress snd_sof_intel_hda_mlink snd_hda_ext_core iwlmvm intel_rapl_msr intel_rapl_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp mac80211 coretemp kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic uvcvideo libarc4 snd_hda_intel snd_intel_dspcfg snd_hda_codec iwlwifi videobuf2_vmalloc videobuf2_memops uvc irqbypass btusb videobuf2_v4l2 snd_seq_midi crct10dif_pclmul hid_multitouch crc32_pclmul snd_seq_midi_event btrtl snd_hwdep videodev polyval_clmulni polyval_generic snd_rawmidi ghash_clmulni_intel aesni_intel btintel crypto_simd snd_hda_core cryptd snd_seq btbcm ee1004 8250_dw videobuf2_common btmtk rapl nls_iso8859_1 mei_hdcp thunderbolt bluetooth intel_cstate wmi_bmof intel_wmi_thunderbolt cfg80211 snd_pcm mc snd_seq_device i2c_i801 r8169 ecdh_generic snd_timer i2c_smbus ecc snd mei_me intel_lpss_pci mei ahci intel_lpss soundcore realtek libahci idma64 intel_pch_thermal i2c_hid_acpi i2c_hid acpi_pad sch_fq_codel msr parport_pc ppdev lp parport efi_pstore ip_tables x_tables autofs4 dm_crypt raid10 raid456 libcrc32c async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0 joydev input_leds hid_generic usbhid hid nouveau i915 drm_ttm_helper gpu_sched drm_gpuvm drm_exec i2c_algo_bit drm_buddy ttm drm_display_helper drm_kms_helper cec rc_core drm nvme nvme_core mxm_wmi xhci_pci xhci_pci_renesas video wmi pinctrl_cannonlake mac_hid ---[ end trace 0000000000000000 ]--- Fix this by avoiding the divide if bpp is 0. Fixes: c1d6a22b7219 ("drm/dp: Add helpers to calculate the link BW overhead") Cc: stable@vger.kernel.org Acked-by: Imre Deak Signed-off-by: Chris Bainbridge Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/ZfWLJwYikw2K7B6c@debian.local --- drivers/gpu/drm/display/drm_dp_helper.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index b1ca3a1100da..26c188ce5f1c 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -3982,6 +3982,13 @@ int drm_dp_bw_overhead(int lane_count, int hactive, u32 overhead = 1000000; int symbol_cycles; + if (lane_count == 0 || hactive == 0 || bpp_x16 == 0) { + DRM_DEBUG_KMS("Invalid BW overhead params: lane_count %d, hactive %d, bpp_x16 %d.%04d\n", + lane_count, hactive, + bpp_x16 >> 4, (bpp_x16 & 0xf) * 625); + return 0; + } + /* * DP Standard v2.1 2.6.4.1 * SSC downspread and ref clock variation margin: From 2295bd846765c766701e666ed2e4b35396be25e6 Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 20 Mar 2024 04:15:23 +0500 Subject: [PATCH 06/55] dma-buf: Fix NULL pointer dereference in sanitycheck() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If due to a memory allocation failure mock_chain() returns NULL, it is passed to dma_fence_enable_sw_signaling() resulting in NULL pointer dereference there. Call dma_fence_enable_sw_signaling() only if mock_chain() succeeds. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d62c43a953ce ("dma-buf: Enable signaling on fence for selftests") Signed-off-by: Pavel Sakharov Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240319231527.1821372-1-p.sakharov@ispras.ru --- drivers/dma-buf/st-dma-fence-chain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 9c2a0c082a76..ed4b323886e4 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); From fd00fe8cdbb241644131ece133a2eb1c3951f21e Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 11 Mar 2024 16:11:59 +0100 Subject: [PATCH 07/55] drm/xe: Remove unused xe_bo->props struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Property struct is not being used so remove it and related dead code. Fixes: ddfa2d6a846a ("drm/xe/uapi: Kill VM_MADVISE IOCTL") Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: intel-xe@lists.freedesktop.org Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240311151159.10036-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 002d8f0b4f76aabbf8e00c538a124b91625d7260) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 59 +++++--------------------------- drivers/gpu/drm/xe/xe_bo_types.h | 19 ---------- 2 files changed, 9 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6603a0ea79c5..9c0837b6fdfc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -144,9 +144,6 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, .mem_type = XE_PL_TT, }; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = XE_PL_TT; } } @@ -181,25 +178,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } places[*c] = place; *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = mem_type; } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo->props.preferred_gt == XE_GT1) { - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - } else { - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); - } + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, @@ -223,17 +210,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, { u32 c = 0; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; - - /* The order of placements should indicate preferred location */ - - if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { - try_add_system(xe, bo, bo_flags, &c); - try_add_vram(xe, bo, bo_flags, &c); - } else { - try_add_vram(xe, bo, bo_flags, &c); - try_add_system(xe, bo, bo_flags, &c); - } + try_add_vram(xe, bo, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); try_add_stolen(xe, bo, bo_flags, &c); if (!c) @@ -1126,13 +1104,6 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } -static bool should_migrate_to_system(struct xe_bo *bo) -{ - struct xe_device *xe = xe_bo_device(bo); - - return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; -} - static vm_fault_t xe_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; @@ -1141,7 +1112,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; vm_fault_t ret; - int idx, r = 0; + int idx; if (needs_rpm) xe_device_mem_access_get(xe); @@ -1153,17 +1124,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) if (drm_dev_enter(ddev, &idx)) { trace_xe_bo_cpu_fault(bo); - if (should_migrate_to_system(bo)) { - r = xe_bo_migrate(bo, XE_PL_TT); - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) - ret = VM_FAULT_NOPAGE; - else if (r) - ret = VM_FAULT_SIGBUS; - } - if (!ret) - ret = ttm_bo_vm_fault_reserved(vmf, - vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); @@ -1291,9 +1253,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; - bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; - bo->props.preferred_gt = XE_BO_PROPS_INVALID; - bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; bo->ttm.priority = XE_BO_PRIORITY_NORMAL; INIT_LIST_HEAD(&bo->pinned_link); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 14ef13b7b421..86422e113d39 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -56,25 +56,6 @@ struct xe_bo { */ struct list_head client_link; #endif - /** @props: BO user controlled properties */ - struct { - /** @preferred_mem: preferred memory class for this BO */ - s16 preferred_mem_class; - /** @prefered_gt: preferred GT for this BO */ - s16 preferred_gt; - /** @preferred_mem_type: preferred memory type */ - s32 preferred_mem_type; - /** - * @cpu_atomic: the CPU expects to do atomics operations to - * this BO - */ - bool cpu_atomic; - /** - * @device_atomic: the device expects to do atomics operations - * to this BO - */ - bool device_atomic; - } props; /** @freed: List node for delayed put. */ struct llist_node freed; /** @created: Whether the bo has passed initial creation */ From 9c1256369c10e31b5ce6575e4ea27fe2c375fd94 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 14 Mar 2024 12:15:55 +0000 Subject: [PATCH 08/55] drm/xe/guc_submit: use jiffies for job timeout drm_sched_init() expects jiffies for the timeout, but here we are passing the timeout in ms. Convert to jiffies instead. Fixes: eef55700f302 ("drm/xe: Add sysfs for default engine scheduler properties") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240314121554.223229-2-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 2c5b70f74d61438a071a19370e63c234d2bd8938) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ff77bc8da1b2..e2a4c3b5e9ff 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1220,7 +1220,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) init_waitqueue_head(&ge->suspend_wait); timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - q->sched_props.job_timeout_ms; + msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, get_submit_wq(guc), q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, From b7dce525c4fcc92b373136288309f8c9ca6c375f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 18:05:33 +0000 Subject: [PATCH 09/55] drm/xe/queue: fix engine_class bounds check The engine_class is the index into the user_to_xe_engine_class, therefore it needs to be less than. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318180532.57522-4-matthew.auld@intel.com (cherry picked from commit fe87b7dfcb204a161d1e38b0e787b2f5ab520f32) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 11e150f4c0c1..ead25d5e723e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -448,7 +448,7 @@ find_hw_engine(struct xe_device *xe, { u32 idx; - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; if (eci.gt_id >= xe->info.gt_count) From 23e1ee3a2317f41f47d4f7255257431c5f8d1c2c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 18:05:34 +0000 Subject: [PATCH 10/55] drm/xe/device: fix XE_MAX_GT_PER_TILE check Here XE_MAX_GT_PER_TILE is the total, therefore the gt index should always be less than. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240318180532.57522-5-matthew.auld@intel.com (cherry picked from commit a5ef563b1d676548a4c5016540833ff970230964) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 14be34d9f543..5c254ec9c602 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -58,7 +58,7 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) { - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE)) + if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) gt_id = 0; return gt_id ? tile->media_gt : tile->primary_gt; From b45f20fa69cedb6038fdaec31bd600c273c865a5 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 18 Mar 2024 18:05:35 +0000 Subject: [PATCH 11/55] drm/xe/device: fix XE_MAX_TILES_PER_DEVICE check Here XE_MAX_TILES_PER_DEVICE is the gt array size, therefore the gt index should always be less than. v2 (Lucas): - Add fixes tag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Acked-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240318180532.57522-6-matthew.auld@intel.com (cherry picked from commit a96cd71ec7be0790f9fc4039ad21be8d214b03a4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 5c254ec9c602..d413bc2c6be5 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -79,7 +79,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) if (MEDIA_VER(xe) >= 13) { gt = xe_tile_get_gt(root_tile, gt_id); } else { - if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE)) + if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) gt_id = 0; gt = xe->tiles[gt_id].primary_gt; From 45c30b2923e5c53e0ef057a8a525b0456adde18e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 21 Mar 2024 11:06:30 +0000 Subject: [PATCH 12/55] drm/xe/query: fix gt_id bounds check The user provided gt_id should always be less than the XE_MAX_GT_PER_TILE. Fixes: 7793d00d1bf5 ("drm/xe: Correlate engine and cpu timestamps with better accuracy") Signed-off-by: Matthew Auld Cc: Nirmoy Das Cc: # v6.8+ Reviewed-by: Nirmoy Das Acked-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240321110629.334701-2-matthew.auld@intel.com (cherry picked from commit 4b275f502a0d3668195762fb55fa00e659ad1b0b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 92bb06c0586e..075f9eaef031 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -132,7 +132,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id > XE_MAX_GT_PER_TILE) + if (eci->gt_id >= XE_MAX_GT_PER_TILE) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); From 0d8cf0c924732a045273c6aca6900a340ac88529 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 22 Mar 2024 07:48:43 -0700 Subject: [PATCH 13/55] drm/xe: Fix END redefinition mips declares an END macro in its headers so it can't be used without namespace in a driver like xe. Instead of coming up with a longer name, just remove the macro and replace its use with 0 since it's still clear what that means: set_offsets() was already using that implicitly when checking the data variable. Reported-by: Guenter Roeck Closes: http://kisskb.ellerman.id.au/kisskb/buildresult/15143996/ Tested-by: Guenter Roeck Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240322145037.196548-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 35b22649eb4155ca6bcffcb2c6e2a1d311aaaf72) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7ad853b0788a..1426febe86eb 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -97,7 +97,6 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END 0 { const u32 base = hwe->mmio_base; @@ -168,7 +167,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 dg2_xcs_offsets[] = { @@ -202,7 +201,7 @@ static const u8 dg2_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END + 0 }; static const u8 gen12_rcs_offsets[] = { @@ -298,7 +297,7 @@ static const u8 gen12_rcs_offsets[] = { REG(0x084), NOP(1), - END + 0 }; static const u8 xehp_rcs_offsets[] = { @@ -339,7 +338,7 @@ static const u8 xehp_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 dg2_rcs_offsets[] = { @@ -382,7 +381,7 @@ static const u8 dg2_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; static const u8 mtl_rcs_offsets[] = { @@ -425,7 +424,7 @@ static const u8 mtl_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END + 0 }; #define XE2_CTX_COMMON \ @@ -471,7 +470,7 @@ static const u8 xe2_rcs_offsets[] = { LRI(1, 0), /* [0x47] */ REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - END + 0 }; static const u8 xe2_bcs_offsets[] = { @@ -482,16 +481,15 @@ static const u8 xe2_bcs_offsets[] = { REG16(0x200), /* [0x42] BCS_SWCTRL */ REG16(0x204), /* [0x44] BLIT_CCTL */ - END + 0 }; static const u8 xe2_xcs_offsets[] = { XE2_CTX_COMMON, - END + 0 }; -#undef END #undef REG16 #undef REG #undef LRI From a8eb93b42d7e068306ca07f51055cbcde893fea3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 22 Mar 2024 09:29:46 +0100 Subject: [PATCH 14/55] fbdev: Select I/O-memory framebuffer ops for SBus Framebuffer I/O on the Sparc Sbus requires read/write helpers for I/O memory. Select FB_IOMEM_FOPS accordingly. Reported-by: Nick Bowler Closes: https://lore.kernel.org/lkml/5bc21364-41da-a339-676e-5bb0f4faebfb@draconx.ca/ Signed-off-by: Thomas Zimmermann Fixes: 8813e86f6d82 ("fbdev: Remove default file-I/O implementations") Cc: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Daniel Vetter Cc: Helge Deller Cc: Sam Ravnborg Cc: Arnd Bergmann Cc: Geert Uytterhoeven Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: # v6.8+ Reviewed-by: Javier Martinez Canillas Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20240322083005.24269-1-tzimmermann@suse.de --- drivers/video/fbdev/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index e3179e987cdb..197b6d5268e9 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -494,6 +494,7 @@ config FB_SBUS_HELPERS select FB_CFB_COPYAREA select FB_CFB_FILLRECT select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS config FB_BW2 bool "BWtwo support" @@ -514,6 +515,7 @@ config FB_CG6 depends on (FB = y) && (SPARC && FB_SBUS) select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS help This is the frame buffer device driver for the CGsix (GX, TurboGX) frame buffer. @@ -523,6 +525,7 @@ config FB_FFB depends on FB_SBUS && SPARC64 select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_IOMEM_FOPS help This is the frame buffer device driver for the Creator, Creator3D, and Elite3D graphics boards. From 905f7d53a1bc105b22d9ffd03dc11b6b42ec6ba6 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Mon, 4 Mar 2024 18:09:52 +0800 Subject: [PATCH 15/55] drm/rockchip: vop2: Remove AR30 and AB30 format support The Alpha blending for 30 bit RGB/BGR are not functioning properly for rk3568/rk3588, so remove it from the format list. Fixes: bfd8a5c228fa ("drm/rockchip: vop2: Add more supported 10bit formats") Signed-off-by: Andy Yan Acked-by: Sascha Hauer Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240304100952.3592984-1-andyshrk@163.com --- drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 48170694ac6b..18efb3fe1c00 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -17,9 +17,7 @@ static const uint32_t formats_cluster[] = { DRM_FORMAT_XRGB2101010, - DRM_FORMAT_ARGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ABGR2101010, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, DRM_FORMAT_XBGR8888, From 075ec164740172ec7f1fc80c2cb79f7ec1c9451a Mon Sep 17 00:00:00 2001 From: lima1002 Date: Thu, 25 Jan 2024 15:53:16 +0800 Subject: [PATCH 16/55] drm/amd/swsmu: add smu 14.0.1 vcn and jpeg msg add new vcn and jpeg msg v2: squash in updates (Alex) v3: rework code for better compat with other smu14.x variants (Alex) Reviewed-by: Alex Deucher Signed-off-by: lima1002 Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h | 28 +++++------ drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 10 ++++ .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 50 ++++++++++++++++--- .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 21 +++++--- 4 files changed, 82 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h index 8a8a57c56bc0..ca7ce4251482 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h @@ -54,14 +54,14 @@ #define PPSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team #define PPSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version #define PPSMC_MSG_GetDriverIfVersion 0x03 ///< Get PMFW_DRIVER_IF version -#define PPSMC_MSG_SPARE0 0x04 ///< SPARE -#define PPSMC_MSG_SPARE1 0x05 ///< SPARE -#define PPSMC_MSG_PowerDownVcn 0x06 ///< Power down VCN -#define PPSMC_MSG_PowerUpVcn 0x07 ///< Power up VCN; VCN is power gated by default -#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display +#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1 +#define PPSMC_MSG_PowerUpVcn1 0x05 ///< Power up VCN1; VCN1 is power gated by default +#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0 +#define PPSMC_MSG_PowerUpVcn0 0x07 ///< Power up VCN0; VCN0 is power gated by default +#define PPSMC_MSG_SetHardMinVcn0 0x08 ///< For wireless display #define PPSMC_MSG_SetSoftMinGfxclk 0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz -#define PPSMC_MSG_SPARE2 0x0A ///< SPARE -#define PPSMC_MSG_SPARE3 0x0B ///< SPARE +#define PPSMC_MSG_SetHardMinVcn1 0x0A ///< For wireless display +#define PPSMC_MSG_SetSoftMinVcn1 0x0B ///< Set soft min for VCN1 clocks (VCLK1 and DCLK1) #define PPSMC_MSG_PrepareMp1ForUnload 0x0C ///< Prepare PMFW for GFX driver unload #define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of DRAM address for Driver table transfer #define PPSMC_MSG_SetDriverDramAddrLow 0x0E ///< Set low 32 bits of DRAM address for Driver table transfer @@ -71,7 +71,7 @@ #define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features in PMFW #define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK #define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK -#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK) +#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0) #define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU @@ -84,17 +84,17 @@ #define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK #define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK -#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN clocks (VCLK and DCLK) +#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0) #define PPSMC_MSG_spare_0x20 0x20 -#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg -#define PPSMC_MSG_PowerUpJpeg 0x22 ///< Power up Jpeg; VCN is power gated by default +#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0 +#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default #define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK #define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK #define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity -#define PPSMC_MSG_Reserved 0x26 ///< Not used -#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously PPSMC_MSG_RequestActiveWgp -#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously PPSMC_MSG_QueryActiveWgp +#define PPSMC_MSG_PowerDownJpeg1 0x26 ///< Power down Jpeg of VCN1 +#define PPSMC_MSG_PowerUpJpeg1 0x27 ///< Power up Jpeg of VCN1; VCN1 is power gated by default +#define PPSMC_MSG_SetSoftMaxVcn1 0x28 ///< Set soft max for VCN1 clocks (VCLK1 and DCLK1) #define PPSMC_MSG_PowerDownIspByTile 0x29 ///< ISP is power gated by default #define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM #define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a941fdbf78b6..af427cc7dbb8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -115,6 +115,10 @@ __SMU_DUMMY_MAP(PowerDownVcn), \ __SMU_DUMMY_MAP(PowerUpJpeg), \ __SMU_DUMMY_MAP(PowerDownJpeg), \ + __SMU_DUMMY_MAP(PowerUpJpeg0), \ + __SMU_DUMMY_MAP(PowerDownJpeg0), \ + __SMU_DUMMY_MAP(PowerUpJpeg1), \ + __SMU_DUMMY_MAP(PowerDownJpeg1), \ __SMU_DUMMY_MAP(BacoAudioD3PME), \ __SMU_DUMMY_MAP(ArmD3), \ __SMU_DUMMY_MAP(RunDcBtc), \ @@ -135,6 +139,8 @@ __SMU_DUMMY_MAP(PowerUpSdma), \ __SMU_DUMMY_MAP(SetHardMinIspclkByFreq), \ __SMU_DUMMY_MAP(SetHardMinVcn), \ + __SMU_DUMMY_MAP(SetHardMinVcn0), \ + __SMU_DUMMY_MAP(SetHardMinVcn1), \ __SMU_DUMMY_MAP(SetAllowFclkSwitch), \ __SMU_DUMMY_MAP(SetMinVideoGfxclkFreq), \ __SMU_DUMMY_MAP(ActiveProcessNotify), \ @@ -150,6 +156,8 @@ __SMU_DUMMY_MAP(SetPhyclkVoltageByFreq), \ __SMU_DUMMY_MAP(SetDppclkVoltageByFreq), \ __SMU_DUMMY_MAP(SetSoftMinVcn), \ + __SMU_DUMMY_MAP(SetSoftMinVcn0), \ + __SMU_DUMMY_MAP(SetSoftMinVcn1), \ __SMU_DUMMY_MAP(EnablePostCode), \ __SMU_DUMMY_MAP(GetGfxclkFrequency), \ __SMU_DUMMY_MAP(GetFclkFrequency), \ @@ -161,6 +169,8 @@ __SMU_DUMMY_MAP(SetSoftMaxSocclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMaxFclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMaxVcn), \ + __SMU_DUMMY_MAP(SetSoftMaxVcn0), \ + __SMU_DUMMY_MAP(SetSoftMaxVcn1), \ __SMU_DUMMY_MAP(PowerGateMmHub), \ __SMU_DUMMY_MAP(UpdatePmeRestore), \ __SMU_DUMMY_MAP(GpuChangeState), \ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index b06a3cc43305..9e39f99154f9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1402,9 +1402,22 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, if (adev->vcn.harvest_config & (1 << i)) continue; - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) { + if (i == 0) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, + i << 16U, NULL); + else if (i == 1) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, + i << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + i << 16U, NULL); + } + if (ret) return ret; } @@ -1415,9 +1428,34 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, int smu_v14_0_set_jpeg_enable(struct smu_context *smu, bool enable) { - return smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg, - 0, NULL); + struct amdgpu_device *adev = smu->adev; + int i, ret = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) { + if (i == 0) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0, + i << 16U, NULL); + else if (i == 1 && amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg1 : SMU_MSG_PowerDownJpeg1, + i << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg, + i << 16U, NULL); + } + + if (ret) + return ret; + } + + return ret; } int smu_v14_0_run_btc(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 9310c4758e38..d6de6d97286c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -70,9 +70,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), - MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1), - MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 1), + MSG_MAP(PowerDownVcn0, PPSMC_MSG_PowerDownVcn0, 1), + MSG_MAP(PowerUpVcn0, PPSMC_MSG_PowerUpVcn0, 1), + MSG_MAP(SetHardMinVcn0, PPSMC_MSG_SetHardMinVcn0, 1), + MSG_MAP(PowerDownVcn1, PPSMC_MSG_PowerDownVcn1, 1), + MSG_MAP(PowerUpVcn1, PPSMC_MSG_PowerUpVcn1, 1), + MSG_MAP(SetHardMinVcn1, PPSMC_MSG_SetHardMinVcn1, 1), MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 1), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), @@ -83,7 +86,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 1), MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq, 1), MSG_MAP(SetSoftMinFclk, PPSMC_MSG_SetSoftMinFclk, 1), - MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn, 1), + MSG_MAP(SetSoftMinVcn0, PPSMC_MSG_SetSoftMinVcn0, 1), + MSG_MAP(SetSoftMinVcn1, PPSMC_MSG_SetSoftMinVcn1, 1), MSG_MAP(EnableGfxImu, PPSMC_MSG_EnableGfxImu, 1), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), @@ -91,9 +95,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk, 1), MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq, 1), MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq, 1), - MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn, 1), - MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1), - MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1), + MSG_MAP(SetSoftMaxVcn0, PPSMC_MSG_SetSoftMaxVcn0, 1), + MSG_MAP(SetSoftMaxVcn1, PPSMC_MSG_SetSoftMaxVcn1, 1), + MSG_MAP(PowerDownJpeg0, PPSMC_MSG_PowerDownJpeg0, 1), + MSG_MAP(PowerUpJpeg0, PPSMC_MSG_PowerUpJpeg0, 1), + MSG_MAP(PowerDownJpeg1, PPSMC_MSG_PowerDownJpeg1, 1), + MSG_MAP(PowerUpJpeg1, PPSMC_MSG_PowerUpJpeg1, 1), MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq, 1), MSG_MAP(SetSoftMinSocclkByFreq, PPSMC_MSG_SetSoftMinSocclkByFreq, 1), MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile, 1), From 9d7993a7ab9651afd5fb295a4992e511b2b727aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 12 Feb 2024 10:40:55 -0500 Subject: [PATCH 17/55] drm/amdkfd: Check cgroup when returning DMABuf info Check cgroup permissions when returning DMA-buf info and based on cgroup info return the GPU id of the GPU that have access to the BO. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dfa8c69532d4..f9631f4b1a02 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, /* Find a KFD GPU device that supports the get_dmabuf_info query */ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) - if (dev) + if (dev && !kfd_devcgroup_check_permission(dev)) break; if (!dev) return -EINVAL; @@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, if (xcp_id >= 0) args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; else - args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id; + args->gpu_id = dev->id; args->flags = flags; /* Copy metadata buffer to user mode */ From a99d81937526d60796a4462de459a85146851ccf Mon Sep 17 00:00:00 2001 From: Natanel Roizenman Date: Wed, 6 Mar 2024 12:38:54 -0500 Subject: [PATCH 18/55] drm/amd/display: Increase Z8 watermark times. Increase Z8 watermark times from 210->250us and 320->350us. Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Signed-off-by: Natanel Roizenman Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 80bebfc268db..21e0eef3269b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -166,8 +166,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .num_states = 5, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 320.0, + .sr_exit_z8_time_us = 250.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index dc9e1b758ed6..7bd67f6b1595 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -145,8 +145,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .num_states = 5, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 210.0, - .sr_enter_plus_exit_z8_time_us = 320.0, + .sr_exit_z8_time_us = 250.0, + .sr_enter_plus_exit_z8_time_us = 350.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, From 72d72e8fddbcd6c98e1b02d32cf6f2b04e10bd1c Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 5 Mar 2024 17:41:15 -0500 Subject: [PATCH 19/55] drm/amd/display: Prevent crash when disable stream [Why] Disabling stream encoder invokes a function that no longer exists. [How] Check if the function declaration is NULL in disable stream encoder. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 9d5df4c0da59..0ba1feaf96c0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1185,7 +1185,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) if (dccg) { dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, From 02c825dcc621b0178d548cacc56e3fd0313b5fd9 Mon Sep 17 00:00:00 2001 From: Xi Liu Date: Thu, 7 Mar 2024 11:51:56 -0500 Subject: [PATCH 20/55] drm/amd/display: increase bb clock for DCN351 [Why and how] Bounding box clocks for DCN351 should be increased as per request Reviewed-by: Swapnil Patel Acked-by: Wayne Lin Signed-off-by: Xi Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn351/dcn351_fpu.c | 90 ++++++++++++++++--- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index 7bd67f6b1595..b6246406a042 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -98,51 +98,110 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .clock_limits = { { .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 400.0, + .fabricclk_mhz = 400.0, + .socclk_mhz = 600.0, + .dram_speed_mts = 3200.0, + .dispclk_mhz = 600.0, + .dppclk_mhz = 600.0, .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, + .dscclk_mhz = 200.0, .dtbclk_mhz = 600.0, }, { .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 600.0, + .fabricclk_mhz = 1000.0, + .socclk_mhz = 733.0, + .dram_speed_mts = 6400.0, + .dispclk_mhz = 800.0, + .dppclk_mhz = 800.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, + .dscclk_mhz = 266.7, .dtbclk_mhz = 600.0, }, { .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 738.0, + .fabricclk_mhz = 1200.0, + .socclk_mhz = 880.0, + .dram_speed_mts = 7500.0, + .dispclk_mhz = 800.0, + .dppclk_mhz = 800.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, + .dscclk_mhz = 266.7, .dtbclk_mhz = 600.0, }, { .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, + .dcfclk_mhz = 800.0, + .fabricclk_mhz = 1400.0, + .socclk_mhz = 978.0, + .dram_speed_mts = 7500.0, + .dispclk_mhz = 960.0, + .dppclk_mhz = 960.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, + .dscclk_mhz = 320.0, .dtbclk_mhz = 600.0, }, { .state = 4, + .dcfclk_mhz = 873.0, + .fabricclk_mhz = 1600.0, + .socclk_mhz = 1100.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1066.7, + .dppclk_mhz = 1066.7, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 355.6, + .dtbclk_mhz = 600.0, + }, + { + .state = 5, + .dcfclk_mhz = 960.0, + .fabricclk_mhz = 1700.0, + .socclk_mhz = 1257.0, + .dram_speed_mts = 8533.0, .dispclk_mhz = 1200.0, .dppclk_mhz = 1200.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, + .dscclk_mhz = 400.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 6, + .dcfclk_mhz = 1067.0, + .fabricclk_mhz = 1850.0, + .socclk_mhz = 1257.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1371.4, + .dppclk_mhz = 1371.4, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 457.1, + .dtbclk_mhz = 600.0, + }, + { + .state = 7, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 2000.0, + .socclk_mhz = 1467.0, + .dram_speed_mts = 8533.0, + .dispclk_mhz = 1600.0, + .dppclk_mhz = 1600.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 533.3, .dtbclk_mhz = 600.0, }, }, - .num_states = 5, + .num_states = 8, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, .sr_exit_z8_time_us = 250.0, @@ -177,6 +236,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .num_chans = 4, + .dram_clock_change_latency_us = 11.72, + .dispclk_dppclk_vco_speed_mhz = 2400.0, }; /* From eed14eb48ee176fe0144c6a999d00c855d0b199b Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Wed, 13 Mar 2024 16:53:49 +0800 Subject: [PATCH 21/55] drm/amdgpu/vpe: power on vpe when hw_init To fix mode2 reset failure. Should power on VPE when hw_init. Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 7a65a2b128ec..6695481f870f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; From 1210e2f1033dc56b666c9f6dfb761a2d3f9f5d6c Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Wed, 20 Mar 2024 15:53:47 -0400 Subject: [PATCH 22/55] drm/amdkfd: fix TLB flush after unmap for GFX9.4.2 TLB flush after unmap accidentially was removed on gfx9.4.2. It is to add it back. Signed-off-by: Eric Huang Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 42d40560cd30..a81ef232fdef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1473,7 +1473,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd, static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } From 78aca9ee5e012e130dbfbd7191bc2302b0cf3b37 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 12 Mar 2024 11:21:32 -0400 Subject: [PATCH 23/55] Revert "drm/amd/display: Fix sending VSC (+ colorimetry) packets for DP/eDP displays without PSR" This causes flicker on a bunch of eDP panels. The info_packet code also caused regressions on other OSes that we haven't' seen on Linux yet, but that is likely due to the fact that we haven't had a chance to test those environments on Linux. We'll need to revisit this. This reverts commit 202260f64519e591b5cd99626e441b6559f571a3. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3207 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3151 Signed-off-by: Harry Wentland Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++----- .../amd/display/modules/info_packet/info_packet.c | 13 +++++-------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2851719d7121..71d2d44681b2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6305,9 +6305,8 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket); - else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || - stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - stream->signal == SIGNAL_TYPE_EDP) { + + if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) { // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet @@ -6323,9 +6322,8 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); + aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; - if (stream->link->psr_settings.psr_feature_enabled) - aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } finish: dc_sink_release(sink); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 738ee763f24a..84f9b412a4f1 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -147,15 +147,12 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } /* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */ - if (stream->link->psr_settings.psr_feature_enabled) { - if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) - vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) - vsc_packet_revision = vsc_packet_rev2; - } - - if (stream->link->replay_settings.config.replay_supported) + if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->replay_settings.config.replay_supported) + vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) + vsc_packet_revision = vsc_packet_rev2; /* Update to revision 5 for extended colorimetry support */ if (stream->use_vsc_sdp_for_colorimetry) From 0cac183b98d8a8c692c98e8dba37df15a9e9210d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 1 Mar 2024 12:25:17 -0500 Subject: [PATCH 24/55] drm/amdkfd: range check cp bad op exception interrupts Due to a CP interrupt bug, bad packet garbage exception codes are raised. Do a range check so that the debugger and runtime do not receive garbage codes. Update the user api to guard exception code type checking as well. Signed-off-by: Jonathan Kim Tested-by: Jesse Zhang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 3 ++- .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 3 ++- include/uapi/linux/kfd_ioctl.h | 17 ++++++++++++++--- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 9a06c6fb6605..40a21be6c07c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 7e2859736a55..fe2ad0c0de95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 91dd5e045b51..c4c6a29052ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, sq_int_data, 24); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 9ce46edc62a5..2040a470ddb4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code { KFD_EC_MASK(EC_DEVICE_NEW)) #define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \ KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE)) +#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)) /* Checks for exception code types for KFD search */ +#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX) #define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) #define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) #define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET)) /* Runtime enable states */ From ca299b4512d4b4f516732a48ce9aa19d91f4473e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 20 Mar 2024 13:32:21 -0500 Subject: [PATCH 25/55] drm/amd: Flush GFXOFF requests in prepare stage If the system hasn't entered GFXOFF when suspend starts it can cause hangs accessing GC and RLC during the suspend stage. Cc: # 6.1.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.1.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.1.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.1.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.6.y: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Cc: # 6.6.y: cb11ca3233aa ("drm/amd: Add concept of running prepare_suspend() sequence for IP blocks") Cc: # 6.6.y: 2ceec37b0e3d ("drm/amd: Add missing kernel doc for prepare_suspend()") Cc: # 6.6.y: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Cc: # 6.1+ Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132 Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5dc24c971b41..aa16d51dd842 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4539,6 +4539,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) goto unprepare; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; From 1202f794cdaa4f0ba6a456bc034f2db6cfcf5579 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 21 Mar 2024 16:09:21 -0400 Subject: [PATCH 26/55] drm/amd/display: fix IPX enablement We need to re-enable idle power optimizations after entering PSR. Since, we get kicked out of idle power optimizations before entering PSR (entering PSR requires us to write to DCN registers, which isn't allowed while we are in IPS). Fixes: a9b1a4f684b3 ("drm/amd/display: Add more checks for exiting idle in DC") Tested-by: Mark Broadworth Reviewed-by: Nicholas Kazlauskas Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 8 +++++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 1f08c6564c3b..286ecd28cc6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -141,9 +141,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) * amdgpu_dm_psr_enable() - enable psr f/w * @stream: stream state * - * Return: true if success */ -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) +void amdgpu_dm_psr_enable(struct dc_stream_state *stream) { struct dc_link *link = stream->link; unsigned int vsync_rate_hz = 0; @@ -190,7 +189,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1) power_opt |= psr_power_opt_z10_static_screen; - return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); + dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt); + + if (link->ctx->dc->caps.ips_support) + dc_allow_idle_optimizations(link->ctx->dc, true); } /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index 6806b3c9c84b..1fdfd183c0d9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -32,7 +32,7 @@ #define AMDGPU_DM_PSR_ENTRY_DELAY 5 void amdgpu_dm_set_psr_caps(struct dc_link *link); -bool amdgpu_dm_psr_enable(struct dc_stream_state *stream); +void amdgpu_dm_psr_enable(struct dc_stream_state *stream); bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream); bool amdgpu_dm_psr_disable(struct dc_stream_state *stream); bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm); From 09d62c7beb3b98c03b4fc2205bfa7b80c249157d Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Wed, 21 Feb 2024 16:47:06 -0500 Subject: [PATCH 27/55] drm/amd/display: Update dcn351 to latest dcn35 config [why & how] There were some fixes in dcn35 that need to be ported over to dcn351 to prevent any regression. Signed-off-by: Sung Joon Kim Reviewed-by: Liu, Xi (Alex) Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c | 9 ++++++--- .../gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c | 2 +- .../amd/display/dc/resource/dcn351/dcn351_resource.c | 11 ++++++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index b6246406a042..b3ffab77cf88 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -402,6 +402,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -414,6 +416,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } @@ -613,6 +617,7 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context) if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; } + /*dcn351 does not support z9/z10*/ if (context->stream_count == 0 || plane_count == 0) { support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; @@ -626,11 +631,9 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context) dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; - /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/ - if (is_pwrseq0 && (is_psr || is_replay)) + if (is_pwrseq0 && (is_psr || is_replay)) support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW; - } context->bw_ctx.bw.dcn.clk.zstate_support = support; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index ab17fa1c64e8..670255c9bc82 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .prepare_bandwidth = dcn35_prepare_bandwidth, .optimize_bandwidth = dcn35_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn35_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn35_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 5b486400dfdb..909e14261f9b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -700,6 +700,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, + .disable_optc_power_gate = true, /*should the same as above two*/ + .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/ .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, @@ -742,12 +744,13 @@ static const struct dc_debug_options debug_defaults_drv = { }, .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ + .minimum_z8_residency_time = 2100, .using_dml2 = true, .support_eDP1_5 = true, .enable_hpo_pg_support = false, .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = true, - .disable_idle_power_optimizations = true, + .disable_idle_power_optimizations = false, .dmcub_emulation = false, .disable_boot_optimizations = false, .disable_unbounded_requesting = false, @@ -758,8 +761,10 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400 + .ips2_eval_delay_us = 2000, + .ips2_entry_delay_us = 800, + .disable_dmub_reallow_idle = true, + .static_screen_wait_frames = 2, }; static const struct dc_panel_config panel_config_defaults = { From 25358e04a43c33e6cd8dce528da1d624de915864 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Thu, 29 Feb 2024 09:52:05 -0500 Subject: [PATCH 28/55] drm/amd/display: Send DTBCLK disable message on first commit [Why] Previous patch to allow DTBCLK disable didn't address boot case. Driver thinks DTBCLK is disabled by default, so we don't send disable message to PMFW. DTBCLK is then enabled at idle desktop on boot, burning power. [How] Set dtbclk_en to true on boot so that disable message is sent during first commit. Fixes: 27750e176a4f ("drm/amd/display: Allow DTBCLK disable for DCN35") Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Taimur Hassan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index c378b879c76d..ee5c4ee1ed66 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -73,6 +73,8 @@ #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 + #define REG(reg_name) \ (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -411,9 +413,12 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs static void init_clk_states(struct clk_mgr *clk_mgr) { + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD) + clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; From fe869c2e53484a29ab241667606240b91db920ef Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 12 Mar 2024 15:31:59 -0400 Subject: [PATCH 29/55] drm/amd/display: fix a dereference of a NULL pointer [why&how] In some platform out_transfer_func may not be popualted. We need to check for null before dereferencing it. Fixes: d2dea1f14038 ("drm/amd/display: Generalize new minimal transition path") Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e7dc128f6284..03b554e912a2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3024,7 +3024,8 @@ static void backup_planes_and_stream_state( scratch->blend_tf[i] = *status->plane_states[i]->blend_tf; } scratch->stream_state = *stream; - scratch->out_transfer_func = *stream->out_transfer_func; + if (stream->out_transfer_func) + scratch->out_transfer_func = *stream->out_transfer_func; } static void restore_planes_and_stream_state( @@ -3046,7 +3047,8 @@ static void restore_planes_and_stream_state( *status->plane_states[i]->blend_tf = scratch->blend_tf[i]; } *stream = scratch->stream_state; - *stream->out_transfer_func = scratch->out_transfer_func; + if (stream->out_transfer_func) + *stream->out_transfer_func = scratch->out_transfer_func; } static bool update_planes_and_stream_state(struct dc *dc, From edfa93d87fc46913868481fe8ed3fb62c891ffb5 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 16 Feb 2024 19:37:03 -0500 Subject: [PATCH 30/55] drm/amd/display: Remove MPC rate control logic from DCN30 and above [Why] MPC flow rate control is not needed for DCN30 and above. Current logic that uses it can result in underflow for certain edge cases (such as DSC N422 + ODM combine + 422 left edge pixel). [How] Remove MPC flow rate control logic and programming for DCN30 and above. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu Acked-by: Tom Chung Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c | 54 +++++++++++-------- .../gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h | 14 ++--- .../gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 5 +- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 41 -------------- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 41 -------------- .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 41 -------------- 6 files changed, 41 insertions(+), 155 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index bf3386cd444d..5ebb57303130 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -44,6 +44,36 @@ #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) +void mpc3_mpc_init(struct mpc *mpc) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + int opp_id; + + mpc1_mpc_init(mpc); + + for (opp_id = 0; opp_id < MAX_OPP; opp_id++) { + if (REG(MUX[opp_id])) + /* disable mpc out rate and flow control */ + REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE, + 1, MPC_OUT_FLOW_CONTROL_COUNT, 0); + } +} + +void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + mpc1_mpc_init_single_inst(mpc, mpcc_id); + + /* assuming mpc out mux is connected to opp with the same index at this + * point in time (e.g. transitioning from vbios to driver) + */ + if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id])) + /* disable mpc out rate and flow control */ + REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE, + 1, MPC_OUT_FLOW_CONTROL_COUNT, 0); +} + bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id) @@ -80,25 +110,6 @@ void mpc3_disable_dwb_mux( MPC_DWB0_MUX, 0xf); } -void mpc3_set_out_rate_control( - struct mpc *mpc, - int opp_id, - bool enable, - bool rate_2x_mode, - struct mpc_dwb_flow_control *flow_control) -{ - struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); - - REG_UPDATE_2(MUX[opp_id], - MPC_OUT_RATE_CONTROL_DISABLE, !enable, - MPC_OUT_RATE_CONTROL, rate_2x_mode); - - if (flow_control) - REG_UPDATE_2(MUX[opp_id], - MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode, - MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1); -} - enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id) { /*Contrary to DCN2 and DCN1 wherein a single status register field holds this info; @@ -1490,8 +1501,8 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .read_mpcc_state = mpc3_read_mpcc_state, .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, - .mpc_init = mpc1_mpc_init, - .mpc_init_single_inst = mpc1_mpc_init_single_inst, + .mpc_init = mpc3_mpc_init, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, .update_blending = mpc2_update_blending, .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, @@ -1508,7 +1519,6 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, - .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc3_program_shaper, .acquire_rmu = mpcc3_acquire_rmu, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h index 9cb96ae95a2f..ce93003dae01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h @@ -1007,6 +1007,13 @@ void dcn30_mpc_construct(struct dcn30_mpc *mpc30, int num_mpcc, int num_rmu); +void mpc3_mpc_init( + struct mpc *mpc); + +void mpc3_mpc_init_single_inst( + struct mpc *mpc, + unsigned int mpcc_id); + bool mpc3_program_shaper( struct mpc *mpc, const struct pwl_params *params, @@ -1078,13 +1085,6 @@ bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id); -void mpc3_set_out_rate_control( - struct mpc *mpc, - int opp_id, - bool enable, - bool rate_2x_mode, - struct mpc_dwb_flow_control *flow_control); - void mpc3_power_on_ogam_lut( struct mpc *mpc, int mpcc_id, bool power_on); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index e789e654c387..e408e859b355 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -47,7 +47,7 @@ void mpc32_mpc_init(struct mpc *mpc) struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); int mpcc_id; - mpc1_mpc_init(mpc); + mpc3_mpc_init(mpc); if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) { if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) { @@ -991,7 +991,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, .mpc_init = mpc32_mpc_init, - .mpc_init_single_inst = mpc1_mpc_init_single_inst, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, .update_blending = mpc2_update_blending, .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, @@ -1008,7 +1008,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, - .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc32_program_shaper, .program_3dlut = mpc32_program_3dlut, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 3a9cc8ac0c07..093f4387553c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -69,29 +69,6 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -183,10 +160,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -199,20 +172,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index c0b526cf1786..7668229438da 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -966,29 +966,6 @@ void dcn32_init_hw(struct dc *dc) } } -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -1103,10 +1080,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -1119,20 +1092,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 4b92df23ff0d..a5560b3fc39b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -358,29 +358,6 @@ void dcn35_init_hw(struct dc *dc) } } -static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, - int opp_cnt) -{ - bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing); - int flow_ctrl_cnt; - - if (opp_cnt >= 2) - hblank_halved = true; - - flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable - - stream->timing.h_border_left - - stream->timing.h_border_right; - - if (hblank_halved) - flow_ctrl_cnt /= 2; - - /* ODM combine 4:1 case */ - if (opp_cnt == 4) - flow_ctrl_cnt /= 2; - - return flow_ctrl_cnt; -} - static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; @@ -474,10 +451,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * struct pipe_ctx *odm_pipe; int opp_cnt = 0; int opp_inst[MAX_PIPES] = {0}; - bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing)); - struct mpc_dwb_flow_control flow_control; - struct mpc *mpc = dc->res_pool->mpc; - int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -490,20 +463,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1; - flow_control.flow_ctrl_mode = 0; - flow_control.flow_ctrl_cnt0 = 0x80; - flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt); - if (mpc->funcs->set_out_rate_control) { - for (i = 0; i < opp_cnt; ++i) { - mpc->funcs->set_out_rate_control( - mpc, opp_inst[i], - true, - rate_control_2x_pclk, - &flow_control); - } - } - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, From 2f10d4a51bbcd938f1f02f16c304ad1d54717b96 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 13 Mar 2024 18:35:13 -0400 Subject: [PATCH 31/55] drm/amd/display: Fix bounds check for dcn35 DcfClocks [Why] NumFclkLevelsEnabled is used for DcfClocks bounds check instead of designated NumDcfClkLevelsEnabled. That can cause array index out-of-bounds access. [How] Use designated variable for dcn35 DcfClocks bounds check. Fixes: a8edc9cc0b14 ("drm/amd/display: Fix array-index-out-of-bounds in dcn35_clkmgr") Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Sun peng Li Acked-by: Tom Chung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index ee5c4ee1ed66..101fe96287cb 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -714,7 +714,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk clock_table->NumFclkLevelsEnabled; max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk); - num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : + num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : clock_table->NumDcfClkLevelsEnabled; for (i = 0; i < num_dcfclk; i++) { int j; From 0ccc2b30f4feadc0b1a282dbcc06e396382e5d74 Mon Sep 17 00:00:00 2001 From: Xi Liu Date: Tue, 27 Feb 2024 13:39:00 -0500 Subject: [PATCH 32/55] drm/amd/display: Set DCN351 BB and IP the same as DCN35 [WHY & HOW] DCN351 and DCN35 should use the same bounding box and IP settings. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Xi Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 17a58f41fc6a..a20f28a5d2e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -228,17 +228,13 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s break; case dml_project_dcn35: + case dml_project_dcn351: out->num_chans = 4; out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; out->dispclk_dppclk_vco_speed_mhz = 3600; break; - case dml_project_dcn351: - out->num_chans = 16; - out->round_trip_ping_latency_dcfclk_cycles = 1100; - out->smn_latency_us = 2; - break; } /* ---Overrides if available--- */ if (dml2->config.bbox_overrides.dram_num_chan) From 6b154c00cd5378abfd8930a823a7c23bf0750206 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 7 Mar 2024 11:13:19 +0800 Subject: [PATCH 33/55] drm/amdgpu/umsch: update UMSCH 4.0 FW interface Align with FW changes. Signed-off-by: Lang Yu Reviewed-by: Veerabadhran Gopalakrishnan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h | 20 +++++++++---------- .../drm/amd/include/umsch_mm_4_0_api_def.h | 13 ++++++++++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..5014b5af95fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h index beadb9e42850..ca83e9e5c3ff 100644 --- a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h +++ b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h @@ -234,7 +234,8 @@ union UMSCHAPI__SET_HW_RESOURCES { uint32_t enable_level_process_quantum_check : 1; uint32_t is_vcn0_enabled : 1; uint32_t is_vcn1_enabled : 1; - uint32_t reserved : 27; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 26; }; uint32_t uint32_all; }; @@ -297,9 +298,12 @@ union UMSCHAPI__ADD_QUEUE { struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; struct UMSCH_API_STATUS api_status; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -314,6 +318,7 @@ union UMSCHAPI__REMOVE_QUEUE { uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -337,6 +342,7 @@ union UMSCHAPI__SUSPEND { uint32_t suspend_fence_value; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -356,6 +362,7 @@ union UMSCHAPI__RESUME { enum UMSCH_ENGINE_TYPE engine_type; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -404,6 +411,7 @@ union UMSCHAPI__UPDATE_AFFINITY { union UMSCH_AFFINITY affinity; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; @@ -417,6 +425,7 @@ union UMSCHAPI__CHANGE_CONTEXT_PRIORITY_LEVEL { uint64_t context_quantum; uint64_t context_csa_addr; struct UMSCH_API_STATUS api_status; + uint32_t context_csa_array_index; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; From 68a2afbccaba588403f18197cdbfc43e5f98c336 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 7 Mar 2024 13:57:06 +0800 Subject: [PATCH 34/55] drm/amdgpu: enable UMSCH 4.0.6 Share same codes with 4.0.5 and enable collaborate mode for VPE. Signed-off-by: Lang Yu Reviewed-by: Veerabadhran Gopalakrishnan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 12 ++++++++++-- drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c | 7 +++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a07e4b87d4ca..fdd36fb027ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2237,6 +2237,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ab820cf52668..0df97c3e3a70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..84368cf1e175 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -248,7 +248,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +271,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +348,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; From 8678b1060ae2b75feb60b87e5b75e17374e3c1c5 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 7 Mar 2024 17:07:37 -0500 Subject: [PATCH 35/55] drm/amdgpu: fix deadlock while reading mqd from debugfs An errant disk backup on my desktop got into debugfs and triggered the following deadlock scenario in the amdgpu debugfs files. The machine also hard-resets immediately after those lines are printed (although I wasn't able to reproduce that part when reading by hand): [ 1318.016074][ T1082] ====================================================== [ 1318.016607][ T1082] WARNING: possible circular locking dependency detected [ 1318.017107][ T1082] 6.8.0-rc7-00015-ge0c8221b72c0 #17 Not tainted [ 1318.017598][ T1082] ------------------------------------------------------ [ 1318.018096][ T1082] tar/1082 is trying to acquire lock: [ 1318.018585][ T1082] ffff98c44175d6a0 (&mm->mmap_lock){++++}-{3:3}, at: __might_fault+0x40/0x80 [ 1318.019084][ T1082] [ 1318.019084][ T1082] but task is already holding lock: [ 1318.020052][ T1082] ffff98c4c13f55f8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: amdgpu_debugfs_mqd_read+0x6a/0x250 [amdgpu] [ 1318.020607][ T1082] [ 1318.020607][ T1082] which lock already depends on the new lock. [ 1318.020607][ T1082] [ 1318.022081][ T1082] [ 1318.022081][ T1082] the existing dependency chain (in reverse order) is: [ 1318.023083][ T1082] [ 1318.023083][ T1082] -> #2 (reservation_ww_class_mutex){+.+.}-{3:3}: [ 1318.024114][ T1082] __ww_mutex_lock.constprop.0+0xe0/0x12f0 [ 1318.024639][ T1082] ww_mutex_lock+0x32/0x90 [ 1318.025161][ T1082] dma_resv_lockdep+0x18a/0x330 [ 1318.025683][ T1082] do_one_initcall+0x6a/0x350 [ 1318.026210][ T1082] kernel_init_freeable+0x1a3/0x310 [ 1318.026728][ T1082] kernel_init+0x15/0x1a0 [ 1318.027242][ T1082] ret_from_fork+0x2c/0x40 [ 1318.027759][ T1082] ret_from_fork_asm+0x11/0x20 [ 1318.028281][ T1082] [ 1318.028281][ T1082] -> #1 (reservation_ww_class_acquire){+.+.}-{0:0}: [ 1318.029297][ T1082] dma_resv_lockdep+0x16c/0x330 [ 1318.029790][ T1082] do_one_initcall+0x6a/0x350 [ 1318.030263][ T1082] kernel_init_freeable+0x1a3/0x310 [ 1318.030722][ T1082] kernel_init+0x15/0x1a0 [ 1318.031168][ T1082] ret_from_fork+0x2c/0x40 [ 1318.031598][ T1082] ret_from_fork_asm+0x11/0x20 [ 1318.032011][ T1082] [ 1318.032011][ T1082] -> #0 (&mm->mmap_lock){++++}-{3:3}: [ 1318.032778][ T1082] __lock_acquire+0x14bf/0x2680 [ 1318.033141][ T1082] lock_acquire+0xcd/0x2c0 [ 1318.033487][ T1082] __might_fault+0x58/0x80 [ 1318.033814][ T1082] amdgpu_debugfs_mqd_read+0x103/0x250 [amdgpu] [ 1318.034181][ T1082] full_proxy_read+0x55/0x80 [ 1318.034487][ T1082] vfs_read+0xa7/0x360 [ 1318.034788][ T1082] ksys_read+0x70/0xf0 [ 1318.035085][ T1082] do_syscall_64+0x94/0x180 [ 1318.035375][ T1082] entry_SYSCALL_64_after_hwframe+0x46/0x4e [ 1318.035664][ T1082] [ 1318.035664][ T1082] other info that might help us debug this: [ 1318.035664][ T1082] [ 1318.036487][ T1082] Chain exists of: [ 1318.036487][ T1082] &mm->mmap_lock --> reservation_ww_class_acquire --> reservation_ww_class_mutex [ 1318.036487][ T1082] [ 1318.037310][ T1082] Possible unsafe locking scenario: [ 1318.037310][ T1082] [ 1318.037838][ T1082] CPU0 CPU1 [ 1318.038101][ T1082] ---- ---- [ 1318.038350][ T1082] lock(reservation_ww_class_mutex); [ 1318.038590][ T1082] lock(reservation_ww_class_acquire); [ 1318.038839][ T1082] lock(reservation_ww_class_mutex); [ 1318.039083][ T1082] rlock(&mm->mmap_lock); [ 1318.039328][ T1082] [ 1318.039328][ T1082] *** DEADLOCK *** [ 1318.039328][ T1082] [ 1318.040029][ T1082] 1 lock held by tar/1082: [ 1318.040259][ T1082] #0: ffff98c4c13f55f8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: amdgpu_debugfs_mqd_read+0x6a/0x250 [amdgpu] [ 1318.040560][ T1082] [ 1318.040560][ T1082] stack backtrace: [ 1318.041053][ T1082] CPU: 22 PID: 1082 Comm: tar Not tainted 6.8.0-rc7-00015-ge0c8221b72c0 #17 3316c85d50e282c5643b075d1f01a4f6365e39c2 [ 1318.041329][ T1082] Hardware name: Gigabyte Technology Co., Ltd. B650 AORUS PRO AX/B650 AORUS PRO AX, BIOS F20 12/14/2023 [ 1318.041614][ T1082] Call Trace: [ 1318.041895][ T1082] [ 1318.042175][ T1082] dump_stack_lvl+0x4a/0x80 [ 1318.042460][ T1082] check_noncircular+0x145/0x160 [ 1318.042743][ T1082] __lock_acquire+0x14bf/0x2680 [ 1318.043022][ T1082] lock_acquire+0xcd/0x2c0 [ 1318.043301][ T1082] ? __might_fault+0x40/0x80 [ 1318.043580][ T1082] ? __might_fault+0x40/0x80 [ 1318.043856][ T1082] __might_fault+0x58/0x80 [ 1318.044131][ T1082] ? __might_fault+0x40/0x80 [ 1318.044408][ T1082] amdgpu_debugfs_mqd_read+0x103/0x250 [amdgpu 8fe2afaa910cbd7654c8cab23563a94d6caebaab] [ 1318.044749][ T1082] full_proxy_read+0x55/0x80 [ 1318.045042][ T1082] vfs_read+0xa7/0x360 [ 1318.045333][ T1082] ksys_read+0x70/0xf0 [ 1318.045623][ T1082] do_syscall_64+0x94/0x180 [ 1318.045913][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.046201][ T1082] ? lockdep_hardirqs_on+0x7d/0x100 [ 1318.046487][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.046773][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.047057][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.047337][ T1082] ? do_syscall_64+0xa0/0x180 [ 1318.047611][ T1082] entry_SYSCALL_64_after_hwframe+0x46/0x4e [ 1318.047887][ T1082] RIP: 0033:0x7f480b70a39d [ 1318.048162][ T1082] Code: 91 ba 0d 00 f7 d8 64 89 02 b8 ff ff ff ff eb b2 e8 18 a3 01 00 0f 1f 84 00 00 00 00 00 80 3d a9 3c 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 53 48 83 [ 1318.048769][ T1082] RSP: 002b:00007ffde77f5c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 1318.049083][ T1082] RAX: ffffffffffffffda RBX: 0000000000000800 RCX: 00007f480b70a39d [ 1318.049392][ T1082] RDX: 0000000000000800 RSI: 000055c9f2120c00 RDI: 0000000000000008 [ 1318.049703][ T1082] RBP: 0000000000000800 R08: 000055c9f2120a94 R09: 0000000000000007 [ 1318.050011][ T1082] R10: 0000000000000000 R11: 0000000000000246 R12: 000055c9f2120c00 [ 1318.050324][ T1082] R13: 0000000000000008 R14: 0000000000000008 R15: 0000000000000800 [ 1318.050638][ T1082] amdgpu_debugfs_mqd_read() holds a reservation when it calls put_user(), which may fault and acquire the mmap_sem. This violates the established locking order. Bounce the mqd data through a kernel buffer to get put_user() out of the illegal section. Fixes: 445d85e3c1df ("drm/amdgpu: add debugfs interface for reading MQDs") Cc: stable@vger.kernel.org # v6.5+ Reviewed-by: Shashank Sharma Signed-off-by: Johannes Weiner Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 46 +++++++++++++++--------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5505d646f43a..06f0a6534a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { From 4be9075fec0a639384ed19975634b662bfab938f Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 12 Mar 2024 10:35:12 +0100 Subject: [PATCH 36/55] drm/vmwgfx: Create debugfs ttm_resource_manager entry only if needed The driver creates /sys/kernel/debug/dri/0/mob_ttm even when the corresponding ttm_resource_manager is not allocated. This leads to a crash when trying to read from this file. Add a check to create mob_ttm, system_mob_ttm, and gmr_ttm debug file only when the corresponding ttm_resource_manager is allocated. crash> bt PID: 3133409 TASK: ffff8fe4834a5000 CPU: 3 COMMAND: "grep" #0 [ffffb954506b3b20] machine_kexec at ffffffffb2a6bec3 #1 [ffffb954506b3b78] __crash_kexec at ffffffffb2bb598a #2 [ffffb954506b3c38] crash_kexec at ffffffffb2bb68c1 #3 [ffffb954506b3c50] oops_end at ffffffffb2a2a9b1 #4 [ffffb954506b3c70] no_context at ffffffffb2a7e913 #5 [ffffb954506b3cc8] __bad_area_nosemaphore at ffffffffb2a7ec8c #6 [ffffb954506b3d10] do_page_fault at ffffffffb2a7f887 #7 [ffffb954506b3d40] page_fault at ffffffffb360116e [exception RIP: ttm_resource_manager_debug+0x11] RIP: ffffffffc04afd11 RSP: ffffb954506b3df0 RFLAGS: 00010246 RAX: ffff8fe41a6d1200 RBX: 0000000000000000 RCX: 0000000000000940 RDX: 0000000000000000 RSI: ffffffffc04b4338 RDI: 0000000000000000 RBP: ffffb954506b3e08 R8: ffff8fee3ffad000 R9: 0000000000000000 R10: ffff8fe41a76a000 R11: 0000000000000001 R12: 00000000ffffffff R13: 0000000000000001 R14: ffff8fe5bb6f3900 R15: ffff8fe41a6d1200 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffffb954506b3e00] ttm_resource_manager_show at ffffffffc04afde7 [ttm] #9 [ffffb954506b3e30] seq_read at ffffffffb2d8f9f3 RIP: 00007f4c4eda8985 RSP: 00007ffdbba9e9f8 RFLAGS: 00000246 RAX: ffffffffffffffda RBX: 000000000037e000 RCX: 00007f4c4eda8985 RDX: 000000000037e000 RSI: 00007f4c41573000 RDI: 0000000000000003 RBP: 000000000037e000 R8: 0000000000000000 R9: 000000000037fe30 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4c41573000 R13: 0000000000000003 R14: 00007f4c41572010 R15: 0000000000000003 ORIG_RAX: 0000000000000000 CS: 0033 SS: 002b Signed-off-by: Jocelyn Falempe Fixes: af4a25bbe5e7 ("drm/vmwgfx: Add debugfs entries for various ttm resource managers") Cc: Reviewed-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240312093551.196609-1-jfalempe@redhat.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index d3e308fdfd5b..c7d90f96d16a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1444,12 +1444,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw) root, "system_ttm"); ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM), root, "vram_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), - root, "gmr_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), - root, "mob_ttm"); - ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), - root, "system_mob_ttm"); + if (vmw->has_gmr) + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR), + root, "gmr_ttm"); + if (vmw->has_mob) { + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB), + root, "mob_ttm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM), + root, "system_mob_ttm"); + } } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, From 197aa825fdc4a3500f8d06518a4975f5461b4b19 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 28 Mar 2024 15:38:48 +1000 Subject: [PATCH 37/55] drm/i915: add bug.h include to i915_memcpy.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is stopping me building here for some reason, /home/airlied/devel/kernel/dim/drm-fixes/drivers/gpu/drm/i915/i915_memcpy.c: In function ‘i915_unaligned_memcpy_from_wc’: /home/airlied/devel/kernel/dim/drm-fixes/drivers/gpu/drm/i915/i915_memcpy.c:33:25: error: implicit declaration of function ‘BUG_ON’; did you mean ‘CI_BUG_ON’? [-Werror=implicit-function-declaration] 33 | #define CI_BUG_ON(expr) BUG_ON(expr) | ^~~~~~ /home/airlied/devel/kernel/dim/drm-fixes/drivers/gpu/drm/i915/i915_memcpy.c:144:9: note: in expansion of macro ‘CI_BUG_ON’ 144 | CI_BUG_ON(!i915_has_memcpy_from_wc()); | ^~~~~~~~~ engage maintainer overrides :-) Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_memcpy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index ba82277254b7..783de079ef44 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "i915_memcpy.h" From 7cd78fd7e29644641b848d69a585f2aea45f0991 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 27 Mar 2024 18:55:55 +0100 Subject: [PATCH 38/55] drm/qxl: remove unused `count` variable from `qxl_surface_id_alloc()` Clang 14 in an (essentially) defconfig loongarch64 build for next-20240326 reports [1]: drivers/gpu/drm/qxl/qxl_cmd.c:424:6: error: variable 'count' set but not used [-Werror,-Wunused-but-set-variable] The variable is already unused in the version that got into the tree. Thus remove the unused variable. Fixes: f64122c1f6ad ("drm: add new QXL driver. (v1.4)") Closes: https://lore.kernel.org/lkml/CANiq72mjc5t4n25SQvYSrOEhxxpXYPZ4pPzneSJHEnc3qApu2Q@mail.gmail.com/ [1] Closes: https://lore.kernel.org/all/20240327163331.GB1153323@dev-arch.thelio-3990X/ Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/r/20240327175556.233126-1-ojeda@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/qxl/qxl_cmd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 281edab518cd..d6ea01f3797b 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -421,7 +421,6 @@ int qxl_surface_id_alloc(struct qxl_device *qdev, { uint32_t handle; int idr_ret; - int count = 0; again: idr_preload(GFP_ATOMIC); spin_lock(&qdev->surf_id_idr_lock); @@ -433,7 +432,6 @@ again: handle = idr_ret; if (handle >= qdev->rom->n_surfaces) { - count++; spin_lock(&qdev->surf_id_idr_lock); idr_remove(&qdev->surf_id_idr, handle); spin_unlock(&qdev->surf_id_idr_lock); From aba2a144c0bf1ecdcbc520525712fb661392e509 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 27 Mar 2024 18:55:56 +0100 Subject: [PATCH 39/55] drm/qxl: remove unused variable from `qxl_process_single_command()` Clang 14 in an (essentially) defconfig loongarch64 build for next-20240327 reports [1]: drivers/gpu/drm/qxl/qxl_ioctl.c:148:14: error: variable 'num_relocs' set but not used [-Werror,-Wunused-but-set-variable] The variable was originally used in the `out_free_bos` label, but commit 74d9a6335dce ("drm/qxl: Simplify cleaning qxl processing command") removed the use that happened in that label. Thus remove the unused variable. Fixes: 74d9a6335dce ("drm/qxl: Simplify cleaning qxl processing command") Closes: https://lore.kernel.org/lkml/CANiq72kqqQfUxLkHJYqeBAhpc6YcX7bfR96gmmbF=j8hEOykqw@mail.gmail.com/ [1] Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/r/20240327175556.233126-2-ojeda@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/qxl/qxl_ioctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index dd0f834d881c..506ae1f5e099 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -145,7 +145,7 @@ static int qxl_process_single_command(struct qxl_device *qdev, struct qxl_release *release; struct qxl_bo *cmd_bo; void *fb_cmd; - int i, ret, num_relocs; + int i, ret; int unwritten; switch (cmd->type) { @@ -200,7 +200,6 @@ static int qxl_process_single_command(struct qxl_device *qdev, } /* fill out reloc info structs */ - num_relocs = 0; for (i = 0; i < cmd->relocs_num; ++i) { struct drm_qxl_reloc reloc; struct drm_qxl_reloc __user *u = u64_to_user_ptr(cmd->relocs); @@ -230,7 +229,6 @@ static int qxl_process_single_command(struct qxl_device *qdev, reloc_info[i].dst_bo = cmd_bo; reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset; } - num_relocs++; /* reserve and validate the reloc dst bo */ if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle) { From 5acb32b1ad6672fb2985d26b5660a9f3726b0632 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 8 Mar 2024 16:46:43 +0200 Subject: [PATCH 40/55] drm/i915: Add includes for BUG_ON/BUILD_BUG_ON in i915_memcpy.c Add standalone includes for BUG_ON and BUILD_BUG_ON to avoid build failure after linux-next include refactoring. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240308144643.137831-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 4df6ac223cad36e7384ed00fe6efc114279f0df6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_memcpy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index ba82277254b7..cc41974cee74 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include "i915_memcpy.h" From f378ab7870046704fb92e64d50a67dda2cae8420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 26 Feb 2024 21:32:50 +0200 Subject: [PATCH 41/55] drm/i915: Stop doing double audio enable/disable on SDVO and g4x+ DP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like I misplaced a few hunks when I moved the audio enable/disable out from the encoder enable/disable hooks. So we are now doing a double audio enable/disable on SDVO and g4x+ DP. Probably harmless as doing it twice shouldn't really change anything, but let's do it just once, as intended. Fixes: cff742cc6851 ("drm/i915: Hoist the encoder->audio_{enable,disable}() calls higher up") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240226193251.29619-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 315bd0a0825776d6c66d474bf572db64fa019ad8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/g4x_dp.c | 2 -- drivers/gpu/drm/i915/display/intel_sdvo.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index dfe0b07a122d..06ec04e667e3 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -717,7 +717,6 @@ static void g4x_enable_dp(struct intel_atomic_state *state, { intel_enable_dp(state, encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); - encoder->audio_enable(encoder, pipe_config, conn_state); } static void vlv_enable_dp(struct intel_atomic_state *state, @@ -726,7 +725,6 @@ static void vlv_enable_dp(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { intel_edp_backlight_on(pipe_config, conn_state); - encoder->audio_enable(encoder, pipe_config, conn_state); } static void g4x_pre_enable_dp(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 5f9e748adc89..0cd9c183f621 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1842,8 +1842,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); u32 temp; - encoder->audio_disable(encoder, old_crtc_state, conn_state); - intel_sdvo_set_active_outputs(intel_sdvo, 0); if (0) intel_sdvo_set_encoder_power_state(intel_sdvo, @@ -1935,8 +1933,6 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, intel_sdvo_set_encoder_power_state(intel_sdvo, DRM_MODE_DPMS_ON); intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag); - - encoder->audio_enable(encoder, pipe_config, conn_state); } static enum drm_mode_status From cf48bddd31deefb9ab07de9a4d0150da6610198a Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Wed, 28 Feb 2024 16:02:25 +0200 Subject: [PATCH 42/55] drm/i915/display: Disable AuxCCS framebuffers if built for Xe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AuxCCS framebuffers don't work on Xe driver hence disable them from plane capabilities until they are fixed. FlatCCS framebuffers work and they are left enabled. CCS is left untouched for i915 driver. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/933 Signed-off-by: Juha-Pekka Heikkila Reviewed-by: José Roberto de Souza Tested-by: José Roberto de Souza Acked-by: Jani Nikula Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240228140225.858145-1-juhapekka.heikkila@gmail.com (cherry picked from commit b7232a730fbf043f54fb46fbf4a6e92936770e79) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e941e2e4fd14..860574d04f88 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2295,6 +2295,9 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915, if (HAS_4TILE(i915)) caps |= INTEL_PLANE_CAP_TILING_4; + if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915)) + return caps; + if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) { caps |= INTEL_PLANE_CAP_CCS_RC; if (DISPLAY_VER(i915) >= 12) From 18846627ef1210dcd55d65342b055ea97a46ffff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Mar 2024 10:36:59 +0200 Subject: [PATCH 43/55] drm/i915/dsi: Go back to the previous INIT_OTP/DISPLAY_ON order, mostly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reinstate commit 88b065943cb5 ("drm/i915/dsi: Do display on sequence later on icl+"), for the most part. Turns out some machines (eg. Chuwi Minibook X) really do need that updated order. It is also the order the Windows driver uses. However we can't just undo the revert since that would again break Lenovo 82TQ. After staring at the VBT sequences for both machines I've concluded that the Lenovo 82TQ sequences look somewhat broken: - INIT_OTP is not present at all - what should be in INIT_OTP is found in DISPLAY_ON - what should be in DISPLAY_ON is found in BACKLIGHT_ON (along with the actual backlight stuff) The Chuwi Minibook X on the other hand has a full complement of sequences in its VBT. So let's try to deal with the broken sequences in the Lenovo 82TQ VBT by simply swapping the (non-existent) INIT_OTP sequence with the DISPLAY_ON sequence. Thus we execute DISPLAY_ON when intending to execute INIT_OTP, and execute nothing at all when intending to execute DISPLAY_ON. That should be 100% equivalent to the revert, for such broken VBTs. Cc: stable@vger.kernel.org Fixes: 6992eb815d08 ("Revert "drm/i915/dsi: Do display on sequence later on icl+"") References: https://gitlab.freedesktop.org/drm/intel/-/issues/10071 Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10334 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240305083659.8396-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula (cherry picked from commit 94ae4612ea336bfc3c12b3fc68467c6711a4f39b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/icl_dsi.c | 3 +- drivers/gpu/drm/i915/display/intel_bios.c | 43 +++++++++++++++++++---- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index eda4a8b88590..ac456a2275db 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index fe52c06271ef..9328601e4e9f 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. */ -static void fixup_mipi_sequences(struct drm_i915_private *i915, - struct intel_panel *panel) +static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) { u8 *init_otp; int len; - /* Limit this to VLV for now. */ - if (!IS_VALLEYVIEW(i915)) - return; - /* Limit this to v1 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || panel->vbt.dsi.seq_version != 1) @@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915, panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1; } +/* + * Some machines (eg. Lenovo 82TQ) appear to have broken + * VBT sequences: + * - INIT_OTP is not present at all + * - what should be in INIT_OTP is in DISPLAY_ON + * - what should be in DISPLAY_ON is in BACKLIGHT_ON + * (along with the actual backlight stuff) + * + * To make those work we simply swap DISPLAY_ON and INIT_OTP. + * + * TODO: Do we need to limit this to specific machines, + * or examine the contents of the sequences to + * avoid false positives? + */ +static void icl_fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) +{ + if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] && + panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) { + drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); + + swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP], + panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]); + } +} + +static void fixup_mipi_sequences(struct drm_i915_private *i915, + struct intel_panel *panel) +{ + if (DISPLAY_VER(i915) >= 11) + icl_fixup_mipi_sequences(i915, panel); + else if (IS_VALLEYVIEW(i915)) + vlv_fixup_mipi_sequences(i915, panel); +} + static void parse_mipi_sequence(struct drm_i915_private *i915, struct intel_panel *panel) From 186bce682772e7346bf7ced5325b5f4ff050ccfb Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 28 Feb 2024 16:07:38 +0530 Subject: [PATCH 44/55] drm/i915/mtl: Update workaround 14018575942 Applying WA 14018575942 only on Compute engine has impact on some apps like chrome. Updating this WA to apply on Render engine as well as it is helping with performance on Chrome. Note: There is no concern from media team thus not applying WA on media engines. We will revisit if any issues reported from media team. V2(Matt): - Use correct WA number Fixes: 668f37e1ee11 ("drm/i915/mtl: Update workaround 14018778641") Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240228103738.2018458-1-tejas.upadhyay@intel.com (cherry picked from commit 71271280175aa0ed6673e40cce7c01296bcd05f6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d67d44611c28..25413809b9dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1653,6 +1653,7 @@ static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ From 09ae0f4543acc1b623fc2b7ab2489ae5c09fbede Mon Sep 17 00:00:00 2001 From: Bhanuprakash Modem Date: Wed, 28 Feb 2024 11:25:02 +0530 Subject: [PATCH 45/55] drm/i915/drrs: Refactor CPU transcoder DRRS check Rename cpu_transcoder_has_drrs() to intel_cpu_transcoder_has_drrs() and move it to intel_drrs.[ch]. V2: - Move helpers to intel_drrs.[ch] (Jani) - Fix commit message (Jani) Cc: Jani Nikula Cc: Ankit Nautiyal Cc: Mitul Golani Signed-off-by: Bhanuprakash Modem Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240228055502.2857819-1-bhanuprakash.modem@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 2d04f8158548103c082190c8dbf6a19097e2423e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp.c | 12 ++---------- drivers/gpu/drm/i915/display/intel_drrs.c | 9 +++++++++ drivers/gpu/drm/i915/display/intel_drrs.h | 3 +++ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f0c3ed37b350..f98ef4b42a44 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -67,6 +67,7 @@ #include "intel_dp_tunnel.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" +#include "intel_drrs.h" #include "intel_fifo_underrun.h" #include "intel_hdcp.h" #include "intel_hdmi.h" @@ -2683,15 +2684,6 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA); } -static bool cpu_transcoder_has_drrs(struct drm_i915_private *i915, - enum transcoder cpu_transcoder) -{ - if (HAS_DOUBLE_BUFFERED_M_N(i915)) - return true; - - return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); -} - static bool can_enable_drrs(struct intel_connector *connector, const struct intel_crtc_state *pipe_config, const struct drm_display_mode *downclock_mode) @@ -2714,7 +2706,7 @@ static bool can_enable_drrs(struct intel_connector *connector, if (pipe_config->has_pch_encoder) return false; - if (!cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder)) + if (!intel_cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder)) return false; return downclock_mode && diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index 169ef38ff188..4743495ad41f 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -63,6 +63,15 @@ const char *intel_drrs_type_str(enum drrs_type drrs_type) return str[drrs_type]; } +bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, + enum transcoder cpu_transcoder) +{ + if (HAS_DOUBLE_BUFFERED_M_N(i915)) + return true; + + return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder); +} + static void intel_drrs_set_refresh_rate_pipeconf(struct intel_crtc *crtc, enum drrs_refresh_rate refresh_rate) diff --git a/drivers/gpu/drm/i915/display/intel_drrs.h b/drivers/gpu/drm/i915/display/intel_drrs.h index 8ef5f93a80ff..0982f95eab72 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.h +++ b/drivers/gpu/drm/i915/display/intel_drrs.h @@ -9,12 +9,15 @@ #include enum drrs_type; +enum transcoder; struct drm_i915_private; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; struct intel_connector; +bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915, + enum transcoder cpu_transcoder); const char *intel_drrs_type_str(enum drrs_type drrs_type); bool intel_drrs_is_active(struct intel_crtc *crtc); void intel_drrs_activate(const struct intel_crtc_state *crtc_state); From 0f8c7a7dd3d39fb640018b5cd977054d52c0bab2 Mon Sep 17 00:00:00 2001 From: Bhanuprakash Modem Date: Tue, 27 Feb 2024 18:08:33 +0530 Subject: [PATCH 46/55] drm/i915/display/debugfs: Fix duplicate checks in i915_drrs_status Remove duplicate checks for debugfs entry "DRRS capable:". Fixes: 20af10845864 ("drm/i915/display/debugfs: New entry "DRRS capable" to i915_drrs_status") Cc: Jani Nikula Cc: Ankit Nautiyal Cc: Mitul Golani Signed-off-by: Bhanuprakash Modem Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240227123833.2799647-2-bhanuprakash.modem@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 3d81fceb60f20fe2ceed2198636ee6dc9ef46775) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_drrs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index 4743495ad41f..597f8bd6aa1a 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -321,9 +321,8 @@ static int intel_drrs_debugfs_status_show(struct seq_file *m, void *unused) mutex_lock(&crtc->drrs.mutex); seq_printf(m, "DRRS capable: %s\n", - str_yes_no(crtc_state->has_drrs || - HAS_DOUBLE_BUFFERED_M_N(i915) || - intel_cpu_transcoder_has_m2_n2(i915, crtc_state->cpu_transcoder))); + str_yes_no(intel_cpu_transcoder_has_drrs(i915, + crtc_state->cpu_transcoder))); seq_printf(m, "DRRS enabled: %s\n", str_yes_no(crtc_state->has_drrs)); From f7d3b9277ff7eb8e84e6f8554d1c2dd78278a572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Mar 2024 06:08:04 +0200 Subject: [PATCH 47/55] drm/i915/vrr: Generate VRR "safe window" for DSB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like TRANS_CHICKEN bit 31 means something totally different depending on the platform: TGL: generate VRR "safe window" for DSB ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR So far we've only set this on ADL/DG2, but when using DSB+VRR we also need to set it on TGL. And a quick test on MTL says it doesn't need this bit for either of those purposes, even though it's still documented as valid in bspec. Cc: stable@vger.kernel.org Fixes: 34d8311f4a1c ("drm/i915/dsb: Re-instate DSB for LUT updates") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9927 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240306040806.21697-2-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna (cherry picked from commit 810e4519a1b34b5a0ff0eab32e5b184f533c5ee9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_vrr.c | 7 ++++--- drivers/gpu/drm/i915/i915_reg.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5d905f932cb4..eb5bd0743902 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -187,10 +187,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; /* - * TRANS_SET_CONTEXT_LATENCY with VRR enabled - * requires this chicken bit on ADL/DG2. + * This bit seems to have two meanings depending on the platform: + * TGL: generate VRR "safe window" for DSB vblank waits + * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR */ - if (DISPLAY_VER(dev_priv) == 13) + if (IS_DISPLAY_VER(dev_priv, 12, 13)) intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0, PIPE_VBLANK_WITH_DELAY); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e00557e1a57f..3b2e49ce29ba 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4599,7 +4599,7 @@ #define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \ _MTL_CHICKEN_TRANS_A, \ _MTL_CHICKEN_TRANS_B) -#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */ +#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */ #define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */ #define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27) #define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x) From f12751168f1a49ebb84b8056cf038973c53b284f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Mar 2024 06:08:05 +0200 Subject: [PATCH 48/55] drm/i915/dsb: Fix DSB vblank waits when using VRR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like the undelayed vblank gets signalled exactly when the active period ends. That is a problem for DSB+VRR when we are already in vblank and expect DSB to start executing as soon as we send the push. Instead of starting, the DSB just keeps on waiting for the undelayed vblank which won't signal until the end of the next frame's active period, which is far too late. The end result is that DSB won't have even started executing by the time the flips/etc. have completed. We then wait for an extra 1ms, after which we terminate the DSB and report a timeout: [drm] *ERROR* [CRTC:80:pipe A] DSB 0 timed out waiting for idle (current head=0xfedf4000, head=0x0, tail=0x1080) To fix this let's configure DSB to use the so called VRR "safe window" instead of the undelayed vblank to trigger the DSB vblank logic, when VRR is enabled. Cc: stable@vger.kernel.org Fixes: 34d8311f4a1c ("drm/i915/dsb: Re-instate DSB for LUT updates") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9927 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240306040806.21697-3-ville.syrjala@linux.intel.com Reviewed-by: Animesh Manna (cherry picked from commit 41429d9b68367596eb3d6d5961e6295c284622a7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dsb.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index d62e050185e7..e4515bf92038 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); } +static u32 dsb_chicken(struct intel_crtc *crtc) +{ + if (crtc->mode_flags & I915_MODE_FLAG_VRR) + return DSB_CTRL_WAIT_SAFE_WINDOW | + DSB_CTRL_NO_WAIT_VBLANK | + DSB_INST_WAIT_SAFE_WINDOW | + DSB_INST_NO_WAIT_VBLANK; + else + return 0; +} + static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, int dewake_scanline) { @@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id), ctrl | DSB_ENABLE); + intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id), + dsb_chicken(crtc)); + intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id), intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); From b212b79768ccde74429f872c37618c543fa11333 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 11 Mar 2024 21:34:58 +0100 Subject: [PATCH 49/55] drm/i915/hwmon: Fix locking inversion in sysfs getter In i915 hwmon sysfs getter path we now take a hwmon_lock, then acquire an rpm wakeref. That results in lock inversion: <4> [197.079335] ====================================================== <4> [197.085473] WARNING: possible circular locking dependency detected <4> [197.091611] 6.8.0-rc7-Patchwork_129026v7-gc4dc92fb1152+ #1 Not tainted <4> [197.098096] ------------------------------------------------------ <4> [197.104231] prometheus-node/839 is trying to acquire lock: <4> [197.109680] ffffffff82764d80 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc+0x9a/0x350 <4> [197.116939] but task is already holding lock: <4> [197.122730] ffff88811b772a40 (&hwmon->hwmon_lock){+.+.}-{3:3}, at: hwm_energy+0x4b/0x100 [i915] <4> [197.131543] which lock already depends on the new lock. ... <4> [197.507922] Chain exists of: fs_reclaim --> >->reset.mutex --> &hwmon->hwmon_lock <4> [197.518528] Possible unsafe locking scenario: <4> [197.524411] CPU0 CPU1 <4> [197.528916] ---- ---- <4> [197.533418] lock(&hwmon->hwmon_lock); <4> [197.537237] lock(>->reset.mutex); <4> [197.543376] lock(&hwmon->hwmon_lock); <4> [197.549682] lock(fs_reclaim); ... <4> [197.632548] Call Trace: <4> [197.634990] <4> [197.637088] dump_stack_lvl+0x64/0xb0 <4> [197.640738] check_noncircular+0x15e/0x180 <4> [197.652968] check_prev_add+0xe9/0xce0 <4> [197.656705] __lock_acquire+0x179f/0x2300 <4> [197.660694] lock_acquire+0xd8/0x2d0 <4> [197.673009] fs_reclaim_acquire+0xa1/0xd0 <4> [197.680478] __kmalloc+0x9a/0x350 <4> [197.689063] acpi_ns_internalize_name.part.0+0x4a/0xb0 <4> [197.694170] acpi_ns_get_node_unlocked+0x60/0xf0 <4> [197.720608] acpi_ns_get_node+0x3b/0x60 <4> [197.724428] acpi_get_handle+0x57/0xb0 <4> [197.728164] acpi_has_method+0x20/0x50 <4> [197.731896] acpi_pci_set_power_state+0x43/0x120 <4> [197.736485] pci_power_up+0x24/0x1c0 <4> [197.740047] pci_pm_default_resume_early+0x9/0x30 <4> [197.744725] pci_pm_runtime_resume+0x2d/0x90 <4> [197.753911] __rpm_callback+0x3c/0x110 <4> [197.762586] rpm_callback+0x58/0x70 <4> [197.766064] rpm_resume+0x51e/0x730 <4> [197.769542] rpm_resume+0x267/0x730 <4> [197.773020] rpm_resume+0x267/0x730 <4> [197.776498] rpm_resume+0x267/0x730 <4> [197.779974] __pm_runtime_resume+0x49/0x90 <4> [197.784055] __intel_runtime_pm_get+0x19/0xa0 [i915] <4> [197.789070] hwm_energy+0x55/0x100 [i915] <4> [197.793183] hwm_read+0x9a/0x310 [i915] <4> [197.797124] hwmon_attr_show+0x36/0x120 <4> [197.800946] dev_attr_show+0x15/0x60 <4> [197.804509] sysfs_kf_seq_show+0xb5/0x100 Acquire the wakeref before the lock and hold it as long as the lock is also held. Follow that pattern across the whole source file where similar lock inversion can happen. v2: Keep hardware read under the lock so the whole operation of updating energy from hardware is still atomic (Guenter), - instead, acquire the rpm wakeref before the lock and hold it as long as the lock is held, - use the same aproach for other similar places across the i915_hwmon.c source file (Rodrigo). Fixes: 1b44019a93e2 ("drm/i915/guc: Disable PL1 power limit when loading GuC firmware") Signed-off-by: Janusz Krzysztofik Cc: Rodrigo Vivi Cc: Guenter Roeck Cc: # v6.5+ Reviewed-by: Ashutosh Dixit Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240311203500.518675-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 71b218771426ea84c0e0148a2b7ac52c1f76e792) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_hwmon.c | 37 ++++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8c3f443c8347..b758fd110c20 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat, struct intel_uncore *uncore = ddat->uncore; intel_wakeref_t wakeref; - mutex_lock(&hwmon->hwmon_lock); + with_intel_runtime_pm(uncore->rpm, wakeref) { + mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(uncore->rpm, wakeref) intel_uncore_rmw(uncore, reg, clear, set); - mutex_unlock(&hwmon->hwmon_lock); + mutex_unlock(&hwmon->hwmon_lock); + } } /* @@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy) else rgaddr = hwmon->rg.energy_status_all; - mutex_lock(&hwmon->hwmon_lock); + with_intel_runtime_pm(uncore->rpm, wakeref) { + mutex_lock(&hwmon->hwmon_lock); - with_intel_runtime_pm(uncore->rpm, wakeref) reg_val = intel_uncore_read(uncore, rgaddr); - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; - ei->reg_val_prev = reg_val; + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + ei->reg_val_prev = reg_val; - *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, - hwmon->scl_shift_energy); - mutex_unlock(&hwmon->hwmon_lock); + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, + hwmon->scl_shift_energy); + mutex_unlock(&hwmon->hwmon_lock); + } } static ssize_t @@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) /* Block waiting for GuC reset to complete when needed */ for (;;) { + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); mutex_lock(&hwmon->hwmon_lock); prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE); @@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) } mutex_unlock(&hwmon->hwmon_lock); + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); schedule(); } finish_wait(&ddat->waitq, &wait); if (ret) - goto unlock; - - wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + goto exit; /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { @@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: - intel_runtime_pm_put(ddat->uncore->rpm, wakeref); -unlock: mutex_unlock(&hwmon->hwmon_lock); + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); return ret; } From e41d769f1a7a1dc533c35ef7b366be3dbf432a1c Mon Sep 17 00:00:00 2001 From: Jonathon Hall Date: Wed, 13 Mar 2024 09:54:25 -0400 Subject: [PATCH 50/55] drm/i915: Do not match JSL in ehl_combo_pll_div_frac_wa_needed() Since commit 0c65dc062611 ("drm/i915/jsl: s/JSL/JASPERLAKE for platform/subplatform defines"), boot freezes on a Jasper Lake tablet (Librem 11), usually with graphical corruption on the eDP display, but sometimes just a black screen. This commit was included in 6.6 and later. That commit was intended to refactor EHL and JSL macros, but the change to ehl_combo_pll_div_frac_wa_needed() started matching JSL incorrectly when it was only intended to match EHL. It replaced: return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || with: return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) && IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || Remove IS_JASPERLAKE() to fix the regression. Signed-off-by: Jonathon Hall Cc: stable@vger.kernel.org Fixes: 0c65dc062611 ("drm/i915/jsl: s/JSL/JASPERLAKE for platform/subplatform defines") Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240313135424.3731410-1-jonathon.hall@puri.sm Signed-off-by: Jani Nikula (cherry picked from commit 1ef48859317b2a77672dea8682df133abf9c44ed) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index ff480f171f75..b6d24410740f 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2554,7 +2554,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params, static bool ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { - return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) && + return ((IS_ELKHARTLAKE(i915) && IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) && i915->display.dpll.ref_clks.nssc == 38400; From d392e1b9c2e8c60550a2a467732107f0f98b8e97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 20 Mar 2024 14:05:47 -0700 Subject: [PATCH 51/55] drm/i915: Do not print 'pxp init failed with 0' when it succeed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is misleading, if the intention was to also print something in case it succeed it should have a different string. Cc: Alan Previn Signed-off-by: José Roberto de Souza Fixes: 698e19da2914 ("drm/i915: Skip pxp init if gt is wedged") Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240320210547.71937-1-jose.souza@intel.com (cherry picked from commit d437099ab21cd4c6ce5d578b765df642d759c929) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 9ee902d5b72c..4b9233c07a22 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -800,7 +800,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_cleanup_modeset2; ret = intel_pxp_init(i915); - if (ret != -ENODEV) + if (ret && ret != -ENODEV) drm_dbg(&i915->drm, "pxp init failed with %d\n", ret); ret = intel_display_driver_probe(i915); From 0e45882ca829b26b915162e8e86dbb1095768e9e Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 5 Mar 2024 15:35:06 +0100 Subject: [PATCH 52/55] drm/i915/vma: Fix UAF on destroy against retire race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Object debugging tools were sporadically reporting illegal attempts to free a still active i915 VMA object when parking a GT believed to be idle. [161.359441] ODEBUG: free active (active state 0) object: ffff88811643b958 object type: i915_active hint: __i915_vma_active+0x0/0x50 [i915] [161.360082] WARNING: CPU: 5 PID: 276 at lib/debugobjects.c:514 debug_print_object+0x80/0xb0 ... [161.360304] CPU: 5 PID: 276 Comm: kworker/5:2 Not tainted 6.5.0-rc1-CI_DRM_13375-g003f860e5577+ #1 [161.360314] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 [161.360322] Workqueue: i915-unordered __intel_wakeref_put_work [i915] [161.360592] RIP: 0010:debug_print_object+0x80/0xb0 ... [161.361347] debug_object_free+0xeb/0x110 [161.361362] i915_active_fini+0x14/0x130 [i915] [161.361866] release_references+0xfe/0x1f0 [i915] [161.362543] i915_vma_parked+0x1db/0x380 [i915] [161.363129] __gt_park+0x121/0x230 [i915] [161.363515] ____intel_wakeref_put_last+0x1f/0x70 [i915] That has been tracked down to be happening when another thread is deactivating the VMA inside __active_retire() helper, after the VMA's active counter has been already decremented to 0, but before deactivation of the VMA's object is reported to the object debugging tool. We could prevent from that race by serializing i915_active_fini() with __active_retire() via ref->tree_lock, but that wouldn't stop the VMA from being used, e.g. from __i915_vma_retire() called at the end of __active_retire(), after that VMA has been already freed by a concurrent i915_vma_destroy() on return from the i915_active_fini(). Then, we should rather fix the issue at the VMA level, not in i915_active. Since __i915_vma_parked() is called from __gt_park() on last put of the GT's wakeref, the issue could be addressed by holding the GT wakeref long enough for __active_retire() to complete before that wakeref is released and the GT parked. I believe the issue was introduced by commit d93939730347 ("drm/i915: Remove the vma refcount") which moved a call to i915_active_fini() from a dropped i915_vma_release(), called on last put of the removed VMA kref, to i915_vma_parked() processing path called on last put of a GT wakeref. However, its visibility to the object debugging tool was suppressed by a bug in i915_active that was fixed two weeks later with commit e92eb246feb9 ("drm/i915/active: Fix missing debug object activation"). A VMA associated with a request doesn't acquire a GT wakeref by itself. Instead, it depends on a wakeref held directly by the request's active intel_context for a GT associated with its VM, and indirectly on that intel_context's engine wakeref if the engine belongs to the same GT as the VMA's VM. Those wakerefs are released asynchronously to VMA deactivation. Fix the issue by getting a wakeref for the VMA's GT when activating it, and putting that wakeref only after the VMA is deactivated. However, exclude global GTT from that processing path, otherwise the GPU never goes idle. Since __i915_vma_retire() may be called from atomic contexts, use async variant of wakeref put. Also, to avoid circular locking dependency, take care of acquiring the wakeref before VM mutex when both are needed. v7: Add inline comments with justifications for: - using untracked variants of intel_gt_pm_get/put() (Nirmoy), - using async variant of _put(), - not getting the wakeref in case of a global GTT, - always getting the first wakeref outside vm->mutex. v6: Since __i915_vma_active/retire() callbacks are not serialized, storing a wakeref tracking handle inside struct i915_vma is not safe, and there is no other good place for that. Use untracked variants of intel_gt_pm_get/put_async(). v5: Replace "tile" with "GT" across commit description (Rodrigo), - avoid mentioning multi-GT case in commit description (Rodrigo), - explain why we need to take a temporary wakeref unconditionally inside i915_vma_pin_ww() (Rodrigo). v4: Refresh on top of commit 5e4e06e4087e ("drm/i915: Track gt pm wakerefs") (Andi), - for more easy backporting, split out removal of former insufficient workarounds and move them to separate patches (Nirmoy). - clean up commit message and description a bit. v3: Identify root cause more precisely, and a commit to blame, - identify and drop former workarounds, - update commit message and description. v2: Get the wakeref before VM mutex to avoid circular locking dependency, - drop questionable Fixes: tag. Fixes: d93939730347 ("drm/i915: Remove the vma refcount") Closes: https://gitlab.freedesktop.org/drm/intel/issues/8875 Signed-off-by: Janusz Krzysztofik Cc: Thomas Hellström Cc: Nirmoy Das Cc: Andi Shyti Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Nirmoy Das Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240305143747.335367-6-janusz.krzysztofik@linux.intel.com (cherry picked from commit f3c71b2ded5c4367144a810ef25f998fd1d6c381) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 50 ++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d09aad34ba37..b70715b1411d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -34,6 +34,7 @@ #include "gt/intel_engine.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" #include "gt/intel_tlb.h" @@ -103,12 +104,42 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref) static int __i915_vma_active(struct i915_active *ref) { - return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_tryget(vma)) + return -ENOENT; + + /* + * Exclude global GTT VMA from holding a GT wakeref + * while active, otherwise GPU never goes idle. + */ + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we and our _retire() counterpart can be + * called asynchronously, storing a wakeref tracking + * handle inside struct i915_vma is not safe, and + * there is no other good place for that. Hence, + * use untracked variants of intel_gt_pm_get/put(). + */ + intel_gt_pm_get_untracked(vma->vm->gt); + } + + return 0; } static void __i915_vma_retire(struct i915_active *ref) { - i915_vma_put(active_to_vma(ref)); + struct i915_vma *vma = active_to_vma(ref); + + if (!i915_vma_is_ggtt(vma)) { + /* + * Since we can be called from atomic contexts, + * use an async variant of intel_gt_pm_put(). + */ + intel_gt_pm_put_async_untracked(vma->vm->gt); + } + + i915_vma_put(vma); } static struct i915_vma * @@ -1404,7 +1435,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, struct i915_vma_work *work = NULL; struct dma_fence *moving = NULL; struct i915_vma_resource *vma_res = NULL; - intel_wakeref_t wakeref = 0; + intel_wakeref_t wakeref; unsigned int bound; int err; @@ -1424,8 +1455,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + /* + * In case of a global GTT, we must hold a runtime-pm wakeref + * while global PTEs are updated. In other cases, we hold + * the rpm reference while the VMA is active. Since runtime + * resume may require allocations, which are forbidden inside + * vm->mutex, get the first rpm wakeref outside of the mutex. + */ + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { /* lock VM */ @@ -1561,8 +1598,7 @@ err_fence: if (work) dma_fence_work_commit_imm(&work->base); err_rpm: - if (wakeref) - intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); if (moving) dma_fence_put(moving); From 4a3859ea5240365d21f6053ee219bb240d520895 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Mar 2024 14:58:47 +0100 Subject: [PATCH 53/55] drm/i915/gt: Reset queue_priority_hint on parking Originally, with strict in order execution, we could complete execution only when the queue was empty. Preempt-to-busy allows replacement of an active request that may complete before the preemption is processed by HW. If that happens, the request is retired from the queue, but the queue_priority_hint remains set, preventing direct submission until after the next CS interrupt is processed. This preempt-to-busy race can be triggered by the heartbeat, which will also act as the power-management barrier and upon completion allow us to idle the HW. We may process the completion of the heartbeat, and begin parking the engine before the CS event that restores the queue_priority_hint, causing us to fail the assertion that it is MIN. <3>[ 166.210729] __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 166.210781] Dumping ftrace buffer: <0>[ 166.210795] --------------------------------- ... <0>[ 167.302811] drm_fdin-1097 2..s1. 165741070us : trace_ports: 0000:00:02.0 rcs0: promote { ccid:20 1217:2 prio 0 } <0>[ 167.302861] drm_fdin-1097 2d.s2. 165741072us : execlists_submission_tasklet: 0000:00:02.0 rcs0: preempting last=1217:2, prio=0, hint=2147483646 <0>[ 167.302928] drm_fdin-1097 2d.s2. 165741072us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 1217:2, current 0 <0>[ 167.302992] drm_fdin-1097 2d.s2. 165741073us : __i915_request_submit: 0000:00:02.0 rcs0: fence 3:4660, current 4659 <0>[ 167.303044] drm_fdin-1097 2d.s1. 165741076us : execlists_submission_tasklet: 0000:00:02.0 rcs0: context:3 schedule-in, ccid:40 <0>[ 167.303095] drm_fdin-1097 2d.s1. 165741077us : trace_ports: 0000:00:02.0 rcs0: submit { ccid:40 3:4660* prio 2147483646 } <0>[ 167.303159] kworker/-89 11..... 165741139us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence c90:2, current 2 <0>[ 167.303208] kworker/-89 11..... 165741148us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:c90 unpin <0>[ 167.303272] kworker/-89 11..... 165741159us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 1217:2, current 2 <0>[ 167.303321] kworker/-89 11..... 165741166us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:1217 unpin <0>[ 167.303384] kworker/-89 11..... 165741170us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 3:4660, current 4660 <0>[ 167.303434] kworker/-89 11d..1. 165741172us : __intel_context_retire: 0000:00:02.0 rcs0: context:1216 retire runtime: { total:56028ns, avg:56028ns } <0>[ 167.303484] kworker/-89 11..... 165741198us : __engine_park: 0000:00:02.0 rcs0: parked <0>[ 167.303534] -0 5d.H3. 165741207us : execlists_irq_handler: 0000:00:02.0 rcs0: semaphore yield: 00000040 <0>[ 167.303583] kworker/-89 11..... 165741397us : __intel_context_retire: 0000:00:02.0 rcs0: context:1217 retire runtime: { total:325575ns, avg:0ns } <0>[ 167.303756] kworker/-89 11..... 165741777us : __intel_context_retire: 0000:00:02.0 rcs0: context:c90 retire runtime: { total:0ns, avg:0ns } <0>[ 167.303806] kworker/-89 11..... 165742017us : __engine_park: __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 167.303811] --------------------------------- <4>[ 167.304722] ------------[ cut here ]------------ <2>[ 167.304725] kernel BUG at drivers/gpu/drm/i915/gt/intel_engine_pm.c:283! <4>[ 167.304731] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI <4>[ 167.304734] CPU: 11 PID: 89 Comm: kworker/11:1 Tainted: G W 6.8.0-rc2-CI_DRM_14193-gc655e0fd2804+ #1 <4>[ 167.304736] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 <4>[ 167.304738] Workqueue: i915-unordered retire_work_handler [i915] <4>[ 167.304839] RIP: 0010:__engine_park+0x3fd/0x680 [i915] <4>[ 167.304937] Code: 00 48 c7 c2 b0 e5 86 a0 48 8d 3d 00 00 00 00 e8 79 48 d4 e0 bf 01 00 00 00 e8 ef 0a d4 e0 31 f6 bf 09 00 00 00 e8 03 49 c0 e0 <0f> 0b 0f 0b be 01 00 00 00 e8 f5 61 fd ff 31 c0 e9 34 fd ff ff 48 <4>[ 167.304940] RSP: 0018:ffffc9000059fce0 EFLAGS: 00010246 <4>[ 167.304942] RAX: 0000000000000200 RBX: 0000000000000000 RCX: 0000000000000006 <4>[ 167.304944] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 <4>[ 167.304946] RBP: ffff8881330ca1b0 R08: 0000000000000001 R09: 0000000000000001 <4>[ 167.304947] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881330ca000 <4>[ 167.304948] R13: ffff888110f02aa0 R14: ffff88812d1d0205 R15: ffff88811277d4f0 <4>[ 167.304950] FS: 0000000000000000(0000) GS:ffff88844f780000(0000) knlGS:0000000000000000 <4>[ 167.304952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 167.304953] CR2: 00007fc362200c40 CR3: 000000013306e003 CR4: 0000000000770ef0 <4>[ 167.304955] PKRU: 55555554 <4>[ 167.304957] Call Trace: <4>[ 167.304958] <4>[ 167.305573] ____intel_wakeref_put_last+0x1d/0x80 [i915] <4>[ 167.305685] i915_request_retire.part.0+0x34f/0x600 [i915] <4>[ 167.305800] retire_requests+0x51/0x80 [i915] <4>[ 167.305892] intel_gt_retire_requests_timeout+0x27f/0x700 [i915] <4>[ 167.305985] process_scheduled_works+0x2db/0x530 <4>[ 167.305990] worker_thread+0x18c/0x350 <4>[ 167.305993] kthread+0xfe/0x130 <4>[ 167.305997] ret_from_fork+0x2c/0x50 <4>[ 167.306001] ret_from_fork_asm+0x1b/0x30 <4>[ 167.306004] It is necessary for the queue_priority_hint to be lower than the next request submission upon waking up, as we rely on the hint to decide when to kick the tasklet to submit that first request. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Closes: https://gitlab.freedesktop.org/drm/intel/issues/10154 Signed-off-by: Chris Wilson Signed-off-by: Janusz Krzysztofik Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240318135906.716055-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 98850e96cf811dc2d0a7d0af491caff9f5d49c1e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 96bdb93a948d..fb7bff27b45a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 42aade0faf2d..b061a0a0d6b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3272,6 +3272,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq) From 582dc04b0658ef3b90aeb49cbdd9747c2f1eccc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 25 Mar 2024 19:57:38 +0200 Subject: [PATCH 54/55] drm/i915: Pre-populate the cursor physical dma address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling i915_gem_object_get_dma_address() from the vblank evade critical section triggers might_sleep(). While we know that we've already pinned the framebuffer and thus i915_gem_object_get_dma_address() will in fact not sleep in this case, it seems reasonable to keep the unconditional might_sleep() for maximum coverage. So let's instead pre-populate the dma address during fb pinning, which all happens before we enter the vblank evade critical section. We can use u32 for the dma address as this class of hardware doesn't support >32bit addresses. Cc: stable@vger.kernel.org Fixes: 0225a90981c8 ("drm/i915: Make cursor plane registers unlocked") Reported-by: Borislav Petkov Closes: https://lore.kernel.org/intel-gfx/20240227100342.GAZd2zfmYcPS_SndtO@fat_crate.local/ Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240325175738.3440-1-ville.syrjala@linux.intel.com Tested-by: Borislav Petkov (AMD) Reviewed-by: Chaitanya Kumar Borah (cherry picked from commit c1289a5c3594cf04caa94ebf0edeb50c62009f1f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_cursor.c | 4 +--- drivers/gpu/drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_fb_pin.c | 10 ++++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index f8b33999d43f..0d3da55e1c24 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); - const struct drm_framebuffer *fb = plane_state->hw.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = i915_gem_object_get_dma_address(obj, 0); + base = plane_state->phys_dma_addr; else base = intel_plane_ggtt_offset(plane_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e67cd5b02e84..9104f18753b4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -727,6 +727,7 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; + u32 phys_dma_addr; /* for cursor_needs_physical */ /* Plane pxp decryption state */ bool decrypt; diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 7b42aef37d2f..b6df9baf481b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) return PTR_ERR(vma); plane_state->ggtt_vma = vma; + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (phys_cursor) + plane_state->phys_dma_addr = + i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); } else { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); From 32e39bab59934bfd3f37097d4dd85ac5eb0fd549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 19 Mar 2024 11:24:42 +0200 Subject: [PATCH 55/55] drm/i915/bios: Tolerate devdata==NULL in intel_bios_encoder_supports_dp_dual_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have no VBT, or the VBT didn't declare the encoder in question, we won't have the 'devdata' for the encoder. Instead of oopsing just bail early. We won't be able to tell whether the port is DP++ or not, but so be it. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10464 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240319092443.15769-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 26410896206342c8a80d2b027923e9ee7d33b733) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 9328601e4e9f..52bd3576835b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3382,6 +3382,9 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da { const struct child_device_config *child = &devdata->child; + if (!devdata) + return false; + if (!intel_bios_encoder_supports_dp(devdata) || !intel_bios_encoder_supports_hdmi(devdata)) return false;