diff options
23 files changed, 1419 insertions, 0 deletions
diff --git a/queue-6.12/drm-amd-display-add-null-pointer-check-for-get_first_active_display.patch b/queue-6.12/drm-amd-display-add-null-pointer-check-for-get_first_active_display.patch new file mode 100644 index 0000000000..7a5d3d5e22 --- /dev/null +++ b/queue-6.12/drm-amd-display-add-null-pointer-check-for-get_first_active_display.patch @@ -0,0 +1,40 @@ +From c3e9826a22027a21d998d3e64882fa377b613006 Mon Sep 17 00:00:00 2001 +From: Wentao Liang <vulab@iscas.ac.cn> +Date: Mon, 26 May 2025 10:37:31 +0800 +Subject: drm/amd/display: Add null pointer check for get_first_active_display() + +From: Wentao Liang <vulab@iscas.ac.cn> + +commit c3e9826a22027a21d998d3e64882fa377b613006 upstream. + +The function mod_hdcp_hdcp1_enable_encryption() calls the function +get_first_active_display(), but does not check its return value. +The return value is a null pointer if the display list is empty. +This will lead to a null pointer dereference in +mod_hdcp_hdcp2_enable_encryption(). + +Add a null pointer check for get_first_active_display() and return +MOD_HDCP_STATUS_DISPLAY_NOT_FOUND if the function return null. + +Fixes: 2deade5ede56 ("drm/amd/display: Remove hdcp display state with mst fix") +Signed-off-by: Wentao Liang <vulab@iscas.ac.cn> +Reviewed-by: Alex Hung <alex.hung@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Cc: stable@vger.kernel.org # v5.8 +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c ++++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +@@ -368,6 +368,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enab + struct mod_hdcp_display *display = get_first_active_display(hdcp); + enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + ++ if (!display) ++ return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; ++ + mutex_lock(&psp->hdcp_context.mutex); + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; + memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); diff --git a/queue-6.12/drm-amd-display-check-dce_hwseq-before-dereferencing-it.patch b/queue-6.12/drm-amd-display-check-dce_hwseq-before-dereferencing-it.patch new file mode 100644 index 0000000000..54efae73cb --- /dev/null +++ b/queue-6.12/drm-amd-display-check-dce_hwseq-before-dereferencing-it.patch @@ -0,0 +1,38 @@ +From b669507b637eb6b1aaecf347f193efccc65d756e Mon Sep 17 00:00:00 2001 +From: Alex Hung <alex.hung@amd.com> +Date: Tue, 3 Jun 2025 18:30:55 -0600 +Subject: drm/amd/display: Check dce_hwseq before dereferencing it + +From: Alex Hung <alex.hung@amd.com> + +commit b669507b637eb6b1aaecf347f193efccc65d756e upstream. + +[WHAT] + +hws was checked for null earlier in dce110_blank_stream, indicating hws +can be null, and should be checked whenever it is used. + +Cc: Mario Limonciello <mario.limonciello@amd.com> +Cc: Alex Deucher <alexander.deucher@amd.com> +Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com> +Signed-off-by: Alex Hung <alex.hung@amd.com> +Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +(cherry picked from commit 79db43611ff61280b6de58ce1305e0b2ecf675ad) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +@@ -1217,7 +1217,7 @@ void dce110_blank_stream(struct pipe_ctx + struct dce_hwseq *hws = link->dc->hwseq; + + if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { +- if (!link->skip_implict_edp_power_control) ++ if (!link->skip_implict_edp_power_control && hws) + hws->funcs.edp_backlight_control(link, false); + link->dc->hwss.set_abm_immediate_disable(pipe_ctx); + } diff --git a/queue-6.12/drm-amd-display-correct-non-oled-pre_t11_delay.patch b/queue-6.12/drm-amd-display-correct-non-oled-pre_t11_delay.patch new file mode 100644 index 0000000000..d4a2fc04e9 --- /dev/null +++ b/queue-6.12/drm-amd-display-correct-non-oled-pre_t11_delay.patch @@ -0,0 +1,53 @@ +From 893f07452bca56ff146a6be02b3294a9ea23d18a Mon Sep 17 00:00:00 2001 +From: Zhongwei Zhang <Zhongwei.Zhang@amd.com> +Date: Tue, 13 May 2025 16:45:59 +0800 +Subject: drm/amd/display: Correct non-OLED pre_T11_delay. + +From: Zhongwei Zhang <Zhongwei.Zhang@amd.com> + +commit 893f07452bca56ff146a6be02b3294a9ea23d18a upstream. + +[Why] +Only OLED panels require non-zero pre_T11_delay defaultly. +Others should be controlled by power sequence. + +[How] +For non OLED, pre_T11_delay delay in code should be zero. +Also post_T7_delay. + +Cc: Mario Limonciello <mario.limonciello@amd.com> +Cc: Alex Deucher <alexander.deucher@amd.com> +Cc: stable@vger.kernel.org +Reviewed-by: Charlene Liu <charlene.liu@amd.com> +Signed-off-by: Zhongwei Zhang <Zhongwei.Zhang@amd.com> +Signed-off-by: Wayne Lin <wayne.lin@amd.com> +Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +@@ -951,8 +951,8 @@ void dce110_edp_backlight_control( + struct dc_context *ctx = link->ctx; + struct bp_transmitter_control cntl = { 0 }; + uint8_t pwrseq_instance = 0; +- unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; +- unsigned int post_T7_delay = OLED_POST_T7_DELAY; ++ unsigned int pre_T11_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_PRE_T11_DELAY : 0); ++ unsigned int post_T7_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_POST_T7_DELAY : 0); + + if (dal_graphics_object_id_get_connector_id(link->link_enc->connector) + != CONNECTOR_ID_EDP) { +@@ -1067,7 +1067,8 @@ void dce110_edp_backlight_control( + if (!enable) { + /*follow oem panel config's requirement*/ + pre_T11_delay += link->panel_config.pps.extra_pre_t11_ms; +- msleep(pre_T11_delay); ++ if (pre_T11_delay) ++ msleep(pre_T11_delay); + } + } + diff --git a/queue-6.12/drm-amd-display-fix-mpv-playback-corruption-on-weston.patch b/queue-6.12/drm-amd-display-fix-mpv-playback-corruption-on-weston.patch new file mode 100644 index 0000000000..9ca81b9a1e --- /dev/null +++ b/queue-6.12/drm-amd-display-fix-mpv-playback-corruption-on-weston.patch @@ -0,0 +1,54 @@ +From 8724a5380c4390eed81e271d22f34ff06453ded9 Mon Sep 17 00:00:00 2001 +From: Alex Hung <alex.hung@amd.com> +Date: Thu, 29 May 2025 10:59:19 -0600 +Subject: drm/amd/display: Fix mpv playback corruption on weston + +From: Alex Hung <alex.hung@amd.com> + +commit 8724a5380c4390eed81e271d22f34ff06453ded9 upstream. + +[WHAT] +Severe video playback corruption is observed in the following setup: + +weston 14.0.90 (built from source) + mpv v0.40.0 with command: +mpv bbb_sunflower_1080p_60fps_normal.mp4 --vo=gpu + +[HOW] +ABGR16161616 needs to be included in dml2/2.1 translation. + +Cc: Mario Limonciello <mario.limonciello@amd.com> +Cc: Alex Deucher <alexander.deucher@amd.com> +Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com> +Reviewed-by: Harry Wentland <harry.wentland@amd.com> +Reviewed-by: Austin Zheng <austin.zheng@amd.com> +Signed-off-by: Alex Hung <alex.hung@amd.com> +Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +(cherry picked from commit d023de809f85307ca819a9dbbceee6ae1f50e2ad) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 1 + + drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 1 + + 2 files changed, 2 insertions(+) + +--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c ++++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +@@ -762,6 +762,7 @@ static void populate_dml21_plane_config_ + plane->pixel_format = dml2_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: ++ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + plane->pixel_format = dml2_444_64; +--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c ++++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +@@ -909,6 +909,7 @@ static void populate_dml_surface_cfg_fro + out->SourcePixelFormat[location] = dml_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: ++ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + out->SourcePixelFormat[location] = dml_444_64; diff --git a/queue-6.12/drm-amd-display-fix-rmcm-programming-seq-errors.patch b/queue-6.12/drm-amd-display-fix-rmcm-programming-seq-errors.patch new file mode 100644 index 0000000000..bfa26b9f18 --- /dev/null +++ b/queue-6.12/drm-amd-display-fix-rmcm-programming-seq-errors.patch @@ -0,0 +1,41 @@ +From 158f9944ac05dafd2d3a23d0688e6cf40ef68b90 Mon Sep 17 00:00:00 2001 +From: Yihan Zhu <Yihan.Zhu@amd.com> +Date: Tue, 27 May 2025 16:47:40 -0400 +Subject: drm/amd/display: Fix RMCM programming seq errors + +From: Yihan Zhu <Yihan.Zhu@amd.com> + +commit 158f9944ac05dafd2d3a23d0688e6cf40ef68b90 upstream. + +[WHY & HOW] +Fix RMCM programming sequence errors and mapping issues to pass the RMCM +test. + +Cc: Mario Limonciello <mario.limonciello@amd.com> +Cc: Alex Deucher <alexander.deucher@amd.com> +Reviewed-by: Dmytro Laktyushkin <dmytro.laktyushkin@amd.com> +Signed-off-by: Yihan Zhu <Yihan.Zhu@amd.com> +Signed-off-by: Alex Hung <alex.hung@amd.com> +Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +(cherry picked from commit 11baa4975025033547f45f5894087a0dda6efbb8) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c ++++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +@@ -4651,7 +4651,10 @@ static void calculate_tdlut_setting( + //the tdlut is fetched during the 2 row times of prefetch. + if (p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); +- *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; ++ if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) ++ *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; ++ else ++ *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + } + diff --git a/queue-6.12/drm-amdgpu-add-kicker-device-detection.patch b/queue-6.12/drm-amdgpu-add-kicker-device-detection.patch new file mode 100644 index 0000000000..2c9486c455 --- /dev/null +++ b/queue-6.12/drm-amdgpu-add-kicker-device-detection.patch @@ -0,0 +1,77 @@ +From 0bbf5fd86c585d437b75003f11365b324360a5d6 Mon Sep 17 00:00:00 2001 +From: Frank Min <Frank.Min@amd.com> +Date: Wed, 4 Jun 2025 21:00:44 +0800 +Subject: drm/amdgpu: Add kicker device detection + +From: Frank Min <Frank.Min@amd.com> + +commit 0bbf5fd86c585d437b75003f11365b324360a5d6 upstream. + +1. add kicker device list +2. add kicker device checking helper function + +Signed-off-by: Frank Min <Frank.Min@amd.com> +Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +(cherry picked from commit 09aa2b408f4ab689c3541d22b0968de0392ee406) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 17 +++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 6 ++++++ + 2 files changed, 23 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +@@ -30,6 +30,10 @@ + + #define AMDGPU_UCODE_NAME_MAX (128) + ++static const struct kicker_device kicker_device_list[] = { ++ {0x744B, 0x00}, ++}; ++ + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) + { + DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); +@@ -1383,6 +1387,19 @@ static const char *amdgpu_ucode_legacy_n + return NULL; + } + ++bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { ++ if (adev->pdev->device == kicker_device_list[i].device && ++ adev->pdev->revision == kicker_device_list[i].revision) ++ return true; ++ } ++ ++ return false; ++} ++ + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) + { + int maj, min, rev; +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +@@ -595,6 +595,11 @@ struct amdgpu_firmware { + uint64_t fw_buf_mc; + }; + ++struct kicker_device{ ++ unsigned short device; ++ u8 revision; ++}; ++ + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); + void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); + void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); +@@ -622,5 +627,6 @@ amdgpu_ucode_get_load_type(struct amdgpu + const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); + + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); ++bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); + + #endif diff --git a/queue-6.12/drm-amdgpu-amdgpu_vram_mgr_new-clamp-lpfn-to-total-vram.patch b/queue-6.12/drm-amdgpu-amdgpu_vram_mgr_new-clamp-lpfn-to-total-vram.patch new file mode 100644 index 0000000000..975dac8738 --- /dev/null +++ b/queue-6.12/drm-amdgpu-amdgpu_vram_mgr_new-clamp-lpfn-to-total-vram.patch @@ -0,0 +1,36 @@ +From 4d2f6b4e4c7ed32e7fa39fcea37344a9eab99094 Mon Sep 17 00:00:00 2001 +From: John Olender <john.olender@gmail.com> +Date: Tue, 29 Apr 2025 07:24:28 -0400 +Subject: drm/amdgpu: amdgpu_vram_mgr_new(): Clamp lpfn to total vram + +From: John Olender <john.olender@gmail.com> + +commit 4d2f6b4e4c7ed32e7fa39fcea37344a9eab99094 upstream. + +The drm_mm allocator tolerated being passed end > mm->size, but the +drm_buddy allocator does not. + +Restore the pre-buddy-allocator behavior of allowing such placements. + +Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3448 +Signed-off-by: John Olender <john.olender@gmail.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +@@ -463,7 +463,7 @@ static int amdgpu_vram_mgr_new(struct tt + int r; + + lpfn = (u64)place->lpfn << PAGE_SHIFT; +- if (!lpfn) ++ if (!lpfn || lpfn > man->size) + lpfn = man->size; + + fpfn = (u64)place->fpfn << PAGE_SHIFT; diff --git a/queue-6.12/drm-amdgpu-fix-sdma-utc_l1-handling-during-start-stop-sequences.patch b/queue-6.12/drm-amdgpu-fix-sdma-utc_l1-handling-during-start-stop-sequences.patch new file mode 100644 index 0000000000..599ef0a051 --- /dev/null +++ b/queue-6.12/drm-amdgpu-fix-sdma-utc_l1-handling-during-start-stop-sequences.patch @@ -0,0 +1,60 @@ +From 7f3b16f3f229e37cc3e02e9e4e7106c523b119e9 Mon Sep 17 00:00:00 2001 +From: Jesse Zhang <jesse.zhang@amd.com> +Date: Mon, 16 Jun 2025 19:21:41 +0800 +Subject: drm/amdgpu: Fix SDMA UTC_L1 handling during start/stop sequences + +From: Jesse Zhang <jesse.zhang@amd.com> + +commit 7f3b16f3f229e37cc3e02e9e4e7106c523b119e9 upstream. + +This commit makes two key fixes to SDMA v4.4.2 handling: + +1. disable UTC_L1 in sdma_cntl register when stopping SDMA engines + by reading the current value before modifying UTC_L1_ENABLE bit. + +2. Ensure UTC_L1_ENABLE is consistently managed by: + - Adding the missing register write when enabling UTC_L1 during start + - Keeping UTC_L1 enabled by default as per hardware requirements + +v2: Correct SDMA_CNTL setting (Philip) + +Suggested-by: Jonathan Kim <jonathan.kim@amd.com> +Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com> +Acked-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +(cherry picked from commit 375bf564654e85a7b1b0657b191645b3edca1bda) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +@@ -485,7 +485,7 @@ static void sdma_v4_4_2_inst_gfx_stop(st + { + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; + u32 doorbell_offset, doorbell; +- u32 rb_cntl, ib_cntl; ++ u32 rb_cntl, ib_cntl, sdma_cntl; + int i; + + for_each_inst(i, inst_mask) { +@@ -497,6 +497,9 @@ static void sdma_v4_4_2_inst_gfx_stop(st + ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); ++ sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); ++ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); ++ WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); + + if (sdma[i]->use_doorbell) { + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); +@@ -953,6 +956,7 @@ static int sdma_v4_4_2_inst_start(struct + /* set utc l1 enable flag always to 1 */ + temp = RREG32_SDMA(i, regSDMA_CNTL); + temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); ++ WREG32_SDMA(i, regSDMA_CNTL, temp); + + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { + /* enable context empty interrupt during initialization */ diff --git a/queue-6.12/drm-amdgpu-switch-job-hw_fence-to-amdgpu_fence.patch b/queue-6.12/drm-amdgpu-switch-job-hw_fence-to-amdgpu_fence.patch new file mode 100644 index 0000000000..9c5cf803d8 --- /dev/null +++ b/queue-6.12/drm-amdgpu-switch-job-hw_fence-to-amdgpu_fence.patch @@ -0,0 +1,206 @@ +From ebe43542702c3d15d1a1d95e8e13b1b54076f05a Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Mon, 2 Jun 2025 11:31:52 -0400 +Subject: drm/amdgpu: switch job hw_fence to amdgpu_fence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Deucher <alexander.deucher@amd.com> + +commit ebe43542702c3d15d1a1d95e8e13b1b54076f05a upstream. + +Use the amdgpu fence container so we can store additional +data in the fence. This also fixes the start_time handling +for MCBP since we were casting the fence to an amdgpu_fence +and it wasn't. + +Fixes: 3f4c175d62d8 ("drm/amdgpu: MCBP based on DRM scheduler (v9)") +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +(cherry picked from commit bf1cd14f9e2e1fdf981eed273ddd595863f5288c) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 - + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 - + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 30 ++++++---------------------- + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 +++++------ + drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 - + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 ++++++++++++++ + 6 files changed, 32 insertions(+), 32 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +@@ -1902,7 +1902,7 @@ no_preempt: + continue; + } + job = to_amdgpu_job(s_job); +- if (preempted && (&job->hw_fence) == fence) ++ if (preempted && (&job->hw_fence.base) == fence) + /* mark the job as preempted */ + job->preemption_status |= AMDGPU_IB_PREEMPTED; + } +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -5861,7 +5861,7 @@ int amdgpu_device_gpu_recover(struct amd + * + * job->base holds a reference to parent fence + */ +- if (job && dma_fence_is_signaled(&job->hw_fence)) { ++ if (job && dma_fence_is_signaled(&job->hw_fence.base)) { + job_signaled = true; + dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); + goto skip_hw_reset; +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -41,22 +41,6 @@ + #include "amdgpu_trace.h" + #include "amdgpu_reset.h" + +-/* +- * Fences mark an event in the GPUs pipeline and are used +- * for GPU/CPU synchronization. When the fence is written, +- * it is expected that all buffers associated with that fence +- * are no longer in use by the associated ring on the GPU and +- * that the relevant GPU caches have been flushed. +- */ +- +-struct amdgpu_fence { +- struct dma_fence base; +- +- /* RB, DMA, etc. */ +- struct amdgpu_ring *ring; +- ktime_t start_timestamp; +-}; +- + static struct kmem_cache *amdgpu_fence_slab; + + int amdgpu_fence_slab_init(void) +@@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring + am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); + if (am_fence == NULL) + return -ENOMEM; +- fence = &am_fence->base; +- am_fence->ring = ring; + } else { + /* take use of job-embedded fence */ +- fence = &job->hw_fence; ++ am_fence = &job->hw_fence; + } ++ fence = &am_fence->base; ++ am_fence->ring = ring; + + seq = ++ring->fence_drv.sync_seq; + if (job && job->job_run_counter) { +@@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fence + * it right here or we won't be able to track them in fence_drv + * and they will remain unsignaled during sa_bo free. + */ +- job = container_of(old, struct amdgpu_job, hw_fence); ++ job = container_of(old, struct amdgpu_job, hw_fence.base); + if (!job->base.s_fence && !dma_fence_is_signaled(old)) + dma_fence_signal(old); + RCU_INIT_POINTER(*ptr, NULL); +@@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_time + + static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) + { +- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); ++ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); + + return (const char *)to_amdgpu_ring(job->base.sched)->name; + } +@@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signalin + */ + static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) + { +- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); ++ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); + + if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); +@@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); + + /* free job if fence has a parent job */ +- kfree(container_of(f, struct amdgpu_job, hw_fence)); ++ kfree(container_of(f, struct amdgpu_job, hw_fence.base)); + } + + /** +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -259,8 +259,8 @@ void amdgpu_job_free_resources(struct am + /* Check if any fences where initialized */ + if (job->base.s_fence && job->base.s_fence->finished.ops) + f = &job->base.s_fence->finished; +- else if (job->hw_fence.ops) +- f = &job->hw_fence; ++ else if (job->hw_fence.base.ops) ++ f = &job->hw_fence.base; + else + f = NULL; + +@@ -277,10 +277,10 @@ static void amdgpu_job_free_cb(struct dr + amdgpu_sync_free(&job->explicit_sync); + + /* only put the hw fence if has embedded fence */ +- if (!job->hw_fence.ops) ++ if (!job->hw_fence.base.ops) + kfree(job); + else +- dma_fence_put(&job->hw_fence); ++ dma_fence_put(&job->hw_fence.base); + } + + void amdgpu_job_set_gang_leader(struct amdgpu_job *job, +@@ -309,10 +309,10 @@ void amdgpu_job_free(struct amdgpu_job * + if (job->gang_submit != &job->base.s_fence->scheduled) + dma_fence_put(job->gang_submit); + +- if (!job->hw_fence.ops) ++ if (!job->hw_fence.base.ops) + kfree(job); + else +- dma_fence_put(&job->hw_fence); ++ dma_fence_put(&job->hw_fence.base); + } + + struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +@@ -48,7 +48,7 @@ struct amdgpu_job { + struct drm_sched_job base; + struct amdgpu_vm *vm; + struct amdgpu_sync explicit_sync; +- struct dma_fence hw_fence; ++ struct amdgpu_fence hw_fence; + struct dma_fence *gang_submit; + uint32_t preamble_status; + uint32_t preemption_status; +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +@@ -126,6 +126,22 @@ struct amdgpu_fence_driver { + struct dma_fence **fences; + }; + ++/* ++ * Fences mark an event in the GPUs pipeline and are used ++ * for GPU/CPU synchronization. When the fence is written, ++ * it is expected that all buffers associated with that fence ++ * are no longer in use by the associated ring on the GPU and ++ * that the relevant GPU caches have been flushed. ++ */ ++ ++struct amdgpu_fence { ++ struct dma_fence base; ++ ++ /* RB, DMA, etc. */ ++ struct amdgpu_ring *ring; ++ ktime_t start_timestamp; ++}; ++ + extern const struct drm_sched_backend_ops amdgpu_sched_ops; + + void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); diff --git a/queue-6.12/drm-bridge-cdns-dsi-check-return-value-when-getting-default-phy-config.patch b/queue-6.12/drm-bridge-cdns-dsi-check-return-value-when-getting-default-phy-config.patch new file mode 100644 index 0000000000..e3291ddc9d --- /dev/null +++ b/queue-6.12/drm-bridge-cdns-dsi-check-return-value-when-getting-default-phy-config.patch @@ -0,0 +1,43 @@ +From c6a7ef0d4856b9629df390e9935d7fd67fe39f81 Mon Sep 17 00:00:00 2001 +From: Aradhya Bhatia <a-bhatia1@ti.com> +Date: Sat, 29 Mar 2025 17:09:15 +0530 +Subject: drm/bridge: cdns-dsi: Check return value when getting default PHY config + +From: Aradhya Bhatia <a-bhatia1@ti.com> + +commit c6a7ef0d4856b9629df390e9935d7fd67fe39f81 upstream. + +Check for the return value of the phy_mipi_dphy_get_default_config() +call, and in case of an error, return back the same. + +Fixes: fced5a364dee ("drm/bridge: cdns: Convert to phy framework") +Cc: stable@vger.kernel.org +Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> +Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com> +Signed-off-by: Aradhya Bhatia <aradhya.bhatia@linux.dev> +Link: https://lore.kernel.org/r/20250329113925.68204-5-aradhya.bhatia@linux.dev +Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c ++++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +@@ -575,9 +575,11 @@ static int cdns_dsi_check_conf(struct cd + if (ret) + return ret; + +- phy_mipi_dphy_get_default_config(mode_clock * 1000, +- mipi_dsi_pixel_format_to_bpp(output->dev->format), +- nlanes, phy_cfg); ++ ret = phy_mipi_dphy_get_default_config(mode_clock * 1000, ++ mipi_dsi_pixel_format_to_bpp(output->dev->format), ++ nlanes, phy_cfg); ++ if (ret) ++ return ret; + + ret = cdns_dsi_adjust_phy_config(dsi, dsi_cfg, phy_cfg, mode, mode_valid_check); + if (ret) diff --git a/queue-6.12/drm-bridge-cdns-dsi-fix-connecting-to-next-bridge.patch b/queue-6.12/drm-bridge-cdns-dsi-fix-connecting-to-next-bridge.patch new file mode 100644 index 0000000000..680855a6ba --- /dev/null +++ b/queue-6.12/drm-bridge-cdns-dsi-fix-connecting-to-next-bridge.patch @@ -0,0 +1,47 @@ +From 688eb4d465484bc2a3471a6a6f06f833b58c7867 Mon Sep 17 00:00:00 2001 +From: Aradhya Bhatia <a-bhatia1@ti.com> +Date: Sat, 29 Mar 2025 17:09:12 +0530 +Subject: drm/bridge: cdns-dsi: Fix connecting to next bridge + +From: Aradhya Bhatia <a-bhatia1@ti.com> + +commit 688eb4d465484bc2a3471a6a6f06f833b58c7867 upstream. + +Fix the OF node pointer passed to the of_drm_find_bridge() call to find +the next bridge in the display chain. + +The code to find the next panel (and create its panel-bridge) works +fine, but to find the next (non-panel) bridge does not. + +To find the next bridge in the pipeline, we need to pass "np" - the OF +node pointer of the next entity in the devicetree chain. Passing +"of_node" to of_drm_find_bridge (which is what the code does currently) +will fetch the bridge for the cdns-dsi which is not what's required. + +Fix that. + +Fixes: e19233955d9e ("drm/bridge: Add Cadence DSI driver") +Cc: stable@vger.kernel.org +Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> +Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com> +Signed-off-by: Aradhya Bhatia <aradhya.bhatia@linux.dev> +Link: https://lore.kernel.org/r/20250329113925.68204-2-aradhya.bhatia@linux.dev +Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c ++++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +@@ -958,7 +958,7 @@ static int cdns_dsi_attach(struct mipi_d + bridge = drm_panel_bridge_add_typed(panel, + DRM_MODE_CONNECTOR_DSI); + } else { +- bridge = of_drm_find_bridge(dev->dev.of_node); ++ bridge = of_drm_find_bridge(np); + if (!bridge) + bridge = ERR_PTR(-EINVAL); + } diff --git a/queue-6.12/drm-bridge-cdns-dsi-fix-phy-de-init-and-flag-it-so.patch b/queue-6.12/drm-bridge-cdns-dsi-fix-phy-de-init-and-flag-it-so.patch new file mode 100644 index 0000000000..0b6c491dbe --- /dev/null +++ b/queue-6.12/drm-bridge-cdns-dsi-fix-phy-de-init-and-flag-it-so.patch @@ -0,0 +1,56 @@ +From fd2611c13f69cbbc6b81d9fc7502abf4f7031d21 Mon Sep 17 00:00:00 2001 +From: Aradhya Bhatia <a-bhatia1@ti.com> +Date: Sat, 29 Mar 2025 17:09:13 +0530 +Subject: drm/bridge: cdns-dsi: Fix phy de-init and flag it so + +From: Aradhya Bhatia <a-bhatia1@ti.com> + +commit fd2611c13f69cbbc6b81d9fc7502abf4f7031d21 upstream. + +The driver code doesn't have a Phy de-initialization path as yet, and so +it does not clear the phy_initialized flag while suspending. This is a +problem because after resume the driver looks at this flag to determine +if a Phy re-initialization is required or not. It is in fact required +because the hardware is resuming from a suspend, but the driver does not +carry out any re-initialization causing the D-Phy to not work at all. + +Call the counterparts of phy_init() and phy_power_on(), that are +phy_exit() and phy_power_off(), from _bridge_post_disable(), and clear +the flags so that the Phy can be initialized again when required. + +Fixes: fced5a364dee ("drm/bridge: cdns: Convert to phy framework") +Cc: stable@vger.kernel.org +Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> +Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com> +Signed-off-by: Aradhya Bhatia <aradhya.bhatia@linux.dev> +Link: https://lore.kernel.org/r/20250329113925.68204-3-aradhya.bhatia@linux.dev +Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c ++++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +@@ -681,6 +681,11 @@ static void cdns_dsi_bridge_post_disable + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + ++ dsi->phy_initialized = false; ++ dsi->link_initialized = false; ++ phy_power_off(dsi->dphy); ++ phy_exit(dsi->dphy); ++ + pm_runtime_put(dsi->base.dev); + } + +@@ -1153,7 +1158,6 @@ static int __maybe_unused cdns_dsi_suspe + clk_disable_unprepare(dsi->dsi_sys_clk); + clk_disable_unprepare(dsi->dsi_p_clk); + reset_control_assert(dsi->dsi_p_rst); +- dsi->link_initialized = false; + return 0; + } + diff --git a/queue-6.12/drm-bridge-cdns-dsi-fix-the-clock-variable-for-mode_valid.patch b/queue-6.12/drm-bridge-cdns-dsi-fix-the-clock-variable-for-mode_valid.patch new file mode 100644 index 0000000000..15b56779ce --- /dev/null +++ b/queue-6.12/drm-bridge-cdns-dsi-fix-the-clock-variable-for-mode_valid.patch @@ -0,0 +1,55 @@ +From 132bdcec399be6ae947582249a134b38cf56731c Mon Sep 17 00:00:00 2001 +From: Aradhya Bhatia <a-bhatia1@ti.com> +Date: Sat, 29 Mar 2025 17:09:14 +0530 +Subject: drm/bridge: cdns-dsi: Fix the clock variable for mode_valid() + +From: Aradhya Bhatia <a-bhatia1@ti.com> + +commit 132bdcec399be6ae947582249a134b38cf56731c upstream. + +The crtc_* mode parameters do not get generated (duplicated in this +case) from the regular parameters before the mode validation phase +begins. + +The rest of the code conditionally uses the crtc_* parameters only +during the bridge enable phase, but sticks to the regular parameters +for mode validation. In this singular instance, however, the driver +tries to use the crtc_clock parameter even during the mode validation, +causing the validation to fail. + +Allow the D-Phy config checks to use mode->clock instead of +mode->crtc_clock during mode_valid checks, like everywhere else in the +driver. + +Fixes: fced5a364dee ("drm/bridge: cdns: Convert to phy framework") +Cc: stable@vger.kernel.org +Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> +Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com> +Signed-off-by: Aradhya Bhatia <aradhya.bhatia@linux.dev> +Link: https://lore.kernel.org/r/20250329113925.68204-4-aradhya.bhatia@linux.dev +Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c ++++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +@@ -568,13 +568,14 @@ static int cdns_dsi_check_conf(struct cd + struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; + unsigned long dsi_hss_hsa_hse_hbp; + unsigned int nlanes = output->dev->lanes; ++ int mode_clock = (mode_valid_check ? mode->clock : mode->crtc_clock); + int ret; + + ret = cdns_dsi_mode2cfg(dsi, mode, dsi_cfg, mode_valid_check); + if (ret) + return ret; + +- phy_mipi_dphy_get_default_config(mode->crtc_clock * 1000, ++ phy_mipi_dphy_get_default_config(mode_clock * 1000, + mipi_dsi_pixel_format_to_bpp(output->dev->format), + nlanes, phy_cfg); + diff --git a/queue-6.12/drm-bridge-cdns-dsi-wait-for-clk-and-data-lanes-to-be-ready.patch b/queue-6.12/drm-bridge-cdns-dsi-wait-for-clk-and-data-lanes-to-be-ready.patch new file mode 100644 index 0000000000..b54e9081e8 --- /dev/null +++ b/queue-6.12/drm-bridge-cdns-dsi-wait-for-clk-and-data-lanes-to-be-ready.patch @@ -0,0 +1,70 @@ +From 47c03e6660e96cbba0239125b1d4a9db3c724b1d Mon Sep 17 00:00:00 2001 +From: Aradhya Bhatia <a-bhatia1@ti.com> +Date: Sat, 29 Mar 2025 17:09:16 +0530 +Subject: drm/bridge: cdns-dsi: Wait for Clk and Data Lanes to be ready + +From: Aradhya Bhatia <a-bhatia1@ti.com> + +commit 47c03e6660e96cbba0239125b1d4a9db3c724b1d upstream. + +Once the DSI Link and DSI Phy are initialized, the code needs to wait +for Clk and Data Lanes to be ready, before continuing configuration. +This is in accordance with the DSI Start-up procedure, found in the +Technical Reference Manual of Texas Instrument's J721E SoC[0] which +houses this DSI TX controller. + +If the previous bridge (or crtc/encoder) are configured pre-maturely, +the input signal FIFO gets corrupt. This introduces a color-shift on the +display. + +Allow the driver to wait for the clk and data lanes to get ready during +DSI enable. + +[0]: See section 12.6.5.7.3 "Start-up Procedure" in J721E SoC TRM + TRM Link: http://www.ti.com/lit/pdf/spruil1 + +Fixes: e19233955d9e ("drm/bridge: Add Cadence DSI driver") +Cc: stable@vger.kernel.org +Tested-by: Dominik Haller <d.haller@phytec.de> +Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> +Signed-off-by: Aradhya Bhatia <a-bhatia1@ti.com> +Signed-off-by: Aradhya Bhatia <aradhya.bhatia@linux.dev> +Link: https://lore.kernel.org/r/20250329113925.68204-6-aradhya.bhatia@linux.dev +Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c ++++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +@@ -769,7 +769,7 @@ static void cdns_dsi_bridge_enable(struc + struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; + unsigned long tx_byte_period; + struct cdns_dsi_cfg dsi_cfg; +- u32 tmp, reg_wakeup, div; ++ u32 tmp, reg_wakeup, div, status; + int nlanes; + + if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0)) +@@ -786,6 +786,19 @@ static void cdns_dsi_bridge_enable(struc + cdns_dsi_hs_init(dsi); + cdns_dsi_init_link(dsi); + ++ /* ++ * Now that the DSI Link and DSI Phy are initialized, ++ * wait for the CLK and Data Lanes to be ready. ++ */ ++ tmp = CLK_LANE_RDY; ++ for (int i = 0; i < nlanes; i++) ++ tmp |= DATA_LANE_RDY(i); ++ ++ if (readl_poll_timeout(dsi->regs + MCTL_MAIN_STS, status, ++ (tmp == (status & tmp)), 100, 500000)) ++ dev_err(dsi->base.dev, ++ "Timed Out: DSI-DPhy Clock and Data Lanes not ready.\n"); ++ + writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa), + dsi->regs + VID_HSIZE1); + writel(HFP_LEN(dsi_cfg.hfp) | HACT_LEN(dsi_cfg.hact), diff --git a/queue-6.12/drm-i915-dsi-fix-off-by-one-in-bxt_mipi_trans_vtotal.patch b/queue-6.12/drm-i915-dsi-fix-off-by-one-in-bxt_mipi_trans_vtotal.patch new file mode 100644 index 0000000000..42f09713bb --- /dev/null +++ b/queue-6.12/drm-i915-dsi-fix-off-by-one-in-bxt_mipi_trans_vtotal.patch @@ -0,0 +1,46 @@ +From c464ce6af332e7c802c36cd337cacf81db05400c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> +Date: Fri, 14 Mar 2025 17:01:34 +0200 +Subject: drm/i915/dsi: Fix off by one in BXT_MIPI_TRANS_VTOTAL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä <ville.syrjala@linux.intel.com> + +commit c464ce6af332e7c802c36cd337cacf81db05400c upstream. + +BXT_MIPI_TRANS_VTOTAL must be programmed with vtotal-1 +instead of vtotal. Make it so. + +Cc: stable@vger.kernel.org +Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> +Link: https://patchwork.freedesktop.org/patch/msgid/20250314150136.22564-1-ville.syrjala@linux.intel.com +Reviewed-by: Jani Nikula <jani.nikula@intel.com> +(cherry picked from commit 7b3685c9b38c3097f465efec8b24dbed63258cf6) +Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/i915/display/vlv_dsi.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/i915/display/vlv_dsi.c ++++ b/drivers/gpu/drm/i915/display/vlv_dsi.c +@@ -1059,7 +1059,7 @@ static void bxt_dsi_get_pipe_config(stru + BXT_MIPI_TRANS_VACTIVE(port)); + adjusted_mode->crtc_vtotal = + intel_de_read(display, +- BXT_MIPI_TRANS_VTOTAL(port)); ++ BXT_MIPI_TRANS_VTOTAL(port)) + 1; + + hactive = adjusted_mode->crtc_hdisplay; + hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port)); +@@ -1264,7 +1264,7 @@ static void set_dsi_timings(struct intel + intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port), + adjusted_mode->crtc_vdisplay); + intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port), +- adjusted_mode->crtc_vtotal); ++ adjusted_mode->crtc_vtotal - 1); + } + + intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port), diff --git a/queue-6.12/drm-i915-gem-allow-exec_capture-on-recoverable-contexts-on-dg1.patch b/queue-6.12/drm-i915-gem-allow-exec_capture-on-recoverable-contexts-on-dg1.patch new file mode 100644 index 0000000000..b4da89de59 --- /dev/null +++ b/queue-6.12/drm-i915-gem-allow-exec_capture-on-recoverable-contexts-on-dg1.patch @@ -0,0 +1,46 @@ +From 25eeba495b2fc16037647c1a51bcdf6fc157af5c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> +Date: Mon, 12 May 2025 21:22:15 +0200 +Subject: drm/i915/gem: Allow EXEC_CAPTURE on recoverable contexts on DG1 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä <ville.syrjala@linux.intel.com> + +commit 25eeba495b2fc16037647c1a51bcdf6fc157af5c upstream. + +The intel-media-driver is currently broken on DG1 because +it uses EXEC_CAPTURE with recovarable contexts. Relax the +check to allow that. + +I've also submitted a fix for the intel-media-driver: +https://github.com/intel/media-driver/pull/1920 + +Cc: stable@vger.kernel.org # v6.0+ +Cc: Matthew Auld <matthew.auld@intel.com> +Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Testcase: igt/gem_exec_capture/capture-invisible +Fixes: 71b1669ea9bd ("drm/i915/uapi: tweak error capture on recoverable contexts") +Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> +Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> +Signed-off-by: Andi Shyti <andi.shyti@kernel.org> +Link: https://lore.kernel.org/r/20250411144313.11660-2-ville.syrjala@linux.intel.com +(cherry picked from commit d6e020819612a4a06207af858e0978be4d3e3140) +Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +@@ -2014,7 +2014,7 @@ static int eb_capture_stage(struct i915_ + continue; + + if (i915_gem_context_is_recoverable(eb->gem_context) && +- (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) ++ GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 10)) + return -EINVAL; + + for_each_batch_create_order(eb, j) { diff --git a/queue-6.12/drm-xe-fix-early-wedge-on-guc-load-failure.patch b/queue-6.12/drm-xe-fix-early-wedge-on-guc-load-failure.patch new file mode 100644 index 0000000000..ba52fd4f5d --- /dev/null +++ b/queue-6.12/drm-xe-fix-early-wedge-on-guc-load-failure.patch @@ -0,0 +1,114 @@ +From a39d082c3553d35b4fe5585e1e2fb221c130cae8 Mon Sep 17 00:00:00 2001 +From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> +Date: Wed, 11 Jun 2025 14:44:54 -0700 +Subject: drm/xe: Fix early wedge on GuC load failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> + +commit a39d082c3553d35b4fe5585e1e2fb221c130cae8 upstream. + +When the GuC fails to load we declare the device wedged. However, the +very first GuC load attempt on GT0 (from xe_gt_init_hwconfig) is done +before the GT1 GuC objects are initialized, so things go bad when the +wedge code attempts to cleanup GT1. To fix this, check the initialization +status in the functions called during wedge. + +Fixes: 7dbe8af13c18 ("drm/xe: Wedge the entire device") +Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> +Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> +Cc: Matthew Brost <matthew.brost@intel.com> +Cc: Jonathan Cavitt <jonathan.cavitt@intel.com> +Cc: Lucas De Marchi <lucas.demarchi@intel.com> +Cc: Zhanjun Dong <zhanjun.dong@intel.com> +Cc: stable@vger.kernel.org # v6.12+: 1e1981b16bb1: drm/xe: Fix taking invalid lock on wedge +Cc: stable@vger.kernel.org # v6.12+ +Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> +Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> +Link: https://lore.kernel.org/r/20250611214453.1159846-2-daniele.ceraolospurio@intel.com +Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> +(cherry picked from commit 0b93b7dcd9eb888a6ac7546560877705d4ad61bf) +Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++++++++ + drivers/gpu/drm/xe/xe_guc_ct.c | 7 +++++-- + drivers/gpu/drm/xe/xe_guc_ct.h | 5 +++++ + drivers/gpu/drm/xe/xe_guc_submit.c | 3 +++ + 4 files changed, 21 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c ++++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +@@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct + int pending_seqno; + + /* ++ * we can get here before the CTs are even initialized if we're wedging ++ * very early, in which case there are not going to be any pending ++ * fences so we can bail immediately. ++ */ ++ if (!xe_guc_ct_initialized(>->uc.guc.ct)) ++ return; ++ ++ /* + * CT channel is already disabled at this point. No new TLB requests can + * appear. + */ +--- a/drivers/gpu/drm/xe/xe_guc_ct.c ++++ b/drivers/gpu/drm/xe/xe_guc_ct.c +@@ -454,6 +454,9 @@ void xe_guc_ct_disable(struct xe_guc_ct + */ + void xe_guc_ct_stop(struct xe_guc_ct *ct) + { ++ if (!xe_guc_ct_initialized(ct)) ++ return; ++ + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + stop_g2h_handler(ct); + } +@@ -638,7 +641,7 @@ static int __guc_ct_send_locked(struct x + u16 seqno; + int ret; + +- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); ++ xe_gt_assert(gt, xe_guc_ct_initialized(ct)); + xe_gt_assert(gt, !g2h_len || !g2h_fence); + xe_gt_assert(gt, !num_g2h || !g2h_fence); + xe_gt_assert(gt, !g2h_len || num_g2h); +@@ -1209,7 +1212,7 @@ static int g2h_read(struct xe_guc_ct *ct + u32 action; + u32 *hxg; + +- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); ++ xe_gt_assert(gt, xe_guc_ct_initialized(ct)); + lockdep_assert_held(&ct->fast_lock); + + if (ct->state == XE_GUC_CT_STATE_DISABLED) +--- a/drivers/gpu/drm/xe/xe_guc_ct.h ++++ b/drivers/gpu/drm/xe/xe_guc_ct.h +@@ -23,6 +23,11 @@ void xe_guc_ct_snapshot_print(struct xe_ + void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); + void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic); + ++static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) ++{ ++ return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; ++} ++ + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) + { + return ct->state == XE_GUC_CT_STATE_ENABLED; +--- a/drivers/gpu/drm/xe/xe_guc_submit.c ++++ b/drivers/gpu/drm/xe/xe_guc_submit.c +@@ -1722,6 +1722,9 @@ int xe_guc_submit_reset_prepare(struct x + { + int ret; + ++ if (!guc->submission_state.initialized) ++ return 0; ++ + /* + * Using an atomic here rather than submission_state.lock as this + * function can be called while holding the CT lock (engine reset diff --git a/queue-6.12/drm-xe-fix-memset-on-iomem.patch b/queue-6.12/drm-xe-fix-memset-on-iomem.patch new file mode 100644 index 0000000000..d8d9cd480a --- /dev/null +++ b/queue-6.12/drm-xe-fix-memset-on-iomem.patch @@ -0,0 +1,38 @@ +From 87a15c89d8c7b00b0fc94e0d4f554f7ee2fe6961 Mon Sep 17 00:00:00 2001 +From: Lucas De Marchi <lucas.demarchi@intel.com> +Date: Thu, 12 Jun 2025 15:14:12 -0700 +Subject: drm/xe: Fix memset on iomem +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lucas De Marchi <lucas.demarchi@intel.com> + +commit 87a15c89d8c7b00b0fc94e0d4f554f7ee2fe6961 upstream. + +It should rather use xe_map_memset() as the BO is created with +XE_BO_FLAG_VRAM_IF_DGFX in xe_guc_pc_init(). + +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Cc: stable@vger.kernel.org +Reviewed-by: Matthew Brost <matthew.brost@intel.com> +Link: https://lore.kernel.org/r/20250612-vmap-vaddr-v1-1-26238ed443eb@intel.com +Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> +(cherry picked from commit 21cf47d89fba353b2d5915ba4718040c4cb955d3) +Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/xe/xe_guc_pc.c ++++ b/drivers/gpu/drm/xe/xe_guc_pc.c +@@ -975,7 +975,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc + goto out; + } + +- memset(pc->bo->vmap.vaddr, 0, size); ++ xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); + slpc_shared_data_write(pc, header.size, size); + + ret = pc_action_reset(pc); diff --git a/queue-6.12/drm-xe-fix-taking-invalid-lock-on-wedge.patch b/queue-6.12/drm-xe-fix-taking-invalid-lock-on-wedge.patch new file mode 100644 index 0000000000..199cad5b05 --- /dev/null +++ b/queue-6.12/drm-xe-fix-taking-invalid-lock-on-wedge.patch @@ -0,0 +1,70 @@ +From 1e1981b16bb1bbe2fafa57ed439b45cb5b34e32d Mon Sep 17 00:00:00 2001 +From: Lucas De Marchi <lucas.demarchi@intel.com> +Date: Wed, 2 Apr 2025 22:38:05 -0700 +Subject: drm/xe: Fix taking invalid lock on wedge + +From: Lucas De Marchi <lucas.demarchi@intel.com> + +commit 1e1981b16bb1bbe2fafa57ed439b45cb5b34e32d upstream. + +If device wedges on e.g. GuC upload, the submission is not yet enabled +and the state is not even initialized. Protect the wedge call so it does +nothing in this case. It fixes the following splat: + + [] xe 0000:bf:00.0: [drm] device wedged, needs recovery + [] ------------[ cut here ]------------ + [] DEBUG_LOCKS_WARN_ON(lock->magic != lock) + [] WARNING: CPU: 48 PID: 312 at kernel/locking/mutex.c:564 __mutex_lock+0x8a1/0xe60 + ... + [] RIP: 0010:__mutex_lock+0x8a1/0xe60 + [] mutex_lock_nested+0x1b/0x30 + [] xe_guc_submit_wedge+0x80/0x2b0 [xe] + +Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> +Link: https://lore.kernel.org/r/20250402-warn-after-wedge-v1-1-93e971511fa5@intel.com +Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++++++ + drivers/gpu/drm/xe/xe_guc_types.h | 5 +++++ + 2 files changed, 14 insertions(+) + +--- a/drivers/gpu/drm/xe/xe_guc_submit.c ++++ b/drivers/gpu/drm/xe/xe_guc_submit.c +@@ -309,6 +309,8 @@ int xe_guc_submit_init(struct xe_guc *gu + + primelockdep(guc); + ++ guc->submission_state.initialized = true; ++ + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + } + +@@ -837,6 +839,13 @@ void xe_guc_submit_wedge(struct xe_guc * + + xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); + ++ /* ++ * If device is being wedged even before submission_state is ++ * initialized, there's nothing to do here. ++ */ ++ if (!guc->submission_state.initialized) ++ return; ++ + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, + guc_submit_wedged_fini, guc); + if (err) { +--- a/drivers/gpu/drm/xe/xe_guc_types.h ++++ b/drivers/gpu/drm/xe/xe_guc_types.h +@@ -74,6 +74,11 @@ struct xe_guc { + struct mutex lock; + /** @submission_state.enabled: submission is enabled */ + bool enabled; ++ /** ++ * @submission_state.initialized: mark when submission state is ++ * even initialized - before that not even the lock is valid ++ */ ++ bool initialized; + /** @submission_state.fini_wq: submit fini wait queue */ + wait_queue_head_t fini_wq; + } submission_state; diff --git a/queue-6.12/drm-xe-guc_submit-add-back-fix.patch b/queue-6.12/drm-xe-guc_submit-add-back-fix.patch new file mode 100644 index 0000000000..5b7cb3f3bc --- /dev/null +++ b/queue-6.12/drm-xe-guc_submit-add-back-fix.patch @@ -0,0 +1,50 @@ +From 2e824747cfbdf1fba88df5e5800d284b2602ae8f Mon Sep 17 00:00:00 2001 +From: Matthew Auld <matthew.auld@intel.com> +Date: Tue, 3 Jun 2025 18:42:14 +0100 +Subject: drm/xe/guc_submit: add back fix +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Auld <matthew.auld@intel.com> + +commit 2e824747cfbdf1fba88df5e5800d284b2602ae8f upstream. + +Daniele noticed that the fix in commit 2d2be279f1ca ("drm/xe: fix UAF +around queue destruction") looks to have been unintentionally removed as +part of handling a conflict in some past merge commit. Add it back. + +Fixes: ac44ff7cec33 ("Merge tag 'drm-xe-fixes-2024-10-10' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes") +Reported-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> +Signed-off-by: Matthew Auld <matthew.auld@intel.com> +Cc: Matthew Brost <matthew.brost@intel.com> +Cc: <stable@vger.kernel.org> # v6.12+ +Reviewed-by: Matthew Brost <matthew.brost@intel.com> +Link: https://lore.kernel.org/r/20250603174213.1543579-2-matthew.auld@intel.com +(cherry picked from commit 9d9fca62dc49d96f97045b6d8e7402a95f8cf92a) +Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/xe/xe_guc_submit.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/drivers/gpu/drm/xe/xe_guc_submit.c ++++ b/drivers/gpu/drm/xe/xe_guc_submit.c +@@ -227,6 +227,17 @@ static bool exec_queue_killed_or_banned_ + static void guc_submit_fini(struct drm_device *drm, void *arg) + { + struct xe_guc *guc = arg; ++ struct xe_device *xe = guc_to_xe(guc); ++ struct xe_gt *gt = guc_to_gt(guc); ++ int ret; ++ ++ ret = wait_event_timeout(guc->submission_state.fini_wq, ++ xa_empty(&guc->submission_state.exec_queue_lookup), ++ HZ * 5); ++ ++ drain_workqueue(xe->destroy_wq); ++ ++ xe_gt_assert(gt, ret); + + xa_destroy(&guc->submission_state.exec_queue_lookup); + } diff --git a/queue-6.12/drm-xe-sched-stop-re-submitting-signalled-jobs.patch b/queue-6.12/drm-xe-sched-stop-re-submitting-signalled-jobs.patch new file mode 100644 index 0000000000..d6423ade6a --- /dev/null +++ b/queue-6.12/drm-xe-sched-stop-re-submitting-signalled-jobs.patch @@ -0,0 +1,98 @@ +From 0ee54d5cacc0276ec631ac149825a24b59c51c38 Mon Sep 17 00:00:00 2001 +From: Matthew Auld <matthew.auld@intel.com> +Date: Wed, 28 May 2025 12:33:29 +0100 +Subject: drm/xe/sched: stop re-submitting signalled jobs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Auld <matthew.auld@intel.com> + +commit 0ee54d5cacc0276ec631ac149825a24b59c51c38 upstream. + +Customer is reporting a really subtle issue where we get random DMAR +faults, hangs and other nasties for kernel migration jobs when stressing +stuff like s2idle/s3/s4. The explosions seems to happen somewhere +after resuming the system with splats looking something like: + +PM: suspend exit +rfkill: input handler disabled +xe 0000:00:02.0: [drm] GT0: Engine reset: engine_class=bcs, logical_mask: 0x2, guc_id=0 +xe 0000:00:02.0: [drm] GT0: Timedout job: seqno=24496, lrc_seqno=24496, guc_id=0, flags=0x13 in no process [-1] +xe 0000:00:02.0: [drm] GT0: Kernel-submitted job timed out + +The likely cause appears to be a race between suspend cancelling the +worker that processes the free_job()'s, such that we still have pending +jobs to be freed after the cancel. Following from this, on resume the +pending_list will now contain at least one already complete job, but it +looks like we call drm_sched_resubmit_jobs(), which will then call +run_job() on everything still on the pending_list. But if the job was +already complete, then all the resources tied to the job, like the bb +itself, any memory that is being accessed, the iommu mappings etc. might +be long gone since those are usually tied to the fence signalling. + +This scenario can be seen in ftrace when running a slightly modified +xe_pm IGT (kernel was only modified to inject artificial latency into +free_job to make the race easier to hit): + +xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ... +xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13 +xe_exec_queue_stop: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=1, guc_state=0x0, flags=0x4 +xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=0, guc_state=0x0, flags=0x3 +xe_exec_queue_stop: dev=0000:00:02.0, 1:0x1, gt=1, width=1, guc_id=1, guc_state=0x0, flags=0x3 +xe_exec_queue_stop: dev=0000:00:02.0, 4:0x1, gt=1, width=1, guc_id=2, guc_state=0x0, flags=0x3 +xe_exec_queue_resubmit: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x0, flags=0x13 +xe_sched_job_run: dev=0000:00:02.0, fence=0xffff888276cc8540, seqno=0, lrc_seqno=0, gt=0, guc_id=0, batch_addr=0x000000146910 ... +..... +xe_exec_queue_memory_cat_error: dev=0000:00:02.0, 3:0x2, gt=0, width=1, guc_id=0, guc_state=0x3, flags=0x13 + +So the job_run() is clearly triggered twice for the same job, even +though the first must have already signalled to completion during +suspend. We can also see a CAT error after the re-submit. + +To prevent this only resubmit jobs on the pending_list that have not yet +signalled. + +v2: + - Make sure to re-arm the fence callbacks with sched_start(). +v3 (Matt B): + - Stop using drm_sched_resubmit_jobs(), which appears to be deprecated + and just open-code a simple loop such that we skip calling run_job() + on anything already signalled. + +Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4856 +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Signed-off-by: Matthew Auld <matthew.auld@intel.com> +Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Cc: Matthew Brost <matthew.brost@intel.com> +Cc: William Tseng <william.tseng@intel.com> +Cc: <stable@vger.kernel.org> # v6.8+ +Reviewed-by: Matthew Brost <matthew.brost@intel.com> +Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com> +Link: https://lore.kernel.org/r/20250528113328.289392-2-matthew.auld@intel.com +(cherry picked from commit 38fafa9f392f3110d2de431432d43f4eef99cd1b) +Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/xe/xe_gpu_scheduler.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h ++++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h +@@ -51,7 +51,15 @@ static inline void xe_sched_tdr_queue_im + + static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) + { +- drm_sched_resubmit_jobs(&sched->base); ++ struct drm_sched_job *s_job; ++ ++ list_for_each_entry(s_job, &sched->base.pending_list, list) { ++ struct drm_sched_fence *s_fence = s_job->s_fence; ++ struct dma_fence *hw_fence = s_fence->parent; ++ ++ if (hw_fence && !dma_fence_is_signaled(hw_fence)) ++ sched->base.ops->run_job(s_job); ++ } + } + + static inline bool diff --git a/queue-6.12/drm-xe-vm-move-rebind_work-init-earlier.patch b/queue-6.12/drm-xe-vm-move-rebind_work-init-earlier.patch new file mode 100644 index 0000000000..a5e35ded71 --- /dev/null +++ b/queue-6.12/drm-xe-vm-move-rebind_work-init-earlier.patch @@ -0,0 +1,59 @@ +From a63e99b4d6d3a0353ef47146dd5bd562f08e1786 Mon Sep 17 00:00:00 2001 +From: Matthew Auld <matthew.auld@intel.com> +Date: Wed, 14 May 2025 16:24:25 +0100 +Subject: drm/xe/vm: move rebind_work init earlier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Auld <matthew.auld@intel.com> + +commit a63e99b4d6d3a0353ef47146dd5bd562f08e1786 upstream. + +In xe_vm_close_and_put() we need to be able to call +flush_work(rebind_work), however during vm creation we can call this on +the error path, before having actually set up the worker, leading to a +splat from flush_work(). + +It looks like we can simply move the worker init step earlier to fix +this. + +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Signed-off-by: Matthew Auld <matthew.auld@intel.com> +Cc: Matthew Brost <matthew.brost@intel.com> +Cc: <stable@vger.kernel.org> # v6.8+ +Reviewed-by: Matthew Brost <matthew.brost@intel.com> +Link: https://lore.kernel.org/r/20250514152424.149591-3-matthew.auld@intel.com +(cherry picked from commit 96af397aa1a2d1032a6e28ff3f4bc0ab4be40e1d) +Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/gpu/drm/xe/xe_vm.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/xe/xe_vm.c ++++ b/drivers/gpu/drm/xe/xe_vm.c +@@ -1477,8 +1477,10 @@ struct xe_vm *xe_vm_create(struct xe_dev + * scheduler drops all the references of it, hence protecting the VM + * for this case is necessary. + */ +- if (flags & XE_VM_FLAG_LR_MODE) ++ if (flags & XE_VM_FLAG_LR_MODE) { ++ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + xe_pm_runtime_get_noresume(xe); ++ } + + vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); + if (!vm_resv_obj) { +@@ -1523,10 +1525,8 @@ struct xe_vm *xe_vm_create(struct xe_dev + vm->batch_invalidate_tlb = true; + } + +- if (vm->flags & XE_VM_FLAG_LR_MODE) { +- INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); ++ if (vm->flags & XE_VM_FLAG_LR_MODE) + vm->batch_invalidate_tlb = false; +- } + + /* Fill pt_root after allocating scratch tables */ + for_each_tile(tile, xe, id) { diff --git a/queue-6.12/series b/queue-6.12/series index 6794f5cbff..6e2c7e29be 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -162,3 +162,25 @@ drm-tegra-fix-a-possible-null-pointer-dereference.patch drm-udl-unregister-device-before-cleaning-up-on-disconnect.patch drm-msm-gpu-fix-crash-when-throttling-gpu-immediately-during-boot.patch drm-amdkfd-fix-race-in-gws-queue-scheduling.patch +drm-bridge-cdns-dsi-fix-the-clock-variable-for-mode_valid.patch +drm-bridge-cdns-dsi-fix-phy-de-init-and-flag-it-so.patch +drm-bridge-cdns-dsi-fix-connecting-to-next-bridge.patch +drm-bridge-cdns-dsi-check-return-value-when-getting-default-phy-config.patch +drm-bridge-cdns-dsi-wait-for-clk-and-data-lanes-to-be-ready.patch +drm-amd-display-add-null-pointer-check-for-get_first_active_display.patch +drm-amdgpu-amdgpu_vram_mgr_new-clamp-lpfn-to-total-vram.patch +drm-amd-display-correct-non-oled-pre_t11_delay.patch +drm-xe-vm-move-rebind_work-init-earlier.patch +drm-xe-sched-stop-re-submitting-signalled-jobs.patch +drm-xe-guc_submit-add-back-fix.patch +drm-i915-gem-allow-exec_capture-on-recoverable-contexts-on-dg1.patch +drm-amd-display-fix-rmcm-programming-seq-errors.patch +drm-amdgpu-add-kicker-device-detection.patch +drm-amd-display-check-dce_hwseq-before-dereferencing-it.patch +drm-xe-fix-memset-on-iomem.patch +drm-xe-fix-taking-invalid-lock-on-wedge.patch +drm-xe-fix-early-wedge-on-guc-load-failure.patch +drm-i915-dsi-fix-off-by-one-in-bxt_mipi_trans_vtotal.patch +drm-amdgpu-fix-sdma-utc_l1-handling-during-start-stop-sequences.patch +drm-amdgpu-switch-job-hw_fence-to-amdgpu_fence.patch +drm-amd-display-fix-mpv-playback-corruption-on-weston.patch |