mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'amd-drm-next-6.14-2025-01-10' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.14-2025-01-10: amdgpu: - Fix max surface handling in DC - clang fixes - DCN 3.5 fixes - DCN 4.0.1 fixes - DC CRC fixes - DML updates - DSC fixes - PSR fixes - DC add some divide by 0 checks - SMU13 updates - SR-IOV fixes - RAS fixes - Cleaner shader support for gfx10.3 dGPUs - fix drm buddy trim handling - SDMA engine reset updates _ Fix RB bitmap setup - Fix doorbell ttm cleanup - Add CEC notifier support - DPIA updates - MST fixes amdkfd: - Shader debugger fixes - Trap handler cleanup - Cleanup includes - Eviction fence wq fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250110172731.2960668-1-alexander.deucher@amd.com
This commit is contained in:
@@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
|
||||
uint32_t low, high;
|
||||
uint64_t queue_addr = 0;
|
||||
|
||||
if (!amdgpu_gpu_recovery)
|
||||
return 0;
|
||||
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
|
||||
|
||||
@@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
|
||||
uint32_t low, high, pipe_reset_data = 0;
|
||||
uint64_t queue_addr = 0;
|
||||
|
||||
if (!amdgpu_gpu_recovery)
|
||||
return 0;
|
||||
|
||||
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
|
||||
|
||||
|
||||
@@ -88,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
|
||||
{
|
||||
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
|
||||
|
||||
if (aobj) {
|
||||
amdgpu_hmm_unregister(aobj);
|
||||
ttm_bo_put(&aobj->tbo);
|
||||
}
|
||||
amdgpu_hmm_unregister(aobj);
|
||||
ttm_bo_put(&aobj->tbo);
|
||||
}
|
||||
|
||||
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
|
||||
|
||||
@@ -3020,10 +3020,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block)
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
|
||||
mutex_lock(&adev->firmware.mutex);
|
||||
/*
|
||||
* This sequence is just used on hw_init only once, no need on
|
||||
* resume.
|
||||
*/
|
||||
|
||||
ret = amdgpu_ucode_init_bo(adev);
|
||||
if (ret)
|
||||
goto failed;
|
||||
@@ -3148,6 +3145,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block)
|
||||
|
||||
mutex_lock(&adev->firmware.mutex);
|
||||
|
||||
ret = amdgpu_ucode_init_bo(adev);
|
||||
if (ret)
|
||||
goto failed;
|
||||
|
||||
ret = psp_hw_start(psp);
|
||||
if (ret)
|
||||
goto failed;
|
||||
@@ -3891,10 +3892,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
struct amdgpu_ip_block *ip_block;
|
||||
uint32_t fw_ver;
|
||||
int ret;
|
||||
|
||||
if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
|
||||
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
|
||||
if (!ip_block || !ip_block->status.late_initialized) {
|
||||
dev_info(adev->dev, "PSP block is not ready yet\n.");
|
||||
return -EBUSY;
|
||||
}
|
||||
@@ -3923,8 +3926,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
|
||||
struct amdgpu_bo *fw_buf_bo = NULL;
|
||||
uint64_t fw_pri_mc_addr;
|
||||
void *fw_pri_cpu_addr;
|
||||
struct amdgpu_ip_block *ip_block;
|
||||
|
||||
if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
|
||||
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
|
||||
if (!ip_block || !ip_block->status.late_initialized) {
|
||||
dev_err(adev->dev, "PSP block is not ready yet.");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@@ -2832,8 +2832,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||
|
||||
mutex_lock(&con->recovery_lock);
|
||||
data = con->eh_data;
|
||||
if (!data)
|
||||
if (!data) {
|
||||
/* Returning 0 as the absence of eh_data is acceptable */
|
||||
goto free;
|
||||
}
|
||||
|
||||
for (i = 0; i < pages; i++) {
|
||||
if (from_rom &&
|
||||
@@ -2845,26 +2847,34 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||
* one row
|
||||
*/
|
||||
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
|
||||
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
|
||||
bps[i].retired_page <<
|
||||
AMDGPU_GPU_PAGE_SHIFT)) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
else
|
||||
} else {
|
||||
find_pages_per_pa = true;
|
||||
}
|
||||
} else {
|
||||
/* unsupported cases */
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
|
||||
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
|
||||
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (from_rom && !find_pages_per_pa) {
|
||||
if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
|
||||
/* bad page in any NPS mode in eeprom */
|
||||
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data))
|
||||
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
|
||||
ret = -EINVAL;
|
||||
goto free;
|
||||
}
|
||||
} else {
|
||||
/* legacy bad page in eeprom, generated only in
|
||||
* NPS1 mode
|
||||
@@ -2881,6 +2891,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
|
||||
/* non-nps1 mode, old RAS TA
|
||||
* can't support it
|
||||
*/
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -362,13 +362,13 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
|
||||
if (!adev)
|
||||
return -ENODEV;
|
||||
|
||||
mask = (1 << adev->sdma.num_instances) - 1;
|
||||
mask = BIT_ULL(adev->sdma.num_instances) - 1;
|
||||
if ((val & mask) == 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; ++i) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
if (val & (1 << i))
|
||||
if (val & BIT_ULL(i))
|
||||
ring->sched.ready = true;
|
||||
else
|
||||
ring->sched.ready = false;
|
||||
|
||||
@@ -2066,6 +2066,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
|
||||
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
|
||||
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
|
||||
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
|
||||
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
|
||||
ttm_device_fini(&adev->mman.bdev);
|
||||
adev->mman.initialized = false;
|
||||
DRM_INFO("amdgpu: ttm finalized\n");
|
||||
|
||||
@@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
|
||||
else
|
||||
remaining_size -= size;
|
||||
}
|
||||
mutex_unlock(&mgr->lock);
|
||||
|
||||
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
|
||||
struct drm_buddy_block *dcc_block;
|
||||
@@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
|
||||
(u64)vres->base.size,
|
||||
&vres->blocks);
|
||||
}
|
||||
mutex_unlock(&mgr->lock);
|
||||
|
||||
vres->base.start = 0;
|
||||
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "clearstate_gfx10.h"
|
||||
#include "v10_structs.h"
|
||||
#include "gfx_v10_0.h"
|
||||
#include "gfx_v10_0_cleaner_shader.h"
|
||||
#include "nbio_v2_3.h"
|
||||
|
||||
/*
|
||||
@@ -4738,6 +4739,23 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
break;
|
||||
}
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(10, 3, 0):
|
||||
case IP_VERSION(10, 3, 2):
|
||||
case IP_VERSION(10, 3, 4):
|
||||
case IP_VERSION(10, 3, 5):
|
||||
adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
|
||||
adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
|
||||
if (adev->gfx.me_fw_version >= 64 &&
|
||||
adev->gfx.pfp_fw_version >= 100 &&
|
||||
adev->gfx.mec_fw_version >= 122) {
|
||||
adev->gfx.enable_cleaner_shader = true;
|
||||
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
|
||||
if (r) {
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
adev->gfx.enable_cleaner_shader = false;
|
||||
break;
|
||||
|
||||
56
drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
Normal file
56
drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright 2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Define the cleaner shader gfx_10_3_0 */
|
||||
static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
|
||||
0xb0804004, 0xbf8a0000,
|
||||
0xbe8203b8, 0xbefc0380,
|
||||
0x7e008480, 0x7e028480,
|
||||
0x7e048480, 0x7e068480,
|
||||
0x7e088480, 0x7e0a8480,
|
||||
0x7e0c8480, 0x7e0e8480,
|
||||
0xbefc0302, 0x80828802,
|
||||
0xbf84fff5, 0xbe8203ff,
|
||||
0x80000000, 0x87020002,
|
||||
0xbf840012, 0xbefe03c1,
|
||||
0xbeff03c1, 0xd7650001,
|
||||
0x0001007f, 0xd7660001,
|
||||
0x0002027e, 0x16020288,
|
||||
0xbe8203bf, 0xbefc03c1,
|
||||
0xd9382000, 0x00020201,
|
||||
0xd9386040, 0x00040401,
|
||||
0xd70f6a01, 0x000202ff,
|
||||
0x00000400, 0x80828102,
|
||||
0xbf84fff7, 0xbefc03ff,
|
||||
0x00000068, 0xbe803080,
|
||||
0xbe813080, 0xbe823080,
|
||||
0xbe833080, 0x80fc847c,
|
||||
0xbf84fffa, 0xbeea0480,
|
||||
0xbeec0480, 0xbeee0480,
|
||||
0xbef00480, 0xbef20480,
|
||||
0xbef40480, 0xbef60480,
|
||||
0xbef80480, 0xbefa0480,
|
||||
0xbf810000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
};
|
||||
124
drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
Normal file
124
drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
Normal file
@@ -0,0 +1,124 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright 2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
|
||||
//To turn this shader program on for complitaion change this to main and lower shader main to main_1
|
||||
|
||||
// GFX10.3 : Clear SGPRs, VGPRs and LDS
|
||||
// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
|
||||
// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
|
||||
// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
|
||||
// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
|
||||
// Each wave clears SGPRs 0 - 107
|
||||
// Each wave clears VGPRs 0 - 63
|
||||
// The first wave of the workgroup clears its 64KB of LDS
|
||||
// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
|
||||
// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
|
||||
|
||||
|
||||
shader main
|
||||
asic(GFX10)
|
||||
type(CS)
|
||||
wave_size(32)
|
||||
// Note: original source code from SQ team
|
||||
|
||||
//
|
||||
// Create 32 waves in a threadgroup (CS waves)
|
||||
// Each allocates 64 VGPRs
|
||||
// The workgroup allocates all of LDS (64kbytes)
|
||||
//
|
||||
// Takes about 2500 clocks to run.
|
||||
// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
|
||||
//
|
||||
S_BARRIER
|
||||
s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
|
||||
s_mov_b32 m0, 0
|
||||
//
|
||||
// CLEAR VGPRs
|
||||
//
|
||||
label_0005:
|
||||
v_movreld_b32 v0, 0
|
||||
v_movreld_b32 v1, 0
|
||||
v_movreld_b32 v2, 0
|
||||
v_movreld_b32 v3, 0
|
||||
v_movreld_b32 v4, 0
|
||||
v_movreld_b32 v5, 0
|
||||
v_movreld_b32 v6, 0
|
||||
v_movreld_b32 v7, 0
|
||||
s_mov_b32 m0, s2
|
||||
s_sub_u32 s2, s2, 8
|
||||
s_cbranch_scc0 label_0005
|
||||
//
|
||||
s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
|
||||
s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
|
||||
s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
|
||||
// CLEAR LDS
|
||||
//
|
||||
s_mov_b32 exec_lo, 0xffffffff
|
||||
s_mov_b32 exec_hi, 0xffffffff
|
||||
v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
|
||||
v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
|
||||
v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
|
||||
s_mov_b32 s2, 0x00000003f // 64 loop iterations
|
||||
s_mov_b32 m0, 0xffffffff
|
||||
// Clear all of LDS space
|
||||
// Each FirstWave of WorkGroup clears 64kbyte block
|
||||
|
||||
label_001F:
|
||||
ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
|
||||
ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
|
||||
v_add_co_u32 v1, vcc, 0x00000400, v1
|
||||
s_sub_u32 s2, s2, 1
|
||||
s_cbranch_scc0 label_001F
|
||||
|
||||
//
|
||||
// CLEAR SGPRs
|
||||
//
|
||||
label_0023:
|
||||
s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
|
||||
label_sgpr_loop:
|
||||
s_movreld_b32 s0, 0
|
||||
s_movreld_b32 s1, 0
|
||||
s_movreld_b32 s2, 0
|
||||
s_movreld_b32 s3, 0
|
||||
s_sub_u32 m0, m0, 4
|
||||
s_cbranch_scc0 label_sgpr_loop
|
||||
|
||||
//clear vcc
|
||||
s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
|
||||
s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
|
||||
s_mov_b64 vcc, 0 //clear vcc
|
||||
s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
|
||||
s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
|
||||
s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
|
||||
s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
|
||||
s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
|
||||
s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
|
||||
s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
|
||||
s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
|
||||
|
||||
s_endpgm
|
||||
|
||||
end
|
||||
|
||||
|
||||
@@ -1891,6 +1891,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
|
||||
|
||||
static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 rb_bitmap_per_sa;
|
||||
u32 rb_bitmap_width_per_sa;
|
||||
u32 max_sa;
|
||||
u32 active_sa_bitmap;
|
||||
@@ -1908,9 +1909,11 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
|
||||
adev->gfx.config.max_sh_per_se;
|
||||
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
|
||||
adev->gfx.config.max_sh_per_se;
|
||||
rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
|
||||
|
||||
for (i = 0; i < max_sa; i++) {
|
||||
if (active_sa_bitmap & (1 << i))
|
||||
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
|
||||
active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
|
||||
}
|
||||
|
||||
active_rb_bitmap &= global_active_rb_bitmap;
|
||||
|
||||
@@ -1442,11 +1442,19 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Add queue reset mask when FW fully supports it */
|
||||
adev->gfx.gfx_supported_reset =
|
||||
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
|
||||
adev->gfx.compute_supported_reset =
|
||||
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
|
||||
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
|
||||
case IP_VERSION(12, 0, 0):
|
||||
case IP_VERSION(12, 0, 1):
|
||||
if ((adev->gfx.me_fw_version >= 2660) &&
|
||||
(adev->gfx.mec_fw_version >= 2920)) {
|
||||
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
|
||||
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (!adev->enable_mes_kiq) {
|
||||
r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
|
||||
@@ -1615,6 +1623,7 @@ static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
|
||||
|
||||
static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 rb_bitmap_per_sa;
|
||||
u32 rb_bitmap_width_per_sa;
|
||||
u32 max_sa;
|
||||
u32 active_sa_bitmap;
|
||||
@@ -1632,12 +1641,14 @@ static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
|
||||
adev->gfx.config.max_sh_per_se;
|
||||
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
|
||||
adev->gfx.config.max_sh_per_se;
|
||||
rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
|
||||
|
||||
for (i = 0; i < max_sa; i++) {
|
||||
if (active_sa_bitmap & (1 << i))
|
||||
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
|
||||
active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
|
||||
}
|
||||
|
||||
active_rb_bitmap |= global_active_rb_bitmap;
|
||||
active_rb_bitmap &= global_active_rb_bitmap;
|
||||
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
|
||||
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
|
||||
}
|
||||
|
||||
@@ -5639,8 +5639,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
{
|
||||
uint32_t temp, data;
|
||||
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
|
||||
/* It is disabled by HW by default */
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
|
||||
@@ -5734,8 +5732,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
|
||||
gfx_v8_0_wait_for_rlc_serdes(adev);
|
||||
}
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
}
|
||||
|
||||
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -5745,8 +5741,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
|
||||
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
|
||||
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
|
||||
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
|
||||
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
|
||||
@@ -5827,12 +5821,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
}
|
||||
|
||||
gfx_v8_0_wait_for_rlc_serdes(adev);
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
}
|
||||
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
|
||||
if (enable) {
|
||||
/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
|
||||
* === MGCG + MGLS + TS(CG/LS) ===
|
||||
@@ -5846,6 +5840,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
|
||||
gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
|
||||
}
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -4964,8 +4964,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
{
|
||||
uint32_t data, def;
|
||||
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
|
||||
/* It is disabled by HW by default */
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
|
||||
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
|
||||
@@ -5030,8 +5028,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
|
||||
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -5042,8 +5038,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
|
||||
if (!adev->gfx.num_gfx_rings)
|
||||
return;
|
||||
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
|
||||
/* Enable 3D CGCG/CGLS */
|
||||
if (enable) {
|
||||
/* write cmd to clear cgcg/cgls ov */
|
||||
@@ -5085,8 +5079,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
|
||||
if (def != data)
|
||||
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
|
||||
}
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
}
|
||||
|
||||
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
|
||||
@@ -5094,8 +5086,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
{
|
||||
uint32_t def, data;
|
||||
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
|
||||
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
|
||||
/* unset CGCG override */
|
||||
@@ -5137,13 +5127,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
|
||||
if (def != data)
|
||||
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
|
||||
}
|
||||
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
}
|
||||
|
||||
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
|
||||
if (enable) {
|
||||
/* CGCG/CGLS should be enabled after MGCG/MGLS
|
||||
* === MGCG + MGLS ===
|
||||
@@ -5163,6 +5152,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
|
||||
/* === MGCG + MGLS === */
|
||||
gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
|
||||
}
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -579,11 +579,16 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
|
||||
{
|
||||
int err;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
|
||||
AMDGPU_UCODE_REQUIRED,
|
||||
"amdgpu/%s_sjt_mec.bin", chip_name);
|
||||
else
|
||||
|
||||
if (err)
|
||||
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
|
||||
AMDGPU_UCODE_REQUIRED,
|
||||
"amdgpu/%s_mec.bin", chip_name);
|
||||
} else
|
||||
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
|
||||
AMDGPU_UCODE_REQUIRED,
|
||||
"amdgpu/%s_mec.bin", chip_name);
|
||||
|
||||
@@ -1602,7 +1602,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
|
||||
int i, r;
|
||||
u32 inst_mask;
|
||||
|
||||
if ((adev->flags & AMD_IS_APU) || amdgpu_sriov_vf(adev))
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return -EINVAL;
|
||||
|
||||
/* stop queue */
|
||||
|
||||
@@ -34,41 +34,24 @@
|
||||
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
|
||||
* sp3 gfx11.sp3 -hex gfx11.hex
|
||||
*
|
||||
* gfx12:
|
||||
* cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3
|
||||
* sp3 gfx12.sp3 -hex gfx12.hex
|
||||
*/
|
||||
|
||||
#define CHIP_NAVI10 26
|
||||
#define CHIP_SIENNA_CICHLID 30
|
||||
#define CHIP_PLUM_BONITO 36
|
||||
#define CHIP_GFX12 37
|
||||
|
||||
#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
|
||||
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
|
||||
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
|
||||
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
|
||||
#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12)
|
||||
#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
|
||||
#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
|
||||
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
|
||||
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
#define S_COHERENCE glc:1
|
||||
#define V_COHERENCE slc:1 glc:1
|
||||
#define S_WAITCNT_0 s_waitcnt 0
|
||||
#else
|
||||
#define S_COHERENCE scope:SCOPE_SYS
|
||||
#define V_COHERENCE scope:SCOPE_SYS
|
||||
#define S_WAITCNT_0 s_wait_idle
|
||||
|
||||
#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO
|
||||
#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI
|
||||
#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC
|
||||
#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC
|
||||
#define HW_REG_MODE HW_REG_WAVE_MODE
|
||||
#endif
|
||||
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
|
||||
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
|
||||
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
|
||||
@@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E
|
||||
var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
|
||||
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
|
||||
var S_SAVE_PC_HI_HT_MASK = 0x01000000
|
||||
#else
|
||||
var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
|
||||
var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
|
||||
var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
|
||||
var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
|
||||
var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
|
||||
var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
|
||||
var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
|
||||
var SQ_WAVE_STATUS_WAVE64_SIZE = 1
|
||||
var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
|
||||
var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV
|
||||
var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
|
||||
var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK
|
||||
var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
|
||||
#endif
|
||||
|
||||
var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
|
||||
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
|
||||
@@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
|
||||
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
|
||||
#endif
|
||||
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
|
||||
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
|
||||
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
|
||||
@@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
|
||||
var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
|
||||
var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
|
||||
#else
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
|
||||
var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
|
||||
var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
|
||||
var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
|
||||
|
||||
var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
|
||||
var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
|
||||
var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
|
||||
var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
|
||||
SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
|
||||
SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
|
||||
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
|
||||
SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
|
||||
SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
|
||||
var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
|
||||
var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
|
||||
var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
|
||||
var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
|
||||
var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
|
||||
var BARRIER_STATE_SIGNAL_OFFSET = 16
|
||||
var BARRIER_STATE_VALID_OFFSET = 0
|
||||
#endif
|
||||
|
||||
// bits [31:24] unused by SPI debug data
|
||||
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
|
||||
@@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER:
|
||||
|
||||
L_HALTED:
|
||||
// Host trap may occur while wave is halted.
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
|
||||
#else
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
|
||||
#endif
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
|
||||
L_CHECK_SAVE:
|
||||
@@ -336,7 +266,6 @@ L_NOT_HALTED:
|
||||
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
|
||||
// Maskable exceptions only cause the wave to enter the trap handler if
|
||||
// their respective bit in mode.excp_en is set.
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
|
||||
s_cbranch_scc0 L_CHECK_TRAP_ID
|
||||
|
||||
@@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH:
|
||||
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
s_and_b32 ttmp2, ttmp2, ttmp3
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
#else
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
|
||||
s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
|
||||
s_cbranch_scc0 L_NOT_ADDR_WATCH
|
||||
s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
|
||||
|
||||
L_NOT_ADDR_WATCH:
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
|
||||
s_and_b32 ttmp2, ttmp3, ttmp2
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
#endif
|
||||
|
||||
L_CHECK_TRAP_ID:
|
||||
// Check trap_id != 0
|
||||
@@ -369,13 +287,8 @@ L_CHECK_TRAP_ID:
|
||||
#if SINGLE_STEP_MISSED_WORKAROUND
|
||||
// Prioritize single step exception over context save.
|
||||
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
|
||||
#else
|
||||
// WAVE_TRAP_CTRL is already in ttmp3.
|
||||
s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
|
||||
#endif
|
||||
s_cbranch_scc1 L_FETCH_2ND_TRAP
|
||||
#endif
|
||||
|
||||
@@ -425,12 +338,7 @@ L_NO_NEXT_TRAP:
|
||||
s_cbranch_scc1 L_TRAP_CASE
|
||||
|
||||
// Host trap will not cause trap re-entry.
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
|
||||
#else
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
|
||||
#endif
|
||||
s_cbranch_scc1 L_EXIT_TRAP
|
||||
s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
|
||||
|
||||
@@ -457,16 +365,7 @@ L_EXIT_TRAP:
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
|
||||
#else
|
||||
// STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
|
||||
// Only restore fields which the trap handler changes.
|
||||
s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT
|
||||
s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
|
||||
SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status
|
||||
#endif
|
||||
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
|
||||
L_SAVE:
|
||||
@@ -478,14 +377,6 @@ L_SAVE:
|
||||
s_endpgm
|
||||
L_HAVE_VGPRS:
|
||||
#endif
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
|
||||
s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
|
||||
s_cbranch_scc0 L_HAVE_VGPRS
|
||||
s_endpgm
|
||||
L_HAVE_VGPRS:
|
||||
#endif
|
||||
|
||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
||||
s_mov_b32 s_save_tmp, 0
|
||||
s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||
@@ -671,19 +562,6 @@ L_SAVE_HWREG:
|
||||
s_mov_b32 m0, 0x0 //Next lane of v2 to write to
|
||||
#endif
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
// Ensure no further changes to barrier or LDS state.
|
||||
// STATE_PRIV.BARRIER_COMPLETE may change up to this point.
|
||||
s_barrier_signal -2
|
||||
s_barrier_wait -2
|
||||
|
||||
// Re-read final state of BARRIER_COMPLETE field for save.
|
||||
s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG)
|
||||
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
|
||||
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
|
||||
s_or_b32 s_save_status, s_save_status, s_save_tmp
|
||||
#endif
|
||||
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
|
||||
@@ -707,21 +585,6 @@ L_SAVE_HWREG:
|
||||
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
|
||||
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
|
||||
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
s_get_barrier_state s_save_tmp, -1
|
||||
s_wait_kmcnt (0)
|
||||
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
#endif
|
||||
|
||||
#if NO_SQC_STORE
|
||||
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
|
||||
s_mov_b32 exec_lo, 0xFFFF
|
||||
@@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL:
|
||||
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
|
||||
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
|
||||
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_barrier //LDS is used? wait for other waves in the same TG
|
||||
#endif
|
||||
s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
|
||||
s_cbranch_scc0 L_SAVE_LDS_DONE
|
||||
|
||||
@@ -1081,11 +942,6 @@ L_RESTORE:
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
|
||||
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
// Save s_restore_spi_init_hi for later use.
|
||||
s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
|
||||
#endif
|
||||
|
||||
//determine it is wave32 or wave64
|
||||
get_wave_size2(s_restore_size)
|
||||
|
||||
@@ -1320,9 +1176,7 @@ L_RESTORE_SGPR:
|
||||
// s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
|
||||
// Clear DEBUG_EN before and restore MODE after the barrier.
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
|
||||
#endif
|
||||
|
||||
/* restore HW registers */
|
||||
L_RESTORE_HWREG:
|
||||
@@ -1334,11 +1188,6 @@ L_RESTORE_HWREG:
|
||||
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
// Restore s_restore_spi_init_hi before the saved value gets clobbered.
|
||||
s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
|
||||
#endif
|
||||
|
||||
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
@@ -1358,44 +1207,6 @@ L_RESTORE_HWREG:
|
||||
|
||||
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
S_WAITCNT_0
|
||||
s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
|
||||
|
||||
read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
S_WAITCNT_0
|
||||
s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
|
||||
|
||||
// Only the first wave needs to restore the workgroup barrier.
|
||||
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
|
||||
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
|
||||
|
||||
// Skip over WAVE_STATUS, since there is no state to restore from it
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
|
||||
|
||||
read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
S_WAITCNT_0
|
||||
|
||||
s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
|
||||
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
|
||||
|
||||
// extract the saved signal count from s_restore_tmp
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
|
||||
|
||||
// We need to call s_barrier_signal repeatedly to restore the signal
|
||||
// count of the work group barrier. The member count is already
|
||||
// initialized with the number of waves in the work group.
|
||||
L_BARRIER_RESTORE_LOOP:
|
||||
s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
|
||||
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
|
||||
s_barrier_signal -1
|
||||
s_add_i32 s_restore_tmp, s_restore_tmp, -1
|
||||
s_branch L_BARRIER_RESTORE_LOOP
|
||||
|
||||
L_SKIP_BARRIER_RESTORE:
|
||||
#endif
|
||||
|
||||
s_mov_b32 m0, s_restore_m0
|
||||
s_mov_b32 exec_lo, s_restore_exec_lo
|
||||
s_mov_b32 exec_hi, s_restore_exec_hi
|
||||
@@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV:
|
||||
|
||||
s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
// Make barrier and LDS state visible to all waves in the group.
|
||||
// STATE_PRIV.BARRIER_COMPLETE may change after this point.
|
||||
s_barrier_signal -2
|
||||
s_barrier_wait -2
|
||||
#endif
|
||||
|
||||
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
|
||||
|
||||
L_END_PGM:
|
||||
@@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes
|
||||
end
|
||||
|
||||
function get_wave_size2(s_reg)
|
||||
#if ASIC_FAMILY < CHIP_GFX12
|
||||
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
|
||||
#else
|
||||
s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
|
||||
#endif
|
||||
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
|
||||
end
|
||||
|
||||
|
||||
1126
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
Normal file
1126
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
Normal file
File diff suppressed because it is too large
Load Diff
@@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
|
||||
{
|
||||
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
|
||||
uint32_t flags = pdd->process->dbg_flags;
|
||||
struct amdgpu_device *adev = pdd->dev->adev;
|
||||
int r;
|
||||
|
||||
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
|
||||
return 0;
|
||||
|
||||
if (!pdd->proc_ctx_cpu_ptr) {
|
||||
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
|
||||
AMDGPU_MES_PROC_CTX_SIZE,
|
||||
&pdd->proc_ctx_bo,
|
||||
&pdd->proc_ctx_gpu_addr,
|
||||
&pdd->proc_ctx_cpu_ptr,
|
||||
false);
|
||||
if (r) {
|
||||
dev_err(adev->dev,
|
||||
"failed to allocate process context bo\n");
|
||||
return r;
|
||||
}
|
||||
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
|
||||
}
|
||||
|
||||
return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
|
||||
pdd->watch_points, flags, sq_trap_en);
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/kfd_ioctl.h>
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/kfifo.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user