Merge tag 'drm-xe-next-2025-01-10' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Driver Changes:
- SRIOV VF: Avoid reading inaccessible registers (Jakub, Marcin)
 - Introduce RPa frequency information (Rodrigo)
 - Remove unnecessary force wakes on SLPC code (Vinay)
 - Fix all typos in xe (Nitin)
 - Adding steering info support for GuC register lists (Jesus)
 - Remove unused xe_pciids.h harder, add missing PCI ID (Jani)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Z4E0tlTAA6MZ7PF2@intel.com
This commit is contained in:
Dave Airlie
2025-01-13 11:07:31 +10:00
40 changed files with 162 additions and 302 deletions

View File

@@ -66,7 +66,7 @@ config DRM_XE_DEBUG_MEM
bool "Enable passing SYS/VRAM addresses to user space"
default n
help
Pass object location trough uapi. Intended for extended
Pass object location through uapi. Intended for extended
testing and development only.
Recommended for driver developers only.
@@ -104,5 +104,5 @@ config DRM_XE_USERPTR_INVAL_INJECT
Choose this option when debugging error paths that
are hit during checks for userptr invalidations.
Recomended for driver developers only.
Recommended for driver developers only.
If in doubt, say "N".

View File

@@ -25,7 +25,7 @@ enum guc_state_capture_type {
#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
/* Class indecies for capture_class and capture_instance arrays */
/* Class indices for capture_class and capture_instance arrays */
enum guc_capture_list_class_type {
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
GUC_CAPTURE_LIST_CLASS_VIDEO = 1,

View File

@@ -132,7 +132,7 @@ enum {
* _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
* This config sets whether strict scheduling is enabled whereby any VF
* that doesnt have work to submit is still allocated a fixed execution
* time-slice to ensure active VFs execution is always consitent even
* time-slice to ensure active VFs execution is always consistent even
* during other VF reprovisiong / rebooting events. Changing this KLV
* impacts all VFs and takes effect on the next VF-Switch event.
*
@@ -207,7 +207,7 @@ enum {
* of and this will never be perfectly-exact (accumulated nano-second
* granularity) since the GPUs clock time runs off a different crystal
* from the CPUs clock. Changing this KLV on a VF that is currently
* running a context wont take effect until a new context is scheduled in.
* running a context won't take effect until a new context is scheduled in.
* That said, when the PF is changing this value from 0x0 to
* a non-zero value, it might never take effect if the VF is running an
* infinitely long compute or shader kernel. In such a scenario, the
@@ -227,7 +227,7 @@ enum {
* HW is capable and this will never be perfectly-exact (accumulated
* nano-second granularity) since the GPUs clock time runs off a
* different crystal from the CPUs clock. Changing this KLV on a VF
* that is currently running a context wont take effect until a new
* that is currently running a context won't take effect until a new
* context is scheduled in.
* That said, when the PF is changing this value from 0x0 to
* a non-zero value, it might never take effect if the VF is running an

View File

@@ -13,7 +13,7 @@
/**
* struct xe_reg - Register definition
*
* Register defintion to be used by the individual register. Although the same
* Register definition to be used by the individual register. Although the same
* definition is used for xe_reg and xe_reg_mcr, they use different internal
* APIs for accesses.
*/

View File

@@ -44,12 +44,16 @@
#define MTL_RP_STATE_CAP XE_REG(0x138000)
#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008)
#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c)
#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020)
#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16)
#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0)
#define MTL_MPA_FREQUENCY XE_REG(0x138028)
#define MTL_RPA_MASK REG_GENMASK(8, 0)
#define MTL_MPE_FREQUENCY XE_REG(0x13802c)
#define MTL_RPE_MASK REG_GENMASK(8, 0)

View File

@@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt,
mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
} else {
/* Just re-use value read on previous iteration */
/* Just reuse value read on previous iteration */
reg_val >>= 16;
}

View File

@@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
/*
* We need to allocate space for the requested number of dwords,
* one additional MI_BATCH_BUFFER_END dword, and additional buffer
* space to accomodate the platform-specific hardware prefetch
* space to accommodate the platform-specific hardware prefetch
* requirements.
*/
bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,

View File

@@ -786,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
* / resume, some of the pinned memory is required for the
* device to resume / use the GPU to move other evicted memory
* (user memory) around. This likely could be optimized a bit
* futher where we find the minimum set of pinned memory
* further where we find the minimum set of pinned memory
* required for resume but for simplity doing a memcpy for all
* pinned memory.
*/
@@ -875,7 +875,7 @@ out:
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
* @bo: The buffer object to move.
*
* On successful completion, the object memory will be moved to sytem memory.
* On successful completion, the object memory will be moved to system memory.
*
* This is needed to for special handling of pinned VRAM object during
* suspend-resume.
@@ -1370,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = {
/**
* xe_bo_alloc - Allocate storage for a struct xe_bo
*
* This funcition is intended to allocate storage to be used for input
* This function is intended to allocate storage to be used for input
* to __xe_bo_create_locked(), in the case a pointer to the bo to be
* created is needed before the call to __xe_bo_create_locked().
* If __xe_bo_create_locked ends up never to be called, then the
@@ -2412,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
* @force_alloc: Set force_alloc in ttm_operation_ctx
*
* On successful completion, the object memory will be moved to evict
* placement. Ths function blocks until the object has been fully moved.
* placement. This function blocks until the object has been fully moved.
*
* Return: 0 on success. Negative error code on failure.
*/

View File

@@ -41,7 +41,7 @@
* created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
* access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
* user BOs are evictable and user BOs are never pinned by XE. The allocation of
* the backing store can be defered from creation time until first use which is
* the backing store can be deferred from creation time until first use which is
* either mmap, bind, or pagefault.
*
* Private BOs

View File

@@ -48,7 +48,7 @@
*
* **Coredump release**:
* After a coredump is generated, it stays in kernel memory until released by
* userpace by writing anything to it, or after an internal timer expires. The
* userspace by writing anything to it, or after an internal timer expires. The
* exact timeout may vary and should not be relied upon. Example to release
* a coredump:
*

View File

@@ -606,7 +606,7 @@ static int probe_has_flat_ccs(struct xe_device *xe)
u32 reg;
/* Always enabled/disabled, no runtime check to do */
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe))
return 0;
gt = xe_root_mmio_gt(xe);
@@ -999,7 +999,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
* xe_device_declare_wedged - Declare device wedged
* @xe: xe device instance
*
* This is a final state that can only be cleared with a mudule
* This is a final state that can only be cleared with a module
* re-probe (unbind + bind).
* In this state every IOCTL will be blocked so the GT cannot be used.
* In general it will be called upon any critical error such as gt reset

View File

@@ -385,7 +385,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
* @p: The drm_printer ptr
* @file: The drm_file ptr
*
* This is callabck for drm fdinfo interface. Register this callback
* This is callback for drm fdinfo interface. Register this callback
* in drm driver ops for show_fdinfo.
*
* Return: void

View File

@@ -33,7 +33,7 @@
*
* In XE we avoid all of this complication by not allowing a BO list to be
* passed into an exec, using the dma-buf implicit sync uAPI, have binds as
* seperate operations, and using the DRM scheduler to flow control the ring.
* separate operations, and using the DRM scheduler to flow control the ring.
* Let's deep dive on each of these.
*
* We can get away from a BO list by forcing the user to use in / out fences on

View File

@@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
/*
* So we don't need to worry about 64K GGTT layout when dealing with
* scratch entires, rather keep the scratch page in system memory on
* scratch entries, rather keep the scratch page in system memory on
* platforms where 64K pages are needed for VRAM.
*/
flags = XE_BO_FLAG_PINNED;

View File

@@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt);
/**
* xe_gt_record_user_engines - save data related to engines available to
* usersapce
* userspace
* @gt: GT structure
*
* Walk the available HW engines from gt->info.engine_mask and calculate data

View File

@@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev,
}
static DEVICE_ATTR_RO(rpe_freq);
static ssize_t rpa_freq_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct xe_guc_pc *pc = dev_to_pc(dev);
u32 freq;
xe_pm_runtime_get(dev_to_xe(dev));
freq = xe_guc_pc_get_rpa_freq(pc);
xe_pm_runtime_put(dev_to_xe(dev));
return sysfs_emit(buf, "%d\n", freq);
}
static DEVICE_ATTR_RO(rpa_freq);
static ssize_t rpn_freq_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = {
&dev_attr_act_freq.attr,
&dev_attr_cur_freq.attr,
&dev_attr_rp0_freq.attr,
&dev_attr_rpa_freq.attr,
&dev_attr_rpe_freq.attr,
&dev_attr_rpn_freq.attr,
&dev_attr_min_freq.attr,

View File

@@ -371,7 +371,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
* @group: steering group ID
* @instance: steering instance ID
*
* Return: the coverted DSS id.
* Return: the converted DSS id.
*/
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance)
{
@@ -550,9 +550,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
* Returns true if the caller should steer to the @group/@instance values
* returned. Returns false if the caller need not perform any steering
*/
static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
struct xe_reg_mcr reg_mcr,
u8 *group, u8 *instance)
bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
struct xe_reg_mcr reg_mcr,
u8 *group, u8 *instance)
{
const struct xe_reg reg = to_xe_reg(reg_mcr);
const struct xe_mmio_range *implicit_ranges;

View File

@@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
u32 value);
bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
struct xe_reg_mcr reg_mcr,
u8 *group, u8 *instance);
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);

View File

@@ -2161,7 +2161,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
*
* This function can only be called on PF.
*
* Return: mininum size of the buffer or the number of bytes saved,
* Return: minimum size of the buffer or the number of bytes saved,
* or a negative error code on failure.
*/
ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)

View File

@@ -29,6 +29,7 @@
#include "xe_platform_types.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
#include "xe_gt_mcr.h"
/* Slack of a few additional entries per engine */
#define ADS_REGSET_EXTRA_MAX 8
@@ -696,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
.flags = reg.masked ? GUC_REGSET_MASKED : 0,
};
if (reg.mcr) {
struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
u8 group, instance;
bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
&group, &instance);
if (steer) {
entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
entry.flags |= GUC_REGSET_STEERING_NEEDED;
}
}
xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
&entry, sizeof(entry));
}

Some files were not shown because too many files have changed in this diff Show More