mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'drm-xe-next-2025-01-10' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Driver Changes: - SRIOV VF: Avoid reading inaccessible registers (Jakub, Marcin) - Introduce RPa frequency information (Rodrigo) - Remove unnecessary force wakes on SLPC code (Vinay) - Fix all typos in xe (Nitin) - Adding steering info support for GuC register lists (Jesus) - Remove unused xe_pciids.h harder, add missing PCI ID (Jani) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Z4E0tlTAA6MZ7PF2@intel.com
This commit is contained in:
@@ -66,7 +66,7 @@ config DRM_XE_DEBUG_MEM
|
||||
bool "Enable passing SYS/VRAM addresses to user space"
|
||||
default n
|
||||
help
|
||||
Pass object location trough uapi. Intended for extended
|
||||
Pass object location through uapi. Intended for extended
|
||||
testing and development only.
|
||||
|
||||
Recommended for driver developers only.
|
||||
@@ -104,5 +104,5 @@ config DRM_XE_USERPTR_INVAL_INJECT
|
||||
Choose this option when debugging error paths that
|
||||
are hit during checks for userptr invalidations.
|
||||
|
||||
Recomended for driver developers only.
|
||||
Recommended for driver developers only.
|
||||
If in doubt, say "N".
|
||||
|
||||
@@ -25,7 +25,7 @@ enum guc_state_capture_type {
|
||||
|
||||
#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
|
||||
|
||||
/* Class indecies for capture_class and capture_instance arrays */
|
||||
/* Class indices for capture_class and capture_instance arrays */
|
||||
enum guc_capture_list_class_type {
|
||||
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
|
||||
GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
|
||||
|
||||
@@ -132,7 +132,7 @@ enum {
|
||||
* _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001
|
||||
* This config sets whether strict scheduling is enabled whereby any VF
|
||||
* that doesn’t have work to submit is still allocated a fixed execution
|
||||
* time-slice to ensure active VFs execution is always consitent even
|
||||
* time-slice to ensure active VFs execution is always consistent even
|
||||
* during other VF reprovisiong / rebooting events. Changing this KLV
|
||||
* impacts all VFs and takes effect on the next VF-Switch event.
|
||||
*
|
||||
@@ -207,7 +207,7 @@ enum {
|
||||
* of and this will never be perfectly-exact (accumulated nano-second
|
||||
* granularity) since the GPUs clock time runs off a different crystal
|
||||
* from the CPUs clock. Changing this KLV on a VF that is currently
|
||||
* running a context wont take effect until a new context is scheduled in.
|
||||
* running a context won't take effect until a new context is scheduled in.
|
||||
* That said, when the PF is changing this value from 0x0 to
|
||||
* a non-zero value, it might never take effect if the VF is running an
|
||||
* infinitely long compute or shader kernel. In such a scenario, the
|
||||
@@ -227,7 +227,7 @@ enum {
|
||||
* HW is capable and this will never be perfectly-exact (accumulated
|
||||
* nano-second granularity) since the GPUs clock time runs off a
|
||||
* different crystal from the CPUs clock. Changing this KLV on a VF
|
||||
* that is currently running a context wont take effect until a new
|
||||
* that is currently running a context won't take effect until a new
|
||||
* context is scheduled in.
|
||||
* That said, when the PF is changing this value from 0x0 to
|
||||
* a non-zero value, it might never take effect if the VF is running an
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
/**
|
||||
* struct xe_reg - Register definition
|
||||
*
|
||||
* Register defintion to be used by the individual register. Although the same
|
||||
* Register definition to be used by the individual register. Although the same
|
||||
* definition is used for xe_reg and xe_reg_mcr, they use different internal
|
||||
* APIs for accesses.
|
||||
*/
|
||||
|
||||
@@ -44,12 +44,16 @@
|
||||
|
||||
#define MTL_RP_STATE_CAP XE_REG(0x138000)
|
||||
|
||||
#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008)
|
||||
#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c)
|
||||
|
||||
#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020)
|
||||
#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16)
|
||||
#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0)
|
||||
|
||||
#define MTL_MPA_FREQUENCY XE_REG(0x138028)
|
||||
#define MTL_RPA_MASK REG_GENMASK(8, 0)
|
||||
|
||||
#define MTL_MPE_FREQUENCY XE_REG(0x13802c)
|
||||
#define MTL_RPE_MASK REG_GENMASK(8, 0)
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ static void read_l3cc_table(struct xe_gt *gt,
|
||||
|
||||
mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
|
||||
} else {
|
||||
/* Just re-use value read on previous iteration */
|
||||
/* Just reuse value read on previous iteration */
|
||||
reg_val >>= 16;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
|
||||
/*
|
||||
* We need to allocate space for the requested number of dwords,
|
||||
* one additional MI_BATCH_BUFFER_END dword, and additional buffer
|
||||
* space to accomodate the platform-specific hardware prefetch
|
||||
* space to accommodate the platform-specific hardware prefetch
|
||||
* requirements.
|
||||
*/
|
||||
bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
|
||||
|
||||
@@ -786,7 +786,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
|
||||
* / resume, some of the pinned memory is required for the
|
||||
* device to resume / use the GPU to move other evicted memory
|
||||
* (user memory) around. This likely could be optimized a bit
|
||||
* futher where we find the minimum set of pinned memory
|
||||
* further where we find the minimum set of pinned memory
|
||||
* required for resume but for simplity doing a memcpy for all
|
||||
* pinned memory.
|
||||
*/
|
||||
@@ -875,7 +875,7 @@ out:
|
||||
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
|
||||
* @bo: The buffer object to move.
|
||||
*
|
||||
* On successful completion, the object memory will be moved to sytem memory.
|
||||
* On successful completion, the object memory will be moved to system memory.
|
||||
*
|
||||
* This is needed to for special handling of pinned VRAM object during
|
||||
* suspend-resume.
|
||||
@@ -1370,7 +1370,7 @@ static const struct drm_gem_object_funcs xe_gem_object_funcs = {
|
||||
/**
|
||||
* xe_bo_alloc - Allocate storage for a struct xe_bo
|
||||
*
|
||||
* This funcition is intended to allocate storage to be used for input
|
||||
* This function is intended to allocate storage to be used for input
|
||||
* to __xe_bo_create_locked(), in the case a pointer to the bo to be
|
||||
* created is needed before the call to __xe_bo_create_locked().
|
||||
* If __xe_bo_create_locked ends up never to be called, then the
|
||||
@@ -2412,7 +2412,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
|
||||
* @force_alloc: Set force_alloc in ttm_operation_ctx
|
||||
*
|
||||
* On successful completion, the object memory will be moved to evict
|
||||
* placement. Ths function blocks until the object has been fully moved.
|
||||
* placement. This function blocks until the object has been fully moved.
|
||||
*
|
||||
* Return: 0 on success. Negative error code on failure.
|
||||
*/
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
* created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
|
||||
* access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
|
||||
* user BOs are evictable and user BOs are never pinned by XE. The allocation of
|
||||
* the backing store can be defered from creation time until first use which is
|
||||
* the backing store can be deferred from creation time until first use which is
|
||||
* either mmap, bind, or pagefault.
|
||||
*
|
||||
* Private BOs
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
*
|
||||
* **Coredump release**:
|
||||
* After a coredump is generated, it stays in kernel memory until released by
|
||||
* userpace by writing anything to it, or after an internal timer expires. The
|
||||
* userspace by writing anything to it, or after an internal timer expires. The
|
||||
* exact timeout may vary and should not be relied upon. Example to release
|
||||
* a coredump:
|
||||
*
|
||||
|
||||
@@ -606,7 +606,7 @@ static int probe_has_flat_ccs(struct xe_device *xe)
|
||||
u32 reg;
|
||||
|
||||
/* Always enabled/disabled, no runtime check to do */
|
||||
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
|
||||
if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe))
|
||||
return 0;
|
||||
|
||||
gt = xe_root_mmio_gt(xe);
|
||||
@@ -999,7 +999,7 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
|
||||
* xe_device_declare_wedged - Declare device wedged
|
||||
* @xe: xe device instance
|
||||
*
|
||||
* This is a final state that can only be cleared with a mudule
|
||||
* This is a final state that can only be cleared with a module
|
||||
* re-probe (unbind + bind).
|
||||
* In this state every IOCTL will be blocked so the GT cannot be used.
|
||||
* In general it will be called upon any critical error such as gt reset
|
||||
|
||||
@@ -385,7 +385,7 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
|
||||
* @p: The drm_printer ptr
|
||||
* @file: The drm_file ptr
|
||||
*
|
||||
* This is callabck for drm fdinfo interface. Register this callback
|
||||
* This is callback for drm fdinfo interface. Register this callback
|
||||
* in drm driver ops for show_fdinfo.
|
||||
*
|
||||
* Return: void
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
*
|
||||
* In XE we avoid all of this complication by not allowing a BO list to be
|
||||
* passed into an exec, using the dma-buf implicit sync uAPI, have binds as
|
||||
* seperate operations, and using the DRM scheduler to flow control the ring.
|
||||
* separate operations, and using the DRM scheduler to flow control the ring.
|
||||
* Let's deep dive on each of these.
|
||||
*
|
||||
* We can get away from a BO list by forcing the user to use in / out fences on
|
||||
|
||||
@@ -362,7 +362,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
|
||||
|
||||
/*
|
||||
* So we don't need to worry about 64K GGTT layout when dealing with
|
||||
* scratch entires, rather keep the scratch page in system memory on
|
||||
* scratch entries, rather keep the scratch page in system memory on
|
||||
* platforms where 64K pages are needed for VRAM.
|
||||
*/
|
||||
flags = XE_BO_FLAG_PINNED;
|
||||
|
||||
@@ -37,7 +37,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt);
|
||||
|
||||
/**
|
||||
* xe_gt_record_user_engines - save data related to engines available to
|
||||
* usersapce
|
||||
* userspace
|
||||
* @gt: GT structure
|
||||
*
|
||||
* Walk the available HW engines from gt->info.engine_mask and calculate data
|
||||
|
||||
@@ -115,6 +115,20 @@ static ssize_t rpe_freq_show(struct device *dev,
|
||||
}
|
||||
static DEVICE_ATTR_RO(rpe_freq);
|
||||
|
||||
static ssize_t rpa_freq_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct xe_guc_pc *pc = dev_to_pc(dev);
|
||||
u32 freq;
|
||||
|
||||
xe_pm_runtime_get(dev_to_xe(dev));
|
||||
freq = xe_guc_pc_get_rpa_freq(pc);
|
||||
xe_pm_runtime_put(dev_to_xe(dev));
|
||||
|
||||
return sysfs_emit(buf, "%d\n", freq);
|
||||
}
|
||||
static DEVICE_ATTR_RO(rpa_freq);
|
||||
|
||||
static ssize_t rpn_freq_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
@@ -202,6 +216,7 @@ static const struct attribute *freq_attrs[] = {
|
||||
&dev_attr_act_freq.attr,
|
||||
&dev_attr_cur_freq.attr,
|
||||
&dev_attr_rp0_freq.attr,
|
||||
&dev_attr_rpa_freq.attr,
|
||||
&dev_attr_rpe_freq.attr,
|
||||
&dev_attr_rpn_freq.attr,
|
||||
&dev_attr_min_freq.attr,
|
||||
|
||||
@@ -371,7 +371,7 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
|
||||
* @group: steering group ID
|
||||
* @instance: steering instance ID
|
||||
*
|
||||
* Return: the coverted DSS id.
|
||||
* Return: the converted DSS id.
|
||||
*/
|
||||
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance)
|
||||
{
|
||||
@@ -550,9 +550,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
|
||||
* Returns true if the caller should steer to the @group/@instance values
|
||||
* returned. Returns false if the caller need not perform any steering
|
||||
*/
|
||||
static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
|
||||
struct xe_reg_mcr reg_mcr,
|
||||
u8 *group, u8 *instance)
|
||||
bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
|
||||
struct xe_reg_mcr reg_mcr,
|
||||
u8 *group, u8 *instance)
|
||||
{
|
||||
const struct xe_reg reg = to_xe_reg(reg_mcr);
|
||||
const struct xe_mmio_range *implicit_ranges;
|
||||
|
||||
@@ -26,6 +26,10 @@ void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
|
||||
void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
|
||||
u32 value);
|
||||
|
||||
bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
|
||||
struct xe_reg_mcr reg_mcr,
|
||||
u8 *group, u8 *instance);
|
||||
|
||||
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
|
||||
void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
|
||||
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
|
||||
|
||||
@@ -2161,7 +2161,7 @@ bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid)
|
||||
*
|
||||
* This function can only be called on PF.
|
||||
*
|
||||
* Return: mininum size of the buffer or the number of bytes saved,
|
||||
* Return: minimum size of the buffer or the number of bytes saved,
|
||||
* or a negative error code on failure.
|
||||
*/
|
||||
ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "xe_platform_types.h"
|
||||
#include "xe_uc_fw.h"
|
||||
#include "xe_wa.h"
|
||||
#include "xe_gt_mcr.h"
|
||||
|
||||
/* Slack of a few additional entries per engine */
|
||||
#define ADS_REGSET_EXTRA_MAX 8
|
||||
@@ -696,6 +697,20 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
|
||||
.flags = reg.masked ? GUC_REGSET_MASKED : 0,
|
||||
};
|
||||
|
||||
if (reg.mcr) {
|
||||
struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
|
||||
u8 group, instance;
|
||||
|
||||
bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
|
||||
&group, &instance);
|
||||
|
||||
if (steer) {
|
||||
entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
|
||||
entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
|
||||
entry.flags |= GUC_REGSET_STEERING_NEEDED;
|
||||
}
|
||||
}
|
||||
|
||||
xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
|
||||
&entry, sizeof(entry));
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user