mirror of
https://github.com/archr-linux/Arch-R.git
synced 2026-03-31 14:41:55 -07:00
mesa (RPi5): drop upstream patches in 23.3.0-rc1
This commit is contained in:
@@ -1,332 +0,0 @@
|
||||
From f62aa2640f92796ff5216da0a5d3c8f46a2855b4 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Mon, 26 Apr 2021 00:02:21 +0200
|
||||
Subject: [PATCH 001/142] broadcom(cle,clif,common,simulator): add 7.1 version
|
||||
on the list of versions to build
|
||||
|
||||
This adds 7.1 to the list of available V3D_VERSION, and first changes
|
||||
on the simulator needed to get it working.
|
||||
|
||||
Note that we needed to touch all those 4 codebases because it is
|
||||
needed if we want to use V3D_DEBUG=clif with the simulator, that it is
|
||||
the easier way to see which packets a vulkan program is using.
|
||||
|
||||
About the simulator, this commit only handle the rename of some
|
||||
registers. Any additional changes needed to get a proper support for
|
||||
v71 will be handled them on following commits.
|
||||
---
|
||||
src/broadcom/cle/meson.build | 3 +-
|
||||
src/broadcom/cle/v3dx_pack.h | 2 +
|
||||
src/broadcom/clif/clif_private.h | 2 +
|
||||
src/broadcom/common/v3d_device_info.c | 1 +
|
||||
src/broadcom/common/v3d_macros.h | 3 +
|
||||
src/broadcom/meson.build | 2 +-
|
||||
src/broadcom/simulator/v3d_simulator.c | 81 +++++++++++++++++++------
|
||||
src/broadcom/simulator/v3d_simulator.h | 5 ++
|
||||
src/broadcom/simulator/v3dx_simulator.c | 31 ++++++++--
|
||||
9 files changed, 106 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build
|
||||
index 31a0d5bfa94..8ac32b313e4 100644
|
||||
--- a/src/broadcom/cle/meson.build
|
||||
+++ b/src/broadcom/cle/meson.build
|
||||
@@ -23,7 +23,8 @@ v3d_versions = [
|
||||
[21, 21],
|
||||
[33, 33],
|
||||
[41, 33],
|
||||
- [42, 33]
|
||||
+ [42, 33],
|
||||
+ [71, 33]
|
||||
]
|
||||
|
||||
v3d_xml_files = []
|
||||
diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h
|
||||
index 5762e5aaa70..e5a1eb26698 100644
|
||||
--- a/src/broadcom/cle/v3dx_pack.h
|
||||
+++ b/src/broadcom/cle/v3dx_pack.h
|
||||
@@ -37,6 +37,8 @@
|
||||
# include "cle/v3d_packet_v41_pack.h"
|
||||
#elif (V3D_VERSION == 42)
|
||||
# include "cle/v3d_packet_v42_pack.h"
|
||||
+#elif (V3D_VERSION == 71)
|
||||
+# include "cle/v3d_packet_v71_pack.h"
|
||||
#else
|
||||
# error "Need to add a pack header include for this v3d version"
|
||||
#endif
|
||||
diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h
|
||||
index 6ace62b0310..cda407a00bf 100644
|
||||
--- a/src/broadcom/clif/clif_private.h
|
||||
+++ b/src/broadcom/clif/clif_private.h
|
||||
@@ -101,6 +101,8 @@ bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
+bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
+ const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
|
||||
static inline void
|
||||
out(struct clif_dump *clif, const char *fmt, ...)
|
||||
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
|
||||
index 272190eb2e5..7e0862f1f02 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.c
|
||||
+++ b/src/broadcom/common/v3d_device_info.c
|
||||
@@ -66,6 +66,7 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
case 33:
|
||||
case 41:
|
||||
case 42:
|
||||
+ case 71:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,
|
||||
diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h
|
||||
index fe89398208a..b4291fb5350 100644
|
||||
--- a/src/broadcom/common/v3d_macros.h
|
||||
+++ b/src/broadcom/common/v3d_macros.h
|
||||
@@ -41,6 +41,9 @@
|
||||
#elif (V3D_VERSION == 42)
|
||||
# define V3DX(x) V3D42_##x
|
||||
# define v3dX(x) v3d42_##x
|
||||
+#elif (V3D_VERSION == 71)
|
||||
+# define V3DX(x) V3D71_##x
|
||||
+# define v3dX(x) v3d71_##x
|
||||
#else
|
||||
# error "Need to add prefixing macros for this v3d version"
|
||||
#endif
|
||||
diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build
|
||||
index 2c10e46b188..73cb7aa0575 100644
|
||||
--- a/src/broadcom/meson.build
|
||||
+++ b/src/broadcom/meson.build
|
||||
@@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')
|
||||
|
||||
subdir('cle')
|
||||
|
||||
-v3d_versions = ['33', '41', '42']
|
||||
+v3d_versions = ['33', '41', '42', '71']
|
||||
v3d_libs = []
|
||||
|
||||
if with_gallium_v3d or with_broadcom_vk
|
||||
diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c
|
||||
index eea5d3f050e..5cceb1a82cc 100644
|
||||
--- a/src/broadcom/simulator/v3d_simulator.c
|
||||
+++ b/src/broadcom/simulator/v3d_simulator.c
|
||||
@@ -490,10 +490,20 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
|
||||
|
||||
v3d_simulator_perfmon_switch(fd, submit->perfmon_id);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
- else
|
||||
- v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
+ v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ break;
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ v3d71_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
|
||||
+ break;
|
||||
+ default:
|
||||
+ unreachable("Unsupported V3D version\n");
|
||||
+ }
|
||||
|
||||
util_dynarray_foreach(&sim_state.bin_oom, struct v3d_simulator_bo *,
|
||||
sim_bo) {
|
||||
@@ -635,10 +645,17 @@ v3d_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
|
||||
static int
|
||||
v3d_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args)
|
||||
{
|
||||
- if (sim_state.ver >= 41)
|
||||
- return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
- else
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
return v3d33_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
+ case 71:
|
||||
+ return v3d71_simulator_get_param_ioctl(sim_state.v3d, args);
|
||||
+ default:
|
||||
+ unreachable("Unsupported V3D version\n");
|
||||
+ }
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -652,10 +669,20 @@ v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args)
|
||||
v3d_simulator_copy_in_handle(file, args->bo_handles[2]);
|
||||
v3d_simulator_copy_in_handle(file, args->bo_handles[3]);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
- else
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
+ break;
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ ret = v3d71_simulator_submit_tfu_ioctl(sim_state.v3d, args);
|
||||
+ break;
|
||||
+ default:
|
||||
+ unreachable("Unsupported V3D version\n");
|
||||
+ }
|
||||
|
||||
v3d_simulator_copy_out_handle(file, args->bo_handles[0]);
|
||||
|
||||
@@ -682,11 +709,19 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
|
||||
|
||||
v3d_simulator_perfmon_switch(fd, args->perfmon_id);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
|
||||
- file->gmp->ofs);
|
||||
- else
|
||||
- ret = -1;
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
|
||||
+ file->gmp->ofs);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ ret = v3d71_simulator_submit_csd_ioctl(sim_state.v3d, args,
|
||||
+ file->gmp->ofs);
|
||||
+ break;
|
||||
+ default:
|
||||
+ ret = -1;
|
||||
+ }
|
||||
|
||||
for (int i = 0; i < args->bo_handle_count; i++)
|
||||
v3d_simulator_copy_out_handle(file, bo_handles[i]);
|
||||
@@ -880,10 +915,20 @@ v3d_simulator_init_global()
|
||||
|
||||
util_dynarray_init(&sim_state.bin_oom, NULL);
|
||||
|
||||
- if (sim_state.ver >= 41)
|
||||
- v3d41_simulator_init_regs(sim_state.v3d);
|
||||
- else
|
||||
+ switch(sim_state.ver) {
|
||||
+ case 33:
|
||||
v3d33_simulator_init_regs(sim_state.v3d);
|
||||
+ break;
|
||||
+ case 41:
|
||||
+ case 42:
|
||||
+ v3d41_simulator_init_regs(sim_state.v3d);
|
||||
+ break;
|
||||
+ case 71:
|
||||
+ v3d71_simulator_init_regs(sim_state.v3d);
|
||||
+ break;
|
||||
+ default:
|
||||
+ unreachable("Not supported V3D version\n");
|
||||
+ }
|
||||
}
|
||||
|
||||
struct v3d_simulator_file *
|
||||
diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h
|
||||
index ddb079c1455..1472c313a03 100644
|
||||
--- a/src/broadcom/simulator/v3d_simulator.h
|
||||
+++ b/src/broadcom/simulator/v3d_simulator.h
|
||||
@@ -52,6 +52,11 @@ uint32_t v3d_simulator_get_mem_free(void);
|
||||
# define v3dX(x) v3d41_##x
|
||||
# include "v3dx_simulator.h"
|
||||
# undef v3dX
|
||||
+
|
||||
+# define v3dX(x) v3d71_##x
|
||||
+# include "v3dx_simulator.h"
|
||||
+# undef v3dX
|
||||
+
|
||||
#endif
|
||||
|
||||
#endif
|
||||
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
|
||||
index c9322f0397b..723796b16c9 100644
|
||||
--- a/src/broadcom/simulator/v3dx_simulator.c
|
||||
+++ b/src/broadcom/simulator/v3dx_simulator.c
|
||||
@@ -46,11 +46,15 @@
|
||||
|
||||
#define HW_REGISTER_RO(x) (x)
|
||||
#define HW_REGISTER_RW(x) (x)
|
||||
-#if V3D_VERSION >= 41
|
||||
+#if V3D_VERSION == 71
|
||||
+#include "libs/core/v3d/registers/7.1.5.1/v3d.h"
|
||||
+#else
|
||||
+#if V3D_VERSION == 41 || V3D_VERSION == 42
|
||||
#include "libs/core/v3d/registers/4.1.35.0/v3d.h"
|
||||
#else
|
||||
#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
|
||||
#endif
|
||||
+#endif
|
||||
|
||||
#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
|
||||
#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
|
||||
@@ -310,16 +314,17 @@ v3d_isr_core(struct v3d_hw *v3d,
|
||||
return;
|
||||
}
|
||||
|
||||
+#if V3D_VERSION <= 42
|
||||
if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
|
||||
fprintf(stderr, "GMP violation at 0x%08x\n",
|
||||
V3D_READ(V3D_GMP_VIO_ADDR));
|
||||
- abort();
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Unexpected ISR with core status 0x%08x\n",
|
||||
core_status);
|
||||
}
|
||||
abort();
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -396,6 +401,18 @@ v3d_isr_hub(struct v3d_hw *v3d)
|
||||
}
|
||||
|
||||
handle_mmu_interruptions(v3d, hub_status);
|
||||
+
|
||||
+#if V3D_VERSION == 71
|
||||
+ if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
|
||||
+ fprintf(stderr, "GMP violation at 0x%08x\n",
|
||||
+ V3D_READ(V3D_GMP_VIO_ADDR));
|
||||
+ } else {
|
||||
+ fprintf(stderr,
|
||||
+ "Unexpected ISR with status 0x%08x\n",
|
||||
+ hub_status);
|
||||
+ }
|
||||
+ abort();
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -436,8 +453,11 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
|
||||
* for tracing. Perhaps we should evaluate to do the same here and add
|
||||
* some debug options.
|
||||
*/
|
||||
- uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
|
||||
- V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
|
||||
+ uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
|
||||
+#if V3D_VERSION <= 42
|
||||
+ core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
|
||||
+#endif
|
||||
+
|
||||
V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
|
||||
V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
|
||||
|
||||
@@ -447,6 +467,9 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
|
||||
V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
|
||||
V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
|
||||
|
||||
+#if V3D_VERSION == 71
|
||||
+ hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
|
||||
+#endif
|
||||
V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
|
||||
V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
From 9e85edd1b347b0e779b393f463f42044a720bcff Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 28 Sep 2021 13:16:49 +0200
|
||||
Subject: [PATCH 002/142] broadcom/simulator: reset CFG7 for compute dispatch
|
||||
in v71
|
||||
|
||||
This register is new in 7.x, it doesn't seem that we need to
|
||||
do anything specific for now, but let's make sure it is reset
|
||||
every time.
|
||||
---
|
||||
src/broadcom/simulator/v3dx_simulator.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
|
||||
index 723796b16c9..f23b0538de3 100644
|
||||
--- a/src/broadcom/simulator/v3dx_simulator.c
|
||||
+++ b/src/broadcom/simulator/v3dx_simulator.c
|
||||
@@ -227,6 +227,9 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
|
||||
+#if V3D_VERSION >= 71
|
||||
+ V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
|
||||
+#endif
|
||||
/* CFG0 kicks off the job */
|
||||
V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,65 +0,0 @@
|
||||
From 569cbe4229df737ce5915c4be2cad534707fb4f7 Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Tue, 9 Nov 2021 08:50:51 +0100
|
||||
Subject: [PATCH 004/142] broadcom/common: retrieve V3D revision number
|
||||
|
||||
The subrev field from the hub ident3 register is bumped with every
|
||||
hardware revision doing backwards incompatible changes so we want to
|
||||
keep track of this.
|
||||
|
||||
Instead of modifying the 'ver' field info to acommodate subrev info,
|
||||
which would require a lot of changes, simply add a new 'rev' field in
|
||||
devinfo that we can use when we need to make changes based on the
|
||||
revision number of a hardware release.
|
||||
---
|
||||
src/broadcom/common/v3d_device_info.c | 14 +++++++++++++-
|
||||
src/broadcom/common/v3d_device_info.h | 3 +++
|
||||
2 files changed, 16 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
|
||||
index 7e0862f1f02..7512fe3a06b 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.c
|
||||
+++ b/src/broadcom/common/v3d_device_info.c
|
||||
@@ -36,6 +36,9 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
struct drm_v3d_get_param ident1 = {
|
||||
.param = DRM_V3D_PARAM_V3D_CORE0_IDENT1,
|
||||
};
|
||||
+ struct drm_v3d_get_param hub_ident3 = {
|
||||
+ .param = DRM_V3D_PARAM_V3D_HUB_IDENT3,
|
||||
+ };
|
||||
int ret;
|
||||
|
||||
ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &ident0);
|
||||
@@ -76,5 +79,14 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
return false;
|
||||
}
|
||||
|
||||
- return true;
|
||||
+ ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &hub_ident3);
|
||||
+ if (ret != 0) {
|
||||
+ fprintf(stderr, "Couldn't get V3D core HUB IDENT3: %s\n",
|
||||
+ strerror(errno));
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ devinfo->rev = (hub_ident3.value >> 8) & 0xff;
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
|
||||
index 97abd9b8d9f..32cb65cf81f 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.h
|
||||
+++ b/src/broadcom/common/v3d_device_info.h
|
||||
@@ -34,6 +34,9 @@ struct v3d_device_info {
|
||||
/** Simple V3D version: major * 10 + minor */
|
||||
uint8_t ver;
|
||||
|
||||
+ /** V3D revision number */
|
||||
+ uint8_t rev;
|
||||
+
|
||||
/** Size of the VPM, in bytes. */
|
||||
int vpm_size;
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
From c260843c882d25bd31e308566b45d4517fda0fa2 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 17 Nov 2021 14:40:47 +0100
|
||||
Subject: [PATCH 005/142] broadcom/common: add some common v71 helpers
|
||||
|
||||
---
|
||||
src/broadcom/common/v3d_util.c | 27 +++++++++++++++++++++++++++
|
||||
src/broadcom/common/v3d_util.h | 27 +++++++++++++++++++++++++++
|
||||
2 files changed, 54 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
|
||||
index 57872a923d3..26f5c6b336f 100644
|
||||
--- a/src/broadcom/common/v3d_util.c
|
||||
+++ b/src/broadcom/common/v3d_util.c
|
||||
@@ -170,3 +170,30 @@ v3d_hw_prim_type(enum mesa_prim prim_type)
|
||||
unreachable("Unsupported primitive type");
|
||||
}
|
||||
}
|
||||
+
|
||||
+uint32_t
|
||||
+v3d_internal_bpp_words(uint32_t internal_bpp)
|
||||
+{
|
||||
+ switch (internal_bpp) {
|
||||
+ case 0 /* V3D_INTERNAL_BPP_32 */:
|
||||
+ return 1;
|
||||
+ case 1 /* V3D_INTERNAL_BPP_64 */:
|
||||
+ return 2;
|
||||
+ case 2 /* V3D_INTERNAL_BPP_128 */:
|
||||
+ return 4;
|
||||
+ default:
|
||||
+ unreachable("Unsupported internal BPP");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+uint32_t
|
||||
+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
|
||||
+ uint32_t bpp)
|
||||
+{
|
||||
+ /* stride in multiples of 128 bits, and covers 2 rows. This is the
|
||||
+ * reason we divide by 2 instead of 4, as we divide number of 32-bit
|
||||
+ * words per row by 2.
|
||||
+ */
|
||||
+
|
||||
+ return (tile_width * bpp) / 2;
|
||||
+}
|
||||
diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h
|
||||
index eb802b77f67..864fc949ffa 100644
|
||||
--- a/src/broadcom/common/v3d_util.h
|
||||
+++ b/src/broadcom/common/v3d_util.h
|
||||
@@ -24,6 +24,7 @@
|
||||
#ifndef V3D_UTIL_H
|
||||
#define V3D_UTIL_H
|
||||
|
||||
+#include "util/macros.h"
|
||||
#include "common/v3d_device_info.h"
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
@@ -46,4 +47,30 @@ v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle);
|
||||
uint32_t
|
||||
v3d_hw_prim_type(enum mesa_prim prim_type);
|
||||
|
||||
+uint32_t
|
||||
+v3d_internal_bpp_words(uint32_t internal_bpp);
|
||||
+
|
||||
+/* Some configuration packets want the size on log2, but starting at 0 for
|
||||
+ * size 8.
|
||||
+ */
|
||||
+static inline uint8_t
|
||||
+log2_tile_size(uint32_t size)
|
||||
+{
|
||||
+ switch(size) {
|
||||
+ case 8:
|
||||
+ return 0;
|
||||
+ case 16:
|
||||
+ return 1;
|
||||
+ case 32:
|
||||
+ return 2;
|
||||
+ case 64:
|
||||
+ return 3;
|
||||
+ default:
|
||||
+ unreachable("Unsupported tile width/height");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+uint32_t
|
||||
+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
|
||||
+ uint32_t bpp);
|
||||
#endif
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
From a5211a4d71acc53183d2a90eb1694d8cce6eb44f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 5 Aug 2021 01:03:11 +0200
|
||||
Subject: [PATCH 006/142] broadcom/qpu: add comments on waddr not used on V3D
|
||||
7.x
|
||||
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.h | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 2e133472698..45a0cad9760 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -88,11 +88,11 @@ enum v3d_qpu_uf {
|
||||
};
|
||||
|
||||
enum v3d_qpu_waddr {
|
||||
- V3D_QPU_WADDR_R0 = 0,
|
||||
- V3D_QPU_WADDR_R1 = 1,
|
||||
- V3D_QPU_WADDR_R2 = 2,
|
||||
- V3D_QPU_WADDR_R3 = 3,
|
||||
- V3D_QPU_WADDR_R4 = 4,
|
||||
+ V3D_QPU_WADDR_R0 = 0, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R1 = 1, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_R5 = 5,
|
||||
V3D_QPU_WADDR_NOP = 6,
|
||||
V3D_QPU_WADDR_TLB = 7,
|
||||
@@ -108,12 +108,12 @@ enum v3d_qpu_waddr {
|
||||
V3D_QPU_WADDR_SYNC = 16,
|
||||
V3D_QPU_WADDR_SYNCU = 17,
|
||||
V3D_QPU_WADDR_SYNCB = 18,
|
||||
- V3D_QPU_WADDR_RECIP = 19,
|
||||
- V3D_QPU_WADDR_RSQRT = 20,
|
||||
- V3D_QPU_WADDR_EXP = 21,
|
||||
- V3D_QPU_WADDR_LOG = 22,
|
||||
- V3D_QPU_WADDR_SIN = 23,
|
||||
- V3D_QPU_WADDR_RSQRT2 = 24,
|
||||
+ V3D_QPU_WADDR_RECIP = 19, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_RSQRT = 20, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_EXP = 21, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_LOG = 22, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_SIN = 23, /* Reserved on V3D 7.x */
|
||||
+ V3D_QPU_WADDR_RSQRT2 = 24, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_TMUC = 32,
|
||||
V3D_QPU_WADDR_TMUS = 33,
|
||||
V3D_QPU_WADDR_TMUT = 34,
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
From 0ccf3043e4a584e5592bb7fad737d5d98ed23db0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 5 Aug 2021 01:00:47 +0200
|
||||
Subject: [PATCH 007/142] broadcom/qpu: set V3D 7.x names for some waddr
|
||||
aliasing
|
||||
|
||||
V3D 7.x got rid of the accumulator, but still uses the values for
|
||||
WADDR_R5 and WADDR_R5REP, so let's return a proper name and add some
|
||||
aliases.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 8 ++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 6 ++++--
|
||||
2 files changed, 12 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 60dabf74e8e..7759fb0efdf 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -35,6 +35,14 @@ v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
|
||||
if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
|
||||
return "tmu";
|
||||
|
||||
+ /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
|
||||
+ */
|
||||
+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
|
||||
+ return "quad";
|
||||
+
|
||||
+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
|
||||
+ return "rep";
|
||||
+
|
||||
static const char *waddr_magic[] = {
|
||||
[V3D_QPU_WADDR_R0] = "r0",
|
||||
[V3D_QPU_WADDR_R1] = "r1",
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 45a0cad9760..19bf721dbe1 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -93,7 +93,8 @@ enum v3d_qpu_waddr {
|
||||
V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */
|
||||
V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */
|
||||
- V3D_QPU_WADDR_R5 = 5,
|
||||
+ V3D_QPU_WADDR_R5 = 5, /* V3D 4.x */
|
||||
+ V3D_QPU_WADDR_QUAD = 5, /* V3D 7.x */
|
||||
V3D_QPU_WADDR_NOP = 6,
|
||||
V3D_QPU_WADDR_TLB = 7,
|
||||
V3D_QPU_WADDR_TLBU = 8,
|
||||
@@ -129,7 +130,8 @@ enum v3d_qpu_waddr {
|
||||
V3D_QPU_WADDR_TMUHSCM = 44,
|
||||
V3D_QPU_WADDR_TMUHSF = 45,
|
||||
V3D_QPU_WADDR_TMUHSLOD = 46,
|
||||
- V3D_QPU_WADDR_R5REP = 55,
|
||||
+ V3D_QPU_WADDR_R5REP = 55, /* V3D 4.x */
|
||||
+ V3D_QPU_WADDR_REP = 55, /* V3D 7.x */
|
||||
};
|
||||
|
||||
struct v3d_qpu_flags {
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,241 +0,0 @@
|
||||
From 18de3cc85cf8bbe294e044f7a12abe14e554de0a Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Sun, 19 Sep 2021 03:20:18 +0200
|
||||
Subject: [PATCH 008/142] broadcom/compiler: rename small_imm to small_imm_b
|
||||
|
||||
Current small_imm is associated with the "B" read address.
|
||||
|
||||
We do this change in advance for v71 support, where we will have 4
|
||||
different small_imm (a/b/c/d), so we start with a renaming.
|
||||
---
|
||||
src/broadcom/compiler/qpu_schedule.c | 22 +++++++++----------
|
||||
.../compiler/vir_opt_small_immediates.c | 4 ++--
|
||||
src/broadcom/compiler/vir_to_qpu.c | 2 +-
|
||||
src/broadcom/qpu/qpu_disasm.c | 2 +-
|
||||
src/broadcom/qpu/qpu_instr.h | 2 +-
|
||||
src/broadcom/qpu/qpu_pack.c | 22 +++++++++----------
|
||||
6 files changed, 27 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
|
||||
index 3b32b48f86f..a10fa03ed10 100644
|
||||
--- a/src/broadcom/compiler/qpu_schedule.c
|
||||
+++ b/src/broadcom/compiler/qpu_schedule.c
|
||||
@@ -160,7 +160,7 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
|
||||
break;
|
||||
case V3D_QPU_MUX_B:
|
||||
- if (!n->inst->qpu.sig.small_imm) {
|
||||
+ if (!n->inst->qpu.sig.small_imm_b) {
|
||||
add_read_dep(state,
|
||||
state->last_rf[n->inst->qpu.raddr_b], n);
|
||||
}
|
||||
@@ -615,7 +615,7 @@ qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
|
||||
return true;
|
||||
|
||||
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
|
||||
- !inst->sig.small_imm && (inst->raddr_b == waddr))
|
||||
+ !inst->sig.small_imm_b && (inst->raddr_b == waddr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -790,11 +790,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
|
||||
uint64_t raddrs_used = 0;
|
||||
if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A))
|
||||
raddrs_used |= (1ll << a->raddr_a);
|
||||
- if (!a->sig.small_imm && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
|
||||
+ if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
|
||||
raddrs_used |= (1ll << a->raddr_b);
|
||||
if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A))
|
||||
raddrs_used |= (1ll << b->raddr_a);
|
||||
- if (!b->sig.small_imm && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
|
||||
+ if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
|
||||
raddrs_used |= (1ll << b->raddr_b);
|
||||
|
||||
return raddrs_used;
|
||||
@@ -816,16 +816,16 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
if (naddrs > 2)
|
||||
return false;
|
||||
|
||||
- if ((add_instr->sig.small_imm || mul_instr->sig.small_imm)) {
|
||||
+ if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) {
|
||||
if (naddrs > 1)
|
||||
return false;
|
||||
|
||||
- if (add_instr->sig.small_imm && mul_instr->sig.small_imm)
|
||||
+ if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b)
|
||||
if (add_instr->raddr_b != mul_instr->raddr_b)
|
||||
return false;
|
||||
|
||||
- result->sig.small_imm = true;
|
||||
- result->raddr_b = add_instr->sig.small_imm ?
|
||||
+ result->sig.small_imm_b = true;
|
||||
+ result->raddr_b = add_instr->sig.small_imm_b ?
|
||||
add_instr->raddr_b : mul_instr->raddr_b;
|
||||
}
|
||||
|
||||
@@ -836,7 +836,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
||||
raddrs_used &= ~(1ll << raddr_a);
|
||||
result->raddr_a = raddr_a;
|
||||
|
||||
- if (!result->sig.small_imm) {
|
||||
+ if (!result->sig.small_imm_b) {
|
||||
if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
|
||||
raddr_a == add_instr->raddr_b) {
|
||||
if (add_instr->alu.add.a == V3D_QPU_MUX_B)
|
||||
@@ -1025,7 +1025,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
merge.sig.ldtmu |= b->sig.ldtmu;
|
||||
merge.sig.ldvary |= b->sig.ldvary;
|
||||
merge.sig.ldvpm |= b->sig.ldvpm;
|
||||
- merge.sig.small_imm |= b->sig.small_imm;
|
||||
+ merge.sig.small_imm_b |= b->sig.small_imm_b;
|
||||
merge.sig.ldtlb |= b->sig.ldtlb;
|
||||
merge.sig.ldtlbu |= b->sig.ldtlbu;
|
||||
merge.sig.ucb |= b->sig.ucb;
|
||||
@@ -1614,7 +1614,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
return false;
|
||||
|
||||
if (inst->raddr_b < 3 &&
|
||||
- !inst->sig.small_imm &&
|
||||
+ !inst->sig.small_imm_b &&
|
||||
v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
|
||||
return false;
|
||||
}
|
||||
diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
index 47d7722968d..df0d6c36c9b 100644
|
||||
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
|
||||
@@ -80,7 +80,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
*/
|
||||
struct v3d_qpu_sig new_sig = inst->qpu.sig;
|
||||
uint32_t sig_packed;
|
||||
- new_sig.small_imm = true;
|
||||
+ new_sig.small_imm_b = true;
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed))
|
||||
continue;
|
||||
|
||||
@@ -89,7 +89,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
||||
vir_dump_inst(c, inst);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
- inst->qpu.sig.small_imm = true;
|
||||
+ inst->qpu.sig.small_imm_b = true;
|
||||
inst->qpu.raddr_b = packed;
|
||||
|
||||
inst->src[i].file = QFILE_SMALL_IMM;
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index 45e6bfa1470..15c2e3674c2 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -94,7 +94,7 @@ static void
|
||||
set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
{
|
||||
if (src.smimm) {
|
||||
- assert(instr->sig.small_imm);
|
||||
+ assert(instr->sig.small_imm_b);
|
||||
*mux = V3D_QPU_MUX_B;
|
||||
return;
|
||||
}
|
||||
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
|
||||
index 28fb2357b97..6aca3c28e78 100644
|
||||
--- a/src/broadcom/qpu/qpu_disasm.c
|
||||
+++ b/src/broadcom/qpu/qpu_disasm.c
|
||||
@@ -62,7 +62,7 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm,
|
||||
if (mux == V3D_QPU_MUX_A) {
|
||||
append(disasm, "rf%d", instr->raddr_a);
|
||||
} else if (mux == V3D_QPU_MUX_B) {
|
||||
- if (instr->sig.small_imm) {
|
||||
+ if (instr->sig.small_imm_b) {
|
||||
uint32_t val;
|
||||
ASSERTED bool ok =
|
||||
v3d_qpu_small_imm_unpack(disasm->devinfo,
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 19bf721dbe1..9cd831863b4 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -50,7 +50,7 @@ struct v3d_qpu_sig {
|
||||
bool ldvpm:1;
|
||||
bool ldtlb:1;
|
||||
bool ldtlbu:1;
|
||||
- bool small_imm:1;
|
||||
+ bool small_imm_b:1;
|
||||
bool ucb:1;
|
||||
bool rotate:1;
|
||||
bool wrtmuc:1;
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index a875683c6f8..beac591d3c1 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -112,7 +112,7 @@
|
||||
#define LDTMU .ldtmu = true
|
||||
#define LDVARY .ldvary = true
|
||||
#define LDVPM .ldvpm = true
|
||||
-#define SMIMM .small_imm = true
|
||||
+#define SMIMM_B .small_imm_b = true
|
||||
#define LDTLB .ldtlb = true
|
||||
#define LDTLBU .ldtlbu = true
|
||||
#define UCB .ucb = true
|
||||
@@ -135,8 +135,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
[12] = { LDVARY, LDTMU, },
|
||||
[13] = { THRSW, LDVARY, LDTMU, },
|
||||
- [14] = { SMIMM, LDVARY, },
|
||||
- [15] = { SMIMM, },
|
||||
+ [14] = { SMIMM_B, LDVARY, },
|
||||
+ [15] = { SMIMM_B, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
/* 18-21 reserved */
|
||||
@@ -148,8 +148,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
[27] = { THRSW, LDVPM, LDUNIF },
|
||||
[28] = { LDVPM, LDTMU, },
|
||||
[29] = { THRSW, LDVPM, LDTMU, },
|
||||
- [30] = { SMIMM, LDVPM, },
|
||||
- [31] = { SMIMM, },
|
||||
+ [30] = { SMIMM_B, LDVPM, },
|
||||
+ [31] = { SMIMM_B, },
|
||||
};
|
||||
|
||||
static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
@@ -167,8 +167,8 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
[10] = { LDVARY, LDUNIF },
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
/* 12-13 reserved */
|
||||
- [14] = { SMIMM, LDVARY, },
|
||||
- [15] = { SMIMM, },
|
||||
+ [14] = { SMIMM_B, LDVARY, },
|
||||
+ [15] = { SMIMM_B, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
[18] = { WRTMUC },
|
||||
@@ -178,7 +178,7 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
[22] = { UCB, },
|
||||
[23] = { ROT, },
|
||||
/* 24-30 reserved */
|
||||
- [31] = { SMIMM, LDTMU, },
|
||||
+ [31] = { SMIMM_B, LDTMU, },
|
||||
};
|
||||
|
||||
static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
@@ -197,8 +197,8 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
[12] = { LDUNIFRF },
|
||||
[13] = { THRSW, LDUNIFRF },
|
||||
- [14] = { SMIMM, LDVARY, },
|
||||
- [15] = { SMIMM, },
|
||||
+ [14] = { SMIMM_B, LDVARY },
|
||||
+ [15] = { SMIMM_B, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
[18] = { WRTMUC },
|
||||
@@ -210,7 +210,7 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
[24] = { LDUNIFA},
|
||||
[25] = { LDUNIFARF },
|
||||
/* 26-30 reserved */
|
||||
- [31] = { SMIMM, LDTMU, },
|
||||
+ [31] = { SMIMM_B, LDTMU, },
|
||||
};
|
||||
|
||||
bool
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
From 0e87405fe73694c173b7ce14c3d60611f241922c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 5 Aug 2021 00:50:12 +0200
|
||||
Subject: [PATCH 009/142] broadcom/compiler: add small_imm a/c/d on v3d_qpu_sig
|
||||
|
||||
small_imm_a, small_imm_c and small_imm_d added on top of the already
|
||||
existing small_imm_b, as V3D 7.1 defines 4 small immediates, tied to
|
||||
the 4 raddr. Note that this is only the definition, and just a inst
|
||||
validation rule to check that are not used before v71. Any real use is
|
||||
still pending.
|
||||
---
|
||||
src/broadcom/compiler/qpu_validate.c | 5 +++++
|
||||
src/broadcom/qpu/qpu_instr.h | 5 ++++-
|
||||
2 files changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
|
||||
index 2cc7a0eb0ae..12788692432 100644
|
||||
--- a/src/broadcom/compiler/qpu_validate.c
|
||||
+++ b/src/broadcom/compiler/qpu_validate.c
|
||||
@@ -115,6 +115,11 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return;
|
||||
|
||||
+ if (devinfo->ver < 71) {
|
||||
+ if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d)
|
||||
+ fail_instr(state, "small imm a/c/d added after V3D 7.1");
|
||||
+ }
|
||||
+
|
||||
/* LDVARY writes r5 two instructions later and LDUNIF writes
|
||||
* r5 one instruction later, which is illegal to have
|
||||
* together.
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 9cd831863b4..13b3f37d43f 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -50,10 +50,13 @@ struct v3d_qpu_sig {
|
||||
bool ldvpm:1;
|
||||
bool ldtlb:1;
|
||||
bool ldtlbu:1;
|
||||
- bool small_imm_b:1;
|
||||
bool ucb:1;
|
||||
bool rotate:1;
|
||||
bool wrtmuc:1;
|
||||
+ bool small_imm_a:1; /* raddr_a (add a), since V3D 7.x */
|
||||
+ bool small_imm_b:1; /* raddr_b (add b) */
|
||||
+ bool small_imm_c:1; /* raddr_c (mul a), since V3D 7.x */
|
||||
+ bool small_imm_d:1; /* raddr_d (mul b), since V3D 7.x */
|
||||
};
|
||||
|
||||
enum v3d_qpu_cond {
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
From eca19c911d9af3b0ab3b563ea65dc455e3d27987 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 4 Aug 2021 01:11:16 +0200
|
||||
Subject: [PATCH 010/142] broadcom/qpu: add v71 signal map
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Compared with v41, the differences are:
|
||||
* 14, 15, 29 and 30 are now about immediate a, b, c, d respectively
|
||||
* 23 is now reserved. On v42 this was for rotate signals, that are
|
||||
gone on v71.
|
||||
|
||||
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
|
||||
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 47 ++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 44 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index beac591d3c1..2820d9d4c56 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -112,12 +112,15 @@
|
||||
#define LDTMU .ldtmu = true
|
||||
#define LDVARY .ldvary = true
|
||||
#define LDVPM .ldvpm = true
|
||||
-#define SMIMM_B .small_imm_b = true
|
||||
#define LDTLB .ldtlb = true
|
||||
#define LDTLBU .ldtlbu = true
|
||||
#define UCB .ucb = true
|
||||
#define ROT .rotate = true
|
||||
#define WRTMUC .wrtmuc = true
|
||||
+#define SMIMM_A .small_imm_a = true
|
||||
+#define SMIMM_B .small_imm_b = true
|
||||
+#define SMIMM_C .small_imm_c = true
|
||||
+#define SMIMM_D .small_imm_d = true
|
||||
|
||||
static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
/* MISC R3 R4 R5 */
|
||||
@@ -213,6 +216,40 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
[31] = { SMIMM_B, LDTMU, },
|
||||
};
|
||||
|
||||
+
|
||||
+static const struct v3d_qpu_sig v71_sig_map[] = {
|
||||
+ /* MISC phys RF0 */
|
||||
+ [0] = { },
|
||||
+ [1] = { THRSW, },
|
||||
+ [2] = { LDUNIF },
|
||||
+ [3] = { THRSW, LDUNIF },
|
||||
+ [4] = { LDTMU, },
|
||||
+ [5] = { THRSW, LDTMU, },
|
||||
+ [6] = { LDTMU, LDUNIF },
|
||||
+ [7] = { THRSW, LDTMU, LDUNIF },
|
||||
+ [8] = { LDVARY, },
|
||||
+ [9] = { THRSW, LDVARY, },
|
||||
+ [10] = { LDVARY, LDUNIF },
|
||||
+ [11] = { THRSW, LDVARY, LDUNIF },
|
||||
+ [12] = { LDUNIFRF },
|
||||
+ [13] = { THRSW, LDUNIFRF },
|
||||
+ [14] = { SMIMM_A, },
|
||||
+ [15] = { SMIMM_B, },
|
||||
+ [16] = { LDTLB, },
|
||||
+ [17] = { LDTLBU, },
|
||||
+ [18] = { WRTMUC },
|
||||
+ [19] = { THRSW, WRTMUC },
|
||||
+ [20] = { LDVARY, WRTMUC },
|
||||
+ [21] = { THRSW, LDVARY, WRTMUC },
|
||||
+ [22] = { UCB, },
|
||||
+ /* 23 reserved */
|
||||
+ [24] = { LDUNIFA},
|
||||
+ [25] = { LDUNIFARF },
|
||||
+ /* 26-29 reserved */
|
||||
+ [30] = { SMIMM_C, },
|
||||
+ [31] = { SMIMM_D, },
|
||||
+};
|
||||
+
|
||||
bool
|
||||
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
|
||||
uint32_t packed_sig,
|
||||
@@ -221,7 +258,9 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
|
||||
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
|
||||
return false;
|
||||
|
||||
- if (devinfo->ver >= 41)
|
||||
+ if (devinfo->ver >= 71)
|
||||
+ *sig = v71_sig_map[packed_sig];
|
||||
+ else if (devinfo->ver >= 41)
|
||||
*sig = v41_sig_map[packed_sig];
|
||||
else if (devinfo->ver == 40)
|
||||
*sig = v40_sig_map[packed_sig];
|
||||
@@ -240,7 +279,9 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
|
||||
{
|
||||
static const struct v3d_qpu_sig *map;
|
||||
|
||||
- if (devinfo->ver >= 41)
|
||||
+ if (devinfo->ver >= 71)
|
||||
+ map = v71_sig_map;
|
||||
+ else if (devinfo->ver >= 41)
|
||||
map = v41_sig_map;
|
||||
else if (devinfo->ver == 40)
|
||||
map = v40_sig_map;
|
||||
--
|
||||
2.39.2
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,45 +0,0 @@
|
||||
From 52ea09792ff8a438ccdecac47b8415657be90098 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 6 Aug 2021 01:33:32 +0200
|
||||
Subject: [PATCH 012/142] broadcom/qpu: add raddr on v3d_qpu_input
|
||||
|
||||
On V3D 7.x mux are not used, and raddr_a/b/c/d are used instead
|
||||
|
||||
This is not perfect, as for v71, the raddr_a/b defined at qpu_instr
|
||||
became superfluous. But the alternative would be to define two
|
||||
different structs, or even having them defined based on version
|
||||
ifdefs, so this is a reasonable compromise.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.h | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 53a51bfb3e1..9e56e2d6a99 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -295,7 +295,10 @@ enum v3d_qpu_mux {
|
||||
};
|
||||
|
||||
struct v3d_qpu_input {
|
||||
- enum v3d_qpu_mux mux;
|
||||
+ union {
|
||||
+ enum v3d_qpu_mux mux; /* V3D 4.x */
|
||||
+ uint8_t raddr; /* V3D 7.x */
|
||||
+ };
|
||||
enum v3d_qpu_input_unpack unpack;
|
||||
};
|
||||
|
||||
@@ -385,8 +388,8 @@ struct v3d_qpu_instr {
|
||||
struct v3d_qpu_sig sig;
|
||||
uint8_t sig_addr;
|
||||
bool sig_magic; /* If the signal writes to a magic address */
|
||||
- uint8_t raddr_a;
|
||||
- uint8_t raddr_b;
|
||||
+ uint8_t raddr_a; /* V3D 4.x */
|
||||
+ uint8_t raddr_b; /* V3D 4.x*/
|
||||
struct v3d_qpu_flags flags;
|
||||
|
||||
union {
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
From 3e5ad0881c2789619cdf65f40a44d5481e28e800 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 12 Aug 2021 02:24:02 +0200
|
||||
Subject: [PATCH 013/142] broadcom/qpu: defining shift/mask for raddr_c/d
|
||||
|
||||
On V3D 7.x it replaces mul_a/b and add_a/b
|
||||
---
|
||||
src/broadcom/qpu/qpu_pack.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
|
||||
index 6e975793fc0..4f106909729 100644
|
||||
--- a/src/broadcom/qpu/qpu_pack.c
|
||||
+++ b/src/broadcom/qpu/qpu_pack.c
|
||||
@@ -84,6 +84,9 @@
|
||||
#define V3D_QPU_MUL_A_SHIFT 18
|
||||
#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
|
||||
|
||||
+#define V3D_QPU_RADDR_C_SHIFT 18
|
||||
+#define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18)
|
||||
+
|
||||
#define V3D_QPU_ADD_B_SHIFT 15
|
||||
#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
|
||||
|
||||
@@ -98,6 +101,9 @@
|
||||
#define V3D_QPU_BRANCH_BDI_SHIFT 12
|
||||
#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
|
||||
|
||||
+#define V3D_QPU_RADDR_D_SHIFT 12
|
||||
+#define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12)
|
||||
+
|
||||
#define V3D_QPU_RADDR_A_SHIFT 6
|
||||
#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
From 81febf14fe05ad26e992275b911e8bc1e1416ebc Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 17 Sep 2021 01:04:31 +0200
|
||||
Subject: [PATCH 014/142] broadcom/commmon: add has_accumulators field on
|
||||
v3d_device_info
|
||||
|
||||
Even if we can just check for the version on the code, checking for
|
||||
this field makes several places more readable. So for example, on the
|
||||
register allocate code we doesn't assign an accumulator because we
|
||||
don't have accumulators on that hw, instead of because hw version is a
|
||||
given one.
|
||||
---
|
||||
src/broadcom/common/v3d_device_info.c | 2 ++
|
||||
src/broadcom/common/v3d_device_info.h | 3 +++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
|
||||
index 7512fe3a06b..7bc2b662cfc 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.c
|
||||
+++ b/src/broadcom/common/v3d_device_info.c
|
||||
@@ -65,6 +65,8 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
||||
int qups = (ident1.value >> 8) & 0xf;
|
||||
devinfo->qpu_count = nslc * qups;
|
||||
|
||||
+ devinfo->has_accumulators = devinfo->ver < 71;
|
||||
+
|
||||
switch (devinfo->ver) {
|
||||
case 33:
|
||||
case 41:
|
||||
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
|
||||
index 32cb65cf81f..8dfc7858727 100644
|
||||
--- a/src/broadcom/common/v3d_device_info.h
|
||||
+++ b/src/broadcom/common/v3d_device_info.h
|
||||
@@ -42,6 +42,9 @@ struct v3d_device_info {
|
||||
|
||||
/* NSLC * QUPS from the core's IDENT registers. */
|
||||
int qpu_count;
|
||||
+
|
||||
+ /* If the hw has accumulator registers */
|
||||
+ bool has_accumulators;
|
||||
};
|
||||
|
||||
typedef int (*v3d_ioctl_fun)(int fd, unsigned long request, void *arg);
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
From 7d42eca87b6e144697810405308d99d200dca62a Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Wed, 15 Sep 2021 10:56:43 +0200
|
||||
Subject: [PATCH 015/142] broadcom/qpu: add qpu_writes_rf0_implicitly helper
|
||||
|
||||
On v71 rf0 replaces r5 as the register that gets updated implicitly
|
||||
with uniform loads, and gets the C coefficient with ldvary. This
|
||||
helper return if rf0 gets implicitly updated.
|
||||
---
|
||||
src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++
|
||||
src/broadcom/qpu/qpu_instr.h | 2 ++
|
||||
2 files changed, 14 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
|
||||
index 7ece8b5e570..8de99c611d5 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.c
|
||||
+++ b/src/broadcom/qpu/qpu_instr.c
|
||||
@@ -920,6 +920,18 @@ v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
|
||||
return false;
|
||||
}
|
||||
|
||||
+bool
|
||||
+v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *inst)
|
||||
+{
|
||||
+ if (devinfo->ver >= 71 &&
|
||||
+ (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
bool
|
||||
v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
|
||||
{
|
||||
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
|
||||
index 9e56e2d6a99..a25be8e0ee6 100644
|
||||
--- a/src/broadcom/qpu/qpu_instr.h
|
||||
+++ b/src/broadcom/qpu/qpu_instr.h
|
||||
@@ -473,6 +473,8 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
+bool v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
|
||||
+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
--
|
||||
2.39.2
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,261 +0,0 @@
|
||||
From ebba9019461083687f6afd23ff0d4646c1a667cb Mon Sep 17 00:00:00 2001
|
||||
From: Iago Toral Quiroga <itoral@igalia.com>
|
||||
Date: Sun, 29 Jan 2023 00:27:11 +0100
|
||||
Subject: [PATCH 017/142] broadcom/compiler: update node/temp translation for
|
||||
v71
|
||||
|
||||
As the offset applied needs to take into account if we have
|
||||
accumulators or not.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 68 +++++++++----------
|
||||
1 file changed, 34 insertions(+), 34 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index b22f915d1df..aa9473d124b 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -39,30 +39,31 @@
|
||||
CLASS_BITS_R5)
|
||||
|
||||
static inline uint32_t
|
||||
-temp_to_node(uint32_t temp)
|
||||
+temp_to_node(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
- return temp + ACC_COUNT;
|
||||
+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
-node_to_temp(uint32_t node)
|
||||
+node_to_temp(struct v3d_compile *c, uint32_t node)
|
||||
{
|
||||
- assert(node >= ACC_COUNT);
|
||||
- return node - ACC_COUNT;
|
||||
+ assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
|
||||
+ (!c->devinfo->has_accumulators && node >= 0));
|
||||
+ return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
-get_temp_class_bits(struct v3d_ra_node_info *nodes,
|
||||
+get_temp_class_bits(struct v3d_compile *c,
|
||||
uint32_t temp)
|
||||
{
|
||||
- return nodes->info[temp_to_node(temp)].class_bits;
|
||||
+ return c->nodes.info[temp_to_node(c, temp)].class_bits;
|
||||
}
|
||||
|
||||
static inline void
|
||||
-set_temp_class_bits(struct v3d_ra_node_info *nodes,
|
||||
+set_temp_class_bits(struct v3d_compile *c,
|
||||
uint32_t temp, uint8_t class_bits)
|
||||
{
|
||||
- nodes->info[temp_to_node(temp)].class_bits = class_bits;
|
||||
+ c->nodes.info[temp_to_node(c, temp)].class_bits = class_bits;
|
||||
}
|
||||
|
||||
static struct ra_class *
|
||||
@@ -84,7 +85,7 @@ static inline struct ra_class *
|
||||
choose_reg_class_for_temp(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
assert(temp < c->num_temps && temp < c->nodes.alloc_count);
|
||||
- return choose_reg_class(c, get_temp_class_bits(&c->nodes, temp));
|
||||
+ return choose_reg_class(c, get_temp_class_bits(c, temp));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@@ -313,7 +314,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
|
||||
|
||||
for (unsigned i = 0; i < c->num_temps; i++) {
|
||||
if (BITSET_TEST(c->spillable, i)) {
|
||||
- ra_set_node_spill_cost(c->g, temp_to_node(i),
|
||||
+ ra_set_node_spill_cost(c->g, temp_to_node(c, i),
|
||||
spill_costs[i]);
|
||||
}
|
||||
}
|
||||
@@ -482,7 +483,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
|
||||
c->temp_start[i] < ip && c->temp_end[i] >= ip :
|
||||
c->temp_start[i] <= ip && c->temp_end[i] > ip;
|
||||
if (thrsw_cross) {
|
||||
- ra_set_node_class(c->g, temp_to_node(i),
|
||||
+ ra_set_node_class(c->g, temp_to_node(c, i),
|
||||
choose_reg_class(c, CLASS_BITS_PHYS));
|
||||
}
|
||||
}
|
||||
@@ -509,8 +510,7 @@ v3d_emit_tmu_spill(struct v3d_compile *c,
|
||||
* same register class bits as the original.
|
||||
*/
|
||||
if (inst == position) {
|
||||
- uint8_t class_bits = get_temp_class_bits(&c->nodes,
|
||||
- inst->dst.index);
|
||||
+ uint8_t class_bits = get_temp_class_bits(c, inst->dst.index);
|
||||
inst->dst = vir_get_temp(c);
|
||||
add_node(c, inst->dst.index, class_bits);
|
||||
} else {
|
||||
@@ -574,7 +574,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
reconstruct_op = orig_def->qpu.alu.add.op;
|
||||
}
|
||||
|
||||
- uint32_t spill_node = temp_to_node(spill_temp);
|
||||
+ uint32_t spill_node = temp_to_node(c, spill_temp);
|
||||
|
||||
/* We must disable the ldunif optimization if we are spilling uniforms */
|
||||
bool had_disable_ldunif_opt = c->disable_ldunif_opt;
|
||||
@@ -739,12 +739,12 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
* update node priorities based one new liveness data.
|
||||
*/
|
||||
uint32_t sb_temp =c->spill_base.index;
|
||||
- uint32_t sb_node = temp_to_node(sb_temp);
|
||||
+ uint32_t sb_node = temp_to_node(c, sb_temp);
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_end[i] == -1)
|
||||
continue;
|
||||
|
||||
- uint32_t node_i = temp_to_node(i);
|
||||
+ uint32_t node_i = temp_to_node(c, i);
|
||||
c->nodes.info[node_i].priority =
|
||||
c->temp_end[i] - c->temp_start[i];
|
||||
|
||||
@@ -752,7 +752,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
j < c->num_temps; j++) {
|
||||
if (interferes(c->temp_start[i], c->temp_end[i],
|
||||
c->temp_start[j], c->temp_end[j])) {
|
||||
- uint32_t node_j = temp_to_node(j);
|
||||
+ uint32_t node_j = temp_to_node(c, j);
|
||||
ra_add_node_interference(c->g, node_i, node_j);
|
||||
}
|
||||
}
|
||||
@@ -958,7 +958,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
- temp_to_node(i),
|
||||
+ temp_to_node(c, i),
|
||||
acc_nodes[3]);
|
||||
}
|
||||
}
|
||||
@@ -968,7 +968,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
- temp_to_node(i),
|
||||
+ temp_to_node(c, i),
|
||||
acc_nodes[4]);
|
||||
}
|
||||
}
|
||||
@@ -987,7 +987,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* decides whether the LDVPM is in or out)
|
||||
*/
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_PHYS);
|
||||
break;
|
||||
}
|
||||
@@ -1002,7 +1002,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* phys regfile.
|
||||
*/
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_PHYS);
|
||||
break;
|
||||
}
|
||||
@@ -1024,7 +1024,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
*/
|
||||
assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
- uint32_t node = temp_to_node(inst->dst.index);
|
||||
+ uint32_t node = temp_to_node(c, inst->dst.index);
|
||||
ra_set_node_reg(c->g, node,
|
||||
PHYS_INDEX + inst->src[0].index);
|
||||
break;
|
||||
@@ -1043,9 +1043,9 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
*/
|
||||
if (!inst->qpu.sig.ldunif) {
|
||||
uint8_t class_bits =
|
||||
- get_temp_class_bits(&c->nodes, inst->dst.index) &
|
||||
+ get_temp_class_bits(c, inst->dst.index) &
|
||||
~CLASS_BITS_R5;
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
class_bits);
|
||||
|
||||
} else {
|
||||
@@ -1054,7 +1054,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* loads interfere with each other.
|
||||
*/
|
||||
if (c->devinfo->ver < 40) {
|
||||
- set_temp_class_bits(&c->nodes, inst->dst.index,
|
||||
+ set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_R5);
|
||||
}
|
||||
}
|
||||
@@ -1064,7 +1064,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
if (inst->qpu.sig.thrsw) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
- set_temp_class_bits(&c->nodes, i,
|
||||
+ set_temp_class_bits(c, i,
|
||||
CLASS_BITS_PHYS);
|
||||
}
|
||||
}
|
||||
@@ -1125,7 +1125,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->nodes.info[i].priority = 0;
|
||||
c->nodes.info[i].class_bits = 0;
|
||||
} else {
|
||||
- uint32_t t = node_to_temp(i);
|
||||
+ uint32_t t = node_to_temp(c, i);
|
||||
c->nodes.info[i].priority =
|
||||
c->temp_end[t] - c->temp_start[t];
|
||||
c->nodes.info[i].class_bits = CLASS_BITS_ANY;
|
||||
@@ -1143,7 +1143,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
- ra_set_node_class(c->g, temp_to_node(i),
|
||||
+ ra_set_node_class(c->g, temp_to_node(c, i),
|
||||
choose_reg_class_for_temp(c, i));
|
||||
}
|
||||
|
||||
@@ -1153,8 +1153,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
if (interferes(c->temp_start[i], c->temp_end[i],
|
||||
c->temp_start[j], c->temp_end[j])) {
|
||||
ra_add_node_interference(c->g,
|
||||
- temp_to_node(i),
|
||||
- temp_to_node(j));
|
||||
+ temp_to_node(c, i),
|
||||
+ temp_to_node(c, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1171,7 +1171,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
if (c->spill_size <
|
||||
V3D_CHANNELS * sizeof(uint32_t) * force_register_spills) {
|
||||
int node = v3d_choose_spill_node(c);
|
||||
- uint32_t temp = node_to_temp(node);
|
||||
+ uint32_t temp = node_to_temp(c, node);
|
||||
if (node != -1) {
|
||||
v3d_spill_reg(c, acc_nodes, temp);
|
||||
continue;
|
||||
@@ -1186,7 +1186,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
if (node == -1)
|
||||
goto spill_fail;
|
||||
|
||||
- uint32_t temp = node_to_temp(node);
|
||||
+ uint32_t temp = node_to_temp(c, node);
|
||||
enum temp_spill_type spill_type =
|
||||
get_spill_type_for_temp(c, temp);
|
||||
if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
|
||||
@@ -1201,7 +1201,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
/* Allocation was successful, build the 'temp -> reg' map */
|
||||
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
- int ra_reg = ra_get_node_reg(c->g, temp_to_node(i));
|
||||
+ int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
|
||||
if (ra_reg < PHYS_INDEX) {
|
||||
temp_registers[i].magic = true;
|
||||
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
From 9b2dfe0286212aba3687a06023cc5b4ce9944ee0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Mon, 23 Aug 2021 02:18:43 +0200
|
||||
Subject: [PATCH 018/142] broadcom/compiler: phys index depends on hw version
|
||||
|
||||
For 7.1 there are not accumulators. So we replace the macro with a
|
||||
function call.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++-----
|
||||
1 file changed, 29 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index aa9473d124b..a358b616e13 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -28,9 +28,19 @@
|
||||
|
||||
#define ACC_INDEX 0
|
||||
#define ACC_COUNT 6
|
||||
-#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
|
||||
-#define PHYS_COUNT 64
|
||||
|
||||
+#define PHYS_COUNT 64
|
||||
+
|
||||
+static uint8_t
|
||||
+get_phys_index(const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->has_accumulators)
|
||||
+ return ACC_INDEX + ACC_COUNT;
|
||||
+ else
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* ACC as accumulator */
|
||||
#define CLASS_BITS_PHYS (1 << 0)
|
||||
#define CLASS_BITS_ACC (1 << 1)
|
||||
#define CLASS_BITS_R5 (1 << 4)
|
||||
@@ -771,9 +781,11 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
}
|
||||
|
||||
struct v3d_ra_select_callback_data {
|
||||
+ uint32_t phys_index;
|
||||
uint32_t next_acc;
|
||||
uint32_t next_phys;
|
||||
struct v3d_ra_node_info *nodes;
|
||||
+ const struct v3d_device_info *devinfo;
|
||||
};
|
||||
|
||||
/* Choosing accumulators improves chances of merging QPU instructions
|
||||
@@ -794,7 +806,7 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
static const int available_rf_threshold = 5;
|
||||
int available_rf = 0 ;
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
- if (BITSET_TEST(regs, PHYS_INDEX + i))
|
||||
+ if (BITSET_TEST(regs, v3d_ra->phys_index + i))
|
||||
available_rf++;
|
||||
if (available_rf >= available_rf_threshold)
|
||||
break;
|
||||
@@ -854,7 +866,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
{
|
||||
for (int i = 0; i < PHYS_COUNT; i++) {
|
||||
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
|
||||
- int phys = PHYS_INDEX + phys_off;
|
||||
+ int phys = v3d_ra->phys_index + phys_off;
|
||||
|
||||
if (BITSET_TEST(regs, phys)) {
|
||||
v3d_ra->next_phys = phys_off + 1;
|
||||
@@ -896,8 +908,9 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
* register file can be divided up for fragment shader threading.
|
||||
*/
|
||||
int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
|
||||
+ uint8_t phys_index = get_phys_index(compiler->devinfo);
|
||||
|
||||
- compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
|
||||
+ compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
|
||||
false);
|
||||
if (!compiler->regs)
|
||||
return false;
|
||||
@@ -912,8 +925,8 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
compiler->reg_class_phys[threads] =
|
||||
ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
|
||||
- for (int i = PHYS_INDEX;
|
||||
- i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
|
||||
+ for (int i = phys_index;
|
||||
+ i < phys_index + (PHYS_COUNT >> threads); i++) {
|
||||
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_phys[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
@@ -1026,7 +1039,8 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
uint32_t node = temp_to_node(c, inst->dst.index);
|
||||
ra_set_node_reg(c->g, node,
|
||||
- PHYS_INDEX + inst->src[0].index);
|
||||
+ get_phys_index(c->devinfo) +
|
||||
+ inst->src[0].index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1086,13 +1100,17 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->num_temps + ACC_COUNT),
|
||||
};
|
||||
|
||||
+ uint32_t phys_index = get_phys_index(c->devinfo);
|
||||
+
|
||||
struct v3d_ra_select_callback_data callback_data = {
|
||||
+ .phys_index = phys_index,
|
||||
.next_acc = 0,
|
||||
/* Start at RF3, to try to keep the TLB writes from using
|
||||
* RF0-2.
|
||||
*/
|
||||
.next_phys = 3,
|
||||
.nodes = &c->nodes,
|
||||
+ .devinfo = c->devinfo,
|
||||
};
|
||||
|
||||
vir_calculate_live_intervals(c);
|
||||
@@ -1139,6 +1157,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
vir_for_each_inst_inorder(inst, c) {
|
||||
inst->ip = ip++;
|
||||
update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
|
||||
+
|
||||
}
|
||||
|
||||
/* Set the register classes for all our temporaries in the graph */
|
||||
@@ -1202,13 +1221,13 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
|
||||
for (uint32_t i = 0; i < c->num_temps; i++) {
|
||||
int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
|
||||
- if (ra_reg < PHYS_INDEX) {
|
||||
+ if (ra_reg < phys_index) {
|
||||
temp_registers[i].magic = true;
|
||||
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
|
||||
ra_reg - ACC_INDEX);
|
||||
} else {
|
||||
temp_registers[i].magic = false;
|
||||
- temp_registers[i].index = ra_reg - PHYS_INDEX;
|
||||
+ temp_registers[i].index = ra_reg - phys_index;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
From da0a3deadf86a46c8323267d3f6a49e442835608 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Fri, 17 Sep 2021 01:07:06 +0200
|
||||
Subject: [PATCH 019/142] broadcom/compiler: don't favor/select accum registers
|
||||
for hw not supporting it
|
||||
|
||||
Note that what we do is to just return false on the favor/select accum
|
||||
methods. We could just avoid to call them, but as the select is called
|
||||
more than once, it is just easier this way.
|
||||
---
|
||||
src/broadcom/compiler/vir_register_allocate.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
|
||||
index a358b616e13..1f495180784 100644
|
||||
--- a/src/broadcom/compiler/vir_register_allocate.c
|
||||
+++ b/src/broadcom/compiler/vir_register_allocate.c
|
||||
@@ -797,6 +797,9 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
BITSET_WORD *regs,
|
||||
int priority)
|
||||
{
|
||||
+ if (!v3d_ra->devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
/* Favor accumulators if we have less that this number of physical
|
||||
* registers. Accumulators have more restrictions (like being
|
||||
* invalidated through thrsw), so running out of physical registers
|
||||
@@ -832,6 +835,9 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
|
||||
BITSET_WORD *regs,
|
||||
unsigned int *out)
|
||||
{
|
||||
+ if (!v3d_ra->devinfo->has_accumulators)
|
||||
+ return false;
|
||||
+
|
||||
/* Choose r5 for our ldunifs if possible (nobody else can load to that
|
||||
* reg, and it keeps the QPU cond field free from being occupied by
|
||||
* ldunifrf).
|
||||
--
|
||||
2.39.2
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
From 6c04d7c917da6b38f8b2b4306ab03ed2ab7e6ce0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
|
||||
Date: Thu, 9 Sep 2021 00:28:53 +0200
|
||||
Subject: [PATCH 020/142] broadcom/vir: implement is_no_op_mov for v71
|
||||
|
||||
Did some refactoring/splitting.
|
||||
---
|
||||
src/broadcom/compiler/vir_to_qpu.c | 66 ++++++++++++++++++++++++------
|
||||
1 file changed, 53 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
|
||||
index c8b6e0a91a0..08970d52954 100644
|
||||
--- a/src/broadcom/compiler/vir_to_qpu.c
|
||||
+++ b/src/broadcom/compiler/vir_to_qpu.c
|
||||
@@ -129,19 +129,8 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
}
|
||||
|
||||
static bool
|
||||
-is_no_op_mov(struct qinst *qinst)
|
||||
+v3d33_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
{
|
||||
- static const struct v3d_qpu_sig no_sig = {0};
|
||||
-
|
||||
- /* Make sure it's just a lone MOV. */
|
||||
- if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
|
||||
- qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
|
||||
- qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
|
||||
- memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
- /* Check if it's a MOV from a register to itself. */
|
||||
enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
|
||||
if (qinst->qpu.alu.mul.magic_write) {
|
||||
if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
|
||||
@@ -168,6 +157,57 @@ is_no_op_mov(struct qinst *qinst)
|
||||
return false;
|
||||
}
|
||||
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+v3d71_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
+{
|
||||
+ if (qinst->qpu.alu.mul.magic_write)
|
||||
+ return false;
|
||||
+
|
||||
+ enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
|
||||
+ int raddr;
|
||||
+
|
||||
+ raddr = qinst->qpu.alu.mul.a.raddr;
|
||||
+ if (raddr != waddr)
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+mov_src_and_dst_equal(struct qinst *qinst,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ if (devinfo->ver < 71)
|
||||
+ return v3d33_mov_src_and_dst_equal(qinst);
|
||||
+ else
|
||||
+ return v3d71_mov_src_and_dst_equal(qinst);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+static bool
|
||||
+is_no_op_mov(struct qinst *qinst,
|
||||
+ const struct v3d_device_info *devinfo)
|
||||
+{
|
||||
+ static const struct v3d_qpu_sig no_sig = {0};
|
||||
+
|
||||
+ /* Make sure it's just a lone MOV. We only check for M_MOV. Although
|
||||
+ * for V3D 7.x there is also A_MOV, we don't need to check for it as
|
||||
+ * we always emit using M_MOV. We could use A_MOV later on the
|
||||
+ * squedule to improve performance
|
||||
+ */
|
||||
+ if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
|
||||
+ qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
|
||||
+ qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
|
||||
+ memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (!mov_src_and_dst_equal(qinst, devinfo))
|
||||
+ return false;
|
||||
+
|
||||
/* No packing or flags updates, or we need to execute the
|
||||
* instruction.
|
||||
*/
|
||||
@@ -324,7 +364,7 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
qinst->qpu.alu.mul.waddr = dst.index;
|
||||
qinst->qpu.alu.mul.magic_write = dst.magic;
|
||||
|
||||
- if (is_no_op_mov(qinst)) {
|
||||
+ if (is_no_op_mov(qinst, c->devinfo)) {
|
||||
vir_remove_instruction(c, qinst);
|
||||
continue;
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user