mesa (RPi5): drop upstream patches in 23.3.0-rc1

This commit is contained in:
Rudi Heitbaum
2023-10-27 13:24:32 +00:00
parent b15bc650f2
commit 4e2a40b1d2
138 changed files with 0 additions and 17791 deletions

View File

@@ -1,332 +0,0 @@
From f62aa2640f92796ff5216da0a5d3c8f46a2855b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Mon, 26 Apr 2021 00:02:21 +0200
Subject: [PATCH 001/142] broadcom(cle,clif,common,simulator): add 7.1 version
on the list of versions to build
This adds 7.1 to the list of available V3D_VERSION, and first changes
on the simulator needed to get it working.
Note that we needed to touch all those 4 codebases because it is
needed if we want to use V3D_DEBUG=clif with the simulator, that it is
the easier way to see which packets a vulkan program is using.
About the simulator, this commit only handle the rename of some
registers. Any additional changes needed to get a proper support for
v71 will be handled them on following commits.
---
src/broadcom/cle/meson.build | 3 +-
src/broadcom/cle/v3dx_pack.h | 2 +
src/broadcom/clif/clif_private.h | 2 +
src/broadcom/common/v3d_device_info.c | 1 +
src/broadcom/common/v3d_macros.h | 3 +
src/broadcom/meson.build | 2 +-
src/broadcom/simulator/v3d_simulator.c | 81 +++++++++++++++++++------
src/broadcom/simulator/v3d_simulator.h | 5 ++
src/broadcom/simulator/v3dx_simulator.c | 31 ++++++++--
9 files changed, 106 insertions(+), 24 deletions(-)
diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build
index 31a0d5bfa94..8ac32b313e4 100644
--- a/src/broadcom/cle/meson.build
+++ b/src/broadcom/cle/meson.build
@@ -23,7 +23,8 @@ v3d_versions = [
[21, 21],
[33, 33],
[41, 33],
- [42, 33]
+ [42, 33],
+ [71, 33]
]
v3d_xml_files = []
diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h
index 5762e5aaa70..e5a1eb26698 100644
--- a/src/broadcom/cle/v3dx_pack.h
+++ b/src/broadcom/cle/v3dx_pack.h
@@ -37,6 +37,8 @@
# include "cle/v3d_packet_v41_pack.h"
#elif (V3D_VERSION == 42)
# include "cle/v3d_packet_v42_pack.h"
+#elif (V3D_VERSION == 71)
+# include "cle/v3d_packet_v71_pack.h"
#else
# error "Need to add a pack header include for this v3d version"
#endif
diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h
index 6ace62b0310..cda407a00bf 100644
--- a/src/broadcom/clif/clif_private.h
+++ b/src/broadcom/clif/clif_private.h
@@ -101,6 +101,8 @@ bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
const uint8_t *cl, uint32_t *size, bool reloc_mode);
bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
const uint8_t *cl, uint32_t *size, bool reloc_mode);
+bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
+ const uint8_t *cl, uint32_t *size, bool reloc_mode);
static inline void
out(struct clif_dump *clif, const char *fmt, ...)
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
index 272190eb2e5..7e0862f1f02 100644
--- a/src/broadcom/common/v3d_device_info.c
+++ b/src/broadcom/common/v3d_device_info.c
@@ -66,6 +66,7 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
case 33:
case 41:
case 42:
+ case 71:
break;
default:
fprintf(stderr,
diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h
index fe89398208a..b4291fb5350 100644
--- a/src/broadcom/common/v3d_macros.h
+++ b/src/broadcom/common/v3d_macros.h
@@ -41,6 +41,9 @@
#elif (V3D_VERSION == 42)
# define V3DX(x) V3D42_##x
# define v3dX(x) v3d42_##x
+#elif (V3D_VERSION == 71)
+# define V3DX(x) V3D71_##x
+# define v3dX(x) v3d71_##x
#else
# error "Need to add prefixing macros for this v3d version"
#endif
diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build
index 2c10e46b188..73cb7aa0575 100644
--- a/src/broadcom/meson.build
+++ b/src/broadcom/meson.build
@@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')
subdir('cle')
-v3d_versions = ['33', '41', '42']
+v3d_versions = ['33', '41', '42', '71']
v3d_libs = []
if with_gallium_v3d or with_broadcom_vk
diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c
index eea5d3f050e..5cceb1a82cc 100644
--- a/src/broadcom/simulator/v3d_simulator.c
+++ b/src/broadcom/simulator/v3d_simulator.c
@@ -490,10 +490,20 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
v3d_simulator_perfmon_switch(fd, submit->perfmon_id);
- if (sim_state.ver >= 41)
- v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
- else
- v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
+ switch(sim_state.ver) {
+ case 33:
+ v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
+ break;
+ case 41:
+ case 42:
+ v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
+ break;
+ case 71:
+ v3d71_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
+ break;
+ default:
+ unreachable("Unsupported V3D version\n");
+ }
util_dynarray_foreach(&sim_state.bin_oom, struct v3d_simulator_bo *,
sim_bo) {
@@ -635,10 +645,17 @@ v3d_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args)
static int
v3d_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args)
{
- if (sim_state.ver >= 41)
- return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
- else
+ switch(sim_state.ver) {
+ case 33:
return v3d33_simulator_get_param_ioctl(sim_state.v3d, args);
+ case 41:
+ case 42:
+ return v3d41_simulator_get_param_ioctl(sim_state.v3d, args);
+ case 71:
+ return v3d71_simulator_get_param_ioctl(sim_state.v3d, args);
+ default:
+ unreachable("Unsupported V3D version\n");
+ }
}
static int
@@ -652,10 +669,20 @@ v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args)
v3d_simulator_copy_in_handle(file, args->bo_handles[2]);
v3d_simulator_copy_in_handle(file, args->bo_handles[3]);
- if (sim_state.ver >= 41)
- ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
- else
+ switch(sim_state.ver) {
+ case 33:
ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args);
+ break;
+ case 41:
+ case 42:
+ ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
+ break;
+ case 71:
+ ret = v3d71_simulator_submit_tfu_ioctl(sim_state.v3d, args);
+ break;
+ default:
+ unreachable("Unsupported V3D version\n");
+ }
v3d_simulator_copy_out_handle(file, args->bo_handles[0]);
@@ -682,11 +709,19 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
v3d_simulator_perfmon_switch(fd, args->perfmon_id);
- if (sim_state.ver >= 41)
- ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
- file->gmp->ofs);
- else
- ret = -1;
+ switch(sim_state.ver) {
+ case 41:
+ case 42:
+ ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
+ file->gmp->ofs);
+ break;
+ case 71:
+ ret = v3d71_simulator_submit_csd_ioctl(sim_state.v3d, args,
+ file->gmp->ofs);
+ break;
+ default:
+ ret = -1;
+ }
for (int i = 0; i < args->bo_handle_count; i++)
v3d_simulator_copy_out_handle(file, bo_handles[i]);
@@ -880,10 +915,20 @@ v3d_simulator_init_global()
util_dynarray_init(&sim_state.bin_oom, NULL);
- if (sim_state.ver >= 41)
- v3d41_simulator_init_regs(sim_state.v3d);
- else
+ switch(sim_state.ver) {
+ case 33:
v3d33_simulator_init_regs(sim_state.v3d);
+ break;
+ case 41:
+ case 42:
+ v3d41_simulator_init_regs(sim_state.v3d);
+ break;
+ case 71:
+ v3d71_simulator_init_regs(sim_state.v3d);
+ break;
+ default:
+ unreachable("Not supported V3D version\n");
+ }
}
struct v3d_simulator_file *
diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h
index ddb079c1455..1472c313a03 100644
--- a/src/broadcom/simulator/v3d_simulator.h
+++ b/src/broadcom/simulator/v3d_simulator.h
@@ -52,6 +52,11 @@ uint32_t v3d_simulator_get_mem_free(void);
# define v3dX(x) v3d41_##x
# include "v3dx_simulator.h"
# undef v3dX
+
+# define v3dX(x) v3d71_##x
+# include "v3dx_simulator.h"
+# undef v3dX
+
#endif
#endif
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
index c9322f0397b..723796b16c9 100644
--- a/src/broadcom/simulator/v3dx_simulator.c
+++ b/src/broadcom/simulator/v3dx_simulator.c
@@ -46,11 +46,15 @@
#define HW_REGISTER_RO(x) (x)
#define HW_REGISTER_RW(x) (x)
-#if V3D_VERSION >= 41
+#if V3D_VERSION == 71
+#include "libs/core/v3d/registers/7.1.5.1/v3d.h"
+#else
+#if V3D_VERSION == 41 || V3D_VERSION == 42
#include "libs/core/v3d/registers/4.1.35.0/v3d.h"
#else
#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
#endif
+#endif
#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
@@ -310,16 +314,17 @@ v3d_isr_core(struct v3d_hw *v3d,
return;
}
+#if V3D_VERSION <= 42
if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
fprintf(stderr, "GMP violation at 0x%08x\n",
V3D_READ(V3D_GMP_VIO_ADDR));
- abort();
} else {
fprintf(stderr,
"Unexpected ISR with core status 0x%08x\n",
core_status);
}
abort();
+#endif
}
static void
@@ -396,6 +401,18 @@ v3d_isr_hub(struct v3d_hw *v3d)
}
handle_mmu_interruptions(v3d, hub_status);
+
+#if V3D_VERSION == 71
+ if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
+ fprintf(stderr, "GMP violation at 0x%08x\n",
+ V3D_READ(V3D_GMP_VIO_ADDR));
+ } else {
+ fprintf(stderr,
+ "Unexpected ISR with status 0x%08x\n",
+ hub_status);
+ }
+ abort();
+#endif
}
static void
@@ -436,8 +453,11 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
* for tracing. Perhaps we should evaluate to do the same here and add
* some debug options.
*/
- uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
- V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
+ uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
+#if V3D_VERSION <= 42
+ core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
+#endif
+
V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
@@ -447,6 +467,9 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
+#if V3D_VERSION == 71
+ hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
+#endif
V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
--
2.39.2

View File

@@ -1,30 +0,0 @@
From 9e85edd1b347b0e779b393f463f42044a720bcff Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 28 Sep 2021 13:16:49 +0200
Subject: [PATCH 002/142] broadcom/simulator: reset CFG7 for compute dispatch
in v71
This register is new in 7.x, it doesn't seem that we need to
do anything specific for now, but let's make sure it is reset
every time.
---
src/broadcom/simulator/v3dx_simulator.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
index 723796b16c9..f23b0538de3 100644
--- a/src/broadcom/simulator/v3dx_simulator.c
+++ b/src/broadcom/simulator/v3dx_simulator.c
@@ -227,6 +227,9 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
+#if V3D_VERSION >= 71
+ V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
+#endif
/* CFG0 kicks off the job */
V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
--
2.39.2

View File

@@ -1,65 +0,0 @@
From 569cbe4229df737ce5915c4be2cad534707fb4f7 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 9 Nov 2021 08:50:51 +0100
Subject: [PATCH 004/142] broadcom/common: retrieve V3D revision number
The subrev field from the hub ident3 register is bumped with every
hardware revision doing backwards incompatible changes so we want to
keep track of this.
Instead of modifying the 'ver' field info to acommodate subrev info,
which would require a lot of changes, simply add a new 'rev' field in
devinfo that we can use when we need to make changes based on the
revision number of a hardware release.
---
src/broadcom/common/v3d_device_info.c | 14 +++++++++++++-
src/broadcom/common/v3d_device_info.h | 3 +++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
index 7e0862f1f02..7512fe3a06b 100644
--- a/src/broadcom/common/v3d_device_info.c
+++ b/src/broadcom/common/v3d_device_info.c
@@ -36,6 +36,9 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
struct drm_v3d_get_param ident1 = {
.param = DRM_V3D_PARAM_V3D_CORE0_IDENT1,
};
+ struct drm_v3d_get_param hub_ident3 = {
+ .param = DRM_V3D_PARAM_V3D_HUB_IDENT3,
+ };
int ret;
ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &ident0);
@@ -76,5 +79,14 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
return false;
}
- return true;
+ ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &hub_ident3);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core HUB IDENT3: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ devinfo->rev = (hub_ident3.value >> 8) & 0xff;
+
+ return true;
}
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
index 97abd9b8d9f..32cb65cf81f 100644
--- a/src/broadcom/common/v3d_device_info.h
+++ b/src/broadcom/common/v3d_device_info.h
@@ -34,6 +34,9 @@ struct v3d_device_info {
/** Simple V3D version: major * 10 + minor */
uint8_t ver;
+ /** V3D revision number */
+ uint8_t rev;
+
/** Size of the VPM, in bytes. */
int vpm_size;
--
2.39.2

View File

@@ -1,91 +0,0 @@
From c260843c882d25bd31e308566b45d4517fda0fa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Wed, 17 Nov 2021 14:40:47 +0100
Subject: [PATCH 005/142] broadcom/common: add some common v71 helpers
---
src/broadcom/common/v3d_util.c | 27 +++++++++++++++++++++++++++
src/broadcom/common/v3d_util.h | 27 +++++++++++++++++++++++++++
2 files changed, 54 insertions(+)
diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
index 57872a923d3..26f5c6b336f 100644
--- a/src/broadcom/common/v3d_util.c
+++ b/src/broadcom/common/v3d_util.c
@@ -170,3 +170,30 @@ v3d_hw_prim_type(enum mesa_prim prim_type)
unreachable("Unsupported primitive type");
}
}
+
+uint32_t
+v3d_internal_bpp_words(uint32_t internal_bpp)
+{
+ switch (internal_bpp) {
+ case 0 /* V3D_INTERNAL_BPP_32 */:
+ return 1;
+ case 1 /* V3D_INTERNAL_BPP_64 */:
+ return 2;
+ case 2 /* V3D_INTERNAL_BPP_128 */:
+ return 4;
+ default:
+ unreachable("Unsupported internal BPP");
+ }
+}
+
+uint32_t
+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
+ uint32_t bpp)
+{
+ /* stride in multiples of 128 bits, and covers 2 rows. This is the
+ * reason we divide by 2 instead of 4, as we divide number of 32-bit
+ * words per row by 2.
+ */
+
+ return (tile_width * bpp) / 2;
+}
diff --git a/src/broadcom/common/v3d_util.h b/src/broadcom/common/v3d_util.h
index eb802b77f67..864fc949ffa 100644
--- a/src/broadcom/common/v3d_util.h
+++ b/src/broadcom/common/v3d_util.h
@@ -24,6 +24,7 @@
#ifndef V3D_UTIL_H
#define V3D_UTIL_H
+#include "util/macros.h"
#include "common/v3d_device_info.h"
#include "pipe/p_defines.h"
@@ -46,4 +47,30 @@ v3d_translate_pipe_swizzle(enum pipe_swizzle swizzle);
uint32_t
v3d_hw_prim_type(enum mesa_prim prim_type);
+uint32_t
+v3d_internal_bpp_words(uint32_t internal_bpp);
+
+/* Some configuration packets want the size on log2, but starting at 0 for
+ * size 8.
+ */
+static inline uint8_t
+log2_tile_size(uint32_t size)
+{
+ switch(size) {
+ case 8:
+ return 0;
+ case 16:
+ return 1;
+ case 32:
+ return 2;
+ case 64:
+ return 3;
+ default:
+ unreachable("Unsupported tile width/height");
+ }
+}
+
+uint32_t
+v3d_compute_rt_row_row_stride_128_bits(uint32_t tile_width,
+ uint32_t bpp);
#endif
--
2.39.2

View File

@@ -1,53 +0,0 @@
From a5211a4d71acc53183d2a90eb1694d8cce6eb44f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 5 Aug 2021 01:03:11 +0200
Subject: [PATCH 006/142] broadcom/qpu: add comments on waddr not used on V3D
7.x
---
src/broadcom/qpu/qpu_instr.h | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 2e133472698..45a0cad9760 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -88,11 +88,11 @@ enum v3d_qpu_uf {
};
enum v3d_qpu_waddr {
- V3D_QPU_WADDR_R0 = 0,
- V3D_QPU_WADDR_R1 = 1,
- V3D_QPU_WADDR_R2 = 2,
- V3D_QPU_WADDR_R3 = 3,
- V3D_QPU_WADDR_R4 = 4,
+ V3D_QPU_WADDR_R0 = 0, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_R1 = 1, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */
V3D_QPU_WADDR_R5 = 5,
V3D_QPU_WADDR_NOP = 6,
V3D_QPU_WADDR_TLB = 7,
@@ -108,12 +108,12 @@ enum v3d_qpu_waddr {
V3D_QPU_WADDR_SYNC = 16,
V3D_QPU_WADDR_SYNCU = 17,
V3D_QPU_WADDR_SYNCB = 18,
- V3D_QPU_WADDR_RECIP = 19,
- V3D_QPU_WADDR_RSQRT = 20,
- V3D_QPU_WADDR_EXP = 21,
- V3D_QPU_WADDR_LOG = 22,
- V3D_QPU_WADDR_SIN = 23,
- V3D_QPU_WADDR_RSQRT2 = 24,
+ V3D_QPU_WADDR_RECIP = 19, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_RSQRT = 20, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_EXP = 21, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_LOG = 22, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_SIN = 23, /* Reserved on V3D 7.x */
+ V3D_QPU_WADDR_RSQRT2 = 24, /* Reserved on V3D 7.x */
V3D_QPU_WADDR_TMUC = 32,
V3D_QPU_WADDR_TMUS = 33,
V3D_QPU_WADDR_TMUT = 34,
--
2.39.2

View File

@@ -1,60 +0,0 @@
From 0ccf3043e4a584e5592bb7fad737d5d98ed23db0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 5 Aug 2021 01:00:47 +0200
Subject: [PATCH 007/142] broadcom/qpu: set V3D 7.x names for some waddr
aliasing
V3D 7.x got rid of the accumulator, but still uses the values for
WADDR_R5 and WADDR_R5REP, so let's return a proper name and add some
aliases.
---
src/broadcom/qpu/qpu_instr.c | 8 ++++++++
src/broadcom/qpu/qpu_instr.h | 6 ++++--
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index 60dabf74e8e..7759fb0efdf 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -35,6 +35,14 @@ v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
return "tmu";
+ /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
+ */
+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
+ return "quad";
+
+ if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
+ return "rep";
+
static const char *waddr_magic[] = {
[V3D_QPU_WADDR_R0] = "r0",
[V3D_QPU_WADDR_R1] = "r1",
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 45a0cad9760..19bf721dbe1 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -93,7 +93,8 @@ enum v3d_qpu_waddr {
V3D_QPU_WADDR_R2 = 2, /* Reserved on V3D 7.x */
V3D_QPU_WADDR_R3 = 3, /* Reserved on V3D 7.x */
V3D_QPU_WADDR_R4 = 4, /* Reserved on V3D 7.x */
- V3D_QPU_WADDR_R5 = 5,
+ V3D_QPU_WADDR_R5 = 5, /* V3D 4.x */
+ V3D_QPU_WADDR_QUAD = 5, /* V3D 7.x */
V3D_QPU_WADDR_NOP = 6,
V3D_QPU_WADDR_TLB = 7,
V3D_QPU_WADDR_TLBU = 8,
@@ -129,7 +130,8 @@ enum v3d_qpu_waddr {
V3D_QPU_WADDR_TMUHSCM = 44,
V3D_QPU_WADDR_TMUHSF = 45,
V3D_QPU_WADDR_TMUHSLOD = 46,
- V3D_QPU_WADDR_R5REP = 55,
+ V3D_QPU_WADDR_R5REP = 55, /* V3D 4.x */
+ V3D_QPU_WADDR_REP = 55, /* V3D 7.x */
};
struct v3d_qpu_flags {
--
2.39.2

View File

@@ -1,241 +0,0 @@
From 18de3cc85cf8bbe294e044f7a12abe14e554de0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Sun, 19 Sep 2021 03:20:18 +0200
Subject: [PATCH 008/142] broadcom/compiler: rename small_imm to small_imm_b
Current small_imm is associated with the "B" read address.
We do this change in advance for v71 support, where we will have 4
different small_imm (a/b/c/d), so we start with a renaming.
---
src/broadcom/compiler/qpu_schedule.c | 22 +++++++++----------
.../compiler/vir_opt_small_immediates.c | 4 ++--
src/broadcom/compiler/vir_to_qpu.c | 2 +-
src/broadcom/qpu/qpu_disasm.c | 2 +-
src/broadcom/qpu/qpu_instr.h | 2 +-
src/broadcom/qpu/qpu_pack.c | 22 +++++++++----------
6 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 3b32b48f86f..a10fa03ed10 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -160,7 +160,7 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
break;
case V3D_QPU_MUX_B:
- if (!n->inst->qpu.sig.small_imm) {
+ if (!n->inst->qpu.sig.small_imm_b) {
add_read_dep(state,
state->last_rf[n->inst->qpu.raddr_b], n);
}
@@ -615,7 +615,7 @@ qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
return true;
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
- !inst->sig.small_imm && (inst->raddr_b == waddr))
+ !inst->sig.small_imm_b && (inst->raddr_b == waddr))
return true;
return false;
@@ -790,11 +790,11 @@ qpu_raddrs_used(const struct v3d_qpu_instr *a,
uint64_t raddrs_used = 0;
if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A))
raddrs_used |= (1ll << a->raddr_a);
- if (!a->sig.small_imm && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
+ if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
raddrs_used |= (1ll << a->raddr_b);
if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A))
raddrs_used |= (1ll << b->raddr_a);
- if (!b->sig.small_imm && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
+ if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
raddrs_used |= (1ll << b->raddr_b);
return raddrs_used;
@@ -816,16 +816,16 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
if (naddrs > 2)
return false;
- if ((add_instr->sig.small_imm || mul_instr->sig.small_imm)) {
+ if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) {
if (naddrs > 1)
return false;
- if (add_instr->sig.small_imm && mul_instr->sig.small_imm)
+ if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b)
if (add_instr->raddr_b != mul_instr->raddr_b)
return false;
- result->sig.small_imm = true;
- result->raddr_b = add_instr->sig.small_imm ?
+ result->sig.small_imm_b = true;
+ result->raddr_b = add_instr->sig.small_imm_b ?
add_instr->raddr_b : mul_instr->raddr_b;
}
@@ -836,7 +836,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
raddrs_used &= ~(1ll << raddr_a);
result->raddr_a = raddr_a;
- if (!result->sig.small_imm) {
+ if (!result->sig.small_imm_b) {
if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
raddr_a == add_instr->raddr_b) {
if (add_instr->alu.add.a == V3D_QPU_MUX_B)
@@ -1025,7 +1025,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.ldtmu |= b->sig.ldtmu;
merge.sig.ldvary |= b->sig.ldvary;
merge.sig.ldvpm |= b->sig.ldvpm;
- merge.sig.small_imm |= b->sig.small_imm;
+ merge.sig.small_imm_b |= b->sig.small_imm_b;
merge.sig.ldtlb |= b->sig.ldtlb;
merge.sig.ldtlbu |= b->sig.ldtlbu;
merge.sig.ucb |= b->sig.ucb;
@@ -1614,7 +1614,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
return false;
if (inst->raddr_b < 3 &&
- !inst->sig.small_imm &&
+ !inst->sig.small_imm_b &&
v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
return false;
}
diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
index 47d7722968d..df0d6c36c9b 100644
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
@@ -80,7 +80,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
*/
struct v3d_qpu_sig new_sig = inst->qpu.sig;
uint32_t sig_packed;
- new_sig.small_imm = true;
+ new_sig.small_imm_b = true;
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig_packed))
continue;
@@ -89,7 +89,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
vir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
- inst->qpu.sig.small_imm = true;
+ inst->qpu.sig.small_imm_b = true;
inst->qpu.raddr_b = packed;
inst->src[i].file = QFILE_SMALL_IMM;
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
index 45e6bfa1470..15c2e3674c2 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -94,7 +94,7 @@ static void
set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
{
if (src.smimm) {
- assert(instr->sig.small_imm);
+ assert(instr->sig.small_imm_b);
*mux = V3D_QPU_MUX_B;
return;
}
diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c
index 28fb2357b97..6aca3c28e78 100644
--- a/src/broadcom/qpu/qpu_disasm.c
+++ b/src/broadcom/qpu/qpu_disasm.c
@@ -62,7 +62,7 @@ v3d_qpu_disasm_raddr(struct disasm_state *disasm,
if (mux == V3D_QPU_MUX_A) {
append(disasm, "rf%d", instr->raddr_a);
} else if (mux == V3D_QPU_MUX_B) {
- if (instr->sig.small_imm) {
+ if (instr->sig.small_imm_b) {
uint32_t val;
ASSERTED bool ok =
v3d_qpu_small_imm_unpack(disasm->devinfo,
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 19bf721dbe1..9cd831863b4 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -50,7 +50,7 @@ struct v3d_qpu_sig {
bool ldvpm:1;
bool ldtlb:1;
bool ldtlbu:1;
- bool small_imm:1;
+ bool small_imm_b:1;
bool ucb:1;
bool rotate:1;
bool wrtmuc:1;
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
index a875683c6f8..beac591d3c1 100644
--- a/src/broadcom/qpu/qpu_pack.c
+++ b/src/broadcom/qpu/qpu_pack.c
@@ -112,7 +112,7 @@
#define LDTMU .ldtmu = true
#define LDVARY .ldvary = true
#define LDVPM .ldvpm = true
-#define SMIMM .small_imm = true
+#define SMIMM_B .small_imm_b = true
#define LDTLB .ldtlb = true
#define LDTLBU .ldtlbu = true
#define UCB .ucb = true
@@ -135,8 +135,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
[11] = { THRSW, LDVARY, LDUNIF },
[12] = { LDVARY, LDTMU, },
[13] = { THRSW, LDVARY, LDTMU, },
- [14] = { SMIMM, LDVARY, },
- [15] = { SMIMM, },
+ [14] = { SMIMM_B, LDVARY, },
+ [15] = { SMIMM_B, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
/* 18-21 reserved */
@@ -148,8 +148,8 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
[27] = { THRSW, LDVPM, LDUNIF },
[28] = { LDVPM, LDTMU, },
[29] = { THRSW, LDVPM, LDTMU, },
- [30] = { SMIMM, LDVPM, },
- [31] = { SMIMM, },
+ [30] = { SMIMM_B, LDVPM, },
+ [31] = { SMIMM_B, },
};
static const struct v3d_qpu_sig v40_sig_map[] = {
@@ -167,8 +167,8 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
[10] = { LDVARY, LDUNIF },
[11] = { THRSW, LDVARY, LDUNIF },
/* 12-13 reserved */
- [14] = { SMIMM, LDVARY, },
- [15] = { SMIMM, },
+ [14] = { SMIMM_B, LDVARY, },
+ [15] = { SMIMM_B, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
[18] = { WRTMUC },
@@ -178,7 +178,7 @@ static const struct v3d_qpu_sig v40_sig_map[] = {
[22] = { UCB, },
[23] = { ROT, },
/* 24-30 reserved */
- [31] = { SMIMM, LDTMU, },
+ [31] = { SMIMM_B, LDTMU, },
};
static const struct v3d_qpu_sig v41_sig_map[] = {
@@ -197,8 +197,8 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
[11] = { THRSW, LDVARY, LDUNIF },
[12] = { LDUNIFRF },
[13] = { THRSW, LDUNIFRF },
- [14] = { SMIMM, LDVARY, },
- [15] = { SMIMM, },
+ [14] = { SMIMM_B, LDVARY },
+ [15] = { SMIMM_B, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
[18] = { WRTMUC },
@@ -210,7 +210,7 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
[24] = { LDUNIFA},
[25] = { LDUNIFARF },
/* 26-30 reserved */
- [31] = { SMIMM, LDTMU, },
+ [31] = { SMIMM_B, LDTMU, },
};
bool
--
2.39.2

View File

@@ -1,53 +0,0 @@
From 0e87405fe73694c173b7ce14c3d60611f241922c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 5 Aug 2021 00:50:12 +0200
Subject: [PATCH 009/142] broadcom/compiler: add small_imm a/c/d on v3d_qpu_sig
small_imm_a, small_imm_c and small_imm_d added on top of the already
existing small_imm_b, as V3D 7.1 defines 4 small immediates, tied to
the 4 raddr. Note that this is only the definition, and just a inst
validation rule to check that are not used before v71. Any real use is
still pending.
---
src/broadcom/compiler/qpu_validate.c | 5 +++++
src/broadcom/qpu/qpu_instr.h | 5 ++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
index 2cc7a0eb0ae..12788692432 100644
--- a/src/broadcom/compiler/qpu_validate.c
+++ b/src/broadcom/compiler/qpu_validate.c
@@ -115,6 +115,11 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
return;
+ if (devinfo->ver < 71) {
+ if (inst->sig.small_imm_a || inst->sig.small_imm_c || inst->sig.small_imm_d)
+ fail_instr(state, "small imm a/c/d added after V3D 7.1");
+ }
+
/* LDVARY writes r5 two instructions later and LDUNIF writes
* r5 one instruction later, which is illegal to have
* together.
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 9cd831863b4..13b3f37d43f 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -50,10 +50,13 @@ struct v3d_qpu_sig {
bool ldvpm:1;
bool ldtlb:1;
bool ldtlbu:1;
- bool small_imm_b:1;
bool ucb:1;
bool rotate:1;
bool wrtmuc:1;
+ bool small_imm_a:1; /* raddr_a (add a), since V3D 7.x */
+ bool small_imm_b:1; /* raddr_b (add b) */
+ bool small_imm_c:1; /* raddr_c (mul a), since V3D 7.x */
+ bool small_imm_d:1; /* raddr_d (mul b), since V3D 7.x */
};
enum v3d_qpu_cond {
--
2.39.2

View File

@@ -1,106 +0,0 @@
From eca19c911d9af3b0ab3b563ea65dc455e3d27987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Wed, 4 Aug 2021 01:11:16 +0200
Subject: [PATCH 010/142] broadcom/qpu: add v71 signal map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Compared with v41, the differences are:
* 14, 15, 29 and 30 are now about immediate a, b, c, d respectively
* 23 is now reserved. On v42 this was for rotate signals, that are
gone on v71.
Signed-off-by: Alejandro Piñeiro <apinheiro@igalia.com>
Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
---
src/broadcom/qpu/qpu_pack.c | 47 ++++++++++++++++++++++++++++++++++---
1 file changed, 44 insertions(+), 3 deletions(-)
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
index beac591d3c1..2820d9d4c56 100644
--- a/src/broadcom/qpu/qpu_pack.c
+++ b/src/broadcom/qpu/qpu_pack.c
@@ -112,12 +112,15 @@
#define LDTMU .ldtmu = true
#define LDVARY .ldvary = true
#define LDVPM .ldvpm = true
-#define SMIMM_B .small_imm_b = true
#define LDTLB .ldtlb = true
#define LDTLBU .ldtlbu = true
#define UCB .ucb = true
#define ROT .rotate = true
#define WRTMUC .wrtmuc = true
+#define SMIMM_A .small_imm_a = true
+#define SMIMM_B .small_imm_b = true
+#define SMIMM_C .small_imm_c = true
+#define SMIMM_D .small_imm_d = true
static const struct v3d_qpu_sig v33_sig_map[] = {
/* MISC R3 R4 R5 */
@@ -213,6 +216,40 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
[31] = { SMIMM_B, LDTMU, },
};
+
+static const struct v3d_qpu_sig v71_sig_map[] = {
+ /* MISC phys RF0 */
+ [0] = { },
+ [1] = { THRSW, },
+ [2] = { LDUNIF },
+ [3] = { THRSW, LDUNIF },
+ [4] = { LDTMU, },
+ [5] = { THRSW, LDTMU, },
+ [6] = { LDTMU, LDUNIF },
+ [7] = { THRSW, LDTMU, LDUNIF },
+ [8] = { LDVARY, },
+ [9] = { THRSW, LDVARY, },
+ [10] = { LDVARY, LDUNIF },
+ [11] = { THRSW, LDVARY, LDUNIF },
+ [12] = { LDUNIFRF },
+ [13] = { THRSW, LDUNIFRF },
+ [14] = { SMIMM_A, },
+ [15] = { SMIMM_B, },
+ [16] = { LDTLB, },
+ [17] = { LDTLBU, },
+ [18] = { WRTMUC },
+ [19] = { THRSW, WRTMUC },
+ [20] = { LDVARY, WRTMUC },
+ [21] = { THRSW, LDVARY, WRTMUC },
+ [22] = { UCB, },
+ /* 23 reserved */
+ [24] = { LDUNIFA},
+ [25] = { LDUNIFARF },
+ /* 26-29 reserved */
+ [30] = { SMIMM_C, },
+ [31] = { SMIMM_D, },
+};
+
bool
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
uint32_t packed_sig,
@@ -221,7 +258,9 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
return false;
- if (devinfo->ver >= 41)
+ if (devinfo->ver >= 71)
+ *sig = v71_sig_map[packed_sig];
+ else if (devinfo->ver >= 41)
*sig = v41_sig_map[packed_sig];
else if (devinfo->ver == 40)
*sig = v40_sig_map[packed_sig];
@@ -240,7 +279,9 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
{
static const struct v3d_qpu_sig *map;
- if (devinfo->ver >= 41)
+ if (devinfo->ver >= 71)
+ map = v71_sig_map;
+ else if (devinfo->ver >= 41)
map = v41_sig_map;
else if (devinfo->ver == 40)
map = v40_sig_map;
--
2.39.2

View File

@@ -1,45 +0,0 @@
From 52ea09792ff8a438ccdecac47b8415657be90098 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Fri, 6 Aug 2021 01:33:32 +0200
Subject: [PATCH 012/142] broadcom/qpu: add raddr on v3d_qpu_input
On V3D 7.x mux are not used, and raddr_a/b/c/d are used instead
This is not perfect, as for v71, the raddr_a/b defined at qpu_instr
became superfluous. But the alternative would be to define two
different structs, or even having them defined based on version
ifdefs, so this is a reasonable compromise.
---
src/broadcom/qpu/qpu_instr.h | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 53a51bfb3e1..9e56e2d6a99 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -295,7 +295,10 @@ enum v3d_qpu_mux {
};
struct v3d_qpu_input {
- enum v3d_qpu_mux mux;
+ union {
+ enum v3d_qpu_mux mux; /* V3D 4.x */
+ uint8_t raddr; /* V3D 7.x */
+ };
enum v3d_qpu_input_unpack unpack;
};
@@ -385,8 +388,8 @@ struct v3d_qpu_instr {
struct v3d_qpu_sig sig;
uint8_t sig_addr;
bool sig_magic; /* If the signal writes to a magic address */
- uint8_t raddr_a;
- uint8_t raddr_b;
+ uint8_t raddr_a; /* V3D 4.x */
+ uint8_t raddr_b; /* V3D 4.x*/
struct v3d_qpu_flags flags;
union {
--
2.39.2

View File

@@ -1,37 +0,0 @@
From 3e5ad0881c2789619cdf65f40a44d5481e28e800 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 12 Aug 2021 02:24:02 +0200
Subject: [PATCH 013/142] broadcom/qpu: defining shift/mask for raddr_c/d
On V3D 7.x it replaces mul_a/b and add_a/b
---
src/broadcom/qpu/qpu_pack.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c
index 6e975793fc0..4f106909729 100644
--- a/src/broadcom/qpu/qpu_pack.c
+++ b/src/broadcom/qpu/qpu_pack.c
@@ -84,6 +84,9 @@
#define V3D_QPU_MUL_A_SHIFT 18
#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
+#define V3D_QPU_RADDR_C_SHIFT 18
+#define V3D_QPU_RADDR_C_MASK QPU_MASK(23, 18)
+
#define V3D_QPU_ADD_B_SHIFT 15
#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
@@ -98,6 +101,9 @@
#define V3D_QPU_BRANCH_BDI_SHIFT 12
#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
+#define V3D_QPU_RADDR_D_SHIFT 12
+#define V3D_QPU_RADDR_D_MASK QPU_MASK(17, 12)
+
#define V3D_QPU_RADDR_A_SHIFT 6
#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
--
2.39.2

View File

@@ -1,46 +0,0 @@
From 81febf14fe05ad26e992275b911e8bc1e1416ebc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Fri, 17 Sep 2021 01:04:31 +0200
Subject: [PATCH 014/142] broadcom/commmon: add has_accumulators field on
v3d_device_info
Even if we can just check for the version on the code, checking for
this field makes several places more readable. So for example, on the
register allocate code we doesn't assign an accumulator because we
don't have accumulators on that hw, instead of because hw version is a
given one.
---
src/broadcom/common/v3d_device_info.c | 2 ++
src/broadcom/common/v3d_device_info.h | 3 +++
2 files changed, 5 insertions(+)
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
index 7512fe3a06b..7bc2b662cfc 100644
--- a/src/broadcom/common/v3d_device_info.c
+++ b/src/broadcom/common/v3d_device_info.c
@@ -65,6 +65,8 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
int qups = (ident1.value >> 8) & 0xf;
devinfo->qpu_count = nslc * qups;
+ devinfo->has_accumulators = devinfo->ver < 71;
+
switch (devinfo->ver) {
case 33:
case 41:
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
index 32cb65cf81f..8dfc7858727 100644
--- a/src/broadcom/common/v3d_device_info.h
+++ b/src/broadcom/common/v3d_device_info.h
@@ -42,6 +42,9 @@ struct v3d_device_info {
/* NSLC * QUPS from the core's IDENT registers. */
int qpu_count;
+
+ /* If the hw has accumulator registers */
+ bool has_accumulators;
};
typedef int (*v3d_ioctl_fun)(int fd, unsigned long request, void *arg);
--
2.39.2

View File

@@ -1,52 +0,0 @@
From 7d42eca87b6e144697810405308d99d200dca62a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Wed, 15 Sep 2021 10:56:43 +0200
Subject: [PATCH 015/142] broadcom/qpu: add qpu_writes_rf0_implicitly helper
On v71 rf0 replaces r5 as the register that gets updated implicitly
with uniform loads, and gets the C coefficient with ldvary. This
helper return if rf0 gets implicitly updated.
---
src/broadcom/qpu/qpu_instr.c | 12 ++++++++++++
src/broadcom/qpu/qpu_instr.h | 2 ++
2 files changed, 14 insertions(+)
diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c
index 7ece8b5e570..8de99c611d5 100644
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -920,6 +920,18 @@ v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
return false;
}
+bool
+v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *inst)
+{
+ if (devinfo->ver >= 71 &&
+ (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
+ return true;
+ }
+
+ return false;
+}
+
bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
{
diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h
index 9e56e2d6a99..a25be8e0ee6 100644
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -473,6 +473,8 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
--
2.39.2

View File

@@ -1,261 +0,0 @@
From ebba9019461083687f6afd23ff0d4646c1a667cb Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Sun, 29 Jan 2023 00:27:11 +0100
Subject: [PATCH 017/142] broadcom/compiler: update node/temp translation for
v71
As the offset applied needs to take into account if we have
accumulators or not.
---
src/broadcom/compiler/vir_register_allocate.c | 68 +++++++++----------
1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index b22f915d1df..aa9473d124b 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -39,30 +39,31 @@
CLASS_BITS_R5)
static inline uint32_t
-temp_to_node(uint32_t temp)
+temp_to_node(struct v3d_compile *c, uint32_t temp)
{
- return temp + ACC_COUNT;
+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
}
static inline uint32_t
-node_to_temp(uint32_t node)
+node_to_temp(struct v3d_compile *c, uint32_t node)
{
- assert(node >= ACC_COUNT);
- return node - ACC_COUNT;
+ assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
+ (!c->devinfo->has_accumulators && node >= 0));
+ return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
}
static inline uint8_t
-get_temp_class_bits(struct v3d_ra_node_info *nodes,
+get_temp_class_bits(struct v3d_compile *c,
uint32_t temp)
{
- return nodes->info[temp_to_node(temp)].class_bits;
+ return c->nodes.info[temp_to_node(c, temp)].class_bits;
}
static inline void
-set_temp_class_bits(struct v3d_ra_node_info *nodes,
+set_temp_class_bits(struct v3d_compile *c,
uint32_t temp, uint8_t class_bits)
{
- nodes->info[temp_to_node(temp)].class_bits = class_bits;
+ c->nodes.info[temp_to_node(c, temp)].class_bits = class_bits;
}
static struct ra_class *
@@ -84,7 +85,7 @@ static inline struct ra_class *
choose_reg_class_for_temp(struct v3d_compile *c, uint32_t temp)
{
assert(temp < c->num_temps && temp < c->nodes.alloc_count);
- return choose_reg_class(c, get_temp_class_bits(&c->nodes, temp));
+ return choose_reg_class(c, get_temp_class_bits(c, temp));
}
static inline bool
@@ -313,7 +314,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
for (unsigned i = 0; i < c->num_temps; i++) {
if (BITSET_TEST(c->spillable, i)) {
- ra_set_node_spill_cost(c->g, temp_to_node(i),
+ ra_set_node_spill_cost(c->g, temp_to_node(c, i),
spill_costs[i]);
}
}
@@ -482,7 +483,7 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
c->temp_start[i] < ip && c->temp_end[i] >= ip :
c->temp_start[i] <= ip && c->temp_end[i] > ip;
if (thrsw_cross) {
- ra_set_node_class(c->g, temp_to_node(i),
+ ra_set_node_class(c->g, temp_to_node(c, i),
choose_reg_class(c, CLASS_BITS_PHYS));
}
}
@@ -509,8 +510,7 @@ v3d_emit_tmu_spill(struct v3d_compile *c,
* same register class bits as the original.
*/
if (inst == position) {
- uint8_t class_bits = get_temp_class_bits(&c->nodes,
- inst->dst.index);
+ uint8_t class_bits = get_temp_class_bits(c, inst->dst.index);
inst->dst = vir_get_temp(c);
add_node(c, inst->dst.index, class_bits);
} else {
@@ -574,7 +574,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
reconstruct_op = orig_def->qpu.alu.add.op;
}
- uint32_t spill_node = temp_to_node(spill_temp);
+ uint32_t spill_node = temp_to_node(c, spill_temp);
/* We must disable the ldunif optimization if we are spilling uniforms */
bool had_disable_ldunif_opt = c->disable_ldunif_opt;
@@ -739,12 +739,12 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
* update node priorities based one new liveness data.
*/
uint32_t sb_temp =c->spill_base.index;
- uint32_t sb_node = temp_to_node(sb_temp);
+ uint32_t sb_node = temp_to_node(c, sb_temp);
for (uint32_t i = 0; i < c->num_temps; i++) {
if (c->temp_end[i] == -1)
continue;
- uint32_t node_i = temp_to_node(i);
+ uint32_t node_i = temp_to_node(c, i);
c->nodes.info[node_i].priority =
c->temp_end[i] - c->temp_start[i];
@@ -752,7 +752,7 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
j < c->num_temps; j++) {
if (interferes(c->temp_start[i], c->temp_end[i],
c->temp_start[j], c->temp_end[j])) {
- uint32_t node_j = temp_to_node(j);
+ uint32_t node_j = temp_to_node(c, j);
ra_add_node_interference(c->g, node_i, node_j);
}
}
@@ -958,7 +958,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
ra_add_node_interference(c->g,
- temp_to_node(i),
+ temp_to_node(c, i),
acc_nodes[3]);
}
}
@@ -968,7 +968,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
ra_add_node_interference(c->g,
- temp_to_node(i),
+ temp_to_node(c, i),
acc_nodes[4]);
}
}
@@ -987,7 +987,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
* decides whether the LDVPM is in or out)
*/
assert(inst->dst.file == QFILE_TEMP);
- set_temp_class_bits(&c->nodes, inst->dst.index,
+ set_temp_class_bits(c, inst->dst.index,
CLASS_BITS_PHYS);
break;
}
@@ -1002,7 +1002,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
* phys regfile.
*/
assert(inst->dst.file == QFILE_TEMP);
- set_temp_class_bits(&c->nodes, inst->dst.index,
+ set_temp_class_bits(c, inst->dst.index,
CLASS_BITS_PHYS);
break;
}
@@ -1024,7 +1024,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
*/
assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
assert(inst->dst.file == QFILE_TEMP);
- uint32_t node = temp_to_node(inst->dst.index);
+ uint32_t node = temp_to_node(c, inst->dst.index);
ra_set_node_reg(c->g, node,
PHYS_INDEX + inst->src[0].index);
break;
@@ -1043,9 +1043,9 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
*/
if (!inst->qpu.sig.ldunif) {
uint8_t class_bits =
- get_temp_class_bits(&c->nodes, inst->dst.index) &
+ get_temp_class_bits(c, inst->dst.index) &
~CLASS_BITS_R5;
- set_temp_class_bits(&c->nodes, inst->dst.index,
+ set_temp_class_bits(c, inst->dst.index,
class_bits);
} else {
@@ -1054,7 +1054,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
* loads interfere with each other.
*/
if (c->devinfo->ver < 40) {
- set_temp_class_bits(&c->nodes, inst->dst.index,
+ set_temp_class_bits(c, inst->dst.index,
CLASS_BITS_R5);
}
}
@@ -1064,7 +1064,7 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
if (inst->qpu.sig.thrsw) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
- set_temp_class_bits(&c->nodes, i,
+ set_temp_class_bits(c, i,
CLASS_BITS_PHYS);
}
}
@@ -1125,7 +1125,7 @@ v3d_register_allocate(struct v3d_compile *c)
c->nodes.info[i].priority = 0;
c->nodes.info[i].class_bits = 0;
} else {
- uint32_t t = node_to_temp(i);
+ uint32_t t = node_to_temp(c, i);
c->nodes.info[i].priority =
c->temp_end[t] - c->temp_start[t];
c->nodes.info[i].class_bits = CLASS_BITS_ANY;
@@ -1143,7 +1143,7 @@ v3d_register_allocate(struct v3d_compile *c)
/* Set the register classes for all our temporaries in the graph */
for (uint32_t i = 0; i < c->num_temps; i++) {
- ra_set_node_class(c->g, temp_to_node(i),
+ ra_set_node_class(c->g, temp_to_node(c, i),
choose_reg_class_for_temp(c, i));
}
@@ -1153,8 +1153,8 @@ v3d_register_allocate(struct v3d_compile *c)
if (interferes(c->temp_start[i], c->temp_end[i],
c->temp_start[j], c->temp_end[j])) {
ra_add_node_interference(c->g,
- temp_to_node(i),
- temp_to_node(j));
+ temp_to_node(c, i),
+ temp_to_node(c, j));
}
}
}
@@ -1171,7 +1171,7 @@ v3d_register_allocate(struct v3d_compile *c)
if (c->spill_size <
V3D_CHANNELS * sizeof(uint32_t) * force_register_spills) {
int node = v3d_choose_spill_node(c);
- uint32_t temp = node_to_temp(node);
+ uint32_t temp = node_to_temp(c, node);
if (node != -1) {
v3d_spill_reg(c, acc_nodes, temp);
continue;
@@ -1186,7 +1186,7 @@ v3d_register_allocate(struct v3d_compile *c)
if (node == -1)
goto spill_fail;
- uint32_t temp = node_to_temp(node);
+ uint32_t temp = node_to_temp(c, node);
enum temp_spill_type spill_type =
get_spill_type_for_temp(c, temp);
if (spill_type != SPILL_TYPE_TMU || tmu_spilling_allowed(c)) {
@@ -1201,7 +1201,7 @@ v3d_register_allocate(struct v3d_compile *c)
/* Allocation was successful, build the 'temp -> reg' map */
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
for (uint32_t i = 0; i < c->num_temps; i++) {
- int ra_reg = ra_get_node_reg(c->g, temp_to_node(i));
+ int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
if (ra_reg < PHYS_INDEX) {
temp_registers[i].magic = true;
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
--
2.39.2

View File

@@ -1,144 +0,0 @@
From 9b2dfe0286212aba3687a06023cc5b4ce9944ee0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Mon, 23 Aug 2021 02:18:43 +0200
Subject: [PATCH 018/142] broadcom/compiler: phys index depends on hw version
For 7.1 there are not accumulators. So we replace the macro with a
function call.
---
src/broadcom/compiler/vir_register_allocate.c | 39 ++++++++++++++-----
1 file changed, 29 insertions(+), 10 deletions(-)
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index aa9473d124b..a358b616e13 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -28,9 +28,19 @@
#define ACC_INDEX 0
#define ACC_COUNT 6
-#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
-#define PHYS_COUNT 64
+#define PHYS_COUNT 64
+
+static uint8_t
+get_phys_index(const struct v3d_device_info *devinfo)
+{
+ if (devinfo->has_accumulators)
+ return ACC_INDEX + ACC_COUNT;
+ else
+ return 0;
+}
+
+/* ACC as accumulator */
#define CLASS_BITS_PHYS (1 << 0)
#define CLASS_BITS_ACC (1 << 1)
#define CLASS_BITS_R5 (1 << 4)
@@ -771,9 +781,11 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
}
struct v3d_ra_select_callback_data {
+ uint32_t phys_index;
uint32_t next_acc;
uint32_t next_phys;
struct v3d_ra_node_info *nodes;
+ const struct v3d_device_info *devinfo;
};
/* Choosing accumulators improves chances of merging QPU instructions
@@ -794,7 +806,7 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
static const int available_rf_threshold = 5;
int available_rf = 0 ;
for (int i = 0; i < PHYS_COUNT; i++) {
- if (BITSET_TEST(regs, PHYS_INDEX + i))
+ if (BITSET_TEST(regs, v3d_ra->phys_index + i))
available_rf++;
if (available_rf >= available_rf_threshold)
break;
@@ -854,7 +866,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
{
for (int i = 0; i < PHYS_COUNT; i++) {
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
- int phys = PHYS_INDEX + phys_off;
+ int phys = v3d_ra->phys_index + phys_off;
if (BITSET_TEST(regs, phys)) {
v3d_ra->next_phys = phys_off + 1;
@@ -896,8 +908,9 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
* register file can be divided up for fragment shader threading.
*/
int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
+ uint8_t phys_index = get_phys_index(compiler->devinfo);
- compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
+ compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
false);
if (!compiler->regs)
return false;
@@ -912,8 +925,8 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
compiler->reg_class_phys[threads] =
ra_alloc_contig_reg_class(compiler->regs, 1);
- for (int i = PHYS_INDEX;
- i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
+ for (int i = phys_index;
+ i < phys_index + (PHYS_COUNT >> threads); i++) {
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
ra_class_add_reg(compiler->reg_class_phys[threads], i);
ra_class_add_reg(compiler->reg_class_any[threads], i);
@@ -1026,7 +1039,8 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
assert(inst->dst.file == QFILE_TEMP);
uint32_t node = temp_to_node(c, inst->dst.index);
ra_set_node_reg(c->g, node,
- PHYS_INDEX + inst->src[0].index);
+ get_phys_index(c->devinfo) +
+ inst->src[0].index);
break;
}
}
@@ -1086,13 +1100,17 @@ v3d_register_allocate(struct v3d_compile *c)
c->num_temps + ACC_COUNT),
};
+ uint32_t phys_index = get_phys_index(c->devinfo);
+
struct v3d_ra_select_callback_data callback_data = {
+ .phys_index = phys_index,
.next_acc = 0,
/* Start at RF3, to try to keep the TLB writes from using
* RF0-2.
*/
.next_phys = 3,
.nodes = &c->nodes,
+ .devinfo = c->devinfo,
};
vir_calculate_live_intervals(c);
@@ -1139,6 +1157,7 @@ v3d_register_allocate(struct v3d_compile *c)
vir_for_each_inst_inorder(inst, c) {
inst->ip = ip++;
update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
+
}
/* Set the register classes for all our temporaries in the graph */
@@ -1202,13 +1221,13 @@ v3d_register_allocate(struct v3d_compile *c)
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
for (uint32_t i = 0; i < c->num_temps; i++) {
int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
- if (ra_reg < PHYS_INDEX) {
+ if (ra_reg < phys_index) {
temp_registers[i].magic = true;
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
ra_reg - ACC_INDEX);
} else {
temp_registers[i].magic = false;
- temp_registers[i].index = ra_reg - PHYS_INDEX;
+ temp_registers[i].index = ra_reg - phys_index;
}
}
--
2.39.2

View File

@@ -1,40 +0,0 @@
From da0a3deadf86a46c8323267d3f6a49e442835608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Fri, 17 Sep 2021 01:07:06 +0200
Subject: [PATCH 019/142] broadcom/compiler: don't favor/select accum registers
for hw not supporting it
Note that what we do is to just return false on the favor/select accum
methods. We could just avoid to call them, but as the select is called
more than once, it is just easier this way.
---
src/broadcom/compiler/vir_register_allocate.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index a358b616e13..1f495180784 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -797,6 +797,9 @@ v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
BITSET_WORD *regs,
int priority)
{
+ if (!v3d_ra->devinfo->has_accumulators)
+ return false;
+
/* Favor accumulators if we have less that this number of physical
* registers. Accumulators have more restrictions (like being
* invalidated through thrsw), so running out of physical registers
@@ -832,6 +835,9 @@ v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
BITSET_WORD *regs,
unsigned int *out)
{
+ if (!v3d_ra->devinfo->has_accumulators)
+ return false;
+
/* Choose r5 for our ldunifs if possible (nobody else can load to that
* reg, and it keeps the QPU cond field free from being occupied by
* ldunifrf).
--
2.39.2

View File

@@ -1,105 +0,0 @@
From 6c04d7c917da6b38f8b2b4306ab03ed2ab7e6ce0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 9 Sep 2021 00:28:53 +0200
Subject: [PATCH 020/142] broadcom/vir: implement is_no_op_mov for v71
Did some refactoring/splitting.
---
src/broadcom/compiler/vir_to_qpu.c | 66 ++++++++++++++++++++++++------
1 file changed, 53 insertions(+), 13 deletions(-)
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
index c8b6e0a91a0..08970d52954 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -129,19 +129,8 @@ set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
}
static bool
-is_no_op_mov(struct qinst *qinst)
+v3d33_mov_src_and_dst_equal(struct qinst *qinst)
{
- static const struct v3d_qpu_sig no_sig = {0};
-
- /* Make sure it's just a lone MOV. */
- if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
- qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
- qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
- memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
- return false;
- }
-
- /* Check if it's a MOV from a register to itself. */
enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
if (qinst->qpu.alu.mul.magic_write) {
if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
@@ -168,6 +157,57 @@ is_no_op_mov(struct qinst *qinst)
return false;
}
+ return true;
+}
+
+static bool
+v3d71_mov_src_and_dst_equal(struct qinst *qinst)
+{
+ if (qinst->qpu.alu.mul.magic_write)
+ return false;
+
+ enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
+ int raddr;
+
+ raddr = qinst->qpu.alu.mul.a.raddr;
+ if (raddr != waddr)
+ return false;
+
+ return true;
+}
+
+static bool
+mov_src_and_dst_equal(struct qinst *qinst,
+ const struct v3d_device_info *devinfo)
+{
+ if (devinfo->ver < 71)
+ return v3d33_mov_src_and_dst_equal(qinst);
+ else
+ return v3d71_mov_src_and_dst_equal(qinst);
+}
+
+
+static bool
+is_no_op_mov(struct qinst *qinst,
+ const struct v3d_device_info *devinfo)
+{
+ static const struct v3d_qpu_sig no_sig = {0};
+
+ /* Make sure it's just a lone MOV. We only check for M_MOV. Although
+ * for V3D 7.x there is also A_MOV, we don't need to check for it as
+ * we always emit using M_MOV. We could use A_MOV later on the
+ * squedule to improve performance
+ */
+ if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
+ qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
+ qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
+ memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
+ return false;
+ }
+
+ if (!mov_src_and_dst_equal(qinst, devinfo))
+ return false;
+
/* No packing or flags updates, or we need to execute the
* instruction.
*/
@@ -324,7 +364,7 @@ v3d_generate_code_block(struct v3d_compile *c,
qinst->qpu.alu.mul.waddr = dst.index;
qinst->qpu.alu.mul.magic_write = dst.magic;
- if (is_no_op_mov(qinst)) {
+ if (is_no_op_mov(qinst, c->devinfo)) {
vir_remove_instruction(c, qinst);
continue;
}
--
2.39.2

Some files were not shown because too many files have changed in this diff Show More