mirror of
https://github.com/ukui/kernel.git
synced 2026-03-09 10:07:04 -07:00
drm/vc4: Add support for drawing 3D frames.
The user submission is basically a pointer to a command list and a
pointer to uniforms. We copy those in to the kernel, validate and
relocate them, and store the result in a GPU BO which we queue for
execution.
v2: Drop support for NV shader recs (not necessary for GL), simplify
vc4_use_bo(), improve bin flush/semaphore checks, use __u32 style
types.
Signed-off-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
@@ -8,12 +8,19 @@ vc4-y := \
|
||||
vc4_crtc.o \
|
||||
vc4_drv.o \
|
||||
vc4_kms.o \
|
||||
vc4_gem.o \
|
||||
vc4_hdmi.o \
|
||||
vc4_hvs.o \
|
||||
vc4_irq.o \
|
||||
vc4_plane.o \
|
||||
vc4_render_cl.o \
|
||||
vc4_trace_points.o \
|
||||
vc4_v3d.o \
|
||||
vc4_validate.o \
|
||||
vc4_validate_shaders.o
|
||||
|
||||
vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
|
||||
|
||||
obj-$(CONFIG_DRM_VC4) += vc4.o
|
||||
|
||||
CFLAGS_vc4_trace_points.o := -I$(src)
|
||||
|
||||
@@ -74,6 +74,9 @@ static const struct file_operations vc4_drm_fops = {
|
||||
};
|
||||
|
||||
static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
|
||||
@@ -83,10 +86,16 @@ static struct drm_driver vc4_drm_driver = {
|
||||
.driver_features = (DRIVER_MODESET |
|
||||
DRIVER_ATOMIC |
|
||||
DRIVER_GEM |
|
||||
DRIVER_HAVE_IRQ |
|
||||
DRIVER_PRIME),
|
||||
.lastclose = vc4_lastclose,
|
||||
.preclose = vc4_drm_preclose,
|
||||
|
||||
.irq_handler = vc4_irq,
|
||||
.irq_preinstall = vc4_irq_preinstall,
|
||||
.irq_postinstall = vc4_irq_postinstall,
|
||||
.irq_uninstall = vc4_irq_uninstall,
|
||||
|
||||
.enable_vblank = vc4_enable_vblank,
|
||||
.disable_vblank = vc4_disable_vblank,
|
||||
.get_vblank_counter = drm_vblank_count,
|
||||
@@ -181,9 +190,11 @@ static int vc4_drm_bind(struct device *dev)
|
||||
if (ret)
|
||||
goto unref;
|
||||
|
||||
vc4_gem_init(drm);
|
||||
|
||||
ret = component_bind_all(dev, drm);
|
||||
if (ret)
|
||||
goto unref;
|
||||
goto gem_destroy;
|
||||
|
||||
ret = drm_dev_register(drm, 0);
|
||||
if (ret < 0)
|
||||
@@ -207,6 +218,8 @@ unregister:
|
||||
drm_dev_unregister(drm);
|
||||
unbind_all:
|
||||
component_unbind_all(dev, drm);
|
||||
gem_destroy:
|
||||
vc4_gem_destroy(drm);
|
||||
unref:
|
||||
drm_dev_unref(drm);
|
||||
vc4_bo_cache_destroy(drm);
|
||||
|
||||
@@ -49,6 +49,48 @@ struct vc4_dev {
|
||||
|
||||
/* Protects bo_cache and the BO stats. */
|
||||
struct mutex bo_lock;
|
||||
|
||||
/* Sequence number for the last job queued in job_list.
|
||||
* Starts at 0 (no jobs emitted).
|
||||
*/
|
||||
uint64_t emit_seqno;
|
||||
|
||||
/* Sequence number for the last completed job on the GPU.
|
||||
* Starts at 0 (no jobs completed).
|
||||
*/
|
||||
uint64_t finished_seqno;
|
||||
|
||||
/* List of all struct vc4_exec_info for jobs to be executed.
|
||||
* The first job in the list is the one currently programmed
|
||||
* into ct0ca/ct1ca for execution.
|
||||
*/
|
||||
struct list_head job_list;
|
||||
/* List of the finished vc4_exec_infos waiting to be freed by
|
||||
* job_done_work.
|
||||
*/
|
||||
struct list_head job_done_list;
|
||||
/* Spinlock used to synchronize the job_list and seqno
|
||||
* accesses between the IRQ handler and GEM ioctls.
|
||||
*/
|
||||
spinlock_t job_lock;
|
||||
wait_queue_head_t job_wait_queue;
|
||||
struct work_struct job_done_work;
|
||||
|
||||
/* The binner overflow memory that's currently set up in
|
||||
* BPOA/BPOS registers. When overflow occurs and a new one is
|
||||
* allocated, the previous one will be moved to
|
||||
* vc4->current_exec's free list.
|
||||
*/
|
||||
struct vc4_bo *overflow_mem;
|
||||
struct work_struct overflow_mem_work;
|
||||
|
||||
struct {
|
||||
uint32_t last_ct0ca, last_ct1ca;
|
||||
struct timer_list timer;
|
||||
struct work_struct reset_work;
|
||||
} hangcheck;
|
||||
|
||||
struct semaphore async_modeset;
|
||||
};
|
||||
|
||||
static inline struct vc4_dev *
|
||||
@@ -60,6 +102,9 @@ to_vc4_dev(struct drm_device *dev)
|
||||
struct vc4_bo {
|
||||
struct drm_gem_cma_object base;
|
||||
|
||||
/* seqno of the last job to render to this BO. */
|
||||
uint64_t seqno;
|
||||
|
||||
/* List entry for the BO's position in either
|
||||
* vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
|
||||
*/
|
||||
@@ -130,6 +175,101 @@ to_vc4_encoder(struct drm_encoder *encoder)
|
||||
#define HVS_READ(offset) readl(vc4->hvs->regs + offset)
|
||||
#define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
|
||||
|
||||
struct vc4_exec_info {
|
||||
/* Sequence number for this bin/render job. */
|
||||
uint64_t seqno;
|
||||
|
||||
/* Kernel-space copy of the ioctl arguments */
|
||||
struct drm_vc4_submit_cl *args;
|
||||
|
||||
/* This is the array of BOs that were looked up at the start of exec.
|
||||
* Command validation will use indices into this array.
|
||||
*/
|
||||
struct drm_gem_cma_object **bo;
|
||||
uint32_t bo_count;
|
||||
|
||||
/* Pointers for our position in vc4->job_list */
|
||||
struct list_head head;
|
||||
|
||||
/* List of other BOs used in the job that need to be released
|
||||
* once the job is complete.
|
||||
*/
|
||||
struct list_head unref_list;
|
||||
|
||||
/* Current unvalidated indices into @bo loaded by the non-hardware
|
||||
* VC4_PACKET_GEM_HANDLES.
|
||||
*/
|
||||
uint32_t bo_index[2];
|
||||
|
||||
/* This is the BO where we store the validated command lists, shader
|
||||
* records, and uniforms.
|
||||
*/
|
||||
struct drm_gem_cma_object *exec_bo;
|
||||
|
||||
/**
|
||||
* This tracks the per-shader-record state (packet 64) that
|
||||
* determines the length of the shader record and the offset
|
||||
* it's expected to be found at. It gets read in from the
|
||||
* command lists.
|
||||
*/
|
||||
struct vc4_shader_state {
|
||||
uint32_t addr;
|
||||
/* Maximum vertex index referenced by any primitive using this
|
||||
* shader state.
|
||||
*/
|
||||
uint32_t max_index;
|
||||
} *shader_state;
|
||||
|
||||
/** How many shader states the user declared they were using. */
|
||||
uint32_t shader_state_size;
|
||||
/** How many shader state records the validator has seen. */
|
||||
uint32_t shader_state_count;
|
||||
|
||||
bool found_tile_binning_mode_config_packet;
|
||||
bool found_start_tile_binning_packet;
|
||||
bool found_increment_semaphore_packet;
|
||||
bool found_flush;
|
||||
uint8_t bin_tiles_x, bin_tiles_y;
|
||||
struct drm_gem_cma_object *tile_bo;
|
||||
uint32_t tile_alloc_offset;
|
||||
|
||||
/**
|
||||
* Computed addresses pointing into exec_bo where we start the
|
||||
* bin thread (ct0) and render thread (ct1).
|
||||
*/
|
||||
uint32_t ct0ca, ct0ea;
|
||||
uint32_t ct1ca, ct1ea;
|
||||
|
||||
/* Pointer to the unvalidated bin CL (if present). */
|
||||
void *bin_u;
|
||||
|
||||
/* Pointers to the shader recs. These paddr gets incremented as CL
|
||||
* packets are relocated in validate_gl_shader_state, and the vaddrs
|
||||
* (u and v) get incremented and size decremented as the shader recs
|
||||
* themselves are validated.
|
||||
*/
|
||||
void *shader_rec_u;
|
||||
void *shader_rec_v;
|
||||
uint32_t shader_rec_p;
|
||||
uint32_t shader_rec_size;
|
||||
|
||||
/* Pointers to the uniform data. These pointers are incremented, and
|
||||
* size decremented, as each batch of uniforms is uploaded.
|
||||
*/
|
||||
void *uniforms_u;
|
||||
void *uniforms_v;
|
||||
uint32_t uniforms_p;
|
||||
uint32_t uniforms_size;
|
||||
};
|
||||
|
||||
static inline struct vc4_exec_info *
|
||||
vc4_first_job(struct vc4_dev *vc4)
|
||||
{
|
||||
if (list_empty(&vc4->job_list))
|
||||
return NULL;
|
||||
return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
|
||||
}
|
||||
|
||||
/**
|
||||
* struct vc4_texture_sample_info - saves the offsets into the UBO for texture
|
||||
* setup parameters.
|
||||
@@ -231,10 +371,31 @@ void vc4_debugfs_cleanup(struct drm_minor *minor);
|
||||
/* vc4_drv.c */
|
||||
void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
|
||||
|
||||
/* vc4_gem.c */
|
||||
void vc4_gem_init(struct drm_device *dev);
|
||||
void vc4_gem_destroy(struct drm_device *dev);
|
||||
int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file_priv);
|
||||
int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file_priv);
|
||||
int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file_priv);
|
||||
void vc4_submit_next_job(struct drm_device *dev);
|
||||
int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
|
||||
uint64_t timeout_ns, bool interruptible);
|
||||
void vc4_job_handle_completed(struct vc4_dev *vc4);
|
||||
|
||||
/* vc4_hdmi.c */
|
||||
extern struct platform_driver vc4_hdmi_driver;
|
||||
int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
|
||||
|
||||
/* vc4_irq.c */
|
||||
irqreturn_t vc4_irq(int irq, void *arg);
|
||||
void vc4_irq_preinstall(struct drm_device *dev);
|
||||
int vc4_irq_postinstall(struct drm_device *dev);
|
||||
void vc4_irq_uninstall(struct drm_device *dev);
|
||||
void vc4_irq_reset(struct drm_device *dev);
|
||||
|
||||
/* vc4_hvs.c */
|
||||
extern struct platform_driver vc4_hvs_driver;
|
||||
void vc4_hvs_dump_state(struct drm_device *dev);
|
||||
@@ -253,6 +414,27 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state);
|
||||
extern struct platform_driver vc4_v3d_driver;
|
||||
int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
|
||||
int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
|
||||
int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
|
||||
|
||||
/* vc4_validate.c */
|
||||
int
|
||||
vc4_validate_bin_cl(struct drm_device *dev,
|
||||
void *validated,
|
||||
void *unvalidated,
|
||||
struct vc4_exec_info *exec);
|
||||
|
||||
int
|
||||
vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
|
||||
|
||||
struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
|
||||
uint32_t hindex);
|
||||
|
||||
int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
|
||||
|
||||
bool vc4_check_tex_size(struct vc4_exec_info *exec,
|
||||
struct drm_gem_cma_object *fbo,
|
||||
uint32_t offset, uint8_t tiling_format,
|
||||
uint32_t width, uint32_t height, uint8_t cpp);
|
||||
|
||||
/* vc4_validate_shader.c */
|
||||
struct vc4_validated_shader_info *
|
||||
|
||||
642
drivers/gpu/drm/vc4/vc4_gem.c
Normal file
642
drivers/gpu/drm/vc4/vc4_gem.c
Normal file
File diff suppressed because it is too large
Load Diff
210
drivers/gpu/drm/vc4/vc4_irq.c
Normal file
210
drivers/gpu/drm/vc4/vc4_irq.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** DOC: Interrupt management for the V3D engine.
|
||||
*
|
||||
* We have an interrupt status register (V3D_INTCTL) which reports
|
||||
* interrupts, and where writing 1 bits clears those interrupts.
|
||||
* There are also a pair of interrupt registers
|
||||
* (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
|
||||
* disables that specific interrupt, and 0s written are ignored
|
||||
* (reading either one returns the set of enabled interrupts).
|
||||
*
|
||||
* When we take a render frame interrupt, we need to wake the
|
||||
* processes waiting for some frame to be done, and get the next frame
|
||||
* submitted ASAP (so the hardware doesn't sit idle when there's work
|
||||
* to do).
|
||||
*
|
||||
* When we take the binner out of memory interrupt, we need to
|
||||
* allocate some new memory and pass it to the binner so that the
|
||||
* current job can make progress.
|
||||
*/
|
||||
|
||||
#include "vc4_drv.h"
|
||||
#include "vc4_regs.h"
|
||||
|
||||
#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
|
||||
V3D_INT_FRDONE)
|
||||
|
||||
DECLARE_WAIT_QUEUE_HEAD(render_wait);
|
||||
|
||||
static void
|
||||
vc4_overflow_mem_work(struct work_struct *work)
|
||||
{
|
||||
struct vc4_dev *vc4 =
|
||||
container_of(work, struct vc4_dev, overflow_mem_work);
|
||||
struct drm_device *dev = vc4->dev;
|
||||
struct vc4_bo *bo;
|
||||
|
||||
bo = vc4_bo_create(dev, 256 * 1024, true);
|
||||
if (!bo) {
|
||||
DRM_ERROR("Couldn't allocate binner overflow mem\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* If there's a job executing currently, then our previous
|
||||
* overflow allocation is getting used in that job and we need
|
||||
* to queue it to be released when the job is done. But if no
|
||||
* job is executing at all, then we can free the old overflow
|
||||
* object direcctly.
|
||||
*
|
||||
* No lock necessary for this pointer since we're the only
|
||||
* ones that update the pointer, and our workqueue won't
|
||||
* reenter.
|
||||
*/
|
||||
if (vc4->overflow_mem) {
|
||||
struct vc4_exec_info *current_exec;
|
||||
unsigned long irqflags;
|
||||
|
||||
spin_lock_irqsave(&vc4->job_lock, irqflags);
|
||||
current_exec = vc4_first_job(vc4);
|
||||
if (current_exec) {
|
||||
vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
|
||||
list_add_tail(&vc4->overflow_mem->unref_head,
|
||||
¤t_exec->unref_list);
|
||||
vc4->overflow_mem = NULL;
|
||||
}
|
||||
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
|
||||
}
|
||||
|
||||
if (vc4->overflow_mem)
|
||||
drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
|
||||
vc4->overflow_mem = bo;
|
||||
|
||||
V3D_WRITE(V3D_BPOA, bo->base.paddr);
|
||||
V3D_WRITE(V3D_BPOS, bo->base.base.size);
|
||||
V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
|
||||
V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_irq_finish_job(struct drm_device *dev)
|
||||
{
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
struct vc4_exec_info *exec = vc4_first_job(vc4);
|
||||
|
||||
if (!exec)
|
||||
return;
|
||||
|
||||
vc4->finished_seqno++;
|
||||
list_move_tail(&exec->head, &vc4->job_done_list);
|
||||
vc4_submit_next_job(dev);
|
||||
|
||||
wake_up_all(&vc4->job_wait_queue);
|
||||
schedule_work(&vc4->job_done_work);
|
||||
}
|
||||
|
||||
irqreturn_t
|
||||
vc4_irq(int irq, void *arg)
|
||||
{
|
||||
struct drm_device *dev = arg;
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
uint32_t intctl;
|
||||
irqreturn_t status = IRQ_NONE;
|
||||
|
||||
barrier();
|
||||
intctl = V3D_READ(V3D_INTCTL);
|
||||
|
||||
/* Acknowledge the interrupts we're handling here. The render
|
||||
* frame done interrupt will be cleared, while OUTOMEM will
|
||||
* stay high until the underlying cause is cleared.
|
||||
*/
|
||||
V3D_WRITE(V3D_INTCTL, intctl);
|
||||
|
||||
if (intctl & V3D_INT_OUTOMEM) {
|
||||
/* Disable OUTOMEM until the work is done. */
|
||||
V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
|
||||
schedule_work(&vc4->overflow_mem_work);
|
||||
status = IRQ_HANDLED;
|
||||
}
|
||||
|
||||
if (intctl & V3D_INT_FRDONE) {
|
||||
spin_lock(&vc4->job_lock);
|
||||
vc4_irq_finish_job(dev);
|
||||
spin_unlock(&vc4->job_lock);
|
||||
status = IRQ_HANDLED;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void
|
||||
vc4_irq_preinstall(struct drm_device *dev)
|
||||
{
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
|
||||
init_waitqueue_head(&vc4->job_wait_queue);
|
||||
INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
|
||||
|
||||
/* Clear any pending interrupts someone might have left around
|
||||
* for us.
|
||||
*/
|
||||
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
|
||||
}
|
||||
|
||||
int
|
||||
vc4_irq_postinstall(struct drm_device *dev)
|
||||
{
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
|
||||
/* Enable both the render done and out of memory interrupts. */
|
||||
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
vc4_irq_uninstall(struct drm_device *dev)
|
||||
{
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
|
||||
/* Disable sending interrupts for our driver's IRQs. */
|
||||
V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
|
||||
|
||||
/* Clear any pending interrupts we might have left. */
|
||||
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
|
||||
|
||||
cancel_work_sync(&vc4->overflow_mem_work);
|
||||
}
|
||||
|
||||
/** Reinitializes interrupt registers when a GPU reset is performed. */
|
||||
void vc4_irq_reset(struct drm_device *dev)
|
||||
{
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
unsigned long irqflags;
|
||||
|
||||
/* Acknowledge any stale IRQs. */
|
||||
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
|
||||
|
||||
/*
|
||||
* Turn all our interrupts on. Binner out of memory is the
|
||||
* only one we expect to trigger at this point, since we've
|
||||
* just come from poweron and haven't supplied any overflow
|
||||
* memory yet.
|
||||
*/
|
||||
V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
|
||||
|
||||
spin_lock_irqsave(&vc4->job_lock, irqflags);
|
||||
vc4_irq_finish_job(dev);
|
||||
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
|
||||
}
|
||||
399
drivers/gpu/drm/vc4/vc4_packet.h
Normal file
399
drivers/gpu/drm/vc4/vc4_packet.h
Normal file
@@ -0,0 +1,399 @@
|
||||
/*
|
||||
* Copyright © 2014 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef VC4_PACKET_H
|
||||
#define VC4_PACKET_H
|
||||
|
||||
#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
|
||||
|
||||
enum vc4_packet {
|
||||
VC4_PACKET_HALT = 0,
|
||||
VC4_PACKET_NOP = 1,
|
||||
|
||||
VC4_PACKET_FLUSH = 4,
|
||||
VC4_PACKET_FLUSH_ALL = 5,
|
||||
VC4_PACKET_START_TILE_BINNING = 6,
|
||||
VC4_PACKET_INCREMENT_SEMAPHORE = 7,
|
||||
VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
|
||||
|
||||
VC4_PACKET_BRANCH = 16,
|
||||
VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
|
||||
|
||||
VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
|
||||
VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
|
||||
VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
|
||||
VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
|
||||
VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
|
||||
VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
|
||||
|
||||
VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
|
||||
VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
|
||||
|
||||
VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
|
||||
VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
|
||||
|
||||
VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
|
||||
|
||||
VC4_PACKET_GL_SHADER_STATE = 64,
|
||||
VC4_PACKET_NV_SHADER_STATE = 65,
|
||||
VC4_PACKET_VG_SHADER_STATE = 66,
|
||||
|
||||
VC4_PACKET_CONFIGURATION_BITS = 96,
|
||||
VC4_PACKET_FLAT_SHADE_FLAGS = 97,
|
||||
VC4_PACKET_POINT_SIZE = 98,
|
||||
VC4_PACKET_LINE_WIDTH = 99,
|
||||
VC4_PACKET_RHT_X_BOUNDARY = 100,
|
||||
VC4_PACKET_DEPTH_OFFSET = 101,
|
||||
VC4_PACKET_CLIP_WINDOW = 102,
|
||||
VC4_PACKET_VIEWPORT_OFFSET = 103,
|
||||
VC4_PACKET_Z_CLIPPING = 104,
|
||||
VC4_PACKET_CLIPPER_XY_SCALING = 105,
|
||||
VC4_PACKET_CLIPPER_Z_SCALING = 106,
|
||||
|
||||
VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
|
||||
VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
|
||||
VC4_PACKET_CLEAR_COLORS = 114,
|
||||
VC4_PACKET_TILE_COORDINATES = 115,
|
||||
|
||||
/* Not an actual hardware packet -- this is what we use to put
|
||||
* references to GEM bos in the command stream, since we need the u32
|
||||
* int the actual address packet in order to store the offset from the
|
||||
* start of the BO.
|
||||
*/
|
||||
VC4_PACKET_GEM_HANDLES = 254,
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define VC4_PACKET_HALT_SIZE 1
|
||||
#define VC4_PACKET_NOP_SIZE 1
|
||||
#define VC4_PACKET_FLUSH_SIZE 1
|
||||
#define VC4_PACKET_FLUSH_ALL_SIZE 1
|
||||
#define VC4_PACKET_START_TILE_BINNING_SIZE 1
|
||||
#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
|
||||
#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
|
||||
#define VC4_PACKET_BRANCH_SIZE 5
|
||||
#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
|
||||
#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
|
||||
#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
|
||||
#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
|
||||
#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
|
||||
#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
|
||||
#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
|
||||
#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
|
||||
#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
|
||||
#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
|
||||
#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
|
||||
#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
|
||||
#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
|
||||
#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
|
||||
#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
|
||||
#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
|
||||
#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
|
||||
#define VC4_PACKET_POINT_SIZE_SIZE 5
|
||||
#define VC4_PACKET_LINE_WIDTH_SIZE 5
|
||||
#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3
|
||||
#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
|
||||
#define VC4_PACKET_CLIP_WINDOW_SIZE 9
|
||||
#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
|
||||
#define VC4_PACKET_Z_CLIPPING_SIZE 9
|
||||
#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
|
||||
#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
|
||||
#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
|
||||
#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11
|
||||
#define VC4_PACKET_CLEAR_COLORS_SIZE 14
|
||||
#define VC4_PACKET_TILE_COORDINATES_SIZE 3
|
||||
#define VC4_PACKET_GEM_HANDLES_SIZE 9
|
||||
|
||||
/* Number of multisamples supported. */
|
||||
#define VC4_MAX_SAMPLES 4
|
||||
/* Size of a full resolution color or Z tile buffer load/store. */
|
||||
#define VC4_TILE_BUFFER_SIZE (64 * 64 * 4)
|
||||
|
||||
/** @{
|
||||
* Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
|
||||
* VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
|
||||
*/
|
||||
#define VC4_TILING_FORMAT_LINEAR 0
|
||||
#define VC4_TILING_FORMAT_T 1
|
||||
#define VC4_TILING_FORMAT_LT 2
|
||||
/** @} */
|
||||
|
||||
/** @{
|
||||
*
|
||||
* low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
|
||||
* VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
|
||||
*/
|
||||
#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
|
||||
|
||||
/** @{
|
||||
*
|
||||
* low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
|
||||
* VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
|
||||
*/
|
||||
#define VC4_LOADSTORE_FULL_RES_EOF BIT(3)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL BIT(2)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS BIT(1)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR BIT(0)
|
||||
|
||||
/** @{
|
||||
*
|
||||
* byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
|
||||
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
|
||||
*/
|
||||
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
|
||||
|
||||
/** @} */
|
||||
|
||||
/** @{
|
||||
*
|
||||
* byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
|
||||
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
|
||||
*/
|
||||
#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
|
||||
#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
|
||||
#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
|
||||
#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
|
||||
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2
|
||||
/** @} */
|
||||
|
||||
/** @{
|
||||
*
|
||||
* byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
|
||||
* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
|
||||
*/
|
||||
#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6)
|
||||
#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6
|
||||
#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6)
|
||||
#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6)
|
||||
#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6)
|
||||
|
||||
/** The values of the field are VC4_TILING_FORMAT_* */
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4)
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4
|
||||
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0)
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_NONE 0
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_ZS 2
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_Z 3
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4
|
||||
#define VC4_LOADSTORE_TILE_BUFFER_FULL 5
|
||||
/** @} */
|
||||
|
||||
#define VC4_INDEX_BUFFER_U8 (0 << 4)
|
||||
#define VC4_INDEX_BUFFER_U16 (1 << 4)
|
||||
|
||||
/* This flag is only present in NV shader state. */
|
||||
#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
|
||||
#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
|
||||
#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
|
||||
#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
|
||||
|
||||
/** @{ byte 2 of config bits. */
|
||||
#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
|
||||
#define VC4_CONFIG_BITS_EARLY_Z BIT(0)
|
||||
/** @} */
|
||||
|
||||
/** @{ byte 1 of config bits. */
|
||||
#define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
|
||||
/** same values in this 3-bit field as PIPE_FUNC_* */
|
||||
#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
|
||||
#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
|
||||
|
||||
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
|
||||
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
|
||||
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
|
||||
#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
|
||||
|
||||
#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
|
||||
/** @} */
|
||||
|
||||
/** @{ byte 0 of config bits. */
|
||||
#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
|
||||
#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
|
||||
#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
|
||||
|
||||
#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
|
||||
#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
|
||||
#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
|
||||
#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
|
||||
#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
|
||||
/** @} */
|
||||
|
||||
/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
|
||||
#define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
|
||||
|
||||
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
|
||||
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
|
||||
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0
|
||||
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1
|
||||
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2
|
||||
#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3
|
||||
|
||||
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3)
|
||||
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
|
||||
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0
|
||||
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1
|
||||
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
|
||||
#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
|
||||
|
||||
#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
|
||||
#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
|
||||
#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
|
||||
/** @} */
|
||||
|
||||
/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
|
||||
#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
|
||||
#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
|
||||
#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
|
||||
#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
|
||||
#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
|
||||
|
||||
/** The values of the field are VC4_TILING_FORMAT_* */
|
||||
#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
|
||||
#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6
|
||||
|
||||
#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4)
|
||||
#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4)
|
||||
#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4)
|
||||
|
||||
#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2)
|
||||
#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2
|
||||
#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0
|
||||
#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
|
||||
#define VC4_RENDER_CONFIG_FORMAT_BGR565 2
|
||||
|
||||
#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
|
||||
#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
|
||||
|
||||
#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
|
||||
#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
|
||||
#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0)
|
||||
#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0)
|
||||
#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0)
|
||||
#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
|
||||
|
||||
enum vc4_texture_data_type {
|
||||
VC4_TEXTURE_TYPE_RGBA8888 = 0,
|
||||
VC4_TEXTURE_TYPE_RGBX8888 = 1,
|
||||
VC4_TEXTURE_TYPE_RGBA4444 = 2,
|
||||
VC4_TEXTURE_TYPE_RGBA5551 = 3,
|
||||
VC4_TEXTURE_TYPE_RGB565 = 4,
|
||||
VC4_TEXTURE_TYPE_LUMINANCE = 5,
|
||||
VC4_TEXTURE_TYPE_ALPHA = 6,
|
||||
VC4_TEXTURE_TYPE_LUMALPHA = 7,
|
||||
VC4_TEXTURE_TYPE_ETC1 = 8,
|
||||
VC4_TEXTURE_TYPE_S16F = 9,
|
||||
VC4_TEXTURE_TYPE_S8 = 10,
|
||||
VC4_TEXTURE_TYPE_S16 = 11,
|
||||
VC4_TEXTURE_TYPE_BW1 = 12,
|
||||
VC4_TEXTURE_TYPE_A4 = 13,
|
||||
VC4_TEXTURE_TYPE_A1 = 14,
|
||||
VC4_TEXTURE_TYPE_RGBA64 = 15,
|
||||
VC4_TEXTURE_TYPE_RGBA32R = 16,
|
||||
VC4_TEXTURE_TYPE_YUV422R = 17,
|
||||
};
|
||||
|
||||
#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
|
||||
#define VC4_TEX_P0_OFFSET_SHIFT 12
|
||||
#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10)
|
||||
#define VC4_TEX_P0_CSWIZ_SHIFT 10
|
||||
#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9)
|
||||
#define VC4_TEX_P0_CMMODE_SHIFT 9
|
||||
#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8)
|
||||
#define VC4_TEX_P0_FLIPY_SHIFT 8
|
||||
#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4)
|
||||
#define VC4_TEX_P0_TYPE_SHIFT 4
|
||||
#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0)
|
||||
#define VC4_TEX_P0_MIPLVLS_SHIFT 0
|
||||
|
||||
#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31)
|
||||
#define VC4_TEX_P1_TYPE4_SHIFT 31
|
||||
#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20)
|
||||
#define VC4_TEX_P1_HEIGHT_SHIFT 20
|
||||
#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19)
|
||||
#define VC4_TEX_P1_ETCFLIP_SHIFT 19
|
||||
#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8)
|
||||
#define VC4_TEX_P1_WIDTH_SHIFT 8
|
||||
|
||||
#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7)
|
||||
#define VC4_TEX_P1_MAGFILT_SHIFT 7
|
||||
# define VC4_TEX_P1_MAGFILT_LINEAR 0
|
||||
# define VC4_TEX_P1_MAGFILT_NEAREST 1
|
||||
|
||||
#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4)
|
||||
#define VC4_TEX_P1_MINFILT_SHIFT 4
|
||||
# define VC4_TEX_P1_MINFILT_LINEAR 0
|
||||
# define VC4_TEX_P1_MINFILT_NEAREST 1
|
||||
# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2
|
||||
# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3
|
||||
# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4
|
||||
# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5
|
||||
|
||||
#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2)
|
||||
#define VC4_TEX_P1_WRAP_T_SHIFT 2
|
||||
#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0)
|
||||
#define VC4_TEX_P1_WRAP_S_SHIFT 0
|
||||
# define VC4_TEX_P1_WRAP_REPEAT 0
|
||||
# define VC4_TEX_P1_WRAP_CLAMP 1
|
||||
# define VC4_TEX_P1_WRAP_MIRROR 2
|
||||
# define VC4_TEX_P1_WRAP_BORDER 3
|
||||
|
||||
#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30)
|
||||
#define VC4_TEX_P2_PTYPE_SHIFT 30
|
||||
# define VC4_TEX_P2_PTYPE_IGNORED 0
|
||||
# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1
|
||||
# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2
|
||||
# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3
|
||||
|
||||
/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
|
||||
#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12)
|
||||
#define VC4_TEX_P2_CMST_SHIFT 12
|
||||
#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0)
|
||||
#define VC4_TEX_P2_BSLOD_SHIFT 0
|
||||
|
||||
/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
|
||||
#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12)
|
||||
#define VC4_TEX_P2_CHEIGHT_SHIFT 12
|
||||
#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0)
|
||||
#define VC4_TEX_P2_CWIDTH_SHIFT 0
|
||||
|
||||
/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
|
||||
#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12)
|
||||
#define VC4_TEX_P2_CYOFF_SHIFT 12
|
||||
#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0)
|
||||
#define VC4_TEX_P2_CXOFF_SHIFT 0
|
||||
|
||||
#endif /* VC4_PACKET_H */
|
||||
634
drivers/gpu/drm/vc4/vc4_render_cl.c
Normal file
634
drivers/gpu/drm/vc4/vc4_render_cl.c
Normal file
File diff suppressed because it is too large
Load Diff
63
drivers/gpu/drm/vc4/vc4_trace.h
Normal file
63
drivers/gpu/drm/vc4/vc4_trace.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Broadcom
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _VC4_TRACE_H_
|
||||
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM vc4
|
||||
#define TRACE_INCLUDE_FILE vc4_trace
|
||||
|
||||
TRACE_EVENT(vc4_wait_for_seqno_begin,
|
||||
TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
|
||||
TP_ARGS(dev, seqno, timeout),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, dev)
|
||||
__field(u64, seqno)
|
||||
__field(u64, timeout)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = dev->primary->index;
|
||||
__entry->seqno = seqno;
|
||||
__entry->timeout = timeout;
|
||||
),
|
||||
|
||||
TP_printk("dev=%u, seqno=%llu, timeout=%llu",
|
||||
__entry->dev, __entry->seqno, __entry->timeout)
|
||||
);
|
||||
|
||||
TRACE_EVENT(vc4_wait_for_seqno_end,
|
||||
TP_PROTO(struct drm_device *dev, uint64_t seqno),
|
||||
TP_ARGS(dev, seqno),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, dev)
|
||||
__field(u64, seqno)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = dev->primary->index;
|
||||
__entry->seqno = seqno;
|
||||
),
|
||||
|
||||
TP_printk("dev=%u, seqno=%llu",
|
||||
__entry->dev, __entry->seqno)
|
||||
);
|
||||
|
||||
#endif /* _VC4_TRACE_H_ */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#include <trace/define_trace.h>
|
||||
14
drivers/gpu/drm/vc4/vc4_trace_points.c
Normal file
14
drivers/gpu/drm/vc4/vc4_trace_points.c
Normal file
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Broadcom
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include "vc4_drv.h"
|
||||
|
||||
#ifndef __CHECKER__
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "vc4_trace.h"
|
||||
#endif
|
||||
@@ -144,6 +144,21 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
/*
|
||||
* Asks the firmware to turn on power to the V3D engine.
|
||||
*
|
||||
* This may be doable with just the clocks interface, though this
|
||||
* packet does some other register setup from the firmware, too.
|
||||
*/
|
||||
int
|
||||
vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
|
||||
{
|
||||
if (on)
|
||||
return pm_generic_poweroff(&vc4->v3d->pdev->dev);
|
||||
else
|
||||
return pm_generic_resume(&vc4->v3d->pdev->dev);
|
||||
}
|
||||
|
||||
static void vc4_v3d_init_hw(struct drm_device *dev)
|
||||
{
|
||||
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
||||
@@ -161,6 +176,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
|
||||
struct drm_device *drm = dev_get_drvdata(master);
|
||||
struct vc4_dev *vc4 = to_vc4_dev(drm);
|
||||
struct vc4_v3d *v3d = NULL;
|
||||
int ret;
|
||||
|
||||
v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
|
||||
if (!v3d)
|
||||
@@ -180,8 +196,20 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Reset the binner overflow address/size at setup, to be sure
|
||||
* we don't reuse an old one.
|
||||
*/
|
||||
V3D_WRITE(V3D_BPOA, 0);
|
||||
V3D_WRITE(V3D_BPOS, 0);
|
||||
|
||||
vc4_v3d_init_hw(drm);
|
||||
|
||||
ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to install IRQ handler\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -191,6 +219,15 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master,
|
||||
struct drm_device *drm = dev_get_drvdata(master);
|
||||
struct vc4_dev *vc4 = to_vc4_dev(drm);
|
||||
|
||||
drm_irq_uninstall(drm);
|
||||
|
||||
/* Disable the binner's overflow memory address, so the next
|
||||
* driver probe (if any) doesn't try to reuse our old
|
||||
* allocation.
|
||||
*/
|
||||
V3D_WRITE(V3D_BPOA, 0);
|
||||
V3D_WRITE(V3D_BPOS, 0);
|
||||
|
||||
vc4->v3d = NULL;
|
||||
}
|
||||
|
||||
|
||||
900
drivers/gpu/drm/vc4/vc4_validate.c
Normal file
900
drivers/gpu/drm/vc4/vc4_validate.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -26,14 +26,155 @@
|
||||
|
||||
#include "drm.h"
|
||||
|
||||
#define DRM_VC4_SUBMIT_CL 0x00
|
||||
#define DRM_VC4_WAIT_SEQNO 0x01
|
||||
#define DRM_VC4_WAIT_BO 0x02
|
||||
#define DRM_VC4_CREATE_BO 0x03
|
||||
#define DRM_VC4_MMAP_BO 0x04
|
||||
#define DRM_VC4_CREATE_SHADER_BO 0x05
|
||||
|
||||
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
|
||||
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
|
||||
#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
|
||||
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
|
||||
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
|
||||
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
|
||||
|
||||
struct drm_vc4_submit_rcl_surface {
|
||||
__u32 hindex; /* Handle index, or ~0 if not present. */
|
||||
__u32 offset; /* Offset to start of buffer. */
|
||||
/*
|
||||
* Bits for either render config (color_write) or load/store packet.
|
||||
* Bits should all be 0 for MSAA load/stores.
|
||||
*/
|
||||
__u16 bits;
|
||||
|
||||
#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0)
|
||||
__u16 flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
|
||||
* engine.
|
||||
*
|
||||
* Drivers typically use GPU BOs to store batchbuffers / command lists and
|
||||
* their associated state. However, because the VC4 lacks an MMU, we have to
|
||||
* do validation of memory accesses by the GPU commands. If we were to store
|
||||
* our commands in BOs, we'd need to do uncached readback from them to do the
|
||||
* validation process, which is too expensive. Instead, userspace accumulates
|
||||
* commands and associated state in plain memory, then the kernel copies the
|
||||
* data to its own address space, and then validates and stores it in a GPU
|
||||
* BO.
|
||||
*/
|
||||
struct drm_vc4_submit_cl {
|
||||
/* Pointer to the binner command list.
|
||||
*
|
||||
* This is the first set of commands executed, which runs the
|
||||
* coordinate shader to determine where primitives land on the screen,
|
||||
* then writes out the state updates and draw calls necessary per tile
|
||||
* to the tile allocation BO.
|
||||
*/
|
||||
__u64 bin_cl;
|
||||
|
||||
/* Pointer to the shader records.
|
||||
*
|
||||
* Shader records are the structures read by the hardware that contain
|
||||
* pointers to uniforms, shaders, and vertex attributes. The
|
||||
* reference to the shader record has enough information to determine
|
||||
* how many pointers are necessary (fixed number for shaders/uniforms,
|
||||
* and an attribute count), so those BO indices into bo_handles are
|
||||
* just stored as __u32s before each shader record passed in.
|
||||
*/
|
||||
__u64 shader_rec;
|
||||
|
||||
/* Pointer to uniform data and texture handles for the textures
|
||||
* referenced by the shader.
|
||||
*
|
||||
* For each shader state record, there is a set of uniform data in the
|
||||
* order referenced by the record (FS, VS, then CS). Each set of
|
||||
* uniform data has a __u32 index into bo_handles per texture
|
||||
* sample operation, in the order the QPU_W_TMUn_S writes appear in
|
||||
* the program. Following the texture BO handle indices is the actual
|
||||
* uniform data.
|
||||
*
|
||||
* The individual uniform state blocks don't have sizes passed in,
|
||||
* because the kernel has to determine the sizes anyway during shader
|
||||
* code validation.
|
||||
*/
|
||||
__u64 uniforms;
|
||||
__u64 bo_handles;
|
||||
|
||||
/* Size in bytes of the binner command list. */
|
||||
__u32 bin_cl_size;
|
||||
/* Size in bytes of the set of shader records. */
|
||||
__u32 shader_rec_size;
|
||||
/* Number of shader records.
|
||||
*
|
||||
* This could just be computed from the contents of shader_records and
|
||||
* the address bits of references to them from the bin CL, but it
|
||||
* keeps the kernel from having to resize some allocations it makes.
|
||||
*/
|
||||
__u32 shader_rec_count;
|
||||
/* Size in bytes of the uniform state. */
|
||||
__u32 uniforms_size;
|
||||
|
||||
/* Number of BO handles passed in (size is that times 4). */
|
||||
__u32 bo_handle_count;
|
||||
|
||||
/* RCL setup: */
|
||||
__u16 width;
|
||||
__u16 height;
|
||||
__u8 min_x_tile;
|
||||
__u8 min_y_tile;
|
||||
__u8 max_x_tile;
|
||||
__u8 max_y_tile;
|
||||
struct drm_vc4_submit_rcl_surface color_read;
|
||||
struct drm_vc4_submit_rcl_surface color_write;
|
||||
struct drm_vc4_submit_rcl_surface zs_read;
|
||||
struct drm_vc4_submit_rcl_surface zs_write;
|
||||
struct drm_vc4_submit_rcl_surface msaa_color_write;
|
||||
struct drm_vc4_submit_rcl_surface msaa_zs_write;
|
||||
__u32 clear_color[2];
|
||||
__u32 clear_z;
|
||||
__u8 clear_s;
|
||||
|
||||
__u32 pad:24;
|
||||
|
||||
#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
|
||||
__u32 flags;
|
||||
|
||||
/* Returned value of the seqno of this render job (for the
|
||||
* wait ioctl).
|
||||
*/
|
||||
__u64 seqno;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_vc4_wait_seqno - ioctl argument for waiting for
|
||||
* DRM_VC4_SUBMIT_CL completion using its returned seqno.
|
||||
*
|
||||
* timeout_ns is the timeout in nanoseconds, where "0" means "don't
|
||||
* block, just return the status."
|
||||
*/
|
||||
struct drm_vc4_wait_seqno {
|
||||
__u64 seqno;
|
||||
__u64 timeout_ns;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_vc4_wait_bo - ioctl argument for waiting for
|
||||
* completion of the last DRM_VC4_SUBMIT_CL on a BO.
|
||||
*
|
||||
* This is useful for cases where multiple processes might be
|
||||
* rendering to a BO and you want to wait for all rendering to be
|
||||
* completed.
|
||||
*/
|
||||
struct drm_vc4_wait_bo {
|
||||
__u32 handle;
|
||||
__u32 pad;
|
||||
__u64 timeout_ns;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user