Files
kernel/drivers/nvdla/include/dla_interface.h
Farzad Farshchi a479b1f19d nvdla: add NVDLA driver
Additional update from Prashant Gaikwad <pgaikwad@nvidia.com>
Adapted for Linux 5.13 and the BeagleV Starlight board by
<cybergaszcz@gmail.com>

kernel test robot: fix platform_no_drv_owner.cocci warnings
Geert: Use div_u64() in dla_get_time_us()

Signed-off-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20220119060057.GA1143@7f39e361da8f
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2203090905560.780932@ramsan.of.borg
Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
2022-10-21 16:50:50 +02:00

887 lines
20 KiB
C

/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FIRMWARE_DLA_INTERFACE_H_
#define __FIRMWARE_DLA_INTERFACE_H_
#include <nvdla_interface.h>
/**
* @ingroup Processors
* @name DLA Processors
* Processor modules in DLA engine. Each processor has it's
* own operation a.k.a. HW layer. Network is formed using
* graph of these operations
* @{
*/
#define DLA_OP_BDMA 0
#define DLA_OP_CONV 1
#define DLA_OP_SDP 2
#define DLA_OP_PDP 3
#define DLA_OP_CDP 4
#define DLA_OP_RUBIK 5
/** @} */
/**
* @ingroup Processors
* @name Maximum number of processors
* @brief DLA ash 6 processors
* @{
*/
#define DLA_OP_NUM 6
/** @} */
/**
* @ingroup Processors
* @name Number of groups
* @brief Each processor has 2 groups of registers
* @{
*/
#define DLA_NUM_GROUPS 2
/** @} */
/**
* Network descriptor
*
* Contains all information to execute a network
*
* @op_head: Index of first operation of each type in operations list
* @num_rois: Number of ROIs
* @num_operations: Number of operations in one list
* @num_luts: Number of LUTs
*/
struct dla_network_desc {
int16_t operation_desc_index;
int16_t surface_desc_index;
int16_t dependency_graph_index;
int16_t lut_data_index;
int16_t roi_array_index;
int16_t surface_index;
int16_t stat_list_index;
int16_t reserved1;
int16_t op_head[DLA_OP_NUM];
uint16_t num_rois;
uint16_t num_operations;
uint16_t num_luts;
uint16_t num_addresses;
int16_t input_layer;
uint8_t dynamic_roi;
uint8_t reserved0;
} __packed __aligned(4);
/**
* @name Memory types
* @brief DLA engnine can read/write to/from 3 memory types
* @{
*/
#define DLA_MEM_MC 0 /* External DRAM */
#define DLA_MEM_CV 1 /* CV-SRAM */
#define DLA_MEM_HW 2 /* DLA sub-module */
/** @} */
/**
* @ingroup Events
* @name Operation events
* @brief Different events triggered by an operations
* @{
*/
#define DLA_EVENT_OP_COMPLETED 1
#define DLA_EVENT_OP_PROGRAMMED 2
#define DLA_EVENT_OP_ENABLED 3
#define DLA_EVENT_CDMA_WT_DONE 4
#define DLA_EVENT_CDMA_DT_DONE 5
/** @} */
struct dla_consumer {
int16_t index; /* the index of dla_common_op_desc in dep_graph_addr */
uint8_t event;
uint8_t res;
} __packed __aligned(4);
struct dla_common_op_desc {
int16_t index; /* set by ucode */
int8_t roi_index;
uint8_t op_type;
uint8_t dependency_count;
uint8_t reserved0[3];
struct dla_consumer consumers[DLA_OP_NUM];
struct dla_consumer fused_parent;
} __packed __aligned(4);
struct dla_roi_array_desc {
uint32_t array_length;
uint32_t array_reserved;
} __packed __aligned(4);
struct dla_roi_desc {
uint32_t left;
uint32_t top;
uint32_t right;
uint32_t bottom;
} __packed __aligned(4);
/**
* @ingroup BDMA
* @name Maximum BDMA transfers
* @brief BDMA supports multiple transfers in operation. This indicates
* maximum number of transfers possible in one operation.
* @{
*/
#define NUM_MAX_BDMA_OPS 20
/** @} */
struct dla_bdma_transfer_desc {
int16_t source_address;
int16_t destination_address;
uint32_t line_size;
uint32_t line_repeat;
uint32_t source_line;
uint32_t destination_line;
uint32_t surface_repeat;
uint32_t source_surface;
uint32_t destination_surface;
} __packed __aligned(4);
struct dla_bdma_surface_desc {
uint8_t source_type;
uint8_t destination_type;
uint16_t num_transfers;
struct dla_bdma_transfer_desc transfers[NUM_MAX_BDMA_OPS];
} __packed __aligned(4);
struct dla_bdma_op_desc {
uint16_t num_transfers;
uint16_t reserved0;
} __packed __aligned(4);
struct dla_bdma_stat_desc {
uint32_t read_stall;
uint32_t write_stall;
uint32_t runtime;
} __packed __aligned(4);
/**
* @ingroup Convolution
* @name Convolution mode
* @brief Convolution modes support by DLA
* @{
*/
#define CONV_MODE_DIRECT 0
#define CONV_MODE_WINOGRAD 1
/** @} */
/**
* @ingroup Processors
* @name Precision BPE mapping
* @brief Precision formats and Bit Per Elements mapping
* @{
*/
#define BPE_PRECISION_INT8 1
#define BPE_PRECISION_INT16 2
#define BPE_PRECISION_FP16 2
/** @} */
/**
* @ingroup Processors
* @name Precision types
* @brief Precision formats supported by DLA engine
* @{
*/
#define PRECISION_INT8 0
#define PRECISION_INT16 1
#define PRECISION_FP16 2
/** @} */
/**
* @ingroup Processors
* @name Data formats
* @brief Data formats supported by DLA engine
* @{
*/
#define FORMAT_T_R8 0
#define FORMAT_T_R10 1
#define FORMAT_T_R12 2
#define FORMAT_T_R16 3
#define FORMAT_T_R16_I 4
#define FORMAT_T_R16_F 5
#define FORMAT_T_A16B16G16R16 6
#define FORMAT_T_X16B16G16R16 7
#define FORMAT_T_A16B16G16R16_F 8
#define FORMAT_T_A16Y16U16V16 9
#define FORMAT_T_V16U16Y16A16 10
#define FORMAT_T_A16Y16U16V16_F 11
#define FORMAT_T_A8B8G8R8 12
#define FORMAT_T_A8R8G8B8 13
#define FORMAT_T_B8G8R8A8 14
#define FORMAT_T_R8G8B8A8 15
#define FORMAT_T_X8B8G8R8 16
#define FORMAT_T_X8R8G8B8 17
#define FORMAT_T_B8G8R8X8 18
#define FORMAT_T_R8G8B8X8 19
#define FORMAT_T_A2B10G10R10 20
#define FORMAT_T_A2R10G10B10 21
#define FORMAT_T_B10G10R10A2 22
#define FORMAT_T_R10G10B10A2 23
#define FORMAT_T_A2Y10U10V10 24
#define FORMAT_T_V10U10Y10A2 25
#define FORMAT_T_A8Y8U8V8 26
#define FORMAT_T_V8U8Y8A8 27
#define FORMAT_T_Y8___U8V8_N444 28
#define FORMAT_T_Y8___V8U8_N444 29
#define FORMAT_T_Y10___U10V10_N444 30
#define FORMAT_T_Y10___V10U10_N444 31
#define FORMAT_T_Y12___U12V12_N444 32
#define FORMAT_T_Y12___V12U12_N444 33
#define FORMAT_T_Y16___U16V16_N444 34
#define FORMAT_T_Y16___V16U16_N444 35
#define FORMAT_FEATURE 36
/** @} */
/**
* @ingroup Convolution
* @name Pixel mapping
* @brief Pixel mapping formats supported for image input in Convolution
* @{
*/
#define MAP_PITCH_LINEAR 0
/** @} */
/**
* @ingroup Convolution
* @name Weight formats
* @brief Weight data formats supported in Convolution
* @{
*/
#define WEIGHT_FORMAT_UNCOMPRESSED 0
#define WEIGHT_FORMAT_COMPRESSED 1
/** @} */
/**
* @ingroup Convolution
* @name Mean data format
* @brief Mean data formats supported in Convolution
* @{
*/
#define MEAN_FORMAT_DISABLE 0
#define MEAN_FORMAT_ENABLE 1
/** @} */
struct dla_cvt_param {
int16_t scale;
uint8_t truncate;
uint8_t enable;
int32_t offset;
} __packed __aligned(4);
struct dla_data_cube {
uint16_t type; /* dla_mem_type */
int16_t address; /* offset to the actual IOVA in task.address_list */
uint32_t offset; /* offset within address */
uint32_t size;
/* cube dimensions */
uint16_t width;
uint16_t height;
uint16_t channel;
uint16_t reserved0;
/* stride information */
uint32_t line_stride;
uint32_t surf_stride;
/* For Rubik only */
uint32_t plane_stride;
} __packed __aligned(4);
#define PIXEL_OVERRIDE_UINT 0
#define PIXEL_OVERRIDE_INT 1
struct dla_conv_surface_desc {
/* Data cube */
struct dla_data_cube weight_data;
struct dla_data_cube wmb_data;
struct dla_data_cube wgs_data;
struct dla_data_cube src_data;
struct dla_data_cube dst_data;
/**
* u_addr = input_data.source_addr + offset_u
* this field should be set when YUV is not interleave format
*
*/
int64_t offset_u;
/* line stride for 2nd plane, must be 32bytes aligned */
uint32_t in_line_uv_stride;
} __packed __aligned(4);
struct dla_conv_op_desc {
/* Performance parameters */
/* dla_conv_mode */
uint8_t conv_mode;
uint8_t data_reuse;
uint8_t weight_reuse;
uint8_t skip_data_rls;
uint8_t skip_weight_rls;
uint8_t reserved0;
uint16_t entry_per_slice;
/* dla_data_format */
uint8_t data_format;
/* dla_pixel_mapping */
uint8_t pixel_mapping;
/* number of free slices before fetch */
uint16_t fetch_grain;
uint8_t reserved_b[8];
/* batch_num */
uint8_t batch;
/* dla_weight_format */
uint8_t weight_format;
uint8_t data_bank;
uint8_t weight_bank;
/* the offset in bytes of each data cube in a batch */
uint32_t batch_stride;
uint8_t post_extension;
uint8_t pixel_override;
/* number of slices need to be released */
uint16_t release;
/* The input cube dimension for CSC */
uint16_t input_width_csc;
uint16_t input_height_csc;
uint16_t input_channel_csc;
uint16_t kernel_width_csc;
uint16_t kernel_height_csc;
uint16_t kernel_channel_csc;
/* The input cube dimension for CMAC */
uint16_t input_width_cmac;
uint16_t input_height_cmac;
/* actual size in bytes */
uint32_t bytes_per_kernel;
/* Algorithm parameters */
int16_t mean_ry; /* mean value for red in RGB or Y in YUV */
int16_t mean_gu; /* mean value for green in RGB or U in YUV */
int16_t mean_bv; /* mean value for blue in RGB or V in YUV */
int16_t mean_ax;
uint8_t mean_format; /* dla_mean_format */
uint8_t conv_stride_x;
uint8_t conv_stride_y;
uint8_t pad_x_left;
uint8_t pad_x_right;
uint8_t pad_y_top;
uint8_t pad_y_bottom;
uint8_t dilation_x;
uint8_t dilation_y;
uint8_t reserved2[2];
/* Precision parameters */
uint8_t pra_truncate;
uint8_t in_precision;
/* The output precision from CONV, it's the MAC processing precison */
uint8_t out_precision;
int16_t pad_val;
/* input converter parameters */
struct dla_cvt_param in_cvt;
/* output converter parameters, support truncate only */
struct dla_cvt_param out_cvt;
} __packed __aligned(4);
struct dla_conv_stat_desc {
uint32_t data_read_stall;
uint32_t weight_read_stall;
uint32_t data_read_latency;
uint32_t weight_read_latency;
uint32_t saturation_count;
uint32_t nan_data_num;
uint32_t nan_weight_num;
uint32_t inf_data_num;
uint32_t inf_weight_num;
uint32_t runtime;
} __packed __aligned(4);
/**
* @ingroup SDP
* @name Activation functions
* @brief Activation functions supported in SDP
* @{
*/
#define ACTIVATION_NONE 0
#define ACTIVATION_RELU 1
#define ACTIVATION_LUT 2
#define ACTIVATION_PRELU 3
/** @} */
/**
* @ingroup LUT
* @name LUT size
* @brief LUT sizes for linear and exponentila LUT
* @{
*/
#define LUT_LINEAR_EXP_TABLE_ENTRY_LOG2 6
#define LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2 8
/** @} */
/**
* @ingroup LUT
* @name LUT types
* @brief DLA supports two types of LUT, linear and exonential
* @{
*/
#define LUT_LINEAR_EXP_TABLE 0
#define LUT_LINEAR_ONLY_TABLE 1
/** @} */
/**
* @ingroup LUT
* @name LUT methods
* @brief DLA supports two types of LUT, linear and exonential
* @{
*/
#define LUT_METHOD_EXPONENTIAL 0
#define LUT_METHOD_LINEAR 1
/** @} */
/**
* @ingroup LUT
* @name LUT
* @brief DLA supports two types of LUT, linear and exonential
* @{
*/
#define LUT_PRI_LINEAR_EXP 0
#define LUT_PRI_LINEAR_ONLY 1
/** @} */
union dla_lut_offset {
/**
* Number should be substracted on log domain before look up
* exponetial table it has the same definition as hardware
* thus input scaling should also take into account when
* set this field.
*/
int8_t exp_offset;
/**
* Number of bits should be right shift before looking
* up linear table
*/
int8_t frac_bits;
uint16_t reserved0;
};
/**
* This struct is used to represent floating point values by INT
* suppose we have a float point number fp_x, it will be represented
* as:
*
* fp_x = scale_int_x>>(shifter_x)
*
* This is very useful for INT pipeline;
*/
struct dla_float_data {
int16_t scale;
int8_t shifter;
uint8_t reserved0;
} __packed __aligned(4);
/**
* For INT pipeline, we use the struct above to represent a floating number;
* For FP16 pipeline, we should store the FP16 encoded value into a uint16_t
* container
*/
union dla_slope {
struct dla_float_data data_i;
uint16_t data_f;
};
struct dla_lut_param {
/**
* value of expression ((1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1) is 65,
* ((1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1) is 257, and int16_t is of
* 2Byte. And below two statement's combined memory size is 644 Byte.
*
* NOTE: below two declaration combined size should always be multiple
* of 4.
*/
int16_t linear_exp_table[(1<<LUT_LINEAR_EXP_TABLE_ENTRY_LOG2)+1];
int16_t linear_only_table[(1<<LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2)+1];
union dla_lut_offset linear_exp_offset;
union dla_lut_offset linear_only_offset;
/**
* The start and end point of raw table,
* valid when raw_method=LINEAR only
*/
uint64_t linear_exp_start;
uint64_t linear_exp_end;
uint64_t linear_only_start;
uint64_t linear_only_end;
union dla_slope linear_exp_underflow_slope;
union dla_slope linear_exp_overflow_slope;
union dla_slope linear_only_underflow_slope;
union dla_slope linear_only_overflow_slope;
/**
* dla_lut_priority, when both lut are hit(or one overflow,
* the other underflow), which one should be selected as output
*/
uint8_t hybrid_priority;
uint8_t underflow_priority;
uint8_t overflow_priority;
uint8_t method; /* dla_lut_method */
} __packed __aligned(4);
struct dla_sdp_surface_desc {
/* Data cube */
/* source input cube, available when SDP working on offline mode */
struct dla_data_cube src_data;
/* X1 input cube */
struct dla_data_cube x1_data;
/* X2 input cube */
struct dla_data_cube x2_data;
/* Y input cube */
struct dla_data_cube y_data;
/* Output cube */
struct dla_data_cube dst_data;
} __packed __aligned(4);
#define SDP_OP_NONE 0
#define SDP_OP_MUL 1
#define SDP_OP_ADD 2
#define SDP_OP_BOTH 3
#define SDP_ALU_OP_MAX 0
#define SDP_ALU_OP_MIN 1
#define SDP_ALU_OP_SUM 2
#define SDP_ALU_OP_EQL 3
#define SDP_OP_PER_LAYER 0
#define SDP_OP_PER_KERNEL 1
#define SDP_OP_PER_POINT 2
struct dla_sdp_cvt {
struct dla_cvt_param alu_cvt;
struct dla_cvt_param mul_cvt;
} __packed __aligned(4);
struct dla_sdp_op {
uint8_t enable;
uint8_t alu_type; /* dla_sdp_alu_op_type */
uint8_t type; /* dla_sdp_op_type */
uint8_t mode; /* dla_sdp_op_mode */
uint8_t act; /* dla_act_type */
uint8_t shift_value; /* left shift */
uint8_t truncate;
uint8_t precision;
int32_t alu_operand;
int32_t mul_operand;
struct dla_sdp_cvt cvt;
} __packed __aligned(4);
struct dla_sdp_op_desc {
/* Precision parameters */
/* dla_precision */
uint8_t src_precision;
uint8_t dst_precision;
int16_t lut_index;
struct dla_cvt_param out_cvt;
/* Performance parameters */
/* dla_conv_mode */
uint8_t conv_mode;
uint8_t batch_num;
uint16_t reserved0;
uint32_t batch_stride; /* will be used when batch_num > 1 */
/* Algorithm parameters */
struct dla_sdp_op x1_op;
struct dla_sdp_op x2_op;
struct dla_sdp_op y_op;
} __packed __aligned(4);
struct dla_sdp_stat_desc {
uint32_t nan_input_num;
uint32_t inf_input_num;
uint32_t nan_output_num;
uint32_t wdma_write_stall;
uint32_t lut_underflow;
uint32_t lut_overflow;
uint32_t lut_hybrid;
uint32_t lut_le_hit;
uint32_t lut_lo_hit;
uint32_t saturation_count;
uint32_t runtime;
} __packed __aligned(4);
#define POOL_MODE_AVG 0
#define POOL_MODE_MAX 1
#define POOL_MODE_MIN 2
#define POOL_SIZE_1 0
#define POOL_SIZE_2 1
#define POOL_SIZE_3 2
#define POOL_SIZE_4 3
#define POOL_SIZE_5 4
#define POOL_SIZE_6 5
#define POOL_SIZE_7 6
#define POOL_SIZE_8 7
#define PDP_PAD_VAL_NUM 7
struct dla_pdp_surface_desc {
/* Data cube */
struct dla_data_cube src_data;
struct dla_data_cube dst_data;
} __packed __aligned(4);
struct dla_pdp_op_desc {
/* Performance parameters */
uint16_t partial_in_width_first;
uint16_t partial_in_width_mid;
uint16_t partial_in_width_last;
uint16_t partial_width_first;
uint16_t partial_width_mid;
uint16_t partial_width_last;
uint8_t split_num;
/* Algorithm parameters */
uint8_t pool_mode; /* dla_pool_mode */
uint8_t pool_width; /* dla_pool_width */
uint8_t pool_height; /* dla_pool_height */
uint8_t stride_x;
uint8_t stride_y;
/**
* The left/right padding size,
* pad_right might be less than pad_left
*/
uint8_t pad_left;
uint8_t pad_right;
/* The top/bottom padding size */
uint8_t pad_top;
uint8_t pad_bottom;
/* Precision parameters */
uint8_t precision; /* dla_precision */
uint8_t reserved0;
/**
* if input has non-zero "offset", this value should be set
* There'll be 7 different paddding values, the relationship between
* those versions are:
* padding_value[0] = -offset*scaling;
* padding_value[1] = 2*padding_value[0]
* padding_value[2] = 3*padding_value[0]
* ...
* The purpose is to avoid ucode implement FP16
* multiplier(for FP16 mode)
*/
int32_t padding_value[PDP_PAD_VAL_NUM];
} __packed __aligned(4);
struct dla_pdp_stat_desc {
uint32_t inf_input_num;
uint32_t nan_input_num;
uint32_t nan_output_num;
uint32_t write_stall;
uint32_t runtime;
} __packed __aligned(4);
struct dla_cdp_surface_desc {
/* Data cube */
struct dla_data_cube src_data;
struct dla_data_cube dst_data;
} __packed __aligned(4);
struct dla_cdp_op_desc {
/* Precision parameters */
/* dla_precision */
uint8_t in_precision;
uint8_t out_precision;
int16_t lut_index;
struct dla_cvt_param in_cvt;
struct dla_cvt_param out_cvt;
/* Performance parameters */
/* Algorithm parameters */
uint8_t local_size;
uint8_t bypass_sqsum;
uint8_t bypass_out_mul;
uint8_t reserved0;
} __packed __aligned(4);
struct dla_cdp_stat_desc {
uint32_t nan_input_num;
uint32_t inf_input_num;
uint32_t nan_output_num;
uint32_t write_stall;
uint32_t lut_uflow;
uint32_t lut_oflow;
uint32_t lut_hybrid;
uint32_t lut_le_hit;
uint32_t lut_lo_hit;
uint32_t saturation_count;
uint32_t runtime;
} __packed __aligned(4);
struct dla_rubik_surface_desc {
/* Data cube */
struct dla_data_cube src_data;
struct dla_data_cube dst_data;
} __packed __aligned(4);
/* rubik mode */
#define RUBIK_MODE_CONTRACT 0
#define RUBIK_MODE_SPLIT 1
#define RUBIK_MODE_MERGE 2
struct dla_rubik_op_desc {
/* Precision parameters */
uint8_t mode;
uint8_t precision;
uint8_t stride_x;
uint8_t stride_y;
} __packed __aligned(4);
struct dla_rubik_stat_desc {
uint32_t read_stall;
uint32_t write_stall;
uint32_t runtime;
} __packed __aligned(4);
union dla_surface_container {
struct dla_bdma_surface_desc bdma_surface;
struct dla_conv_surface_desc conv_surface;
struct dla_sdp_surface_desc sdp_surface;
struct dla_pdp_surface_desc pdp_surface;
struct dla_cdp_surface_desc cdp_surface;
struct dla_rubik_surface_desc rubik_surface;
};
union dla_operation_container {
struct dla_bdma_op_desc bdma_op;
struct dla_conv_op_desc conv_op;
struct dla_sdp_op_desc sdp_op;
struct dla_pdp_op_desc pdp_op;
struct dla_cdp_op_desc cdp_op;
struct dla_rubik_op_desc rubik_op;
};
union dla_stat_container {
struct dla_bdma_stat_desc bdma_stat;
struct dla_conv_stat_desc conv_stat;
struct dla_sdp_stat_desc sdp_stat;
struct dla_pdp_stat_desc pdp_stat;
struct dla_cdp_stat_desc cdp_stat;
struct dla_rubik_stat_desc rubik_stat;
};
/**
* status notifier structure
*
* @address: 64-bit timestamp representing the time at which
* the notifier was written
* @status_engine: status work captured from HW engine
* @subframe: NA
* @status_task: status word as configured from an action list
*/
struct dla_task_status {
uint64_t timestamp;
uint32_t status_engine;
uint16_t subframe;
uint16_t status_task;
} __packed __aligned(4);
#endif