RK3368 GPU version: Rogue M 1.31+

1. Let Rogue M support kernel 4.4. 2. Close OPEN_GPU_PD temporarily. Change-Id: Ia4eefdd4929b7e694ea11907db700a84993b864d Signed-off-by: zxl <zhuangxl@rock-chips.com>
2026-01-06 10:13:00 -08:00 · 2016-01-06 14:34:57 +08:00
parent 1da242fea4
commit f4a9cd3d1e
9 changed files with 243 additions and 18 deletions
--- a/drivers/gpu/rogue_m/Kconfig
+++ b/drivers/gpu/rogue_m/Kconfig
@@ -1,6 +1,8 @@
 config POWERVR_ROGUE_M
-        tristate "PowerVR Rogue M"
+	tristate "PowerVR Rogue M"
+	default n
 	help
-	 Driver for PowerVR Rogue M hardware. Say Y here if your SoC contains
-	 a PowerVR Rogue GPU. For more information, see
-	 <http://www.imgtec.com/powervr/>.
+	  Driver for PowerVR Rogue hardware.
+
+	  Say Y here if your SoC contains a PowerVR Rogue GPU. For more
+	  information, see <http://www.imgtec.com/powervr/>.
--- a/drivers/gpu/rogue_m/include/pvrversion.h
+++ b/drivers/gpu/rogue_m/include/pvrversion.h
@@ -67,6 +67,9 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *	    4. Get raw ion_device by IonDevAcquire.
 *  M 1.31
 *	    1. Merge 1.5_ED3830101 DDK code.
+ *  M 1.31+
+ *          1. Let Rogue M support kernel 4.4.
+ *          2. Close OPEN_GPU_PD temporarily.
 */

 #define PVR_STR(X) #X
@@ -94,5 +97,5 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define PVRVERSION_UNPACK_MIN(VERSION) (((VERSION) >> 0) & 0xFFFF)

 //chenli:define rockchip version
-#define RKVERSION                   "Rogue M 1.31"
+#define RKVERSION                   "Rogue M 1.31+"
 #endif /* _PVRVERSION_H_ */
--- a/drivers/gpu/rogue_m/kernel/drivers/staging/imgtec/pvr_sync.c
+++ b/drivers/gpu/rogue_m/kernel/drivers/staging/imgtec/pvr_sync.c
@@ -1653,7 +1653,7 @@ static long pvr_sync_ioctl_create_fence(struct pvr_sync_timeline *timeline,
 {
 	struct pvr_sync_create_fence_ioctl_data data;
 	struct pvr_sync_alloc_data *alloc_sync_data;
-	int err = -EFAULT, fd = get_unused_fd();
+	int err = -EFAULT, fd = get_unused_fd_flags(0);
 	struct pvr_sync_data *sync_data;
 	struct pvr_sync_pt *pvr_pt;
 	struct sync_fence *fence;
@@ -1742,7 +1742,7 @@ static long pvr_sync_ioctl_alloc_fence(struct pvr_sync_timeline *timeline,
 {
 	struct pvr_sync_alloc_fence_ioctl_data data;
 	struct pvr_sync_alloc_data *alloc_sync_data;
-	int err = -EFAULT, fd = get_unused_fd();
+	int err = -EFAULT, fd = get_unused_fd_flags(0);
 	struct pvr_sync_data *sync_data;
 	struct file *file;

@@ -1894,7 +1894,7 @@ static long pvr_sync_ioctl_sw_create_fence(struct sw_sync_timeline *timeline,
 {
 	struct sw_sync_create_fence_data data;
 	struct sync_fence *fence;
-	int fd = get_unused_fd();
+	int fd = get_unused_fd_flags(0);
 	struct sync_pt *sync_pt;
 	int err = -EFAULT;

--- a/drivers/gpu/rogue_m/services/server/env/linux/osfunc_arm64.c
+++ b/drivers/gpu/rogue_m/services/server/env/linux/osfunc_arm64.c
@@ -41,6 +41,7 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */ /**************************************************************************/
 #include <linux/version.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <asm/cacheflush.h>

@@ -57,11 +58,14 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 	#error "CONFIG_OUTER_CACHE not supported on arm64."
 #endif

+extern struct platform_device *gpsPVRLDMDev;

 static void per_cpu_cache_flush(void *arg)
 {
 	PVR_UNREFERENCED_PARAMETER(arg);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0))
 	flush_cache_all();
+#endif
 }

 void OSCPUOperation(PVRSRV_CACHE_OP uiCacheOp)
@@ -90,6 +94,7 @@ void OSCPUOperation(PVRSRV_CACHE_OP uiCacheOp)
 	}
 }

+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0))
 void OSFlushCPUCacheRangeKM(IMG_PVOID pvVirtStart,
 							IMG_PVOID pvVirtEnd,
 							IMG_CPU_PHYADDR sCPUPhysStart,
@@ -114,3 +119,53 @@ void OSInvalidateCPUCacheRangeKM(IMG_PVOID pvVirtStart,
 {
 	dma_ops->sync_single_for_cpu(NULL, sCPUPhysStart.uiAddr, sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr, DMA_FROM_DEVICE);
 }
+#else
+void OSFlushCPUCacheRangeKM(
+				IMG_PVOID pvVirtStart,
+				IMG_PVOID pvVirtEnd,
+				IMG_CPU_PHYADDR sCPUPhysStart,
+				IMG_CPU_PHYADDR sCPUPhysEnd)
+{
+	const struct dma_map_ops *dma_ops = get_dma_ops(&gpsPVRLDMDev->dev);
+
+	dma_ops->sync_single_for_device(
+		&gpsPVRLDMDev->dev,
+		sCPUPhysStart.uiAddr,
+		sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr,
+		DMA_TO_DEVICE);
+	dma_ops->sync_single_for_cpu(&gpsPVRLDMDev->dev,
+		sCPUPhysStart.uiAddr,
+		sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr,
+		DMA_FROM_DEVICE);
+}
+
+void OSCleanCPUCacheRangeKM(
+				IMG_PVOID pvVirtStart,
+				IMG_PVOID pvVirtEnd,
+				IMG_CPU_PHYADDR sCPUPhysStart,
+				IMG_CPU_PHYADDR sCPUPhysEnd)
+{
+	const struct dma_map_ops *dma_ops = get_dma_ops(&gpsPVRLDMDev->dev);
+
+	dma_ops->sync_single_for_device(
+		&gpsPVRLDMDev->dev,
+		sCPUPhysStart.uiAddr,
+		sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr,
+		DMA_TO_DEVICE);
+}
+
+void OSInvalidateCPUCacheRangeKM(
+					IMG_PVOID pvVirtStart,
+					IMG_PVOID pvVirtEnd,
+					IMG_CPU_PHYADDR sCPUPhysStart,
+					IMG_CPU_PHYADDR sCPUPhysEnd)
+{
+	const struct dma_map_ops *dma_ops = get_dma_ops(&gpsPVRLDMDev->dev);
+
+	dma_ops->sync_single_for_cpu(
+		&gpsPVRLDMDev->dev,
+		sCPUPhysStart.uiAddr,
+		sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr,
+		DMA_FROM_DEVICE);
+}
+#endif
--- a/drivers/gpu/rogue_m/services/server/env/linux/physmem_osmem_linux.c
+++ b/drivers/gpu/rogue_m/services/server/env/linux/physmem_osmem_linux.c
@@ -67,7 +67,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 #include <linux/version.h>

-#if (LINUX_VERSION_CODE > KERNEL_VERSION(3,0,0))
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0))
 #include <linux/mm.h>
 #define PHYSMEM_SUPPORTS_SHRINKER
 #endif
@@ -92,13 +92,13 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #endif

 /* Provide SHRINK_STOP definition for kernel older than 3.12 */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0))
 #define SHRINK_STOP (~0UL)
 #endif

 #include "physmem_osmem_linux.h"

-#if  (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+#if  (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
 /* split_pages() not available on older-kernels */
 #if (PVR_LINUX_PHYSMEM_MAX_ALLOC_ORDER > 0)
 /* This includes bin (i.e. bucket) for order-0 */
@@ -443,14 +443,14 @@ _ScanObjectsInPagePool(struct shrinker *psShrinker, struct shrink_control *psShr
 	remain = g_ui32PagePoolEntryCount;
 	_PagePoolUnlock();

-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0))
 	return remain;
 #else
 	return psShrinkControl->nr_to_scan - uNumToScan;
 #endif
 }

-#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0))
 static int
 _ShrinkPagePool(struct shrinker *psShrinker, struct shrink_control *psShrinkControl)
 {
@@ -1000,7 +1000,11 @@ _AllocOSHigherOrderPages(struct _PMR_OSPAGEARRAY_DATA_ *psPageArrayData,
 		/* Disable retry/wait at order > 0 */
 		PVR_ASSERT(psPageArrayData->uiNumPages > 1);
 		gfp_flags |= __GFP_NORETRY;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0))
 		gfp_flags &= ~__GFP_WAIT;
+#else
+		gfp_flags &= ~__GFP_RECLAIM;
+#endif
 	}

 	/* Re-express uiNumPages in multi-order up to cut-off order */
@@ -1079,7 +1083,11 @@ _AllocOSHigherOrderPages(struct _PMR_OSPAGEARRAY_DATA_ *psPageArrayData,
 				{
 					/* Enable retry/wait at order-0 */
 					gfp_flags &= ~__GFP_NORETRY;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0))
 					gfp_flags |= __GFP_WAIT;
+#else
+					gfp_flags |= __GFP_RECLAIM;
+#endif
 				}

 				/* Accumulate remaining failed order into lower order */
--- a/drivers/gpu/rogue_m/system/rk3368/rk_init.c
+++ b/drivers/gpu/rogue_m/system/rk3368/rk_init.c
@@ -1,10 +1,9 @@
 #if defined(SUPPORT_ION)
 #include "ion_sys.h"
 #endif /* defined(SUPPORT_ION) */
-
+#include <linux/version.h>
 #include <linux/hardirq.h>
 #include <linux/clk.h>
-#include <linux/clk-private.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
@@ -21,16 +20,22 @@
 #include <linux/module.h>
 #include <linux/freezer.h>
 #include <linux/sched/rt.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0))
+#include <linux/clk-private.h>
+#else
+#include <linux/clk-provider.h>
+#endif
 #include "power.h"
 #include "rk_init.h"

 #include <asm/compiler.h>
+#if RK33_DVFS_SUPPORT
 static IMG_HANDLE ghGpuUtilDvfs = NULL;
+#endif

 #if RK_TF_VERSION
 #define PSCI_RKSIP_TF_VERSION (0x82000001)

-
 static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1,
      u64 arg2)
 {
--- a/drivers/gpu/rogue_m/system/rk3368/rk_init.h
+++ b/drivers/gpu/rogue_m/system/rk3368/rk_init.h
@@ -17,7 +17,7 @@
 #define RK33_USE_CUSTOMER_GET_GPU_UTIL      0

 #define RK33_USE_CL_COUNT_UTILS             0
-#define OPEN_GPU_PD                         1
+#define OPEN_GPU_PD                         0

 //USE_KTHREAD and USE_HRTIMER are mutually exclusive
 #define USE_KTHREAD                         0
--- a/drivers/gpu/rogue_m/system/rk3368/sysconfig.c
+++ b/drivers/gpu/rogue_m/system/rk3368/sysconfig.c
@@ -40,7 +40,7 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */ /**************************************************************************/
-
+#include <linux/version.h>
 #include "pvrsrv_device.h"
 #include "syscommon.h"
 #include "sysconfig.h"
@@ -50,6 +50,11 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #endif
 #include "rk_init.h"

+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+#include <linux/platform_device.h>
+extern struct platform_device *gpsPVRLDMDev;
+#endif
+
 static RGX_TIMING_INFORMATION	gsRGXTimingInfo;
 static RGX_DATA			gsRGXData;
 static PVRSRV_DEVICE_CONFIG 	gsDevices[1];
@@ -178,7 +183,11 @@ PVRSRV_ERROR SysCreateConfigData(PVRSRV_SYSTEM_CONFIG **ppsSysConfig, void *hDev
 	/* Device setup information */
 	gsDevices[0].sRegsCpuPBase.uiAddr   = RK_GPU_PBASE;
 	gsDevices[0].ui32RegsSize           = RK_GPU_SIZE;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0))
 	gsDevices[0].ui32IRQ                = RK_IRQ_GPU;
+#else
+	gsDevices[0].ui32IRQ                = platform_get_irq(gpsPVRLDMDev, 0);
+#endif
 	gsDevices[0].bIRQIsShared           = IMG_FALSE;

 	/* Device's physical heap IDs */
--- a/include/trace/events/gpu.h
+++ b/include/trace/events/gpu.h
@@ -0,0 +1,143 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu
+
+#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GPU_H
+
+#include <linux/tracepoint.h>
+#include <linux/time.h>
+
+#define show_secs_from_ns(ns) \
+	({ \
+		u64 t = ns + (NSEC_PER_USEC / 2); \
+		do_div(t, NSEC_PER_SEC); \
+		t; \
+	})
+
+#define show_usecs_from_ns(ns) \
+	({ \
+		u64 t = ns + (NSEC_PER_USEC / 2) ; \
+		u32 rem; \
+		do_div(t, NSEC_PER_USEC); \
+		rem = do_div(t, USEC_PER_SEC); \
+	})
+
+/*
+ * The gpu_sched_switch event indicates that a switch from one GPU context to
+ * another occurred on one of the GPU hardware blocks.
+ *
+ * The gpu_name argument identifies the GPU hardware block.  Each independently
+ * scheduled GPU hardware block should have a different name.  This may be used
+ * in different ways for different GPUs.  For example, if a GPU includes
+ * multiple processing cores it may use names "GPU 0", "GPU 1", etc.  If a GPU
+ * includes a separately scheduled 2D and 3D hardware block, it might use the
+ * names "2D" and "3D".
+ *
+ * The timestamp argument is the timestamp at which the switch occurred on the
+ * GPU. These timestamps are in units of nanoseconds and must use
+ * approximately the same time as sched_clock, though they need not come from
+ * any CPU clock. The timestamps for a single hardware block must be
+ * monotonically nondecreasing.  This means that if a variable compensation
+ * offset is used to translate from some other clock to the sched_clock, then
+ * care must be taken when increasing that offset, and doing so may result in
+ * multiple events with the same timestamp.
+ *
+ * The next_ctx_id argument identifies the next context that was running on
+ * the GPU hardware block.  A value of 0 indicates that the hardware block
+ * will be idle.
+ *
+ * The next_prio argument indicates the priority of the next context at the
+ * time of the event.  The exact numeric values may mean different things for
+ * different GPUs, but they should follow the rule that lower values indicate a
+ * higher priority.
+ *
+ * The next_job_id argument identifies the batch of work that the GPU will be
+ * working on.  This should correspond to a job_id that was previously traced
+ * as a gpu_job_enqueue event when the batch of work was created.
+ */
+TRACE_EVENT(gpu_sched_switch,
+
+	TP_PROTO(const char *gpu_name, u64 timestamp,
+		u32 next_ctx_id, s32 next_prio, u32 next_job_id),
+
+	TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id),
+
+	TP_STRUCT__entry(
+		__string(       gpu_name,       gpu_name        )
+		__field(        u64,            timestamp       )
+		__field(        u32,            next_ctx_id     )
+		__field(        s32,            next_prio       )
+		__field(        u32,            next_job_id     )
+	),
+
+	TP_fast_assign(
+		__assign_str(gpu_name, gpu_name);
+		__entry->timestamp = timestamp;
+		__entry->next_ctx_id = next_ctx_id;
+		__entry->next_prio = next_prio;
+		__entry->next_job_id = next_job_id;
+	),
+
+	TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld "
+		"next_job_id=%lu",
+		__get_str(gpu_name),
+		(unsigned long long)show_secs_from_ns(__entry->timestamp),
+		(unsigned long)show_usecs_from_ns(__entry->timestamp),
+		(unsigned long)__entry->next_ctx_id,
+		(long)__entry->next_prio,
+		(unsigned long)__entry->next_job_id)
+);
+
+/*
+ * The gpu_job_enqueue event indicates that a batch of work has been queued up
+ * to be processed by the GPU.  This event is not intended to indicate that
+ * the batch of work has been submitted to the GPU hardware, but rather that
+ * it has been submitted to the GPU kernel driver.
+ *
+ * This event should be traced on the thread that initiated the work being
+ * queued.  For example, if a batch of work is submitted to the kernel by a
+ * userland thread, the event should be traced on that thread.
+ *
+ * The ctx_id field identifies the GPU context in which the batch of work
+ * being queued is to be run.
+ *
+ * The job_id field identifies the batch of work being queued within the given
+ * GPU context.  The first batch of work submitted for a given GPU context
+ * should have a job_id of 0, and each subsequent batch of work should
+ * increment the job_id by 1.
+ *
+ * The type field identifies the type of the job being enqueued.  The job
+ * types may be different for different GPU hardware.  For example, a GPU may
+ * differentiate between "2D", "3D", and "compute" jobs.
+ */
+TRACE_EVENT(gpu_job_enqueue,
+
+	TP_PROTO(u32 ctx_id, u32 job_id, const char *type),
+
+	TP_ARGS(ctx_id, job_id, type),
+
+	TP_STRUCT__entry(
+		__field(        u32,            ctx_id          )
+		__field(        u32,            job_id          )
+		__string(       type,           type            )
+	),
+
+	TP_fast_assign(
+		__entry->ctx_id = ctx_id;
+		__entry->job_id = job_id;
+		__assign_str(type, type);
+	),
+
+	TP_printk("ctx_id=%lu job_id=%lu type=%s",
+		(unsigned long)__entry->ctx_id,
+		(unsigned long)__entry->job_id,
+		__get_str(type))
+);
+
+#undef show_secs_from_ns
+#undef show_usecs_from_ns
+
+#endif /* _TRACE_GPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>