diff --git a/Documentation/ABI/testing/sysfs-device-mali b/Documentation/ABI/testing/sysfs-device-mali index b7be50338f6e..cd011dae9b50 100644 --- a/Documentation/ABI/testing/sysfs-device-mali +++ b/Documentation/ABI/testing/sysfs-device-mali @@ -236,6 +236,7 @@ Description: device-driver that supports a CSF GPU. The duration value unit is in milliseconds and is used for configuring csf scheduling tick duration. + What: /sys/class/misc/mali%u/device/reset_timeout Description: This attribute is used to set the number of milliseconds to diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt index 04e1bd1a5a39..2b3b1d028ccd 100644 --- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt +++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -129,7 +129,7 @@ for details. set and the setting coresponding to the SYSC_ALLOC register. -Example for a Mali GPU with 1 clock and no regulators: +Example for a Mali GPU with 1 clock and 1 regulator: gpu@0xfc010000 { compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard"; diff --git a/drivers/base/arm/Kbuild b/drivers/base/arm/Kbuild index 01de13bef37c..e5ded4cf7395 100644 --- a/drivers/base/arm/Kbuild +++ b/drivers/base/arm/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -28,7 +28,6 @@ subdir-ccflags-y += $(ccflags-y) # # Kernel modules # -obj-$(CONFIG_DMA_BUF_LOCK) += dma_buf_lock/src/ obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma_buf_test_exporter/ obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) += memory_group_manager/ obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) += protected_memory_allocator/ diff --git a/drivers/base/arm/Kconfig b/drivers/base/arm/Kconfig index e5fca3a39ccb..7f9f1d4c418a 100644 --- a/drivers/base/arm/Kconfig +++ b/drivers/base/arm/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -26,16 +26,6 @@ menuconfig MALI_BASE_MODULES Those modules provide extra features or debug interfaces and, are optional for the use of the Mali GPU modules. -config DMA_BUF_LOCK - bool "Build dma-buf lock module" - depends on MALI_BASE_MODULES && MALI_DMA_FENCE - default y - help - This option will build the dma_buf_lock module. - - Modules: - - dma_buf_lock.ko - config DMA_SHARED_BUFFER_TEST_EXPORTER bool "Build dma-buf framework test exporter module" depends on MALI_BASE_MODULES && DMA_SHARED_BUFFER diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile index ed5c118a5ef4..c1a61a1106d0 100644 --- a/drivers/base/arm/Makefile +++ b/drivers/base/arm/Makefile @@ -38,11 +38,9 @@ ifeq ($(CONFIG_MALI_BASE_MODULES),y) CONFIG_MALI_CSF_SUPPORT ?= n ifneq ($(CONFIG_DMA_SHARED_BUFFER),n) - CONFIG_DMA_BUF_LOCK ?= y CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y else # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n - CONFIG_DMA_BUF_LOCK = n CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n endif @@ -54,7 +52,6 @@ ifeq ($(CONFIG_MALI_BASE_MODULES),y) else # Prevent misuse when CONFIG_MALI_BASE_MODULES=n - CONFIG_DMA_BUF_LOCK = n CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n CONFIG_MALI_MEMORY_GROUP_MANAGER = n CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n @@ -64,10 +61,9 @@ endif CONFIGS := \ CONFIG_MALI_BASE_MODULES \ CONFIG_MALI_CSF_SUPPORT \ - CONFIG_DMA_BUF_LOCK \ CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \ CONFIG_MALI_MEMORY_GROUP_MANAGER \ - CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR \ # @@ -92,26 +88,47 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ $(if $(filter y m,$(value $(value config))), \ -D$(value config)=1)) -# The following were added to align with W=1 in scripts/Makefile.extrawarn -# from the Linux source tree KBUILD_CFLAGS += -Wall -Werror + +# The following were added to align with W=1 in scripts/Makefile.extrawarn +# from the Linux source tree (v5.18.14) KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter KBUILD_CFLAGS += -Wmissing-declarations KBUILD_CFLAGS += -Wmissing-format-attribute KBUILD_CFLAGS += -Wmissing-prototypes KBUILD_CFLAGS += -Wold-style-definition -KBUILD_CFLAGS += -Wmissing-include-dirs +# The -Wmissing-include-dirs cannot be enabled as the path to some of the +# included directories change depending on whether it is an in-tree or +# out-of-tree build. KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-type-limits +KBUILD_CFLAGS += -Wno-shift-negative-value +# This flag is needed to avoid build errors on older kernels +KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 +# The following were added to align with W=2 in scripts/Makefile.extrawarn +# from the Linux source tree (v5.18.14) +KBUILD_CFLAGS += -Wdisabled-optimization +# The -Wshadow flag cannot be enabled unless upstream kernels are +# patched to fix redefinitions of certain built-in functions and +# global variables. +KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) +KBUILD_CFLAGS += -Wmissing-field-initializers +KBUILD_CFLAGS += -Wtype-limits +KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) +KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 + +# This warning is disabled to avoid build failures in some kernel versions +KBUILD_CFLAGS += -Wno-ignored-qualifiers + all: $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules diff --git a/drivers/base/arm/Mconfig b/drivers/base/arm/Mconfig index a48df6d8b090..f7787f0ccd34 100644 --- a/drivers/base/arm/Mconfig +++ b/drivers/base/arm/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -26,16 +26,6 @@ menuconfig MALI_BASE_MODULES Those modules provide extra features or debug interfaces and, are optional for the use of the Mali GPU modules. -config DMA_BUF_LOCK - bool "Build dma-buf lock module" - depends on MALI_BASE_MODULES && MALI_DMA_FENCE - default y - help - This option will build the dma_buf_lock module. - - Modules: - - dma_buf_lock.ko - config DMA_SHARED_BUFFER_TEST_EXPORTER bool "Build dma-buf framework test exporter module" depends on MALI_BASE_MODULES @@ -45,7 +35,7 @@ config DMA_SHARED_BUFFER_TEST_EXPORTER Usable to help test importers. Modules: - - dma-buf-test-exporter.ko + - dma-buf-test-exporter.ko config MALI_MEMORY_GROUP_MANAGER bool "Build Mali Memory Group Manager module" @@ -57,7 +47,7 @@ config MALI_MEMORY_GROUP_MANAGER for memory pools managed by Mali GPU device drivers. Modules: - - memory_group_manager.ko + - memory_group_manager.ko config MALI_PROTECTED_MEMORY_ALLOCATOR bool "Build Mali Protected Memory Allocator module" @@ -70,5 +60,5 @@ config MALI_PROTECTED_MEMORY_ALLOCATOR of Mali GPU device drivers. Modules: - - protected_memory_allocator.ko + - protected_memory_allocator.ko diff --git a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c b/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c deleted file mode 100644 index 43333ca8e5e2..000000000000 --- a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c +++ /dev/null @@ -1,908 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -#include -#else -#include -#endif -#include -#include -#include -#include -#include -#include - -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - -#include - -#define dma_fence_context_alloc(a) fence_context_alloc(a) -#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) -#define dma_fence_get(a) fence_get(a) -#define dma_fence_put(a) fence_put(a) -#define dma_fence_signal(a) fence_signal(a) -#define dma_fence_is_signaled(a) fence_is_signaled(a) -#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) -#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) - -#if (KERNEL_VERSION(4, 9, 68) > LINUX_VERSION_CODE) -#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) -#else -#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) -#endif - -#else - -#include - -#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) -#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ - (a)->status ?: 1 \ - : 0) -#endif - -#endif /* < 4.10.0 */ - -#include "dma_buf_lock.h" - -/* Maximum number of buffers that a single handle can address */ -#define DMA_BUF_LOCK_BUF_MAX 32 - -#define DMA_BUF_LOCK_DEBUG 1 - -#define DMA_BUF_LOCK_INIT_BIAS 0xFF - -static dev_t dma_buf_lock_dev; -static struct cdev dma_buf_lock_cdev; -static struct class *dma_buf_lock_class; -static const char dma_buf_lock_dev_name[] = "dma_buf_lock"; - -#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE)) -static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -#else -static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); -#endif - -static const struct file_operations dma_buf_lock_fops = { - .owner = THIS_MODULE, -#if defined(HAVE_UNLOCKED_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE)) - .unlocked_ioctl = dma_buf_lock_ioctl, -#endif -#if defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE)) - .compat_ioctl = dma_buf_lock_ioctl, -#endif -}; - -struct dma_buf_lock_resource { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence fence; -#else - struct dma_fence fence; -#endif - int *list_of_dma_buf_fds; /* List of buffers copied from userspace */ - atomic_t locked; /* Status of lock */ - struct dma_buf **dma_bufs; - unsigned long exclusive; /* Exclusive access bitmap */ - atomic_t fence_dep_count; /* Number of dma-fence dependencies */ - struct list_head dma_fence_callbacks; /* list of all callbacks set up to wait on other fences */ - wait_queue_head_t wait; - struct kref refcount; - struct list_head link; - struct work_struct work; - int count; -}; - -/** - * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence - * @fence_cb: Callback function - * @fence: Pointer to the fence object on which this callback is waiting - * @res: Pointer to dma_buf_lock_resource that is waiting on this callback - * @node: List head for linking this callback to the lock resource - */ -struct dma_buf_lock_fence_cb { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence_cb fence_cb; - struct fence *fence; -#else - struct dma_fence_cb fence_cb; - struct dma_fence *fence; -#endif - struct dma_buf_lock_resource *res; - struct list_head node; -}; - -static LIST_HEAD(dma_buf_lock_resource_list); -static DEFINE_MUTEX(dma_buf_lock_mutex); - -static inline int is_dma_buf_lock_file(struct file *); -static void dma_buf_lock_dounlock(struct kref *ref); - - -/*** dma_buf_lock fence part ***/ - -/* Spin lock protecting all Mali fences as fence->lock. */ -static DEFINE_SPINLOCK(dma_buf_lock_fence_lock); - -static const char * -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -dma_buf_lock_fence_get_driver_name(struct fence *fence) -#else -dma_buf_lock_fence_get_driver_name(struct dma_fence *fence) -#endif -{ - return "dma_buf_lock"; -} - -static const char * -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -dma_buf_lock_fence_get_timeline_name(struct fence *fence) -#else -dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence) -#endif -{ - return "dma_buf_lock.timeline"; -} - -static bool -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -dma_buf_lock_fence_enable_signaling(struct fence *fence) -#else -dma_buf_lock_fence_enable_signaling(struct dma_fence *fence) -#endif -{ - return true; -} - -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -const struct fence_ops dma_buf_lock_fence_ops = { - .wait = fence_default_wait, -#else -const struct dma_fence_ops dma_buf_lock_fence_ops = { - .wait = dma_fence_default_wait, -#endif - .get_driver_name = dma_buf_lock_fence_get_driver_name, - .get_timeline_name = dma_buf_lock_fence_get_timeline_name, - .enable_signaling = dma_buf_lock_fence_enable_signaling, -}; - -static void -dma_buf_lock_fence_init(struct dma_buf_lock_resource *resource) -{ - dma_fence_init(&resource->fence, - &dma_buf_lock_fence_ops, - &dma_buf_lock_fence_lock, - 0, - 0); -} - -static void -dma_buf_lock_fence_free_callbacks(struct dma_buf_lock_resource *resource) -{ - struct dma_buf_lock_fence_cb *cb, *tmp; - - /* Clean up and free callbacks. */ - list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) { - /* Cancel callbacks that hasn't been called yet and release the - * reference taken in dma_buf_lock_fence_add_callback(). - */ - dma_fence_remove_callback(cb->fence, &cb->fence_cb); - dma_fence_put(cb->fence); - list_del(&cb->node); - kfree(cb); - } -} - -static void -dma_buf_lock_fence_work(struct work_struct *pwork) -{ - struct dma_buf_lock_resource *resource = - container_of(pwork, struct dma_buf_lock_resource, work); - - WARN_ON(atomic_read(&resource->fence_dep_count)); - WARN_ON(!atomic_read(&resource->locked)); - WARN_ON(!resource->exclusive); - - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); -} - -static void -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb) -#else -dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb) -#endif -{ - struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb, - struct dma_buf_lock_fence_cb, - fence_cb); - struct dma_buf_lock_resource *resource = dma_buf_lock_cb->res; - -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s\n", __func__); -#endif - - /* Callback function will be invoked in atomic context. */ - - if (atomic_dec_and_test(&resource->fence_dep_count)) { - atomic_set(&resource->locked, 1); - wake_up(&resource->wait); - - if (resource->exclusive) - /* Warn if the work was already queued */ - WARN_ON(!schedule_work(&resource->work)); - } -} - -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -static int -dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource, - struct fence *fence, - fence_func_t callback) -#else -static int -dma_buf_lock_fence_add_callback(struct dma_buf_lock_resource *resource, - struct dma_fence *fence, - dma_fence_func_t callback) -#endif -{ - int err = 0; - struct dma_buf_lock_fence_cb *fence_cb; - - if (!fence) - return -EINVAL; - - fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL); - if (!fence_cb) - return -ENOMEM; - - fence_cb->fence = fence; - fence_cb->res = resource; - INIT_LIST_HEAD(&fence_cb->node); - - err = dma_fence_add_callback(fence, &fence_cb->fence_cb, - callback); - - if (err == -ENOENT) { - /* Fence signaled, get the completion result */ - err = dma_fence_get_status(fence); - - /* remap success completion to err code */ - if (err == 1) - err = 0; - - kfree(fence_cb); - } else if (err) { - kfree(fence_cb); - } else { - /* - * Get reference to fence that will be kept until callback gets - * cleaned up in dma_buf_lock_fence_free_callbacks(). - */ - dma_fence_get(fence); - atomic_inc(&resource->fence_dep_count); - /* Add callback to resource's list of callbacks */ - list_add(&fence_cb->node, &resource->dma_fence_callbacks); - } - - return err; -} - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -static int -dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource, - struct reservation_object *resv, - bool exclusive) -#else -static int -dma_buf_lock_add_fence_reservation_callback(struct dma_buf_lock_resource *resource, - struct dma_resv *resv, - bool exclusive) -#endif -{ -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence *excl_fence = NULL; - struct fence **shared_fences = NULL; -#else - struct dma_fence *excl_fence = NULL; - struct dma_fence **shared_fences = NULL; -#endif - unsigned int shared_count = 0; - int err, i; - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - err = reservation_object_get_fences_rcu( -#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE) - err = dma_resv_get_fences_rcu( -#else - err = dma_resv_get_fences( -#endif - resv, - &excl_fence, - &shared_count, - &shared_fences); - if (err) - return err; - - if (excl_fence) { - err = dma_buf_lock_fence_add_callback(resource, - excl_fence, - dma_buf_lock_fence_callback); - - /* Release our reference, taken by reservation_object_get_fences_rcu(), - * to the fence. We have set up our callback (if that was possible), - * and it's the fence's owner is responsible for singling the fence - * before allowing it to disappear. - */ - dma_fence_put(excl_fence); - - if (err) - goto out; - } - - if (exclusive) { - for (i = 0; i < shared_count; i++) { - err = dma_buf_lock_fence_add_callback(resource, - shared_fences[i], - dma_buf_lock_fence_callback); - if (err) - goto out; - } - } - - /* Release all our references to the shared fences, taken by - * reservation_object_get_fences_rcu(). We have set up our callback (if - * that was possible), and it's the fence's owner is responsible for - * signaling the fence before allowing it to disappear. - */ -out: - for (i = 0; i < shared_count; i++) - dma_fence_put(shared_fences[i]); - kfree(shared_fences); - - return err; -} - -static void -dma_buf_lock_release_fence_reservation(struct dma_buf_lock_resource *resource, - struct ww_acquire_ctx *ctx) -{ - unsigned int r; - - for (r = 0; r < resource->count; r++) - ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock); - ww_acquire_fini(ctx); -} - -static int -dma_buf_lock_acquire_fence_reservation(struct dma_buf_lock_resource *resource, - struct ww_acquire_ctx *ctx) -{ -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - struct reservation_object *content_resv = NULL; -#else - struct dma_resv *content_resv = NULL; -#endif - unsigned int content_resv_idx = 0; - unsigned int r; - int err = 0; - - ww_acquire_init(ctx, &reservation_ww_class); - -retry: - for (r = 0; r < resource->count; r++) { - if (resource->dma_bufs[r]->resv == content_resv) { - content_resv = NULL; - continue; - } - - err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx); - if (err) - goto error; - } - - ww_acquire_done(ctx); - return err; - -error: - content_resv_idx = r; - - /* Unlock the locked one ones */ - while (r--) - ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock); - - if (content_resv) - ww_mutex_unlock(&content_resv->lock); - - /* If we deadlock try with lock_slow and retry */ - if (err == -EDEADLK) { -#if DMA_BUF_LOCK_DEBUG - pr_debug("deadlock at dma_buf fd %i\n", - resource->list_of_dma_buf_fds[content_resv_idx]); -#endif - content_resv = resource->dma_bufs[content_resv_idx]->resv; - ww_mutex_lock_slow(&content_resv->lock, ctx); - goto retry; - } - - /* If we are here the function failed */ - ww_acquire_fini(ctx); - return err; -} - -static int dma_buf_lock_handle_release(struct inode *inode, struct file *file) -{ - struct dma_buf_lock_resource *resource; - - if (!is_dma_buf_lock_file(file)) - return -EINVAL; - - resource = file->private_data; -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s\n", __func__); -#endif - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - - return 0; -} - -static __poll_t dma_buf_lock_handle_poll(struct file *file, poll_table *wait) -{ - struct dma_buf_lock_resource *resource; - unsigned int ret = 0; - - if (!is_dma_buf_lock_file(file)) { -#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - return POLLERR; -#else - return EPOLLERR; -#endif - } - - resource = file->private_data; -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s\n", __func__); -#endif - if (atomic_read(&resource->locked) == 1) { - /* Resources have been locked */ -#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - ret = POLLIN | POLLRDNORM; - if (resource->exclusive) - ret |= POLLOUT | POLLWRNORM; -#else - ret = EPOLLIN | EPOLLRDNORM; - if (resource->exclusive) - ret |= EPOLLOUT | EPOLLWRNORM; -#endif - } else { - if (!poll_does_not_wait(wait)) - poll_wait(file, &resource->wait, wait); - } -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : return %i\n", __func__, ret); -#endif - return ret; -} - -static const struct file_operations dma_buf_lock_handle_fops = { - .owner = THIS_MODULE, - .release = dma_buf_lock_handle_release, - .poll = dma_buf_lock_handle_poll, -}; - -/* - * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock - */ -static inline int is_dma_buf_lock_file(struct file *file) -{ - return file->f_op == &dma_buf_lock_handle_fops; -} - -/* - * Start requested lock. - * - * Allocates required memory, copies dma_buf_fd list from userspace, - * acquires related reservation objects, and starts the lock. - */ -static int dma_buf_lock_dolock(struct dma_buf_lock_k_request *request) -{ - struct dma_buf_lock_resource *resource; - struct ww_acquire_ctx ww_ctx; - struct file *file; - int size; - int fd; - int i; - int ret; - int error; - - if (request->list_of_dma_buf_fds == NULL) - return -EINVAL; - if (request->count <= 0) - return -EINVAL; - if (request->count > DMA_BUF_LOCK_BUF_MAX) - return -EINVAL; - if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE && - request->exclusive != DMA_BUF_LOCK_EXCLUSIVE) - return -EINVAL; - - resource = kzalloc(sizeof(*resource), GFP_KERNEL); - if (resource == NULL) - return -ENOMEM; - - atomic_set(&resource->locked, 0); - kref_init(&resource->refcount); - INIT_LIST_HEAD(&resource->link); - INIT_WORK(&resource->work, dma_buf_lock_fence_work); - resource->count = request->count; - - /* Allocate space to store dma_buf_fds received from user space */ - size = request->count * sizeof(int); - resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL); - - if (resource->list_of_dma_buf_fds == NULL) { - kfree(resource); - return -ENOMEM; - } - - /* Allocate space to store dma_buf pointers associated with dma_buf_fds */ - size = sizeof(struct dma_buf *) * request->count; - resource->dma_bufs = kmalloc(size, GFP_KERNEL); - - if (resource->dma_bufs == NULL) { - kfree(resource->list_of_dma_buf_fds); - kfree(resource); - return -ENOMEM; - } - - /* Copy requested list of dma_buf_fds from user space */ - size = request->count * sizeof(int); - if (copy_from_user(resource->list_of_dma_buf_fds, - (void __user *)request->list_of_dma_buf_fds, - size) != 0) { - kfree(resource->list_of_dma_buf_fds); - kfree(resource->dma_bufs); - kfree(resource); - return -ENOMEM; - } -#if DMA_BUF_LOCK_DEBUG - for (i = 0; i < request->count; i++) - pr_debug("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]); -#endif - - /* Initialize the fence associated with dma_buf_lock resource */ - dma_buf_lock_fence_init(resource); - - INIT_LIST_HEAD(&resource->dma_fence_callbacks); - - atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS); - - /* Add resource to global list */ - mutex_lock(&dma_buf_lock_mutex); - - list_add(&resource->link, &dma_buf_lock_resource_list); - - mutex_unlock(&dma_buf_lock_mutex); - - for (i = 0; i < request->count; i++) { - /* Convert fd into dma_buf structure */ - resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]); - - if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) { - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - return -EINVAL; - } - - /*Check the reservation object associated with dma_buf */ - if (resource->dma_bufs[i]->resv == NULL) { - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - return -EINVAL; - } -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n", - __func__, resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv); -#endif - } - - init_waitqueue_head(&resource->wait); - - kref_get(&resource->refcount); - - error = get_unused_fd_flags(0); - if (error < 0) - return error; - - fd = error; - - file = anon_inode_getfile("dma_buf_lock", &dma_buf_lock_handle_fops, (void *)resource, 0); - - if (IS_ERR(file)) { - put_unused_fd(fd); - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - return PTR_ERR(file); - } - - resource->exclusive = request->exclusive; - - /* Start locking process */ - ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx); - if (ret) { -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : Error %d locking reservations.\n", __func__, ret); -#endif - put_unused_fd(fd); - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - return ret; - } - - /* Take an extra reference for exclusive access, which will be dropped - * once the pre-existing fences attached to dma-buf resources, for which - * we have commited for exclusive access, are signaled. - * At a given time there can be only one exclusive fence attached to a - * reservation object, so the new exclusive fence replaces the original - * fence and the future sync is done against the new fence which is - * supposed to be signaled only after the original fence was signaled. - * If the new exclusive fence is signaled prematurely then the resources - * would become available for new access while they are already being - * written to by the original owner. - */ - if (resource->exclusive) - kref_get(&resource->refcount); - - for (i = 0; i < request->count; i++) { -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - struct reservation_object *resv = resource->dma_bufs[i]->resv; -#else - struct dma_resv *resv = resource->dma_bufs[i]->resv; -#endif - if (!test_bit(i, &resource->exclusive)) { - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - ret = reservation_object_reserve_shared(resv); -#else - ret = dma_resv_reserve_shared(resv, 0); -#endif - if (ret) { -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : Error %d reserving space for shared fence.\n", __func__, ret); -#endif - break; - } - - ret = dma_buf_lock_add_fence_reservation_callback(resource, - resv, - false); - if (ret) { -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret); -#endif - break; - } - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - reservation_object_add_shared_fence(resv, &resource->fence); -#else - dma_resv_add_shared_fence(resv, &resource->fence); -#endif - } else { - ret = dma_buf_lock_add_fence_reservation_callback(resource, resv, true); - if (ret) { -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : Error %d adding reservation to callback.\n", __func__, ret); -#endif - break; - } - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - reservation_object_add_excl_fence(resv, &resource->fence); -#else - dma_resv_add_excl_fence(resv, &resource->fence); -#endif - } - } - - dma_buf_lock_release_fence_reservation(resource, &ww_ctx); - - /* Test if the callbacks were already triggered */ - if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count)) { - atomic_set(&resource->locked, 1); - - /* Drop the extra reference taken for exclusive access */ - if (resource->exclusive) - dma_buf_lock_fence_work(&resource->work); - } - - if (IS_ERR_VALUE((unsigned long)ret)) { - put_unused_fd(fd); - - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - - return ret; - } - -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s : complete\n", __func__); -#endif - mutex_lock(&dma_buf_lock_mutex); - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - - /* Installing the fd is deferred to the very last operation before return - * to avoid allowing userspace to close it during the setup. - */ - fd_install(fd, file); - return fd; -} - -static void dma_buf_lock_dounlock(struct kref *ref) -{ - int i; - struct dma_buf_lock_resource *resource = container_of(ref, struct dma_buf_lock_resource, refcount); - - atomic_set(&resource->locked, 0); - - /* Signal the resource's fence. */ - dma_fence_signal(&resource->fence); - - dma_buf_lock_fence_free_callbacks(resource); - - list_del(&resource->link); - - for (i = 0; i < resource->count; i++) { - if (resource->dma_bufs[i]) - dma_buf_put(resource->dma_bufs[i]); - } - - kfree(resource->dma_bufs); - kfree(resource->list_of_dma_buf_fds); - dma_fence_put(&resource->fence); -} - -static int __init dma_buf_lock_init(void) -{ - int err; -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s\n", __func__); -#endif - err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name); - - if (err == 0) { - cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops); - - err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1); - - if (err == 0) { - dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name); - if (IS_ERR(dma_buf_lock_class)) - err = PTR_ERR(dma_buf_lock_class); - else { - struct device *mdev = device_create( - dma_buf_lock_class, NULL, dma_buf_lock_dev, - NULL, "%s", dma_buf_lock_dev_name); - if (!IS_ERR(mdev)) - return 0; - - err = PTR_ERR(mdev); - class_destroy(dma_buf_lock_class); - } - cdev_del(&dma_buf_lock_cdev); - } - - unregister_chrdev_region(dma_buf_lock_dev, 1); - } -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s failed\n", __func__); -#endif - return err; -} - -static void __exit dma_buf_lock_exit(void) -{ -#if DMA_BUF_LOCK_DEBUG - pr_debug("%s\n", __func__); -#endif - - /* Unlock all outstanding references */ - while (1) { - struct dma_buf_lock_resource *resource; - - mutex_lock(&dma_buf_lock_mutex); - if (list_empty(&dma_buf_lock_resource_list)) { - mutex_unlock(&dma_buf_lock_mutex); - break; - } - - resource = list_entry(dma_buf_lock_resource_list.next, - struct dma_buf_lock_resource, link); - - kref_put(&resource->refcount, dma_buf_lock_dounlock); - mutex_unlock(&dma_buf_lock_mutex); - } - - device_destroy(dma_buf_lock_class, dma_buf_lock_dev); - - class_destroy(dma_buf_lock_class); - - cdev_del(&dma_buf_lock_cdev); - - unregister_chrdev_region(dma_buf_lock_dev, 1); -} - -#if defined(HAVE_UNLOCKED_IOCTL) || defined(HAVE_COMPAT_IOCTL) || ((KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE)) -static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -#else -static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) -#endif -{ - struct dma_buf_lock_k_request request; - int size = _IOC_SIZE(cmd); - - if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC) - return -ENOTTY; - if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR)) - return -ENOTTY; - - switch (cmd) { - case DMA_BUF_LOCK_FUNC_LOCK_ASYNC: - if (size != sizeof(request)) - return -ENOTTY; - if (copy_from_user(&request, (void __user *)arg, size)) - return -EFAULT; -#if DMA_BUF_LOCK_DEBUG - pr_debug("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count); -#endif - return dma_buf_lock_dolock(&request); - } - - return -ENOTTY; -} - -module_init(dma_buf_lock_init); -module_exit(dma_buf_lock_exit); - -MODULE_LICENSE("GPL"); -MODULE_INFO(import_ns, "DMA_BUF"); diff --git a/drivers/base/arm/dma_buf_test_exporter/build.bp b/drivers/base/arm/dma_buf_test_exporter/build.bp index a49fb81d6665..aabd32aa5103 100644 --- a/drivers/base/arm/dma_buf_test_exporter/build.bp +++ b/drivers/base/arm/dma_buf_test_exporter/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,7 @@ bob_kernel_module { name: "dma-buf-test-exporter", defaults: [ - "kernel_defaults" + "kernel_defaults", ], srcs: [ "Kbuild", diff --git a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c index 6b9a4d70483a..5f033a60026c 100644 --- a/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c +++ b/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c @@ -19,7 +19,7 @@ * */ -#include +#include #include #include #include @@ -32,6 +32,9 @@ #include #include +#define DMA_BUF_TE_VER_MAJOR 1 +#define DMA_BUF_TE_VER_MINOR 0 + /* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */ #define DMA_BUF_TE_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ diff --git a/drivers/base/arm/memory_group_manager/build.bp b/drivers/base/arm/memory_group_manager/build.bp index 23db183e4f1b..f4b809e774d2 100644 --- a/drivers/base/arm/memory_group_manager/build.bp +++ b/drivers/base/arm/memory_group_manager/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,7 @@ bob_kernel_module { name: "memory_group_manager", defaults: [ - "kernel_defaults" + "kernel_defaults", ], srcs: [ "Kbuild", diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c index 7729492e0c80..825893e3cf8e 100644 --- a/drivers/base/arm/memory_group_manager/memory_group_manager.c +++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -265,8 +265,8 @@ static struct page *example_mgm_alloc_page( struct mgm_groups *const data = mgm_dev->data; struct page *p; - dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d gfp_mask=0x%x order=%u\n", - __func__, (void *)mgm_dev, group_id, gfp_mask, order); + dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d gfp_mask=0x%x order=%u\n", __func__, + (void *)mgm_dev, group_id, gfp_mask, order); if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) @@ -291,8 +291,8 @@ static void example_mgm_free_page( { struct mgm_groups *const data = mgm_dev->data; - dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d page=%p order=%u\n", - __func__, (void *)mgm_dev, group_id, (void *)page, order); + dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d page=%pK order=%u\n", __func__, + (void *)mgm_dev, group_id, (void *)page, order); if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) @@ -309,9 +309,8 @@ static int example_mgm_get_import_memory_id( { struct mgm_groups *const data = mgm_dev->data; - dev_dbg(data->dev, "%s(mgm_dev=%p, import_data=%p (type=%d)\n", - __func__, (void *)mgm_dev, (void *)import_data, - (int)import_data->type); + dev_dbg(data->dev, "%s(mgm_dev=%pK, import_data=%pK (type=%d)\n", __func__, (void *)mgm_dev, + (void *)import_data, (int)import_data->type); if (!WARN_ON(!import_data)) { WARN_ON(!import_data->u.dma_buf); @@ -329,9 +328,8 @@ static u64 example_mgm_update_gpu_pte( { struct mgm_groups *const data = mgm_dev->data; - dev_dbg(data->dev, - "%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n", - __func__, (void *)mgm_dev, group_id, mmu_level, pte); + dev_dbg(data->dev, "%s(mgm_dev=%pK, group_id=%d, mmu_level=%d, pte=0x%llx)\n", __func__, + (void *)mgm_dev, group_id, mmu_level, pte); if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) @@ -367,9 +365,9 @@ static vm_fault_t example_mgm_vmf_insert_pfn_prot( vm_fault_t fault; dev_dbg(data->dev, - "%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n", + "%s(mgm_dev=%pK, group_id=%d, vma=%pK, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n", __func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn, - (unsigned long long) pgprot_val(prot)); + (unsigned long long)pgprot_val(prot)); if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) diff --git a/drivers/base/arm/protected_memory_allocator/build.bp b/drivers/base/arm/protected_memory_allocator/build.bp index 4c56154061e8..aef5344da31c 100644 --- a/drivers/base/arm/protected_memory_allocator/build.bp +++ b/drivers/base/arm/protected_memory_allocator/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,7 @@ bob_kernel_module { name: "protected_memory_allocator", defaults: [ - "kernel_defaults" + "kernel_defaults", ], srcs: [ "Kbuild", diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild index a7f0ba0da1e8..70f3997b2bd3 100644 --- a/drivers/gpu/arm/bifrost/Kbuild +++ b/drivers/gpu/arm/bifrost/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -59,10 +59,8 @@ ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y) endif ifeq ($(CONFIG_MALI_BIFROST_FENCE_DEBUG), y) - ifneq ($(CONFIG_SYNC), y) - ifneq ($(CONFIG_SYNC_FILE), y) - $(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC || CONFIG_SYNC_FILE to be set in Kernel configuration) - endif + ifneq ($(CONFIG_SYNC_FILE), y) + $(error CONFIG_MALI_BIFROST_FENCE_DEBUG depends on CONFIG_SYNC_FILE to be set in Kernel configuration) endif endif @@ -71,7 +69,7 @@ endif # # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"g13p0-01eac0"' +MALI_RELEASE_NAME ?= '"g15p0-01eac0"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y) MALI_UNIT_TEST = 1 @@ -151,6 +149,7 @@ bifrost_kbase-y := \ mali_kbase_cache_policy.o \ mali_kbase_ccswe.o \ mali_kbase_mem.o \ + mali_kbase_mem_migrate.o \ mali_kbase_mem_pool_group.o \ mali_kbase_native_mgm.o \ mali_kbase_ctx_sched.o \ @@ -159,12 +158,6 @@ bifrost_kbase-y := \ mali_kbase_config.o \ mali_kbase_kinstr_prfcnt.o \ mali_kbase_vinstr.o \ - mali_kbase_hwcnt.o \ - mali_kbase_hwcnt_gpu.o \ - mali_kbase_hwcnt_gpu_narrow.o \ - mali_kbase_hwcnt_types.o \ - mali_kbase_hwcnt_virtualizer.o \ - mali_kbase_hwcnt_watchdog_if_timer.o \ mali_kbase_softjobs.o \ mali_kbase_hw.o \ mali_kbase_debug.o \ @@ -175,6 +168,7 @@ bifrost_kbase-y := \ mali_kbase_disjoint_events.o \ mali_kbase_debug_mem_view.o \ mali_kbase_debug_mem_zones.o \ + mali_kbase_debug_mem_allocs.o \ mali_kbase_smc.o \ mali_kbase_mem_pool.o \ mali_kbase_mem_pool_debugfs.o \ @@ -191,24 +185,14 @@ bifrost_kbase-$(CONFIG_DEBUG_FS) += mali_kbase_pbha_debugfs.o bifrost_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o -bifrost_kbase-$(CONFIG_SYNC) += \ - mali_kbase_sync_android.o \ - mali_kbase_sync_common.o - bifrost_kbase-$(CONFIG_SYNC_FILE) += \ mali_kbase_fence_ops.o \ mali_kbase_sync_file.o \ mali_kbase_sync_common.o -ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) - bifrost_kbase-y += \ - mali_kbase_hwcnt_backend_csf.o \ - mali_kbase_hwcnt_backend_csf_if_fw.o -else +ifneq ($(CONFIG_MALI_CSF_SUPPORT),y) bifrost_kbase-y += \ mali_kbase_jm.o \ - mali_kbase_hwcnt_backend_jm.o \ - mali_kbase_hwcnt_backend_jm_watchdog.o \ mali_kbase_dummy_job_wa.o \ mali_kbase_debug_job_fault.o \ mali_kbase_event.o \ @@ -218,11 +202,6 @@ else mali_kbase_js_ctx_attr.o \ mali_kbase_kinstr_jm.o - bifrost_kbase-$(CONFIG_MALI_BIFROST_DMA_FENCE) += \ - mali_kbase_fence_ops.o \ - mali_kbase_dma_fence.o \ - mali_kbase_fence.o - bifrost_kbase-$(CONFIG_SYNC_FILE) += \ mali_kbase_fence_ops.o \ mali_kbase_fence.o @@ -236,6 +215,7 @@ INCLUDE_SUBDIR = \ $(src)/backend/gpu/Kbuild \ $(src)/mmu/Kbuild \ $(src)/tl/Kbuild \ + $(src)/hwcnt/Kbuild \ $(src)/gpu/Kbuild \ $(src)/thirdparty/Kbuild \ $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig index 54f083dbad27..1bfb59ca14e2 100644 --- a/drivers/gpu/arm/bifrost/Kconfig +++ b/drivers/gpu/arm/bifrost/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -91,16 +91,6 @@ config MALI_BIFROST_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_BIFROST_DMA_FENCE - bool "Enable DMA_BUF fence support for Mali" - depends on MALI_BIFROST - default n - help - Support DMA_BUF fences for Mali. - - This option should only be enabled if the Linux Kernel has built in - support for DMA_BUF fences. - config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_BIFROST && !MALI_CSF_SUPPORT @@ -117,7 +107,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND depends on MALI_BIFROST default n help - This option caused kbase to set up the GPU mapping of imported + This option will cause kbase to set up the GPU mapping of imported dma-buf when needed to run atoms. This is the legacy behavior. This is intended for testing and the option will get removed in the @@ -237,7 +227,7 @@ config MALI_BIFROST_DEBUG config MALI_BIFROST_FENCE_DEBUG bool "Enable debug sync fence usage" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT && (SYNC || SYNC_FILE) + depends on MALI_BIFROST && MALI_BIFROST_EXPERT && SYNC_FILE default y if MALI_BIFROST_DEBUG help Select this option to enable additional checking and reporting on the @@ -385,9 +375,6 @@ config MALI_ARBITRATION virtualization setup for Mali If unsure, say N. -if MALI_ARBITRATION -source "drivers/gpu/arm/bifrost/arbitration/Kconfig" -endif # source "drivers/gpu/arm/bifrost/tests/Kconfig" diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile index 623177ed26fb..3fb736d7950e 100644 --- a/drivers/gpu/arm/bifrost/Makefile +++ b/drivers/gpu/arm/bifrost/Makefile @@ -65,7 +65,7 @@ ifeq ($(CONFIG_MALI_BIFROST),m) endif ifeq ($(CONFIG_XEN),y) - ifneq ($(CONFIG_MALI_ARBITRATION), n) + ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n) CONFIG_MALI_XEN ?= m endif endif @@ -91,14 +91,10 @@ ifeq ($(CONFIG_MALI_BIFROST),m) CONFIG_MALI_BIFROST_ENABLE_TRACE ?= y CONFIG_MALI_BIFROST_SYSTEM_TRACE ?= y - ifeq ($(CONFIG_SYNC), y) + ifeq ($(CONFIG_SYNC_FILE), y) CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y else - ifeq ($(CONFIG_SYNC_FILE), y) - CONFIG_MALI_BIFROST_FENCE_DEBUG ?= y - else - CONFIG_MALI_BIFROST_FENCE_DEBUG = n - endif + CONFIG_MALI_BIFROST_FENCE_DEBUG = n endif else # Prevent misuse when CONFIG_MALI_BIFROST_DEBUG=n @@ -160,7 +156,6 @@ CONFIGS := \ CONFIG_MALI_BIFROST \ CONFIG_MALI_CSF_SUPPORT \ CONFIG_MALI_BIFROST_GATOR_SUPPORT \ - CONFIG_MALI_BIFROST_DMA_FENCE \ CONFIG_MALI_ARBITER_SUPPORT \ CONFIG_MALI_ARBITRATION \ CONFIG_MALI_ARBITER_MODULES \ @@ -227,26 +222,47 @@ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions # -# The following were added to align with W=1 in scripts/Makefile.extrawarn -# from the Linux source tree KBUILD_CFLAGS += -Wall -Werror + +# The following were added to align with W=1 in scripts/Makefile.extrawarn +# from the Linux source tree (v5.18.14) KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter KBUILD_CFLAGS += -Wmissing-declarations KBUILD_CFLAGS += -Wmissing-format-attribute KBUILD_CFLAGS += -Wmissing-prototypes KBUILD_CFLAGS += -Wold-style-definition -KBUILD_CFLAGS += -Wmissing-include-dirs +# The -Wmissing-include-dirs cannot be enabled as the path to some of the +# included directories change depending on whether it is an in-tree or +# out-of-tree build. KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-type-limits +KBUILD_CFLAGS += -Wno-shift-negative-value +# This flag is needed to avoid build errors on older kernels +KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 +# The following were added to align with W=2 in scripts/Makefile.extrawarn +# from the Linux source tree (v5.18.14) +KBUILD_CFLAGS += -Wdisabled-optimization +# The -Wshadow flag cannot be enabled unless upstream kernels are +# patched to fix redefinitions of certain built-in functions and +# global variables. +KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) +KBUILD_CFLAGS += -Wmissing-field-initializers +KBUILD_CFLAGS += -Wtype-limits +KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) +KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 + +# This warning is disabled to avoid build failures in some kernel versions +KBUILD_CFLAGS += -Wno-ignored-qualifiers + all: $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules diff --git a/drivers/gpu/arm/bifrost/Mconfig b/drivers/gpu/arm/bifrost/Mconfig index fd81ac44af3d..f812bcad639c 100644 --- a/drivers/gpu/arm/bifrost/Mconfig +++ b/drivers/gpu/arm/bifrost/Mconfig @@ -97,16 +97,6 @@ config MALI_BIFROST_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_BIFROST_DMA_FENCE - bool "Enable DMA_BUF fence support for Mali" - depends on MALI_BIFROST - default n - help - Support DMA_BUF fences for Mali. - - This option should only be enabled if the Linux Kernel has built in - support for DMA_BUF fences. - config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" depends on MALI_BIFROST && !MALI_CSF_SUPPORT @@ -129,7 +119,7 @@ config MALI_DMA_BUF_MAP_ON_DEMAND default n default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED help - This option caused kbase to set up the GPU mapping of imported + This option will cause kbase to set up the GPU mapping of imported dma-buf when needed to run atoms. This is the legacy behavior. This is intended for testing and the option will get removed in the @@ -157,17 +147,6 @@ menuconfig MALI_BIFROST_EXPERT Enabling this option and modifying the default settings may produce a driver with performance or other limitations. -config MALI_2MB_ALLOC - bool "Attempt to allocate 2MB pages" - depends on MALI_BIFROST && MALI_BIFROST_EXPERT - default n - help - Rather than allocating all GPU memory page-by-page, attempt to - allocate 2MB pages from the kernel. This reduces TLB pressure and - helps to prevent memory fragmentation. - - If in doubt, say N - config MALI_MEMORY_FULLY_BACKED bool "Enable memory fully physically-backed" depends on MALI_BIFROST && MALI_BIFROST_EXPERT @@ -200,10 +179,10 @@ config MALI_FW_CORE_DUMP Example: * To explicitly request core dump: - echo 1 >/sys/kernel/debug/mali0/fw_core_dump + echo 1 >/sys/kernel/debug/mali0/fw_core_dump * To output current core dump (after explicitly requesting a core dump, - or kernel driver reported an internal firmware error): - cat /sys/kernel/debug/mali0/fw_core_dump + or kernel driver reported an internal firmware error): + cat /sys/kernel/debug/mali0/fw_core_dump choice prompt "Error injection level" @@ -343,5 +322,5 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE slowest clock will be selected. -source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig" +source "kernel/drivers/gpu/arm/arbitration/Mconfig" source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c index 64e11ce53625..b5d3cd685ba5 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c +++ b/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbif.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,12 +28,12 @@ #include #include #include -#include "mali_kbase_arbiter_interface.h" +#include "linux/mali_arbiter_interface.h" /* Arbiter interface version against which was implemented this module */ #define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 #if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ - MALI_KBASE_ARBITER_INTERFACE_VERSION + MALI_ARBITER_INTERFACE_VERSION #error "Unsupported Mali Arbiter interface version." #endif @@ -205,6 +205,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { dev_err(kbdev->dev, "arbiter_if driver not available\n"); + put_device(&pdev->dev); return -EPROBE_DEFER; } kbdev->arb.arb_dev = &pdev->dev; @@ -212,6 +213,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) if (!arb_if) { dev_err(kbdev->dev, "arbiter_if driver not ready\n"); module_put(pdev->dev.driver->owner); + put_device(&pdev->dev); return -EPROBE_DEFER; } @@ -233,6 +235,7 @@ int kbase_arbif_init(struct kbase_device *kbdev) if (err) { dev_err(&pdev->dev, "Failed to register with arbiter\n"); module_put(pdev->dev.driver->owner); + put_device(&pdev->dev); if (err != -EPROBE_DEFER) err = -EFAULT; return err; @@ -262,8 +265,10 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); } kbdev->arb.arb_if = NULL; - if (kbdev->arb.arb_dev) + if (kbdev->arb.arb_dev) { module_put(kbdev->arb.arb_dev->driver->owner); + put_device(kbdev->arb.arb_dev); + } kbdev->arb.arb_dev = NULL; } diff --git a/drivers/gpu/arm/bifrost/arbitration/Kconfig b/drivers/gpu/arm/bifrost/arbitration/Kconfig deleted file mode 100644 index e991653e8d81..000000000000 --- a/drivers/gpu/arm/bifrost/arbitration/Kconfig +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT -# -# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -config MALI_XEN - tristate "Enable Xen Interface reference code" - depends on MALI_ARBITRATION && XEN - default n - help - Enables the build of xen interface modules used in the reference - virtualization setup for Mali - If unsure, say N. - -config MALI_ARBITER_MODULES - tristate "Enable mali arbiter modules" - depends on MALI_ARBITRATION - default y - help - Enables the build of the arbiter modules used in the reference - virtualization setup for Mali - If unsure, say N - -config MALI_GPU_POWER_MODULES - tristate "Enable gpu power modules" - depends on MALI_ARBITRATION - default y - help - Enables the build of the gpu power modules used in the reference - virtualization setup for Mali - If unsure, say N - - -source "drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig" diff --git a/drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig b/drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig deleted file mode 100644 index 074ebd50daa5..000000000000 --- a/drivers/gpu/arm/bifrost/arbitration/ptm/Kconfig +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note OR MIT -# -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU license. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# - -config MALI_PARTITION_MANAGER - tristate "Enable compilation of partition manager modules" - depends on MALI_ARBITRATION - default n - help - This option enables the compilation of the partition manager - modules used to configure the Mali-G78AE GPU. - diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild index 65f07e23412d..7eec91ff6631 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild +++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c index 9587c704ff8a..7c0abbaf860f 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,12 +22,32 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" #include +/** + * kbasep_amba_register_present() - Check AMBA_<> register is present + * in the GPU. + * @kbdev: Device pointer + * + * Note: Only for arch version 12.x.1 onwards. + * + * Return: true if AMBA_FEATURES/ENABLE registers are present. + */ +static bool kbasep_amba_register_present(struct kbase_device *kbdev) +{ + return (ARCH_MAJOR_REV_REG(kbdev->gpu_props.props.raw_props.gpu_id) >= + GPU_ID2_ARCH_MAJOR_REV_MAKE(12, 1)); +} void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) { kbdev->current_gpu_coherency_mode = mode; + if (kbasep_amba_register_present(kbdev)) { + u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + + val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); + kbase_reg_write(kbdev, AMBA_ENABLE, val); + } else kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); } @@ -35,9 +55,38 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) { u32 coherency_features; + if (kbasep_amba_register_present(kbdev)) + coherency_features = + kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_FEATURES)); + else coherency_features = kbase_reg_read( kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); return coherency_features; } +void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, + bool enable) +{ + if (kbasep_amba_register_present(kbdev)) { + u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + + val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); + kbase_reg_write(kbdev, AMBA_ENABLE, val); + + } else { + WARN(1, "memory_cache_support not supported"); + } +} + +void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) +{ + if (kbasep_amba_register_present(kbdev)) { + u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + + val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); + kbase_reg_write(kbdev, AMBA_ENABLE, val); + } else { + WARN(1, "invalidate_hint not supported"); + } +} diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h index 795dbea40318..758e3be08c16 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,4 +43,23 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, */ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); +/** + * kbase_amba_set_memory_cache_support() - Sets AMBA memory cache support + * in the GPU. + * @kbdev: Device pointer + * @enable: true for enable. + * + * Note: Only for arch version 12.x.1 onwards. + */ +void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, + bool enable); +/** + * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint + * in the GPU. + * @kbdev: Device pointer + * @enable: true for enable. + * + * Note: Only for arch version 12.x.1 onwards. + */ +void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h index 7190f42c2104..bd2eb8a12047 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_instr_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,7 @@ #ifndef _KBASE_INSTR_DEFS_H_ #define _KBASE_INSTR_DEFS_H_ -#include +#include /* * Instrumentation State Machine States diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c index 72def5e7aabb..15999cbc9126 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -163,7 +163,6 @@ static irq_handler_t kbase_handler_table[] = { #ifdef CONFIG_MALI_BIFROST_DEBUG #define JOB_IRQ_HANDLER JOB_IRQ_TAG -#define MMU_IRQ_HANDLER MMU_IRQ_TAG #define GPU_IRQ_HANDLER GPU_IRQ_TAG /** diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c index 08de02495a4a..e17014e45f6b 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -1440,6 +1440,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) return true; } +bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) +{ + return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING; +} + int kbase_reset_gpu_wait(struct kbase_device *kbdev) { wait_event(kbdev->hwaccess.backend.reset_wait, diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c index 9960beb2e9b4..e5af4ca8fc43 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c index e4f4b2455925..1a0209f702ac 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c @@ -80,31 +80,360 @@ static bool ipa_control_timer_enabled; #endif #define LO_MASK(M) ((M) & 0xFFFFFFFF) +#if !MALI_USE_CSF #define HI_MASK(M) ((M) & 0xFFFFFFFF00000000) +#endif -static u32 get_implementation_register(u32 reg) -{ - switch (reg) { - case GPU_CONTROL_REG(SHADER_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT); - case GPU_CONTROL_REG(TILER_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); - case GPU_CONTROL_REG(L2_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); - case GPU_CONTROL_REG(STACK_PRESENT_LO): - return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT); +/* Construct a value for the THREAD_FEATURES register, *except* the two most + * significant bits, which are set to IMPLEMENTATION_MODEL in + * midgard_model_read_reg(). + */ +#if MALI_USE_CSF +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ + ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) +#else +#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ + ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) +#endif - case GPU_CONTROL_REG(SHADER_PRESENT_HI): - case GPU_CONTROL_REG(TILER_PRESENT_HI): - case GPU_CONTROL_REG(L2_PRESENT_HI): - case GPU_CONTROL_REG(STACK_PRESENT_HI): - /* *** FALLTHROUGH *** */ - default: - return 0; - } -} +struct error_status_t hw_error_status; -struct { +/** + * struct control_reg_values_t - control register values specific to the GPU being 'emulated' + * @name: GPU name + * @gpu_id: GPU ID to report + * @as_present: Bitmap of address spaces present + * @thread_max_threads: Maximum number of threads per core + * @thread_max_workgroup_size: Maximum number of threads per workgroup + * @thread_max_barrier_size: Maximum number of threads per barrier + * @thread_features: Thread features, NOT INCLUDING the 2 + * most-significant bits, which are always set to + * IMPLEMENTATION_MODEL. + * @core_features: Core features + * @tiler_features: Tiler features + * @mmu_features: MMU features + * @gpu_features_lo: GPU features (low) + * @gpu_features_hi: GPU features (high) + * @shader_present: Available shader bitmap + * @stack_present: Core stack present bitmap + * + */ +struct control_reg_values_t { + const char *name; + u32 gpu_id; + u32 as_present; + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + u32 core_features; + u32 tiler_features; + u32 mmu_features; + u32 gpu_features_lo; + u32 gpu_features_hi; + u32 shader_present; + u32 stack_present; +}; + +struct job_slot { + int job_active; + int job_queued; + int job_complete_irq_asserted; + int job_irq_mask; + int job_disabled; +}; + +struct dummy_model_t { + int reset_completed; + int reset_completed_mask; +#if !MALI_USE_CSF + int prfcnt_sample_completed; +#endif /* !MALI_USE_CSF */ + int power_changed_mask; /* 2bits: _ALL,_SINGLE */ + int power_changed; /* 1bit */ + bool clean_caches_completed; + bool clean_caches_completed_irq_enabled; +#if MALI_USE_CSF + bool flush_pa_range_completed; + bool flush_pa_range_completed_irq_enabled; +#endif + int power_on; /* 6bits: SHADER[4],TILER,L2 */ + u32 stack_power_on_lo; + u32 coherency_enable; + unsigned int job_irq_js_state; + struct job_slot slots[NUM_SLOTS]; + const struct control_reg_values_t *control_reg_values; + u32 l2_config; + void *data; +}; + +/* Array associating GPU names with control register values. The first + * one is used in the case of no match. + */ +static const struct control_reg_values_t all_control_reg_values[] = { + { + .name = "tMIx", + .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tHEx", + .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tSIx", + .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0), + .as_present = 0xFF, + .thread_max_threads = 0x300, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x209, + .mmu_features = 0x2821, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tDVx", + .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x300, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x209, + .mmu_features = 0x2821, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tNOx", + .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tGOx_r0p0", + .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tGOx_r1p0", + .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), + .core_features = 0x2, + .tiler_features = 0x209, + .mmu_features = 0x2823, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tTRx", + .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tNAx", + .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tBEx", + .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tBAx", + .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tDUx", + .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tODx", + .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tGRx", + .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .core_features = 0x0, /* core_1e16fma2tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tVAx", + .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x180, + .thread_max_workgroup_size = 0x180, + .thread_max_barrier_size = 0x180, + .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), + .core_features = 0x0, /* core_1e16fma2tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0, + .gpu_features_hi = 0, + .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, + .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT, + }, + { + .name = "tTUx", + .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x800, + .thread_max_workgroup_size = 0x400, + .thread_max_barrier_size = 0x400, + .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0), + .core_features = 0x0, /* core_1e32fma2tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0xf, + .gpu_features_hi = 0, + .shader_present = 0xFF, + .stack_present = 0xF, + }, + { + .name = "tTIx", + .gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0), + .as_present = 0xFF, + .thread_max_threads = 0x800, + .thread_max_workgroup_size = 0x400, + .thread_max_barrier_size = 0x400, + .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0), + .core_features = 0x1, /* core_1e64fma4tex */ + .tiler_features = 0x809, + .mmu_features = 0x2830, + .gpu_features_lo = 0xf, + .gpu_features_hi = 0, + .shader_present = 0xFF, + .stack_present = 0xF, + }, +}; + +static struct { spinlock_t access_lock; #if !MALI_USE_CSF unsigned long prfcnt_base; @@ -125,74 +454,33 @@ struct { #endif /* !MALI_USE_CSF */ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES * - KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; + KBASE_DUMMY_MODEL_COUNTER_PER_CORE]; +} performance_counters; -} performance_counters = { - .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT, - .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT, -}; +static u32 get_implementation_register(u32 reg, + const struct control_reg_values_t *const control_reg_values) +{ + switch (reg) { + case GPU_CONTROL_REG(SHADER_PRESENT_LO): + return LO_MASK(control_reg_values->shader_present); + case GPU_CONTROL_REG(TILER_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT); + case GPU_CONTROL_REG(L2_PRESENT_LO): + return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT); + case GPU_CONTROL_REG(STACK_PRESENT_LO): + return LO_MASK(control_reg_values->stack_present); -struct job_slot { - int job_active; - int job_queued; - int job_complete_irq_asserted; - int job_irq_mask; - int job_disabled; -}; - -/** - * struct control_reg_values_t - control register values specific to the GPU being 'emulated' - * @name: GPU name - * @gpu_id: GPU ID to report - * @as_present: Bitmap of address spaces present - * @thread_max_threads: Maximum number of threads per core - * @thread_max_workgroup_size: Maximum number of threads per workgroup - * @thread_max_barrier_size: Maximum number of threads per barrier - * @thread_features: Thread features, NOT INCLUDING the 2 - * most-significant bits, which are always set to - * IMPLEMENTATION_MODEL. - * @core_features: Core features - * @tiler_features: Tiler features - * @mmu_features: MMU features - * @gpu_features_lo: GPU features (low) - * @gpu_features_hi: GPU features (high) - */ -struct control_reg_values_t { - const char *name; - u32 gpu_id; - u32 as_present; - u32 thread_max_threads; - u32 thread_max_workgroup_size; - u32 thread_max_barrier_size; - u32 thread_features; - u32 core_features; - u32 tiler_features; - u32 mmu_features; - u32 gpu_features_lo; - u32 gpu_features_hi; -}; - -struct dummy_model_t { - int reset_completed; - int reset_completed_mask; -#if !MALI_USE_CSF - int prfcnt_sample_completed; -#endif /* !MALI_USE_CSF */ - int power_changed_mask; /* 2bits: _ALL,_SINGLE */ - int power_changed; /* 1bit */ - bool clean_caches_completed; - bool clean_caches_completed_irq_enabled; - int power_on; /* 6bits: SHADER[4],TILER,L2 */ - u32 stack_power_on_lo; - u32 coherency_enable; - unsigned int job_irq_js_state; - struct job_slot slots[NUM_SLOTS]; - const struct control_reg_values_t *control_reg_values; - u32 l2_config; - void *data; -}; + case GPU_CONTROL_REG(SHADER_PRESENT_HI): + case GPU_CONTROL_REG(TILER_PRESENT_HI): + case GPU_CONTROL_REG(L2_PRESENT_HI): + case GPU_CONTROL_REG(STACK_PRESENT_HI): + /* *** FALLTHROUGH *** */ + default: + return 0; + } +} void gpu_device_set_data(void *model, void *data) { @@ -221,238 +509,6 @@ static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; module_param(no_mali_gpu, charp, 0000); MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); -/* Construct a value for the THREAD_FEATURES register, *except* the two most - * significant bits, which are set to IMPLEMENTATION_MODEL in - * midgard_model_read_reg(). - */ -#if MALI_USE_CSF -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ - ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24)) -#else -#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \ - ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24)) -#endif - -/* Array associating GPU names with control register values. The first - * one is used in the case of no match. - */ -static const struct control_reg_values_t all_control_reg_values[] = { - { - .name = "tMIx", - .gpu_id = GPU_ID2_MAKE(6, 0, 10, 0, 0, 1, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tHEx", - .gpu_id = GPU_ID2_MAKE(6, 2, 0, 1, 0, 3, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tSIx", - .gpu_id = GPU_ID2_MAKE(7, 0, 0, 0, 1, 1, 0), - .as_present = 0xFF, - .thread_max_threads = 0x300, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .tiler_features = 0x209, - .mmu_features = 0x2821, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tDVx", - .gpu_id = GPU_ID2_MAKE(7, 0, 0, 3, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x300, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .tiler_features = 0x209, - .mmu_features = 0x2821, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tNOx", - .gpu_id = GPU_ID2_MAKE(7, 2, 1, 1, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tGOx_r0p0", - .gpu_id = GPU_ID2_MAKE(7, 2, 2, 2, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tGOx_r1p0", - .gpu_id = GPU_ID2_MAKE(7, 4, 0, 2, 1, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 10), - .core_features = 0x2, - .tiler_features = 0x209, - .mmu_features = 0x2823, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tTRx", - .gpu_id = GPU_ID2_MAKE(9, 0, 8, 0, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tNAx", - .gpu_id = GPU_ID2_MAKE(9, 0, 8, 1, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tBEx", - .gpu_id = GPU_ID2_MAKE(9, 2, 0, 2, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tBAx", - .gpu_id = GPU_ID2_MAKE(9, 14, 4, 5, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tDUx", - .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tODx", - .gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tGRx", - .gpu_id = GPU_ID2_MAKE(10, 10, 0, 3, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .core_features = 0x0, /* core_1e16fma2tex */ - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tVAx", - .gpu_id = GPU_ID2_MAKE(10, 12, 0, 4, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x180, - .thread_max_workgroup_size = 0x180, - .thread_max_barrier_size = 0x180, - .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0), - .core_features = 0x0, /* core_1e16fma2tex */ - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0, - .gpu_features_hi = 0, - }, - { - .name = "tTUx", - .gpu_id = GPU_ID2_MAKE(11, 8, 5, 2, 0, 0, 0), - .as_present = 0xFF, - .thread_max_threads = 0x800, - .thread_max_workgroup_size = 0x400, - .thread_max_barrier_size = 0x400, - .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 4, 0), - .core_features = 0x0, /* core_1e32fma2tex */ - .tiler_features = 0x809, - .mmu_features = 0x2830, - .gpu_features_lo = 0xf, - .gpu_features_hi = 0, - }, -}; - -struct error_status_t hw_error_status; - #if MALI_USE_CSF static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx, bool is_low_word) @@ -1011,6 +1067,21 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp size_t i; const struct control_reg_values_t *ret = NULL; + /* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1 + * revisions respectively. As none of them are named "tGOx" the name comparison + * needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0" + * or "r1p0" and is derived from the DDK's build configuration. In cases + * where it is unavailable, it defaults to tGOx r1p0. + */ + if (!strcmp(gpu, "tGOx")) { +#ifdef CONFIG_GPU_HWVER + if (!strcmp(CONFIG_GPU_HWVER, "r0p0")) + gpu = "tGOx_r0p0"; + else if (!strcmp(CONFIG_GPU_HWVER, "r1p0")) +#endif /* CONFIG_GPU_HWVER defined */ + gpu = "tGOx_r1p0"; + } + for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) { const struct control_reg_values_t * const fcrv = &all_control_reg_values[i]; @@ -1043,6 +1114,10 @@ void *midgard_model_create(const void *config) dummy->job_irq_js_state = 0; init_register_statuses(dummy); dummy->control_reg_values = find_control_reg_values(no_mali_gpu); + performance_counters.l2_present = get_implementation_register( + GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values); + performance_counters.shader_present = get_implementation_register( + GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values); } return dummy; } @@ -1066,6 +1141,8 @@ static void midgard_model_get_outputs(void *h) hw_error_status.gpu_error_irq || #if !MALI_USE_CSF dummy->prfcnt_sample_completed || +#else + (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) || #endif (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled)) gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ); @@ -1235,6 +1312,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->reset_completed_mask = (value >> 8) & 0x01; dummy->power_changed_mask = (value >> 9) & 0x03; dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u; +#if MALI_USE_CSF + dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u; +#endif } else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) { dummy->coherency_enable = value; } else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) { @@ -1247,10 +1327,17 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) if (value & (1 << 17)) dummy->clean_caches_completed = false; -#if !MALI_USE_CSF - if (value & PRFCNT_SAMPLE_COMPLETED) + +#if MALI_USE_CSF + if (value & (1u << 20)) + dummy->flush_pa_range_completed = false; +#endif /* MALI_USE_CSF */ + +#if !MALI_USE_CSF + if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */ dummy->prfcnt_sample_completed = 0; #endif /* !MALI_USE_CSF */ + /*update error status */ hw_error_status.gpu_error_irq &= ~(value); } else if (addr == GPU_CONTROL_REG(GPU_COMMAND)) { @@ -1274,7 +1361,15 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) pr_debug("clean caches requested"); dummy->clean_caches_completed = true; break; -#if !MALI_USE_CSF +#if MALI_USE_CSF + case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2: + case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC: + case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL: + pr_debug("pa range flush requested"); + dummy->flush_pa_range_completed = true; + break; +#endif /* MALI_USE_CSF */ +#if !MALI_USE_CSF case GPU_COMMAND_PRFCNT_SAMPLE: midgard_model_dump_prfcnt(); dummy->prfcnt_sample_completed = 1; @@ -1282,6 +1377,11 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) default: break; } +#if MALI_USE_CSF + } else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) && + addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) { + /* Writes ignored */ +#endif } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { dummy->l2_config = value; } @@ -1291,6 +1391,12 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; + } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && + (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + /* Do nothing */ + } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && + (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { + /* Do nothing */ } else if (addr == IPA_CONTROL_REG(COMMAND)) { pr_debug("Received IPA_CONTROL command"); } else if (addr == IPA_CONTROL_REG(TIMER)) { @@ -1315,8 +1421,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) hw_error_status.mmu_irq_mask = value; } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); - } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && - (addr <= MMU_AS_REG(15, AS_STATUS))) { + } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) >> 6; @@ -1443,7 +1548,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->power_changed = 1; break; case SHADER_PWRON_LO: - dummy->power_on |= (value & 0xF) << 2; + dummy->power_on |= + (value & dummy->control_reg_values->shader_present) << 2; dummy->power_changed = 1; break; case L2_PWRON_LO: @@ -1459,7 +1565,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->power_changed = 1; break; case SHADER_PWROFF_LO: - dummy->power_on &= ~((value & 0xF) << 2); + dummy->power_on &= + ~((value & dummy->control_reg_values->shader_present) << 2); dummy->power_changed = 1; break; case L2_PWROFF_LO: @@ -1546,6 +1653,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) { *value = (dummy->reset_completed_mask << 8) | ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) | +#if MALI_USE_CSF + ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) | +#endif (dummy->power_changed_mask << 9) | (1 << 7) | 1; pr_debug("GPU_IRQ_MASK read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) { @@ -1555,6 +1665,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) | #endif /* !MALI_USE_CSF */ ((dummy->clean_caches_completed ? 1u : 0u) << 17) | +#if MALI_USE_CSF + ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) | +#endif hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_RAWSTAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) { @@ -1569,6 +1682,13 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) 1u : 0u) << 17) | +#if MALI_USE_CSF + (((dummy->flush_pa_range_completed && + dummy->flush_pa_range_completed_irq_enabled) ? + 1u : + 0u) + << 20) | +#endif hw_error_status.gpu_error_irq; pr_debug("GPU_IRQ_STAT read %x", *value); } else if (addr == GPU_CONTROL_REG(GPU_STATUS)) { @@ -1581,8 +1701,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = hw_error_status.gpu_fault_status; } else if (addr == GPU_CONTROL_REG(L2_CONFIG)) { *value = dummy->l2_config; - } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && - (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { + } +#if MALI_USE_CSF + else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && + (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { + *value = 0; + } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) && + (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) { + *value = 0; + } +#endif + else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) && + (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) { switch (addr) { case GPU_CONTROL_REG(SHADER_PRESENT_LO): case GPU_CONTROL_REG(SHADER_PRESENT_HI): @@ -1592,27 +1722,27 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) case GPU_CONTROL_REG(L2_PRESENT_HI): case GPU_CONTROL_REG(STACK_PRESENT_LO): case GPU_CONTROL_REG(STACK_PRESENT_HI): - *value = get_implementation_register(addr); + *value = get_implementation_register(addr, dummy->control_reg_values); break; case GPU_CONTROL_REG(SHADER_READY_LO): *value = (dummy->power_on >> 0x02) & - get_implementation_register( - GPU_CONTROL_REG(SHADER_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(TILER_READY_LO): *value = (dummy->power_on >> 0x01) & - get_implementation_register( - GPU_CONTROL_REG(TILER_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(L2_READY_LO): *value = dummy->power_on & - get_implementation_register( - GPU_CONTROL_REG(L2_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(STACK_READY_LO): *value = dummy->stack_power_on_lo & - get_implementation_register( - GPU_CONTROL_REG(STACK_PRESENT_LO)); + get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO), + dummy->control_reg_values); break; case GPU_CONTROL_REG(SHADER_READY_HI): @@ -1904,6 +2034,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER, counter_index, is_low_word); + } else if (addr == USER_REG(LATEST_FLUSH)) { + *value = 0; } #endif else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) { diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c index 344046089d6e..972d1c87fb1a 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c @@ -23,13 +23,6 @@ #include #include "backend/gpu/mali_kbase_model_dummy.h" -/* all the error conditions supported by the model */ -#define TOTAL_FAULTS 27 -/* maximum number of levels in the MMU translation table tree */ -#define MAX_MMU_TABLE_LEVEL 4 -/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ -#define MAX_CONCURRENT_FAULTS 3 - static struct kbase_error_atom *error_track_list; unsigned int rand_seed; @@ -40,6 +33,14 @@ unsigned int error_probability = 50; /* to be set between 0 and 100 */ unsigned int multiple_error_probability = 50; #ifdef CONFIG_MALI_ERROR_INJECT_RANDOM + +/* all the error conditions supported by the model */ +#define TOTAL_FAULTS 27 +/* maximum number of levels in the MMU translation table tree */ +#define MAX_MMU_TABLE_LEVEL 4 +/* worst case scenario is <1 MMU fault + 1 job fault + 2 GPU faults> */ +#define MAX_CONCURRENT_FAULTS 3 + /** * gpu_generate_error - Generate GPU error */ diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c index df735d95de9f..5c71fdf154b9 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_backend.c @@ -36,7 +36,7 @@ #include #include #endif /* !MALI_USE_CSF */ -#include +#include #include #include #include diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c index d9e3dfcc6994..9e38b904b459 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -92,29 +92,10 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) * for those cores to get powered down */ if ((core_mask & old_core_mask) != old_core_mask) { - bool can_wait; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* This check is ideally not required, the wait function can - * deal with the GPU power down. But it has been added to - * address the scenario where down-scaling request comes from - * the platform specific code soon after the GPU power down - * and at the time same time application thread tries to - * power up the GPU (on the flush of GPU queue). - * The platform specific @ref callback_power_on that gets - * invoked on power up does not return until down-scaling - * request is complete. The check mitigates the race caused by - * the problem in platform specific code. - */ - if (likely(can_wait)) { - if (kbase_pm_wait_for_desired_state(kbdev)) { - dev_warn(kbdev->dev, - "Wait for update of core_mask from %llx to %llx failed", - old_core_mask, core_mask); - } + if (kbase_pm_wait_for_cores_down_scale(kbdev)) { + dev_warn(kbdev->dev, + "Wait for update of core_mask from %llx to %llx failed", + old_core_mask, core_mask); } } #endif diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c index 94b87ce7166b..8173cf6ba7d7 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include #include @@ -538,6 +538,14 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) return; +#if MALI_USE_CSF + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) { + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), + L2_CONFIG_PBHA_HWU_SET(val, kbdev->pbha_propagate_bits)); + } +#endif /* MALI_USE_CSF */ + /* * Skip if size and hash are not given explicitly, * which means default values are used. @@ -599,6 +607,21 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) return strings[state]; } +static +void kbase_ktrace_log_mcu_state(struct kbase_device *kbdev, enum kbase_mcu_state state) +{ +#if KBASE_KTRACE_ENABLE + switch (state) { +#define KBASEP_MCU_STATE(n) \ + case KBASE_MCU_ ## n: \ + KBASE_KTRACE_ADD(kbdev, PM_MCU_ ## n, NULL, state); \ + break; +#include "mali_kbase_pm_mcu_states.h" +#undef KBASEP_MCU_STATE + } +#endif +} + static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; @@ -689,7 +712,6 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) } #endif - /** * kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts * from the firmware @@ -697,10 +719,10 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) * @kbdev: Pointer to the device * @enable: boolean indicating to enable interrupts or not * - * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled - * after L2 has been turned on when FW is controlling the power for the shader - * cores. Correspondingly, the interrupts can be re-enabled after the MCU has - * been disabled before the power down of L2. + * The POWER_CHANGED_ALL interrupt can be disabled after L2 has been turned on + * when FW is controlling the power for the shader cores. Correspondingly, the + * interrupts can be re-enabled after the MCU has been disabled before the + * power down of L2. */ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable) { @@ -710,10 +732,12 @@ static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool en irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - if (enable) - irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE; - else - irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE); + if (enable) { + irq_mask |= POWER_CHANGED_ALL; + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), POWER_CHANGED_ALL); + } else { + irq_mask &= ~POWER_CHANGED_ALL; + } kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask); } @@ -1028,10 +1052,12 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) backend->mcu_state); } - if (backend->mcu_state != prev_state) + if (backend->mcu_state != prev_state) { dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n", kbase_mcu_state_to_string(prev_state), kbase_mcu_state_to_string(backend->mcu_state)); + kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state); + } } while (backend->mcu_state != prev_state); @@ -1079,6 +1105,21 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) return strings[state]; } +static +void kbase_ktrace_log_l2_core_state(struct kbase_device *kbdev, enum kbase_l2_core_state state) +{ +#if KBASE_KTRACE_ENABLE + switch (state) { +#define KBASEP_L2_STATE(n) \ + case KBASE_L2_ ## n: \ + KBASE_KTRACE_ADD(kbdev, PM_L2_ ## n, NULL, state); \ + break; +#include "mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE + } +#endif +} + #if !MALI_USE_CSF /* On powering on the L2, the tracked kctx becomes stale and can be cleared. * This enables the backend to spare the START_FLUSH.INV_SHADER_OTHER @@ -1136,18 +1177,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); -#ifdef CONFIG_MALI_ARBITER_SUPPORT - u64 tiler_trans = kbase_pm_get_trans_cores( - kbdev, KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_TILER); +#ifdef CONFIG_MALI_ARBITER_SUPPORT /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ - if (kbase_is_gpu_removed(kbdev) - || kbase_pm_is_gpu_lost(kbdev)) { + if (kbase_is_gpu_removed(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; backend->hwcnt_desired = false; @@ -1161,16 +1197,19 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) */ backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; + KBASE_KTRACE_ADD(kbdev, PM_L2_ON_HWCNT_DISABLE, NULL, + backend->l2_state); kbase_pm_trigger_hwcnt_disable(kbdev); } if (backend->hwcnt_disabled) { backend->l2_state = KBASE_L2_OFF; + KBASE_KTRACE_ADD(kbdev, PM_L2_OFF, NULL, backend->l2_state); dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n"); } break; } -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#endif /* mask off ready from trans in case transitions finished * between the register reads @@ -1182,6 +1221,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) switch (backend->l2_state) { case KBASE_L2_OFF: if (kbase_pm_is_l2_desired(kbdev)) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Enable HW timer of IPA control before + * L2 cache is powered-up. + */ + kbase_ipa_control_handle_gpu_sleep_exit(kbdev); +#endif /* * Set the desired config for L2 before * powering it on @@ -1221,14 +1266,12 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) l2_power_up_done = false; if (!l2_trans && l2_ready == l2_present) { if (need_tiler_control(kbdev)) { -#ifndef CONFIG_MALI_ARBITER_SUPPORT u64 tiler_trans = kbase_pm_get_trans_cores( kbdev, KBASE_PM_CORE_TILER); u64 tiler_ready = kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_TILER); -#endif - tiler_trans &= ~tiler_ready; + if (!tiler_trans && tiler_ready == tiler_present) { KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, @@ -1437,12 +1480,26 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) /* We only need to check the L2 here - if the L2 * is off then the tiler is definitely also off. */ - if (!l2_trans && !l2_ready) + if (!l2_trans && !l2_ready) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); +#endif /* L2 is now powered off */ backend->l2_state = KBASE_L2_OFF; + } } else { - if (!kbdev->cache_clean_in_progress) + if (!kbdev->cache_clean_in_progress) { +#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME) + /* Allow clock gating within the GPU and prevent it + * from being seen as active during sleep. + */ + kbase_ipa_control_handle_gpu_sleep_enter(kbdev); +#endif backend->l2_state = KBASE_L2_OFF; + } } break; @@ -1457,11 +1514,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) backend->l2_state); } - if (backend->l2_state != prev_state) + if (backend->l2_state != prev_state) { dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", kbase_l2_core_state_to_string(prev_state), kbase_l2_core_state_to_string( backend->l2_state)); + kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state); + } } while (backend->l2_state != prev_state); @@ -1925,7 +1984,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) in_desired_state = false; #else - in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev); + in_desired_state &= kbase_pm_mcu_is_in_desired_state(kbdev); #endif return in_desired_state; @@ -2122,6 +2181,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) backend->in_reset = true; backend->l2_state = KBASE_L2_RESET_WAIT; + KBASE_KTRACE_ADD(kbdev, PM_L2_RESET_WAIT, NULL, backend->l2_state); #if !MALI_USE_CSF backend->shaders_state = KBASE_SHADERS_RESET_WAIT; #else @@ -2130,6 +2190,7 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) */ if (likely(kbdev->csf.firmware_inited)) { backend->mcu_state = KBASE_MCU_RESET_WAIT; + KBASE_KTRACE_ADD(kbdev, PM_MCU_RESET_WAIT, NULL, backend->mcu_state); #ifdef KBASE_PM_RUNTIME backend->exit_gpu_sleep_mode = true; #endif @@ -2328,6 +2389,66 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); +#if MALI_USE_CSF +/** + * core_mask_update_done - Check if downscaling of shader cores is done + * + * @kbdev: The kbase device structure for the device. + * + * This function checks if the downscaling of cores is effectively complete. + * + * Return: true if the downscale is done. + */ +static bool core_mask_update_done(struct kbase_device *kbdev) +{ + bool update_done = false; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* If MCU is in stable ON state then it implies that the downscale + * request had completed. + * If MCU is not active then it implies all cores are off, so can + * consider the downscale request as complete. + */ + if ((kbdev->pm.backend.mcu_state == KBASE_MCU_ON) || + kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) + update_done = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return update_done; +} + +int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) +{ + long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT)); + long remaining; + int err = 0; + + /* Wait for core mask update to complete */ +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + remaining = wait_event_killable_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); +#else + remaining = wait_event_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + core_mask_update_done(kbdev), timeout); +#endif + + if (!remaining) { + kbase_pm_timed_out(kbdev); + err = -ETIMEDOUT; + } else if (remaining < 0) { + dev_info( + kbdev->dev, + "Wait for cores down scaling got interrupted"); + err = (int)remaining; + } + + return err; +} +#endif + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2391,19 +2512,25 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->pm.lock); mutex_lock(&kbdev->csf.reg_lock); - if (kbdev->csf.mali_file_inode) { - /* This would zap the pte corresponding to the mapping of User - * register page for all the Kbase contexts. - */ - unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, - BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, - PAGE_SIZE, 1); + + /* Only if the mappings for USER page exist, update all PTEs associated to it */ + if (kbdev->csf.nr_user_page_mapped > 0) { + if (likely(kbdev->csf.mali_file_inode)) { + /* This would zap the pte corresponding to the mapping of User + * register page for all the Kbase contexts. + */ + unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, + BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1); + } else { + dev_err(kbdev->dev, + "Device file inode not exist even if USER page previously mapped"); + } } + mutex_unlock(&kbdev->csf.reg_lock); } #endif - /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -2541,7 +2668,6 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) backend->gpu_idled = false; } #endif - } KBASE_EXPORT_TEST_API(kbase_pm_clock_on); diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h index dddc10550306..115cd3c34d90 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h @@ -269,6 +269,37 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); */ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); +#if MALI_USE_CSF +/** + * kbase_pm_wait_for_cores_down_scale - Wait for the downscaling of shader cores + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function can be called to ensure that the downscaling of cores is + * effectively complete and it would be safe to lower the voltage. + * The function assumes that caller had exercised the MCU state machine for the + * downscale request through the kbase_pm_update_state() function. + * + * This function needs to be used by the caller to safely wait for the completion + * of downscale request, instead of kbase_pm_wait_for_desired_state(). + * The downscale request would trigger a state change in MCU state machine + * and so when MCU reaches the stable ON state, it can be inferred that + * downscaling is complete. But it has been observed that the wake up of the + * waiting thread can get delayed by few milli seconds and by the time the + * thread wakes up the power down transition could have started (after the + * completion of downscale request). + * On the completion of power down transition another wake up signal would be + * sent, but again by the time thread wakes up the power up transition can begin. + * And the power up transition could then get blocked inside the platform specific + * callback_power_on() function due to the thread that called into Kbase (from the + * platform specific code) to perform the downscaling and then ended up waiting + * for the completion of downscale request. + * + * Return: 0 on success, error code on error or remaining jiffies on timeout. + */ +int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev); +#endif + /** * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state * machines after changing shader core diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c index 4cc2d50db586..29e945d00fbe 100644 --- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c +++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c @@ -38,11 +38,13 @@ #include #include +#if defined(CONFIG_MALI_BIFROST_DEVFREQ) || defined(CONFIG_MALI_BIFROST_DVFS) || !MALI_USE_CSF /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly * under 11s. Exceeding this will cause overflow */ #define KBASE_PM_TIME_SHIFT 8 +#endif #if MALI_USE_CSF /* To get the GPU_ACTIVE value in nano seconds unit */ diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp index 977d13961786..a17ff432398c 100644 --- a/drivers/gpu/arm/bifrost/build.bp +++ b/drivers/gpu/arm/bifrost/build.bp @@ -32,6 +32,7 @@ bob_defaults { kbuild_options: [ "CONFIG_MALI_BIFROST_NO_MALI=y", "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", + "CONFIG_GPU_HWVER={{.hwver}}", ], }, mali_platform_dt_pin_rst: { @@ -52,9 +53,6 @@ bob_defaults { mali_midgard_enable_trace: { kbuild_options: ["CONFIG_MALI_BIFROST_ENABLE_TRACE=y"], }, - mali_dma_fence: { - kbuild_options: ["CONFIG_MALI_BIFROST_DMA_FENCE=y"], - }, mali_arbiter_support: { kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], }, @@ -64,7 +62,7 @@ bob_defaults { mali_dma_buf_legacy_compat: { kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], }, - mali_2mb_alloc: { + large_page_alloc: { kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], }, mali_memory_fully_backed: { @@ -89,7 +87,7 @@ bob_defaults { kbuild_options: ["CONFIG_MALI_BIFROST_ERROR_INJECT=y"], }, mali_gem5_build: { - kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], + kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], }, mali_debug: { kbuild_options: [ @@ -163,9 +161,7 @@ bob_defaults { // (catch-all for experimental CS code without separating it into // different features). "MALI_INCREMENTAL_RENDERING_JM={{.incremental_rendering_jm}}", - "MALI_GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}", "MALI_BASE_CSF_PERFORMANCE_TESTS={{.base_csf_performance_tests}}", - "MALI_GPU_TIMESTAMP_INTERPOLATION={{.gpu_timestamp_interpolation}}", ], } @@ -184,6 +180,10 @@ bob_kernel_module { "context/*.c", "context/*.h", "context/Kbuild", + "hwcnt/*.c", + "hwcnt/*.h", + "hwcnt/backend/*.h", + "hwcnt/Kbuild", "ipa/*.c", "ipa/*.h", "ipa/Kbuild", @@ -217,6 +217,10 @@ bob_kernel_module { "device/backend/*_jm.c", "gpu/backend/*_jm.c", "gpu/backend/*_jm.h", + "hwcnt/backend/*_jm.c", + "hwcnt/backend/*_jm.h", + "hwcnt/backend/*_jm_*.c", + "hwcnt/backend/*_jm_*.h", "jm/*.h", "tl/backend/*_jm.c", "mmu/backend/*_jm.c", @@ -238,6 +242,10 @@ bob_kernel_module { "device/backend/*_csf.c", "gpu/backend/*_csf.c", "gpu/backend/*_csf.h", + "hwcnt/backend/*_csf.c", + "hwcnt/backend/*_csf.h", + "hwcnt/backend/*_csf_*.c", + "hwcnt/backend/*_csf_*.h", "tl/backend/*_csf.c", "mmu/backend/*_csf.c", "ipa/backend/*_csf.c", diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c index 32bf82526aa3..3abc7a2a66f4 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -39,12 +38,14 @@ #include #include #include +#include #include void kbase_context_debugfs_init(struct kbase_context *const kctx) { kbase_debug_mem_view_init(kctx); kbase_debug_mem_zones_init(kctx); + kbase_debug_mem_allocs_init(kctx); kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); kbase_jit_debugfs_init(kctx); kbase_csf_queue_group_debugfs_init(kctx); diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c index 97cd46e0e5b5..995a08e36f43 100644 --- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c +++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_jm.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -37,12 +36,14 @@ #if IS_ENABLED(CONFIG_DEBUG_FS) #include #include +#include #include void kbase_context_debugfs_init(struct kbase_context *const kctx) { kbase_debug_mem_view_init(kctx); kbase_debug_mem_zones_init(kctx); + kbase_debug_mem_allocs_init(kctx); kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); kbase_jit_debugfs_init(kctx); kbasep_jd_debugfs_ctx_init(kctx); @@ -128,8 +129,6 @@ static const struct kbase_context_init context_init[] = { { NULL, kbase_context_free, NULL }, { kbase_context_common_init, kbase_context_common_term, "Common context initialization failed" }, - { kbase_dma_fence_init, kbase_dma_fence_term, - "DMA fence initialization failed" }, { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, "Memory pool group initialization failed" }, { kbase_mem_evictable_init, kbase_mem_evictable_deinit, diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c index f84e01edee93..b6abfc44d212 100644 --- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c +++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c @@ -165,7 +165,9 @@ int kbase_context_common_init(struct kbase_context *kctx) atomic64_set(&kctx->num_fixed_allocs, 0); #endif + kbase_gpu_vm_lock(kctx); bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); + kbase_gpu_vm_unlock(kctx); kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; @@ -274,10 +276,8 @@ void kbase_context_common_term(struct kbase_context *kctx) int kbase_context_mem_pool_group_init(struct kbase_context *kctx) { - return kbase_mem_pool_group_init(&kctx->mem_pools, - kctx->kbdev, - &kctx->kbdev->mem_pool_defaults, - &kctx->kbdev->mem_pools); + return kbase_mem_pool_group_init(&kctx->mem_pools, kctx->kbdev, + &kctx->kbdev->mem_pool_defaults, &kctx->kbdev->mem_pools); } void kbase_context_mem_pool_group_term(struct kbase_context *kctx) diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild index c5d9154a2e35..1474bdaacb0d 100644 --- a/drivers/gpu/arm/bifrost/csf/Kbuild +++ b/drivers/gpu/arm/bifrost/csf/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,16 @@ bifrost_kbase-y += \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ - csf/mali_kbase_csf_event.o + csf/mali_kbase_csf_event.o \ + csf/mali_kbase_csf_firmware_log.o \ + csf/mali_kbase_csf_tiler_heap_reclaim.o bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o +bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o + ifeq ($(KBUILD_EXTMOD),) # in-tree diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c index e503b08d13b9..c81d0a5a7236 100644 --- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c +++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -28,8 +28,6 @@ * Status flags from the STATUS register of the IPA Control interface. */ #define STATUS_COMMAND_ACTIVE ((u32)1 << 0) -#define STATUS_TIMER_ACTIVE ((u32)1 << 1) -#define STATUS_AUTO_ACTIVE ((u32)1 << 2) #define STATUS_PROTECTED_MODE ((u32)1 << 8) #define STATUS_RESET ((u32)1 << 9) #define STATUS_TIMER_ENABLED ((u32)1 << 31) @@ -37,9 +35,7 @@ /* * Commands for the COMMAND register of the IPA Control interface. */ -#define COMMAND_NOP ((u32)0) #define COMMAND_APPLY ((u32)1) -#define COMMAND_CLEAR ((u32)2) #define COMMAND_SAMPLE ((u32)3) #define COMMAND_PROTECTED_ACK ((u32)4) #define COMMAND_RESET_ACK ((u32)5) @@ -965,6 +961,43 @@ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post); +#ifdef KBASE_PM_RUNTIME +void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { + /* GPU Sleep is treated as a power down */ + kbase_ipa_control_handle_gpu_power_off(kbdev); + + /* SELECT_CSHW register needs to be cleared to prevent any + * IPA control message to be sent to the top level GPU HWCNT. + */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), 0); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), 0); + + /* No need to issue the APPLY command here */ + } +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_enter); + +void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.mcu_state == KBASE_MCU_IN_SLEEP) { + /* To keep things simple, currently exit from + * GPU Sleep is treated as a power on event where + * all 4 SELECT registers are reconfigured. + * On exit from sleep, reconfiguration is needed + * only for the SELECT_CSHW register. + */ + kbase_ipa_control_handle_gpu_power_on(kbdev); + } +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_sleep_exit); +#endif + #if MALI_UNIT_TEST void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index, u32 clk_rate_hz) diff --git a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h index 0469c482dfff..69ff8973bac4 100644 --- a/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h +++ b/drivers/gpu/arm/bifrost/csf/ipa_control/mali_kbase_csf_ipa_control.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -198,6 +198,33 @@ void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev); */ void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev); +#ifdef KBASE_PM_RUNTIME +/** + * kbase_ipa_control_handle_gpu_sleep_enter - Handle the pre GPU Sleep event + * + * @kbdev: Pointer to kbase device. + * + * This function is called after MCU has been put to sleep state & L2 cache has + * been powered down. The top level part of GPU is still powered up when this + * function is called. + */ +void kbase_ipa_control_handle_gpu_sleep_enter(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_handle_gpu_sleep_exit - Handle the post GPU Sleep event + * + * @kbdev: Pointer to kbase device. + * + * This function is called when L2 needs to be powered up and MCU can exit the + * sleep state. The top level part of GPU is powered up when this function is + * called. + * + * This function must be called only if kbase_ipa_control_handle_gpu_sleep_enter() + * was called previously. + */ +void kbase_ipa_control_handle_gpu_sleep_exit(struct kbase_device *kbdev); +#endif + #if MALI_UNIT_TEST /** * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c index 80e37a36ca76..b77007300c5c 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c @@ -348,9 +348,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, if (!reg) return -ENOMEM; - ret = kbase_mem_pool_alloc_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], - num_pages, queue->phys, false); + ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages, + queue->phys, false); if (ret != num_pages) goto phys_alloc_failed; @@ -374,8 +373,11 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, queue->db_file_offset = kbdev->csf.db_file_offsets; kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; - +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); +#else + WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); +#endif /* This is the second reference taken on the queue object and * would be dropped only when the IO mapping is removed either * explicitly by userspace or implicitly by kernel on process exit. @@ -444,25 +446,34 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr) static void get_queue(struct kbase_queue *queue) { +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) WARN_ON(!atomic_inc_not_zero(&queue->refcount)); +#else + WARN_ON(!refcount_inc_not_zero(&queue->refcount)); +#endif } static void release_queue(struct kbase_queue *queue) { lockdep_assert_held(&queue->kctx->csf.lock); - - WARN_ON(atomic_read(&queue->refcount) <= 0); - +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) if (atomic_dec_and_test(&queue->refcount)) { +#else + if (refcount_dec_and_test(&queue->refcount)) { +#endif /* The queue can't still be on the per context list. */ WARN_ON(!list_empty(&queue->link)); WARN_ON(queue->group); + dev_dbg(queue->kctx->kbdev->dev, + "Remove any pending command queue fatal from ctx %d_%d", + queue->kctx->tgid, queue->kctx->id); + kbase_csf_event_remove_error(queue->kctx, &queue->error); kfree(queue); } } static void oom_event_worker(struct work_struct *data); -static void fatal_event_worker(struct work_struct *data); +static void cs_error_worker(struct work_struct *data); /* Between reg and reg_ex, one and only one must be null */ static int csf_queue_register_internal(struct kbase_context *kctx, @@ -565,7 +576,11 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->enabled = false; queue->priority = reg->priority; +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) atomic_set(&queue->refcount, 1); +#else + refcount_set(&queue->refcount, 1); +#endif queue->group = NULL; queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; @@ -588,7 +603,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx, INIT_LIST_HEAD(&queue->link); INIT_LIST_HEAD(&queue->error.link); INIT_WORK(&queue->oom_event_work, oom_event_worker); - INIT_WORK(&queue->fatal_event_work, fatal_event_worker); + INIT_WORK(&queue->cs_error_work, cs_error_worker); list_add(&queue->link, &kctx->csf.queue_list); queue->extract_ofs = 0; @@ -699,11 +714,6 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, } kbase_gpu_vm_unlock(kctx); - dev_dbg(kctx->kbdev->dev, - "Remove any pending command queue fatal from context %pK\n", - (void *)kctx); - kbase_csf_event_remove_error(kctx, &queue->error); - release_queue(queue); } @@ -784,6 +794,11 @@ static struct kbase_queue_group *get_bound_queue_group( return group; } +static void enqueue_gpu_submission_work(struct kbase_context *const kctx) +{ + queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); +} + /** * pending_submission_worker() - Work item to process pending kicked GPU command queues. * @@ -813,11 +828,21 @@ static void pending_submission_worker(struct work_struct *work) list_for_each_entry(queue, &kctx->csf.queue_list, link) { if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { struct kbase_queue_group *group = get_bound_queue_group(queue); + int ret; - if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) + if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) { dev_dbg(kbdev->dev, "queue is not bound to a group"); - else - WARN_ON(kbase_csf_scheduler_queue_start(queue)); + continue; + } + + ret = kbase_csf_scheduler_queue_start(queue); + if (unlikely(ret)) { + dev_dbg(kbdev->dev, "Failed to start queue"); + if (ret == -EBUSY) { + atomic_cmpxchg(&queue->pending, 0, 1); + enqueue_gpu_submission_work(kctx); + } + } } } @@ -831,6 +856,8 @@ void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) if (WARN_ON(slot < 0)) return; + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); } @@ -843,6 +870,8 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); u32 value; + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + if (WARN_ON(slot_bitmap > allowed_bitmap)) return; @@ -872,6 +901,8 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, struct kbase_csf_cmd_stream_group_info *ginfo; u32 value; + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + if (WARN_ON(csg_nr < 0) || WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) return; @@ -891,11 +922,6 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, kbase_csf_ring_csg_doorbell(kbdev, csg_nr); } -static void enqueue_gpu_submission_work(struct kbase_context *const kctx) -{ - queue_work(system_highpri_wq, &kctx->csf.pending_submission_work); -} - int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { @@ -1129,9 +1155,8 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx, } /* Get physical page for a normal suspend buffer */ - err = kbase_mem_pool_alloc_pages( - &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - nr_pages, &s_buf->phy[0], false); + err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, + &s_buf->phy[0], false); if (err < 0) goto phy_pages_alloc_failed; @@ -1362,6 +1387,11 @@ static int create_queue_group(struct kbase_context *const kctx, group->cs_unrecoverable = false; group->reevaluate_idle_status = false; + group->dvs_buf = create->in.dvs_buf; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + group->deschedule_deferred_cnt = 0; +#endif group->group_uid = generate_group_uid(); create->out.group_uid = group->group_uid; @@ -1377,6 +1407,9 @@ static int create_queue_group(struct kbase_context *const kctx, MAX_SUPPORTED_STREAMS_PER_GROUP); group->run_state = KBASE_CSF_GROUP_INACTIVE; + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, + group->run_state); + err = create_suspend_buffers(kctx, group); if (err < 0) { @@ -1396,6 +1429,17 @@ static int create_queue_group(struct kbase_context *const kctx, return group_handle; } +static bool dvs_supported(u32 csf_version) +{ + if (GLB_VERSION_MAJOR_GET(csf_version) < 3) + return false; + + if (GLB_VERSION_MAJOR_GET(csf_version) == 3) + if (GLB_VERSION_MINOR_GET(csf_version) < 2) + return false; + + return true; +} int kbase_csf_queue_group_create(struct kbase_context *const kctx, union kbase_ioctl_cs_queue_group_create *const create) @@ -1434,8 +1478,17 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); err = -EINVAL; - } else if (create->in.reserved) { - dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0"); + } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && + create->in.dvs_buf) { + dev_warn( + kctx->kbdev->dev, + "GPU does not support DVS but userspace is trying to use it"); + err = -EINVAL; + } else if (dvs_supported(kctx->kbdev->csf.global_iface.version) && + !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) && + CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) { + dev_warn(kctx->kbdev->dev, + "DVS buffer pointer is null but size is not 0"); err = -EINVAL; } else { /* For the CSG which satisfies the condition for having @@ -1555,6 +1608,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) &group->protected_suspend_buf); group->run_state = KBASE_CSF_GROUP_TERMINATED; + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state); } /** @@ -1585,6 +1639,34 @@ static void term_queue_group(struct kbase_queue_group *group) kbase_csf_term_descheduled_queue_group(group); } +/** + * wait_group_deferred_deschedule_completion - Wait for refcount of the group to + * become 0 that was taken when the group deschedule had to be deferred. + * + * @group: Pointer to GPU command queue group that is being deleted. + * + * This function is called when Userspace deletes the group and after the group + * has been descheduled. The function synchronizes with the other threads that were + * also trying to deschedule the group whilst the dumping was going on for a fault. + * Please refer the documentation of wait_for_dump_complete_on_group_deschedule() + * for more details. + */ +static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_context *kctx = group->kctx; + + lockdep_assert_held(&kctx->csf.lock); + + if (likely(!group->deschedule_deferred_cnt)) + return; + + mutex_unlock(&kctx->csf.lock); + wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt); + mutex_lock(&kctx->csf.lock); +#endif +} + static void cancel_queue_group_events(struct kbase_queue_group *group) { cancel_work_sync(&group->timer_event_work); @@ -1626,24 +1708,39 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx, group = find_queue_group(kctx, group_handle); if (group) { - remove_pending_group_fatal_error(group); - term_queue_group(group); kctx->csf.queue_groups[group_handle] = NULL; + /* Stop the running of the given group */ + term_queue_group(group); + mutex_unlock(&kctx->csf.lock); + + if (reset_prevented) { + /* Allow GPU reset before cancelling the group specific + * work item to avoid potential deadlock. + * Reset prevention isn't needed after group termination. + */ + kbase_reset_gpu_allow(kbdev); + reset_prevented = false; + } + + /* Cancel any pending event callbacks. If one is in progress + * then this thread waits synchronously for it to complete (which + * is why we must unlock the context first). We already ensured + * that no more callbacks can be enqueued by terminating the group. + */ + cancel_queue_group_events(group); + + mutex_lock(&kctx->csf.lock); + + /* Clean up after the termination */ + remove_pending_group_fatal_error(group); + + wait_group_deferred_deschedule_completion(group); } mutex_unlock(&kctx->csf.lock); if (reset_prevented) kbase_reset_gpu_allow(kbdev); - if (!group) - return; - - /* Cancel any pending event callbacks. If one is in progress - * then this thread waits synchronously for it to complete (which - * is why we must unlock the context first). We already ensured - * that no more callbacks can be enqueued by terminating the group. - */ - cancel_queue_group_events(group); kfree(group); } @@ -1738,7 +1835,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, int kbase_csf_ctx_init(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; int err = -ENOMEM; INIT_LIST_HEAD(&kctx->csf.queue_list); @@ -1747,19 +1843,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) kbase_csf_event_init(kctx); kctx->csf.user_reg_vma = NULL; - mutex_lock(&kbdev->pm.lock); - /* The inode information for /dev/malixx file is not available at the - * time of device probe as the inode is created when the device node - * is created by udevd (through mknod). - */ - if (kctx->filp) { - if (!kbdev->csf.mali_file_inode) - kbdev->csf.mali_file_inode = kctx->filp->f_inode; - - /* inode is unique for a file */ - WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode); - } - mutex_unlock(&kbdev->pm.lock); /* Mark all the cookies as 'free' */ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); @@ -1874,8 +1957,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) else reset_prevented = true; - cancel_work_sync(&kctx->csf.pending_submission_work); - mutex_lock(&kctx->csf.lock); /* Iterate through the queue groups that were not terminated by @@ -1894,6 +1975,8 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) if (reset_prevented) kbase_reset_gpu_allow(kbdev); + cancel_work_sync(&kctx->csf.pending_submission_work); + /* Now that all queue groups have been terminated, there can be no * more OoM or timer event interrupts but there can be inflight work * items. Destroying the wq will implicitly flush those work items. @@ -1938,7 +2021,11 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * only one reference left that was taken when queue was * registered. */ +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) if (atomic_read(&queue->refcount) != 1) +#else + if (refcount_read(&queue->refcount) != 1) +#endif dev_warn(kctx->kbdev->dev, "Releasing queue with incorrect refcounting!\n"); list_del_init(&queue->link); @@ -2059,6 +2146,36 @@ static void report_tiler_oom_error(struct kbase_queue_group *group) kbase_event_wakeup(group->kctx); } +static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) +{ + int err; + const unsigned int cache_flush_wait_timeout_ms = 2000; + + kbase_pm_lock(kbdev); + /* With the advent of partial cache flush, dirty cache lines could + * be left in the GPU L2 caches by terminating the queue group here + * without waiting for proper cache maintenance. A full cache flush + * here will prevent these dirty cache lines from being arbitrarily + * evicted later and possible causing memory corruption. + */ + if (kbdev->pm.backend.gpu_powered) { + kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms); + + if (err) { + dev_warn( + kbdev->dev, + "[%llu] Timeout waiting for cache clean to complete after fatal error", + kbase_backend_get_cycle_cnt(kbdev)); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + } + + kbase_pm_unlock(kbdev); +} + /** * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. * @@ -2071,8 +2188,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group) * notification to allow the firmware to report out-of-memory again in future. * If the out-of-memory condition was successfully handled then this function * rings the relevant doorbell to notify the firmware; otherwise, it terminates - * the GPU command queue group to which the queue is bound. See - * term_queue_group() for details. + * the GPU command queue group to which the queue is bound and notify a waiting + * user space client of the failure. */ static void kbase_queue_oom_event(struct kbase_queue *const queue) { @@ -2084,6 +2201,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue) struct kbase_csf_cmd_stream_info const *stream; int csi_index = queue->csi_index; u32 cs_oom_ack, cs_oom_req; + unsigned long flags; lockdep_assert_held(&kctx->csf.lock); @@ -2129,20 +2247,23 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue) err = handle_oom_event(group, stream); + kbase_csf_scheduler_spin_lock(kbdev, &flags); kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, CS_REQ_TILER_OOM_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); + kbase_csf_scheduler_spin_unlock(kbdev, flags); - if (err) { + if (unlikely(err)) { dev_warn( kbdev->dev, "Queue group to be terminated, couldn't handle the OoM event\n"); + kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM); kbase_csf_scheduler_unlock(kbdev); term_queue_group(group); + flush_gpu_cache_on_fatal_error(kbdev); report_tiler_oom_error(group); return; } - - kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); unlock: kbase_csf_scheduler_unlock(kbdev); } @@ -2164,6 +2285,7 @@ static void oom_event_worker(struct work_struct *data) struct kbase_device *const kbdev = kctx->kbdev; int err = kbase_reset_gpu_try_prevent(kbdev); + /* Regardless of whether reset failed or is currently happening, exit * early */ @@ -2216,12 +2338,13 @@ static void timer_event_worker(struct work_struct *data) struct kbase_queue_group *const group = container_of(data, struct kbase_queue_group, timer_event_work); struct kbase_context *const kctx = group->kctx; + struct kbase_device *const kbdev = kctx->kbdev; bool reset_prevented = false; - int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev); + int err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) dev_warn( - kctx->kbdev->dev, + kbdev->dev, "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless", group->handle); else @@ -2230,11 +2353,12 @@ static void timer_event_worker(struct work_struct *data) mutex_lock(&kctx->csf.lock); term_queue_group(group); + flush_gpu_cache_on_fatal_error(kbdev); report_group_timeout_error(group); mutex_unlock(&kctx->csf.lock); if (reset_prevented) - kbase_reset_gpu_allow(kctx->kbdev); + kbase_reset_gpu_allow(kbdev); } /** @@ -2242,11 +2366,15 @@ static void timer_event_worker(struct work_struct *data) * * @group: Pointer to GPU queue group for which the timeout event is received. * + * Notify a waiting user space client of the timeout. * Enqueue a work item to terminate the group and notify the event notification * thread of progress timeout fault for the GPU command queue group. */ static void handle_progress_timer_event(struct kbase_queue_group *const group) { + kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx, + DF_PROGRESS_TIMER_TIMEOUT); + queue_work(group->kctx->csf.wq, &group->timer_event_work); } @@ -2274,16 +2402,20 @@ static void protm_event_worker(struct work_struct *data) * handle_fault_event - Handler for CS fault. * * @queue: Pointer to queue for which fault event was received. - * @stream: Pointer to the structure containing info provided by the - * firmware about the CSI. - * - * Prints meaningful CS fault information. + * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for + * the queue. * + * Print required information about the CS fault and notify the user space client + * about the fault. */ static void -handle_fault_event(struct kbase_queue *const queue, - struct kbase_csf_cmd_stream_info const *const stream) +handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) { + struct kbase_device *const kbdev = queue->kctx->kbdev; + struct kbase_csf_cmd_stream_group_info const *ginfo = + &kbdev->csf.global_iface.groups[queue->group->csg_nr]; + struct kbase_csf_cmd_stream_info const *stream = + &ginfo->streams[queue->csi_index]; const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); const u64 cs_fault_info = kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | @@ -2295,7 +2427,6 @@ handle_fault_event(struct kbase_queue *const queue, CS_FAULT_EXCEPTION_DATA_GET(cs_fault); const u64 cs_fault_info_exception_data = CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); - struct kbase_device *const kbdev = queue->kctx->kbdev; kbase_csf_scheduler_spin_lock_assert_held(kbdev); @@ -2310,6 +2441,36 @@ handle_fault_event(struct kbase_queue *const queue, kbase_gpu_exception_name(cs_fault_exception_type), cs_fault_exception_data, cs_fault_info_exception_data); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + /* CS_RESOURCE_TERMINATED type fault event can be ignored from the + * standpoint of dump on error. It is used to report fault for the CSIs + * that are associated with the same CSG as the CSI for which the actual + * fault was reported by the Iterator. + * Dumping would be triggered when the actual fault is reported. + * + * CS_INHERIT_FAULT can also be ignored. It could happen due to the error + * in other types of queues (cpu/kcpu). If a fault had occurred in some + * other GPU queue then the dump would have been performed anyways when + * that fault was reported. + */ + if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && + (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { + if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { + get_queue(queue); + queue->cs_error = cs_fault; + queue->cs_error_info = cs_fault_info; + queue->cs_error_fatal = false; + if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) + release_queue(queue); + return; + } + } +#endif + + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); } static void report_queue_fatal_error(struct kbase_queue *const queue, @@ -2341,16 +2502,16 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, } /** - * fatal_event_worker - Handle the fatal error for the GPU queue + * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue * * @data: Pointer to a work_struct embedded in GPU command queue. * * Terminate the CSG and report the error to userspace. */ -static void fatal_event_worker(struct work_struct *const data) +static void cs_error_worker(struct work_struct *const data) { struct kbase_queue *const queue = - container_of(data, struct kbase_queue, fatal_event_work); + container_of(data, struct kbase_queue, cs_error_work); struct kbase_context *const kctx = queue->kctx; struct kbase_device *const kbdev = kctx->kbdev; struct kbase_queue_group *group; @@ -2365,6 +2526,7 @@ static void fatal_event_worker(struct work_struct *const data) else reset_prevented = true; + kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&kctx->csf.lock); group = get_bound_queue_group(queue); @@ -2373,9 +2535,35 @@ static void fatal_event_worker(struct work_struct *const data) goto unlock; } +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (!queue->cs_error_fatal) { + unsigned long flags; + int slot_num; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + slot_num = kbase_csf_scheduler_group_get_slot_locked(group); + if (slot_num >= 0) { + struct kbase_csf_cmd_stream_group_info const *ginfo = + &kbdev->csf.global_iface.groups[slot_num]; + struct kbase_csf_cmd_stream_info const *stream = + &ginfo->streams[queue->csi_index]; + u32 const cs_ack = + kbase_csf_firmware_cs_output(stream, CS_ACK); + + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, + slot_num, true); + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); + goto unlock; + } +#endif + group_handle = group->handle; term_queue_group(group); - report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info, + flush_gpu_cache_on_fatal_error(kbdev); + report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group_handle); unlock: @@ -2391,14 +2579,18 @@ unlock: * @queue: Pointer to queue for which fatal event was received. * @stream: Pointer to the structure containing info provided by the * firmware about the CSI. + * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for + * the queue. * - * Prints meaningful CS fatal information. + * Notify a waiting user space client of the CS fatal and prints meaningful + * information. * Enqueue a work item to terminate the group and report the fatal error * to user space. */ static void handle_fatal_event(struct kbase_queue *const queue, - struct kbase_csf_cmd_stream_info const *const stream) + struct kbase_csf_cmd_stream_info const *const stream, + u32 cs_ack) { const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); const u64 cs_fatal_info = @@ -2428,57 +2620,26 @@ handle_fatal_event(struct kbase_queue *const queue, if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { + kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR); queue_work(system_wq, &kbdev->csf.fw_error_work); } else { + kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL); if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) { queue->group->cs_unrecoverable = true; if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(queue->kctx->kbdev); } get_queue(queue); - queue->cs_fatal = cs_fatal; - queue->cs_fatal_info = cs_fatal_info; - if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work)) + queue->cs_error = cs_fatal; + queue->cs_error_info = cs_fatal_info; + queue->cs_error_fatal = true; + if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) release_queue(queue); } -} + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FATAL_MASK); -/** - * handle_queue_exception_event - Handler for CS fatal/fault exception events. - * - * @queue: Pointer to queue for which fatal/fault event was received. - * @cs_req: Value of the CS_REQ register from the CS's input page. - * @cs_ack: Value of the CS_ACK register from the CS's output page. - */ -static void handle_queue_exception_event(struct kbase_queue *const queue, - const u32 cs_req, const u32 cs_ack) -{ - struct kbase_csf_cmd_stream_group_info const *ginfo; - struct kbase_csf_cmd_stream_info const *stream; - struct kbase_context *const kctx = queue->kctx; - struct kbase_device *const kbdev = kctx->kbdev; - struct kbase_queue_group *group = queue->group; - int csi_index = queue->csi_index; - int slot_num = group->csg_nr; - - kbase_csf_scheduler_spin_lock_assert_held(kbdev); - - ginfo = &kbdev->csf.global_iface.groups[slot_num]; - stream = &ginfo->streams[csi_index]; - - if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { - handle_fatal_event(queue, stream); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, - CS_REQ_FATAL_MASK); - } - - if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { - handle_fault_event(queue, stream); - kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, - CS_REQ_FAULT_MASK); - kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); - } } /** @@ -2531,11 +2692,16 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, kbase_csf_firmware_cs_output(stream, CS_ACK); struct workqueue_struct *wq = group->kctx->csf.wq; - if ((cs_req & CS_REQ_EXCEPTION_MASK) ^ - (cs_ack & CS_ACK_EXCEPTION_MASK)) { + if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, group, queue, cs_req ^ cs_ack); - handle_queue_exception_event(queue, cs_req, cs_ack); + handle_fatal_event(queue, stream, cs_ack); + } + + if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT, + group, queue, cs_req ^ cs_ack); + handle_fault_event(queue, cs_ack); } /* PROTM_PEND and TILER_OOM can be safely ignored @@ -2597,6 +2763,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) { clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, + scheduler->csg_slots_idle_mask[0]); dev_dbg(kbdev->dev, "Group-%d on slot %d de-idled by protm request", group->handle, group->csg_nr); @@ -2698,7 +2866,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c /* If there are non-idle CSGs waiting for a slot, fire * a tock for a replacement. */ - mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS, + group, req ^ ack); + kbase_csf_scheduler_invoke_tock(kbdev); + } else { + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS, + group, req ^ ack); } if (group->scan_seq_num < track->idle_seq) { @@ -2709,14 +2882,15 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) { kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, - CSG_REQ_PROGRESS_TIMER_EVENT_MASK); + CSG_REQ_PROGRESS_TIMER_EVENT_MASK); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, - group, req ^ ack); - dev_info(kbdev->dev, + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group, + req ^ ack); + dev_info( + kbdev->dev, "[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, group->kctx->tgid, group->kctx->id, csg_nr); + kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, + group->kctx->id, csg_nr); handle_progress_timer_event(group); } @@ -2904,7 +3078,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, * for the scheduler to re-examine the case. */ dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot); - mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); } else if (group) { u32 i, num_groups = kbdev->csf.global_iface.group_num; struct kbase_queue_group *grp; @@ -2927,7 +3101,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, tock_triggered = true; dev_dbg(kbdev->dev, "Attempt new protm from tick/tock idle slot %d\n", i); - mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); break; } } @@ -2940,77 +3114,133 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev, } } +static void order_job_irq_clear_with_iface_mem_read(void) +{ + /* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the + * read from interface memory. The ordering is needed considering the way + * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers + * without any synchronization. Without the barrier there is no guarantee + * about the ordering, the write to IRQ_CLEAR can take effect after the read + * from interface memory and that could cause a problem for the scenario where + * FW sends back to back notifications for the same CSG for events like + * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the + * first event. Similar thing can happen with glb events like CFG_ALLOC_EN + * acknowledgment and GPU idle notification. + * + * MCU CPU + * --------------- ---------------- + * Update interface memory Write to IRQ_CLEAR to clear current IRQ + * + * Write to IRQ_RAWSTAT to raise new IRQ Read interface memory + */ + + /* CPU and GPU would be in the same Outer shareable domain */ + dmb(osh); +} + void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) { - unsigned long flags; - u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; - struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; + bool deferred_handling_glb_idle_irq = false; lockdep_assert_held(&kbdev->hwaccess_lock); KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); - if (csg_interrupts != 0) { - kbase_csf_scheduler_spin_lock(kbdev, &flags); - /* Looping through and track the highest idle and protm groups */ - while (csg_interrupts != 0) { - int const csg_nr = ffs(csg_interrupts) - 1; + do { + unsigned long flags; + u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; + struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; + bool glb_idle_irq_received = false; - process_csg_interrupts(kbdev, csg_nr, &track); - csg_interrupts &= ~(1 << csg_nr); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + order_job_irq_clear_with_iface_mem_read(); + + if (csg_interrupts != 0) { + kbase_csf_scheduler_spin_lock(kbdev, &flags); + /* Looping through and track the highest idle and protm groups */ + while (csg_interrupts != 0) { + int const csg_nr = ffs(csg_interrupts) - 1; + + process_csg_interrupts(kbdev, csg_nr, &track); + csg_interrupts &= ~(1 << csg_nr); + } + + /* Handle protm from the tracked information */ + process_tracked_info_for_protm(kbdev, &track); + kbase_csf_scheduler_spin_unlock(kbdev, flags); } - /* Handle protm from the tracked information */ - process_tracked_info_for_protm(kbdev, &track); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - } + if (val & JOB_IRQ_GLOBAL_IF) { + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; - if (val & JOB_IRQ_GLOBAL_IF) { - const struct kbase_csf_global_iface *const global_iface = - &kbdev->csf.global_iface; + kbdev->csf.interrupt_received = true; - kbdev->csf.interrupt_received = true; + if (!kbdev->csf.firmware_reloaded) + kbase_csf_firmware_reload_completed(kbdev); + else if (global_iface->output) { + u32 glb_req, glb_ack; - if (!kbdev->csf.firmware_reloaded) - kbase_csf_firmware_reload_completed(kbdev); - else if (global_iface->output) { - u32 glb_req, glb_ack; + kbase_csf_scheduler_spin_lock(kbdev, &flags); + glb_req = + kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK); + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, + glb_req ^ glb_ack); - kbase_csf_scheduler_spin_lock(kbdev, &flags); - glb_req = kbase_csf_firmware_global_input_read( - global_iface, GLB_REQ); - glb_ack = kbase_csf_firmware_global_output( - global_iface, GLB_ACK); - KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL, glb_req ^ glb_ack); + check_protm_enter_req_complete(kbdev, glb_req, glb_ack); - check_protm_enter_req_complete(kbdev, glb_req, glb_ack); + if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) + process_protm_exit(kbdev, glb_ack); - if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) - process_protm_exit(kbdev, glb_ack); - - /* Handle IDLE Hysteresis notification event */ - if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { - dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); - kbase_csf_firmware_global_input_mask( + /* Handle IDLE Hysteresis notification event */ + if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { + dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); + kbase_csf_firmware_global_input_mask( global_iface, GLB_REQ, glb_ack, GLB_REQ_IDLE_EVENT_MASK); - kbase_csf_scheduler_process_gpu_idle_event(kbdev); + glb_idle_irq_received = true; + /* Defer handling this IRQ to account for a race condition + * where the idle worker could be executed before we have + * finished handling all pending IRQs (including CSG IDLE + * IRQs). + */ + deferred_handling_glb_idle_irq = true; + } + + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Invoke the MCU state machine as a state transition + * might have completed. + */ + kbase_pm_update_state(kbdev); } - - process_prfcnt_interrupts(kbdev, glb_req, glb_ack); - - kbase_csf_scheduler_spin_unlock(kbdev, flags); - - /* Invoke the MCU state machine as a state transition - * might have completed. - */ - kbase_pm_update_state(kbdev); } + + if (!glb_idle_irq_received) + break; + /* Attempt to serve potential IRQs that might have occurred + * whilst handling the previous IRQ. In case we have observed + * the GLB IDLE IRQ without all CSGs having been marked as + * idle, the GPU would be treated as no longer idle and left + * powered on. + */ + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + } while (val); + + if (deferred_handling_glb_idle_irq) { + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_scheduler_process_gpu_idle_event(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); } wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); } @@ -3037,9 +3267,8 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) if (IS_ERR(filp)) return PTR_ERR(filp); - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - 1, &phys, false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, + false); if (ret <= 0) { fput(filp); @@ -3073,9 +3302,8 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) kbdev->csf.dummy_user_reg_page = as_tagged(0); - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, - false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, + false); if (ret <= 0) return ret; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c index 92a511d79a05..3afbe6d4005e 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c @@ -23,12 +23,135 @@ #include #include #include -#include #include #if IS_ENABLED(CONFIG_DEBUG_FS) #include "mali_kbase_csf_tl_reader.h" +/* Wait time to be used cumulatively for all the CSG slots. + * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be + * any other Host request pending on the FW side and usually FW would be responsive + * to the Doorbell IRQs as it won't do any polling for a long time and also it won't + * have to wait for any HW state transition to complete for publishing the status. + * So it is reasonable to expect that handling of STATUS_UPDATE request would be + * relatively very quick. + */ +#define STATUS_UPDATE_WAIT_TIMEOUT 500 + +/* The bitmask of CSG slots for which the STATUS_UPDATE request completed. + * The access to it is serialized with scheduler lock, so at a time it would + * get used either for "active_groups" or per context "groups" debugfs file. + */ +static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS); + +static +bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr) +{ + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[csg_nr]; + + return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ + kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & + CSG_REQ_STATUS_UPDATE_MASK); +} + +static +bool csg_slots_status_update_finish(struct kbase_device *kbdev, + const unsigned long *slots_mask) +{ + const u32 max_csg_slots = kbdev->csf.global_iface.group_num; + bool changed = false; + u32 csg_nr; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + for_each_set_bit(csg_nr, slots_mask, max_csg_slots) { + if (csg_slot_status_update_finish(kbdev, csg_nr)) { + set_bit(csg_nr, csg_slots_status_updated); + changed = true; + } + } + + return changed; +} + +static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, + unsigned long *slots_mask) +{ + const u32 max_csg_slots = kbdev->csf.global_iface.group_num; + long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + bitmap_zero(csg_slots_status_updated, max_csg_slots); + + while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) { + remaining = wait_event_timeout(kbdev->csf.event_wait, + csg_slots_status_update_finish(kbdev, slots_mask), + remaining); + if (likely(remaining)) { + bitmap_andnot(slots_mask, slots_mask, + csg_slots_status_updated, max_csg_slots); + } else { + dev_warn(kbdev->dev, + "STATUS_UPDATE request timed out for slots 0x%lx", + slots_mask[0]); + } + } +} + +static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file) +{ + u32 max_csg_slots = kbdev->csf.global_iface.group_num; + DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; + u32 csg_nr; + unsigned long flags; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell + * ring for Extract offset update, shall not be made when MCU has been + * put to sleep otherwise it will undesirably make MCU exit the sleep + * state. Also it isn't really needed as FW will implicitly update the + * status of all on-slot groups when MCU sleep request is sent to it. + */ + if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { + bitmap_copy(csg_slots_status_updated, + kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); + return; + } + + for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + if (!group) + continue; + /* Ring the User doorbell for FW to update the Extract offset */ + kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); + set_bit(csg_nr, used_csgs); + } + + /* Return early if there are no on-slot groups */ + if (bitmap_empty(used_csgs, max_csg_slots)) + return; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + for_each_set_bit(csg_nr, used_csgs, max_csg_slots) { + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[csg_nr]; + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, + ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), + CSG_REQ_STATUS_UPDATE_MASK); + } + + BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE)); + kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_csg_slots_status_update_finish(kbdev, used_csgs); + /* Wait for the User doobell ring to take effect */ + msleep(100); +} + #define MAX_SCHED_STATE_STRING_LEN (16) static const char *scheduler_state_to_string(struct kbase_device *kbdev, enum kbase_csf_scheduler_state sched_state) @@ -77,16 +200,32 @@ static const char *blocked_reason_to_string(u32 reason_id) return cs_blocked_reason[reason_id]; } +static bool sb_source_supported(u32 glb_version) +{ + bool supported = false; + + if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && + (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && + (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && + (GLB_VERSION_MINOR_GET(glb_version) >= 3))) + supported = true; + + return supported; +} + static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - struct seq_file *file, u32 wait_status, u32 wait_sync_value, - u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, - u32 blocked_reason) + struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value, + u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason) { #define WAITING "Waiting" #define NOT_WAITING "Not waiting" seq_printf(file, "SB_MASK: %d\n", CS_STATUS_WAIT_SB_MASK_GET(wait_status)); + if (sb_source_supported(glb_version)) + seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); seq_printf(file, "PROGRESS_WAIT: %s\n", CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? WAITING : NOT_WAITING); @@ -156,10 +295,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, struct kbase_vmap_struct *mapping; u64 *evt; u64 wait_sync_live_value; + u32 glb_version; if (!queue) return; + glb_version = queue->kctx->kbdev->csf.global_iface.version; + if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || !queue->group)) return; @@ -200,9 +342,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, } kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, - sb_status, blocked_reason); + file, glb_version, wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); } } else { struct kbase_device const *const kbdev = @@ -257,9 +398,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, } kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, sb_status, - blocked_reason); + file, glb_version, wait_status, wait_sync_value, wait_sync_live_value, + wait_sync_pointer, sb_status, blocked_reason); /* Dealing with cs_trace */ if (kbase_csf_scheduler_queue_has_trace(queue)) kbasep_csf_scheduler_dump_active_cs_trace(file, stream); @@ -270,54 +410,6 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, seq_puts(file, "\n"); } -static void update_active_group_status(struct seq_file *file, - struct kbase_queue_group *const group) -{ - struct kbase_device *const kbdev = group->kctx->kbdev; - struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[group->csg_nr]; - long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); - unsigned long flags; - - /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell - * ring for Extract offset update, shall not be made when MCU has been - * put to sleep otherwise it will undesirably make MCU exit the sleep - * state. Also it isn't really needed as FW will implicitly update the - * status of all on-slot groups when MCU sleep request is sent to it. - */ - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) - return; - - /* Ring the User doobell shared between the queues bound to this - * group, to have FW update the CS_EXTRACT for all the queues - * bound to the group. Ring early so that FW gets adequate time - * for the handling. - */ - kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); - - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, - ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), - CSG_REQ_STATUS_UPDATE_MASK); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr); - - remaining = wait_event_timeout(kbdev->csf.event_wait, - !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ - kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & - CSG_REQ_STATUS_UPDATE_MASK), remaining); - - if (!remaining) { - dev_err(kbdev->dev, - "Timed out for STATUS_UPDATE on group %d on slot %d", - group->handle, group->csg_nr); - - seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", - group->csg_nr); - seq_puts(file, "*** The following group-record is likely stale\n"); - } -} - static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, struct kbase_queue_group *const group) { @@ -331,8 +423,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, u8 slot_priority = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; - update_active_group_status(file, group); - ep_c = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_CURRENT); ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); @@ -348,6 +438,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, CSG_STATUS_STATE_IDLE_MASK) idle = 'Y'; + if (!test_bit(group->csg_nr, csg_slots_status_updated)) { + seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", + group->csg_nr); + seq_puts(file, "*** The following group-record is likely stale\n"); + } + seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", group->handle, @@ -363,10 +459,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle); - - /* Wait for the User doobell ring to take effect */ - if (kbdev->csf.scheduler.state != SCHED_SLEEPING) - msleep(100); } else { seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); seq_printf(file, "%7d, %6d, %9d, %8d\n", @@ -416,10 +508,11 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, kbase_csf_scheduler_lock(kbdev); if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { /* Wait for the MCU sleep request to complete. Please refer the - * update_active_group_status() function for the explanation. + * update_active_groups_status() function for the explanation. */ kbase_pm_wait_for_desired_state(kbdev); } + update_active_groups_status(kbdev, file); for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { struct kbase_queue_group *const group = kctx->csf.queue_groups[gr]; @@ -455,10 +548,11 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, kbase_csf_scheduler_lock(kbdev); if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { /* Wait for the MCU sleep request to complete. Please refer the - * update_active_group_status() function for the explanation. + * update_active_groups_status() function for the explanation. */ kbase_pm_wait_for_desired_state(kbdev); } + update_active_groups_status(kbdev, file); for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { struct kbase_queue_group *const group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; @@ -664,7 +758,6 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev) &kbasep_csf_debugfs_scheduler_state_fops); kbase_csf_tl_reader_debugfs_init(kbdev); - kbase_csf_firmware_trace_buffer_debugfs_init(kbdev); } #else diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h index 27aa53de110d..32a1c557e387 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h @@ -31,6 +31,7 @@ #include "mali_kbase_csf_firmware.h" #include "mali_kbase_csf_event.h" +#include /* Maximum number of KCPU command queues to be created per GPU address space. */ @@ -355,14 +356,19 @@ struct kbase_csf_notification { * @trace_buffer_size: CS trace buffer size for the queue. * @trace_cfg: CS trace configuration parameters. * @error: GPU command queue fatal information to pass to user space. - * @fatal_event_work: Work item to handle the CS fatal event reported for this - * queue. - * @cs_fatal_info: Records additional information about the CS fatal event. - * @cs_fatal: Records information about the CS fatal event. + * @cs_error_work: Work item to handle the CS fatal event reported for this + * queue or the CS fault event if dump on fault is enabled + * and acknowledgment for CS fault event needs to be done + * after dumping is complete. + * @cs_error_info: Records additional information about the CS fatal event or + * about CS fault event if dump on fault is enabled. + * @cs_error: Records information about the CS fatal event or + * about CS fault event if dump on fault is enabled. + * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. * @pending: Indicating whether the queue has new submitted work. - * @extract_ofs: The current EXTRACT offset, this is updated during certain - * events such as GPU idle IRQ in order to help detect a - * queue's true idle status. + * @extract_ofs: The current EXTRACT offset, this is only updated when handling + * the GLB IDLE IRQ if the idle timeout value is non-0 in order + * to help detect a queue's true idle status. * @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the * group to which queue is bound is suspended. * This can be useful in certain cases to know that till which @@ -377,7 +383,11 @@ struct kbase_queue { int doorbell_nr; unsigned long db_file_offset; struct list_head link; +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) atomic_t refcount; +#else + refcount_t refcount; +#endif struct kbase_queue_group *group; struct kbase_va_region *queue_reg; struct work_struct oom_event_work; @@ -397,14 +407,15 @@ struct kbase_queue { u32 trace_buffer_size; u32 trace_cfg; struct kbase_csf_notification error; - struct work_struct fatal_event_work; - u64 cs_fatal_info; - u32 cs_fatal; + struct work_struct cs_error_work; + u64 cs_error_info; + u32 cs_error; + bool cs_error_fatal; atomic_t pending; u64 extract_ofs; #if IS_ENABLED(CONFIG_DEBUG_FS) u64 saved_cmd_ptr; -#endif +#endif /* CONFIG_DEBUG_FS */ }; /** @@ -498,6 +509,9 @@ struct kbase_protected_suspend_buffer { * to be returned to userspace if such an error has occurred. * @timer_event_work: Work item to handle the progress timeout fatal event * for the group. + * @deschedule_deferred_cnt: Counter keeping a track of the number of threads + * that tried to deschedule the group and had to defer + * the descheduling due to the dump on fault. */ struct kbase_queue_group { struct kbase_context *kctx; @@ -539,6 +553,15 @@ struct kbase_queue_group { struct work_struct timer_event_work; + /** + * @dvs_buf: Address and size of scratch memory. + * + * Used to store intermediate DVS data by the GPU. + */ + u64 dvs_buf; +#if IS_ENABLED(CONFIG_DEBUG_FS) + u32 deschedule_deferred_cnt; +#endif }; /** @@ -548,10 +571,10 @@ struct kbase_queue_group { * @lock: Lock preventing concurrent access to @array and the @in_use bitmap. * @array: Array of pointers to kernel CPU command queues. * @in_use: Bitmap which indicates which kernel CPU command queues are in use. - * @wq: Dedicated workqueue for processing kernel CPU command queues. - * @num_cmds: The number of commands that have been enqueued across - * all the KCPU command queues. This could be used as a - * timestamp to determine the command's enqueueing time. + * @cmd_seq_num: The sequence number assigned to an enqueued command, + * in incrementing order (older commands shall have a + * smaller number). + * @jit_lock: Lock to serialise JIT operations. * @jit_cmds_head: A list of the just-in-time memory commands, both * allocate & free, in submission order, protected * by kbase_csf_kcpu_queue_context.lock. @@ -564,9 +587,9 @@ struct kbase_csf_kcpu_queue_context { struct mutex lock; struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES]; DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); - struct workqueue_struct *wq; - u64 num_cmds; + atomic64_t cmd_seq_num; + struct mutex jit_lock; struct list_head jit_cmds_head; struct list_head jit_blocked_queues; }; @@ -636,6 +659,28 @@ struct kbase_csf_tiler_heap_context { u64 nr_of_heaps; }; +/** + * struct kbase_csf_ctx_heap_reclaim_info - Object representing the data section of + * a kctx for tiler heap reclaim manger + * @mgr_link: Link for hooking up to the heap reclaim manger's kctx lists + * @nr_freed_pages: Number of freed pages from the the kctx, after its attachment + * to the reclaim manager. This is used for tracking reclaim's + * free operation progress. + * @nr_est_unused_pages: Estimated number of pages that could be freed for the kctx + * when all its CSGs are off-slot, on attaching to the reclaim + * manager. + * @on_slot_grps: Number of on-slot groups from this kctx. In principle, if a + * kctx has groups on-slot, the scheduler will detach it from + * the tiler heap reclaim manager, i.e. no tiler heap memory + * reclaiming operations on the kctx. + */ +struct kbase_csf_ctx_heap_reclaim_info { + struct list_head mgr_link; + u32 nr_freed_pages; + u32 nr_est_unused_pages; + u8 on_slot_grps; +}; + /** * struct kbase_csf_scheduler_context - Object representing the scheduler's * context for a GPU address space. @@ -657,6 +702,10 @@ struct kbase_csf_tiler_heap_context { * streams bound to groups of @idle_wait_groups list. * @ngrp_to_schedule: Number of groups added for the context to the * 'groups_to_schedule' list of scheduler instance. + * @heap_info: Heap reclaim information data of the kctx. As the + * reclaim action needs to be coordinated with the scheduler + * operations, any manipulations on the data needs holding + * the scheduler's mutex lock. */ struct kbase_csf_scheduler_context { struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; @@ -666,6 +715,7 @@ struct kbase_csf_scheduler_context { struct workqueue_struct *sync_update_wq; struct work_struct sync_update_work; u32 ngrp_to_schedule; + struct kbase_csf_ctx_heap_reclaim_info heap_info; }; /** @@ -808,6 +858,22 @@ struct kbase_csf_csg_slot { u8 priority; }; +/** + * struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim + * kctx lists inside the CSF device's scheduler. + * + * @heap_reclaim: Tiler heap reclaim shrinker object. + * @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The + * lists track the kctxs attached to the reclaim manager. + * @unused_pages: Estimated number of unused pages from the @ctxlist array. The + * number is indicative for use with reclaim shrinker's count method. + */ +struct kbase_csf_sched_heap_reclaim_mgr { + struct shrinker heap_reclaim; + struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + atomic_t unused_pages; +}; + /** * struct kbase_csf_scheduler - Object representing the scheduler used for * CSF for an instance of GPU platform device. @@ -880,6 +946,8 @@ struct kbase_csf_csg_slot { * operation to implement timeslice-based scheduling. * @tock_work: Work item that would perform the schedule on tock * operation to implement the asynchronous scheduling. + * @pending_tock_work: Indicates that the tock work item should re-execute + * once it's finished instead of going back to sleep. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would @@ -889,8 +957,6 @@ struct kbase_csf_csg_slot { * @top_grp. * @top_grp: Pointer to queue group inside @groups_to_schedule * list that was assigned the highest slot priority. - * @tock_pending_request: A "tock" request is pending: a group that is not - * currently on the GPU demands to be scheduled. * @active_protm_grp: Indicates if firmware has been permitted to let GPU * enter protected mode with the given group. On exit * from protected mode the pointer is reset to NULL. @@ -903,6 +969,13 @@ struct kbase_csf_csg_slot { * handler. * @gpu_idle_work: Work item for facilitating the scheduler to bring * the GPU to a low-power mode on becoming idle. + * @fast_gpu_idle_handling: Indicates whether to relax many of the checks + * normally done in the GPU idle worker. This is + * set to true when handling the GLB IDLE IRQ if the + * idle hysteresis timeout is 0, since it makes it + * possible to receive this IRQ before the extract + * offset is published (which would cause more + * extensive GPU idle checks to fail). * @gpu_no_longer_idle: Effective only when the GPU idle worker has been * queued for execution, this indicates whether the * GPU has become non-idle since the last time the @@ -934,6 +1007,7 @@ struct kbase_csf_csg_slot { * groups. It is updated on every tick/tock. * @interrupt_lock is used to serialize the access. * @protm_enter_time: GPU protected mode enter time. + * @reclaim_mgr: CSGs tiler heap manager object. */ struct kbase_csf_scheduler { struct mutex lock; @@ -960,13 +1034,14 @@ struct kbase_csf_scheduler { struct hrtimer tick_timer; struct work_struct tick_work; struct delayed_work tock_work; + atomic_t pending_tock_work; struct delayed_work ping_work; struct kbase_context *top_ctx; struct kbase_queue_group *top_grp; - bool tock_pending_request; struct kbase_queue_group *active_protm_grp; struct workqueue_struct *idle_wq; struct work_struct gpu_idle_work; + bool fast_gpu_idle_handling; atomic_t gpu_no_longer_idle; atomic_t non_idle_offslot_grps; u32 non_idle_scanout_grps; @@ -975,6 +1050,7 @@ struct kbase_csf_scheduler { bool tick_timer_active; u32 tick_protm_pending_seq; ktime_t protm_enter_time; + struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; }; /* @@ -1161,6 +1237,7 @@ struct kbase_ipa_control { * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes * @data_start: Offset into firmware image at which the interface data starts * @data_end: Offset into firmware image at which the interface data ends + * @virtual_exe_start: Starting GPU execution virtual address of this interface * @kernel_map: A kernel mapping of the memory or NULL if not required to be * mapped in the kernel * @pma: Array of pointers to protected memory allocations. @@ -1177,6 +1254,7 @@ struct kbase_csf_firmware_interface { u32 flags; u32 data_start; u32 data_end; + u32 virtual_exe_start; void *kernel_map; struct protected_memory_allocation **pma; }; @@ -1208,6 +1286,74 @@ struct kbase_csf_mcu_fw { u8 *data; }; +/* + * Firmware log polling period. + */ +#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25 + +/** + * enum kbase_csf_firmware_log_mode - Firmware log operating mode + * + * @KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: Manual mode, firmware log can be read + * manually by the userspace (and it will also be dumped automatically into + * dmesg on GPU reset). + * + * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log + * will be periodically emptied into dmesg, manual reading through debugfs is + * disabled. + */ +enum kbase_csf_firmware_log_mode { + KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL, + KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT +}; + +/** + * struct kbase_csf_firmware_log - Object containing members for handling firmware log. + * + * @mode: Firmware log operating mode. + * @busy: Indicating whether a firmware log operation is in progress. + * @poll_work: Work item that would poll firmware log buffer + * at regular intervals to perform any periodic + * activities required by current log mode. + * @dump_buf: Buffer used for dumping the log. + * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions. + * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions. + */ +struct kbase_csf_firmware_log { + enum kbase_csf_firmware_log_mode mode; + atomic_t busy; + struct delayed_work poll_work; + u8 *dump_buf; + u32 func_call_list_va_start; + u32 func_call_list_va_end; +}; + +#if IS_ENABLED(CONFIG_DEBUG_FS) +/** + * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon + * + * @error_code: Error code. + * @kctx_tgid: tgid value of the Kbase context for which the fault happened. + * @kctx_id: id of the Kbase context for which the fault happened. + * @enabled: Flag to indicate that 'csf_fault' debugfs has been opened + * so dump on fault is enabled. + * @fault_wait_wq: Waitqueue on which user space client is blocked till kbase + * reports a fault. + * @dump_wait_wq: Waitqueue on which kbase threads are blocked till user space client + * completes the dump on fault. + * @lock: Lock to protect this struct members from concurrent access. + */ +struct kbase_csf_dump_on_fault { + enum dumpfault_error_type error_code; + u32 kctx_tgid; + u32 kctx_id; + atomic_t enabled; + wait_queue_head_t fault_wait_wq; + wait_queue_head_t dump_wait_wq; + spinlock_t lock; +}; +#endif /* CONFIG_DEBUG_FS*/ + /** * struct kbase_csf_device - Object representing CSF for an instance of GPU * platform device. @@ -1251,11 +1397,14 @@ struct kbase_csf_mcu_fw { * in the address space of every process, that created * a Base context, to enable the access to LATEST_FLUSH * register from userspace. + * @nr_user_page_mapped: The number of clients using the mapping of USER page. + * This is used to maintain backward compatibility. + * It's protected by @reg_lock. * @mali_file_inode: Pointer to the inode corresponding to mali device * file. This is needed in order to switch to the * @dummy_user_reg_page on GPU power down. * All instances of the mali device file will point to - * the same inode. + * the same inode. It's protected by @reg_lock. * @reg_lock: Lock to serialize the MCU firmware related actions * that affect all contexts such as allocation of * regions from shared interface area, assignment of @@ -1320,6 +1469,8 @@ struct kbase_csf_mcu_fw { * @hwcnt: Contain members required for handling the dump of * HW counters. * @fw: Copy of the loaded MCU firmware image. + * @fw_log: Contain members required for handling firmware log. + * @dof: Structure for dump on fault. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1334,6 +1485,7 @@ struct kbase_csf_device { u32 db_file_offsets; struct tagged_addr dummy_db_page; struct tagged_addr dummy_user_reg_page; + u32 nr_user_page_mapped; struct inode *mali_file_inode; struct mutex reg_lock; wait_queue_head_t event_wait; @@ -1360,6 +1512,10 @@ struct kbase_csf_device { unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; + struct kbase_csf_firmware_log fw_log; +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_csf_dump_on_fault dof; +#endif /* CONFIG_DEBUG_FS */ }; /** diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c index 170b7ec51af7..49e52938499f 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_event.c @@ -169,7 +169,8 @@ void kbase_csf_event_term(struct kbase_context *kctx) kfree(event_cb); } - WARN_ON(!list_empty(&kctx->csf.event.error_list)); + WARN(!list_empty(&kctx->csf.event.error_list), + "Error list not empty for ctx %d_%d\n", kctx->tgid, kctx->id); spin_unlock_irqrestore(&kctx->csf.event.lock, flags); } @@ -244,6 +245,14 @@ bool kbase_csf_event_error_pending(struct kbase_context *kctx) bool error_pending = false; unsigned long flags; + /* Withhold the error event if the dump on fault is ongoing. + * This would prevent the Userspace from taking error recovery actions + * (which can potentially affect the state that is being dumped). + * Event handling thread would eventually notice the error event. + */ + if (unlikely(!kbase_debug_csf_fault_dump_complete(kctx->kbdev))) + return false; + spin_lock_irqsave(&kctx->csf.event.lock, flags); error_pending = !list_empty(&kctx->csf.event.error_list); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c index 0fb56e0094c5..1f4a4d9b6876 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c @@ -21,6 +21,7 @@ #include "mali_kbase.h" #include "mali_kbase_csf_firmware_cfg.h" +#include "mali_kbase_csf_firmware_log.h" #include "mali_kbase_csf_trace_buffer.h" #include "mali_kbase_csf_timeout.h" #include "mali_kbase_mem.h" @@ -77,9 +78,11 @@ MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging firmware code."); #endif -#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) -#define FIRMWARE_HEADER_VERSION (0ul) -#define FIRMWARE_HEADER_LENGTH (0x14ul) + +#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) +#define FIRMWARE_HEADER_VERSION_MAJOR (0ul) +#define FIRMWARE_HEADER_VERSION_MINOR (2ul) +#define FIRMWARE_HEADER_LENGTH (0x14ul) #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ (CSF_FIRMWARE_ENTRY_READ | \ @@ -92,10 +95,10 @@ MODULE_PARM_DESC(fw_debug, #define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) #define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) -#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2) #define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) #define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6) +#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST (7) #define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) #define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) @@ -431,8 +434,8 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, memset(p + copy_len, 0, zi_len); } - kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), - PAGE_SIZE, DMA_TO_DEVICE); + kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]), + PAGE_SIZE, DMA_TO_DEVICE); kunmap_atomic(p); } } @@ -525,6 +528,58 @@ static inline bool entry_find_large_page_to_reuse( *pma = NULL; + /* If the section starts at 2MB aligned boundary, + * then use 2MB page(s) for it. + */ + if (!(virtual_start & (SZ_2M - 1))) { + *num_pages_aligned = + round_up(*num_pages_aligned, NUM_4K_PAGES_IN_2MB_PAGE); + *is_small_page = false; + goto out; + } + + /* If the section doesn't lie within the same 2MB aligned boundary, + * then use 4KB pages as it would be complicated to use a 2MB page + * for such section. + */ + if ((virtual_start & ~(SZ_2M - 1)) != (virtual_end & ~(SZ_2M - 1))) + goto out; + + /* Find the nearest 2MB aligned section which comes before the current + * section. + */ + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + const u32 virtual_diff = virtual_start - interface->virtual; + + if (interface->virtual > virtual_end) + continue; + + if (interface->virtual & (SZ_2M - 1)) + continue; + + if (virtual_diff < virtual_diff_min) { + target_interface = interface; + virtual_diff_min = virtual_diff; + } + } + + if (target_interface) { + const u32 page_index = virtual_diff_min >> PAGE_SHIFT; + + if (page_index >= target_interface->num_pages_aligned) + goto out; + + if (target_interface->phys) + *phys = &target_interface->phys[page_index]; + + if (target_interface->pma) + *pma = &target_interface->pma[page_index / NUM_4K_PAGES_IN_2MB_PAGE]; + + *is_small_page = false; + reuse_large_page = true; + } + +out: return reuse_large_page; } @@ -555,6 +610,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, u32 num_pages; u32 num_pages_aligned; char *name; + void *name_entry; + unsigned int name_len; struct tagged_addr *phys = NULL; struct kbase_csf_firmware_interface *interface = NULL; bool allocated_pages = false, protected_mode = false; @@ -625,8 +682,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, } else { if (!reuse_pages) { ret = kbase_mem_pool_alloc_pages( - kbase_mem_pool_group_select( - kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), + kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, + is_small_page), num_pages_aligned, phys, false); } } @@ -643,21 +700,24 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, data_start, data_end); /* Allocate enough memory for the struct kbase_csf_firmware_interface and - * the name of the interface. An extra byte is allocated to place a - * NUL-terminator in. This should already be included according to the - * specification but here we add it anyway to be robust against a - * corrupt firmware image. + * the name of the interface. */ - interface = kmalloc(sizeof(*interface) + - size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL); + name_entry = (void *)entry + INTERFACE_ENTRY_NAME_OFFSET; + name_len = strnlen(name_entry, size - INTERFACE_ENTRY_NAME_OFFSET); + if (size < (INTERFACE_ENTRY_NAME_OFFSET + name_len + 1 + sizeof(u32))) { + dev_err(kbdev->dev, "Memory setup entry too short to contain virtual_exe_start"); + ret = -EINVAL; + goto out; + } + + interface = kmalloc(sizeof(*interface) + name_len + 1, GFP_KERNEL); if (!interface) { ret = -ENOMEM; goto out; } name = (void *)(interface + 1); - memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)), - size - INTERFACE_ENTRY_NAME_OFFSET); - name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0; + memcpy(name, name_entry, name_len); + name[name_len] = 0; interface->name = name; interface->phys = phys; @@ -672,6 +732,11 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, interface->data_end = data_end; interface->pma = pma; + /* Discover the virtual execution address field after the end of the name + * field taking into account the NULL-termination character. + */ + interface->virtual_exe_start = *((u32 *)(name_entry + name_len + 1)); + mem_flags = convert_mem_flags(kbdev, flags, &cache_mode); if (flags & CSF_FIRMWARE_ENTRY_SHARED) { @@ -956,6 +1021,15 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs return -EINVAL; } return parse_build_info_metadata_entry(kbdev, fw, entry, size); + case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST: + /* Function call list section */ + if (size < 2 * sizeof(*entry)) { + dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n", + size); + return -EINVAL; + } + kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry); + break; } if (!optional) { @@ -1179,40 +1253,80 @@ static int parse_capabilities(struct kbase_device *kbdev) return 0; } +static inline void access_firmware_memory_common(struct kbase_device *kbdev, + struct kbase_csf_firmware_interface *interface, u32 offset_bytes, + u32 *value, const bool read) +{ + u32 page_num = offset_bytes >> PAGE_SHIFT; + u32 offset_in_page = offset_bytes & ~PAGE_MASK; + struct page *target_page = as_page(interface->phys[page_num]); + uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page); + u32 *addr = (u32 *)(cpu_addr + offset_in_page); + + if (read) { + kbase_sync_single_for_device(kbdev, + kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); + *value = *addr; + } else { + *addr = *value; + kbase_sync_single_for_device(kbdev, + kbase_dma_addr_from_tagged(interface->phys[page_num]) + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); + } + + kunmap_atomic((u32 *)cpu_addr); +} + static inline void access_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 *value, const bool read) { - struct kbase_csf_firmware_interface *interface; + struct kbase_csf_firmware_interface *interface, *access_interface = NULL; + u32 offset_bytes = 0; list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { if ((gpu_addr >= interface->virtual) && (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { - u32 offset_bytes = gpu_addr - interface->virtual; - u32 page_num = offset_bytes >> PAGE_SHIFT; - u32 offset_in_page = offset_bytes & ~PAGE_MASK; - struct page *target_page = as_page( - interface->phys[page_num]); - u32 *cpu_addr = kmap_atomic(target_page); - - if (read) { - kbase_sync_single_for_device(kbdev, - kbase_dma_addr(target_page) + offset_in_page, - sizeof(u32), DMA_BIDIRECTIONAL); - - *value = cpu_addr[offset_in_page >> 2]; - } else { - cpu_addr[offset_in_page >> 2] = *value; - - kbase_sync_single_for_device(kbdev, - kbase_dma_addr(target_page) + offset_in_page, - sizeof(u32), DMA_BIDIRECTIONAL); - } - - kunmap_atomic(cpu_addr); - return; + offset_bytes = gpu_addr - interface->virtual; + access_interface = interface; + break; } } - dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr); + + if (access_interface) + access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); + else + dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); +} + +static inline void access_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value, const bool read) +{ + struct kbase_csf_firmware_interface *interface, *access_interface = NULL; + u32 offset_bytes = 0; + + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + if ((gpu_addr >= interface->virtual_exe_start) && + (gpu_addr < interface->virtual_exe_start + + (interface->num_pages << PAGE_SHIFT))) { + offset_bytes = gpu_addr - interface->virtual_exe_start; + access_interface = interface; + + /* If there's an overlap in execution address range between a moved and a + * non-moved areas, always prefer the moved one. The idea is that FW may + * move sections around during init time, but after the layout is settled, + * any moved sections are going to override non-moved areas at the same + * location. + */ + if (interface->virtual_exe_start != interface->virtual) + break; + } + } + + if (access_interface) + access_firmware_memory_common(kbdev, access_interface, offset_bytes, value, read); + else + dev_warn(kbdev->dev, "Invalid GPU VA %x passed", gpu_addr); } void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, @@ -1227,6 +1341,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, access_firmware_memory(kbdev, gpu_addr, &value, false); } +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value) +{ + access_firmware_memory_exe(kbdev, gpu_addr, value, true); +} + +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 value) +{ + access_firmware_memory_exe(kbdev, gpu_addr, &value, false); +} + void kbase_csf_firmware_cs_input( const struct kbase_csf_cmd_stream_info *const info, const u32 offset, const u32 value) @@ -1462,11 +1588,10 @@ static bool global_request_complete(struct kbase_device *const kbdev, return complete; } -static int wait_for_global_request(struct kbase_device *const kbdev, - u32 const req_mask) +static int wait_for_global_request_with_timeout(struct kbase_device *const kbdev, + u32 const req_mask, unsigned int timeout_ms) { - const long wait_timeout = - kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + const long wait_timeout = kbase_csf_timeout_in_jiffies(timeout_ms); long remaining; int err = 0; @@ -1475,10 +1600,9 @@ static int wait_for_global_request(struct kbase_device *const kbdev, wait_timeout); if (!remaining) { - dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for global request %x to complete", - kbase_backend_get_cycle_cnt(kbdev), - kbdev->csf.fw_timeout_ms, - req_mask); + dev_warn(kbdev->dev, + "[%llu] Timeout (%d ms) waiting for global request %x to complete", + kbase_backend_get_cycle_cnt(kbdev), timeout_ms, req_mask); err = -ETIMEDOUT; } @@ -1486,6 +1610,11 @@ static int wait_for_global_request(struct kbase_device *const kbdev, return err; } +static int wait_for_global_request(struct kbase_device *const kbdev, u32 const req_mask) +{ + return wait_for_global_request_with_timeout(kbdev, req_mask, kbdev->csf.fw_timeout_ms); +} + static void set_global_request( const struct kbase_csf_global_iface *const global_iface, u32 const req_mask) @@ -1559,6 +1688,25 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) } +/** + * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core + * + * @kbdev: The kbase device structure of the device + * + * This function needs to be called to enable the Ray Tracing Unit + * by writing SHADER_PWRFEATURES only when host controls shader cores power. + */ +static void kbasep_enable_rtu(struct kbase_device *kbdev) +{ + const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + if (gpu_id < GPU_ID2_PRODUCT_MAKE(12, 8, 3, 0)) + return; + + if (kbdev->csf.firmware_hctl_core_pwr) + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_PWRFEATURES), 1); +} + static void global_init(struct kbase_device *const kbdev, u64 core_mask) { u32 const ack_irq_mask = @@ -1574,6 +1722,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask) kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbasep_enable_rtu(kbdev); + /* Update shader core allocation enable mask */ enable_endpoints_global(global_iface, core_mask); enable_shader_poweroff_timer(kbdev, global_iface); @@ -1854,7 +2004,6 @@ end: static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) { -#define PWROFF_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); u64 dur_val = dur_us; @@ -1991,16 +2140,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; -#ifdef KBASE_PM_RUNTIME - if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= - FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; -#endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); - kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, kbdev->csf.gpu_idle_hysteresis_ms); - kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); @@ -2020,7 +2159,26 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) return 0; } -int kbase_csf_firmware_init(struct kbase_device *kbdev) +void kbase_csf_firmware_early_term(struct kbase_device *kbdev) +{ + mutex_destroy(&kbdev->csf.reg_lock); +} + +int kbase_csf_firmware_late_init(struct kbase_device *kbdev) +{ + kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_gpu_sleep_allowed(kbdev)) + kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; +#endif + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + kbdev->csf.gpu_idle_dur_count = + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + + return 0; +} + +int kbase_csf_firmware_load_init(struct kbase_device *kbdev) { const struct firmware *firmware = NULL; struct kbase_csf_mcu_fw *const mcu_fw = &kbdev->csf.fw; @@ -2093,7 +2251,8 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) version_minor = mcu_fw->data[4]; version_major = mcu_fw->data[5]; - if (version_major != FIRMWARE_HEADER_VERSION) { + if (version_major != FIRMWARE_HEADER_VERSION_MAJOR || + version_minor != FIRMWARE_HEADER_VERSION_MINOR) { dev_err(kbdev->dev, "Firmware header version %d.%d not understood\n", version_major, version_minor); @@ -2188,6 +2347,12 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; + ret = kbase_csf_firmware_log_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); + goto err_out; + } + /* Firmware loaded successfully, ret = 0 */ KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL, (((u64)version_hash) << 32) | @@ -2195,11 +2360,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) return 0; err_out: - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); return ret; } -void kbase_csf_firmware_term(struct kbase_device *kbdev) +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) { unsigned long flags; int ret = 0; @@ -2210,6 +2375,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) WARN(ret, "failed to wait for GPU reset"); + kbase_csf_firmware_log_term(kbdev); + kbase_csf_firmware_cfg_term(kbdev); kbase_csf_timeout_term(kbdev); @@ -2297,8 +2464,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) */ kbase_mcu_shared_interface_region_tracker_term(kbdev); - mutex_destroy(&kbdev->csf.reg_lock); - kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); /* Release the address space */ @@ -2350,10 +2515,11 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev) kbase_csf_scheduler_spin_unlock(kbdev, flags); } -int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) { kbase_csf_firmware_ping(kbdev); - return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); + + return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms); } int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, @@ -2392,7 +2558,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) +int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) { int err; @@ -2432,12 +2598,14 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) } } - if (err) { + if (unlikely(err)) { if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); + + return err; } void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) @@ -2651,9 +2819,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!page_list) goto page_list_alloc_error; - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false); if (ret <= 0) goto phys_mem_pool_alloc_error; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h index 85caaa7b2ab4..7560a298ac9c 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h @@ -364,7 +364,45 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, u32 gpu_addr, u32 value); /** - * kbase_csf_firmware_early_init() - Early initializatin for the firmware. + * kbase_csf_read_firmware_memory_exe - Read a value in a GPU address in the + * region of its final execution location. + * + * @kbdev: Device pointer + * @gpu_addr: GPU address to read + * @value: Output pointer to which the read value will be written + * + * This function read a value in a GPU address that belongs to a private loaded + * firmware memory region based on its final execution location. The function + * assumes that the location is not permanently mapped on the CPU address space, + * therefore it maps it and then unmaps it to access it independently. This function + * needs to be used when accessing firmware memory regions which will be moved to + * their final execution location during firmware boot using an address based on the + * final execution location. + */ +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value); + +/** + * kbase_csf_update_firmware_memory_exe - Write a value in a GPU address in the + * region of its final execution location. + * + * @kbdev: Device pointer + * @gpu_addr: GPU address to write + * @value: Value to write + * + * This function writes a value in a GPU address that belongs to a private loaded + * firmware memory region based on its final execution location. The function + * assumes that the location is not permanently mapped on the CPU address space, + * therefore it maps it and then unmaps it to access it independently. This function + * needs to be used when accessing firmware memory regions which will be moved to + * their final execution location during firmware boot using an address based on the + * final execution location. + */ +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 value); + +/** + * kbase_csf_firmware_early_init() - Early initialization for the firmware. * @kbdev: Kbase device * * Initialize resources related to the firmware. Must be called at kbase probe. @@ -374,22 +412,43 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, int kbase_csf_firmware_early_init(struct kbase_device *kbdev); /** - * kbase_csf_firmware_init() - Load the firmware for the CSF MCU + * kbase_csf_firmware_early_term() - Terminate resources related to the firmware + * after the firmware unload has been done. + * + * @kbdev: Device pointer + * + * This should be called only when kbase probe fails or gets rmmoded. + */ +void kbase_csf_firmware_early_term(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_late_init() - Late initialization for the firmware. + * @kbdev: Kbase device + * + * Initialize resources related to the firmware. But must be called after + * backend late init is done. Must be used at probe time only. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_late_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_load_init() - Load the firmware for the CSF MCU * @kbdev: Kbase device * * Request the firmware from user space and load it into memory. * * Return: 0 if successful, negative error code on failure */ -int kbase_csf_firmware_init(struct kbase_device *kbdev); +int kbase_csf_firmware_load_init(struct kbase_device *kbdev); /** - * kbase_csf_firmware_term() - Unload the firmware + * kbase_csf_firmware_unload_term() - Unload the firmware * @kbdev: Kbase device * - * Frees the memory allocated by kbase_csf_firmware_init() + * Frees the memory allocated by kbase_csf_firmware_load_init() */ -void kbase_csf_firmware_term(struct kbase_device *kbdev); +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev); /** * kbase_csf_firmware_ping - Send the ping request to firmware. @@ -404,13 +463,14 @@ void kbase_csf_firmware_ping(struct kbase_device *kbdev); * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @wait_timeout_ms: Timeout to get the acknowledgment for PING request from FW. * * The function sends the ping request to firmware and waits to confirm it is * alive. * * Return: 0 on success, or negative on failure. */ -int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev); +int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev, unsigned int wait_timeout_ms); /** * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. @@ -447,8 +507,10 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev); * This function needs to be called after kbase_csf_enter_protected_mode() to * wait for the GPU to actually enter protected mode. GPU reset is triggered if * the wait is unsuccessful. + * + * Return: 0 on success, or negative on failure. */ -void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev); +int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev); static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) { diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c index ad4ae74c7569..c895b080143a 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_cfg.c @@ -20,13 +20,17 @@ */ #include -#include "mali_kbase_csf_firmware_cfg.h" #include #include +#include "mali_kbase_csf_firmware_cfg.h" +#include "mali_kbase_csf_firmware_log.h" + #if CONFIG_SYSFS #define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config" +#define CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME "Log verbosity" + /** * struct firmware_config - Configuration item within the MCU firmware * @@ -125,7 +129,7 @@ static ssize_t store_fw_cfg(struct kobject *kobj, if (attr == &fw_cfg_attr_cur) { unsigned long flags; - u32 val; + u32 val, cur_val; int ret = kstrtouint(buf, 0, &val); if (ret) { @@ -140,7 +144,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj, return -EINVAL; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (config->cur_val == val) { + + cur_val = config->cur_val; + if (cur_val == val) { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return count; } @@ -177,6 +183,20 @@ static ssize_t store_fw_cfg(struct kobject *kobj, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* Enable FW logging only if Log verbosity is non-zero */ + if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) && + (!cur_val || !val)) { + ret = kbase_csf_firmware_log_toggle_logging_calls(kbdev, val); + if (ret) { + /* Undo FW configuration changes */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + config->cur_val = cur_val; + kbase_csf_update_firmware_memory(kbdev, config->address, cur_val); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return ret; + } + } + /* If we can update the config without firmware reset then * we need to just trigger FIRMWARE_CONFIG_UPDATE. */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c new file mode 100644 index 000000000000..20d8c0d4fdb1 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "backend/gpu/mali_kbase_pm_internal.h" +#include +#include +#include +#include +#include + +/* + * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address. + */ +#define ARMV7_T1_BL_IMM_INSTR 0xd800f000 + +/* + * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum + * negative jump offset. + */ +#define ARMV7_T1_BL_IMM_RANGE_MIN -16777216 + +/* + * ARMv7 instruction: Branch with Link calls a subroutine at a PC-relative address, maximum + * positive jump offset. + */ +#define ARMV7_T1_BL_IMM_RANGE_MAX 16777214 + +/* + * ARMv7 instruction: Double NOP instructions. + */ +#define ARMV7_DOUBLE_NOP_INSTR 0xbf00bf00 + +#if defined(CONFIG_DEBUG_FS) + +static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + /* The enabled traces limited to u64 here, regarded practical */ + *val = kbase_csf_firmware_trace_buffer_get_active_mask64(tb); + return 0; +} + +static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + u64 new_mask; + unsigned int enable_bits_count; + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + + /* Ignore unsupported types */ + enable_bits_count = kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); + if (enable_bits_count > 64) { + dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count); + enable_bits_count = 64; + } + new_mask = val & ((1 << enable_bits_count) - 1); + + if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb)) + return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask); + else + return 0; +} + +static int kbasep_csf_firmware_log_debugfs_open(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + file->private_data = kbdev; + dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); + + return 0; +} + +static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct kbase_device *kbdev = file->private_data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + unsigned int n_read; + unsigned long not_copied; + /* Limit reads to the kernel dump buffer size */ + size_t mem = MIN(size, FIRMWARE_LOG_DUMP_BUF_SIZE); + int ret; + + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return -EBUSY; + + /* Reading from userspace is only allowed in manual mode */ + if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) { + ret = -EINVAL; + goto out; + } + + n_read = kbase_csf_firmware_trace_buffer_read_data(tb, fw_log->dump_buf, mem); + + /* Do the copy, if we have obtained some trace data */ + not_copied = (n_read) ? copy_to_user(buf, fw_log->dump_buf, n_read) : 0; + + if (not_copied) { + dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); + ret = -EFAULT; + goto out; + } + + *ppos += n_read; + ret = n_read; + +out: + atomic_set(&fw_log->busy, 0); + return ret; +} + +static int kbase_csf_firmware_log_mode_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + *val = fw_log->mode; + return 0; +} + +static int kbase_csf_firmware_log_mode_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + int ret = 0; + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return -EBUSY; + + if (val == fw_log->mode) + goto out; + + switch (val) { + case KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL: + cancel_delayed_work_sync(&fw_log->poll_work); + break; + case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + break; + default: + ret = -EINVAL; + goto out; + } + + fw_log->mode = val; + +out: + atomic_set(&fw_log->busy, 0); + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops, + kbase_csf_firmware_log_enable_mask_read, + kbase_csf_firmware_log_enable_mask_write, "%llx\n"); + +static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_csf_firmware_log_debugfs_open, + .read = kbasep_csf_firmware_log_debugfs_read, + .llseek = no_llseek, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read, + kbase_csf_firmware_log_mode_write, "%llu\n"); + +#endif /* CONFIG_DEBUG_FS */ + +static void kbase_csf_firmware_log_poll(struct work_struct *work) +{ + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.fw_log.poll_work.work); + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + + kbase_csf_firmware_log_dump_buffer(kbdev); +} + +int kbase_csf_firmware_log_init(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + /* Add one byte for null-termination */ + fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL); + if (fw_log->dump_buf == NULL) + return -ENOMEM; + + /* Ensure null-termination for all strings */ + fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0; + + fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL; + + atomic_set(&fw_log->busy, 0); + INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); + +#if defined(CONFIG_DEBUG_FS) + debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_log_enable_mask_fops); + debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_firmware_log_debugfs_fops); + debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_log_mode_fops); +#endif /* CONFIG_DEBUG_FS */ + + return 0; +} + +void kbase_csf_firmware_log_term(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + if (fw_log->dump_buf) { + cancel_delayed_work_sync(&fw_log->poll_work); + kfree(fw_log->dump_buf); + fw_log->dump_buf = NULL; + } +} + +void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf; + unsigned int read_size, remaining_size; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + + if (tb == NULL) { + dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); + return; + } + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return; + + /* FW should only print complete messages, so there's no need to handle + * partial messages over multiple invocations of this function + */ + + p = buf; + pendbuf = &buf[FIRMWARE_LOG_DUMP_BUF_SIZE]; + + while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, pendbuf - p))) { + pend = p + read_size; + p = buf; + + while (p < pend && (pnewline = memchr(p, '\n', pend - p))) { + /* Null-terminate the string */ + *pnewline = 0; + + dev_err(kbdev->dev, "FW> %s", p); + + p = pnewline + 1; + } + + remaining_size = pend - p; + + if (!remaining_size) { + p = buf; + } else if (remaining_size < FIRMWARE_LOG_DUMP_BUF_SIZE) { + /* Copy unfinished string to the start of the buffer */ + memmove(buf, p, remaining_size); + p = &buf[remaining_size]; + } else { + /* Print abnormally long string without newlines */ + dev_err(kbdev->dev, "FW> %s", buf); + p = buf; + } + } + + if (p != buf) { + /* Null-terminate and print last unfinished string */ + *p = 0; + dev_err(kbdev->dev, "FW> %s", buf); + } + + atomic_set(&fw_log->busy, 0); +} + +void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev, + const uint32_t *entry) +{ + kbdev->csf.fw_log.func_call_list_va_start = entry[0]; + kbdev->csf.fw_log.func_call_list_va_end = entry[1]; +} + +/** + * toggle_logging_calls_in_loaded_image - Toggles FW log func calls in loaded FW image. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @enable: Whether to enable or disable the function calls. + */ +static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, bool enable) +{ + uint32_t bl_instruction, diff; + uint32_t imm11, imm10, i1, i2, j1, j2, sign; + uint32_t calling_address = 0, callee_address = 0; + uint32_t list_entry = kbdev->csf.fw_log.func_call_list_va_start; + const uint32_t list_va_end = kbdev->csf.fw_log.func_call_list_va_end; + + if (list_entry == 0 || list_va_end == 0) + return; + + if (enable) { + for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { + /* Read calling address */ + kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); + /* Read callee address */ + kbase_csf_read_firmware_memory(kbdev, list_entry + sizeof(uint32_t), + &callee_address); + + diff = callee_address - calling_address - 4; + sign = !!(diff & 0x80000000); + if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff && + ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) { + dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping", + calling_address); + continue; + } + + i1 = (diff & 0x00800000) >> 23; + j1 = !i1 ^ sign; + i2 = (diff & 0x00400000) >> 22; + j2 = !i2 ^ sign; + imm11 = (diff & 0xffe) >> 1; + imm10 = (diff & 0x3ff000) >> 12; + + /* Compose BL instruction */ + bl_instruction = ARMV7_T1_BL_IMM_INSTR; + bl_instruction |= j1 << 29; + bl_instruction |= j2 << 27; + bl_instruction |= imm11 << 16; + bl_instruction |= sign << 10; + bl_instruction |= imm10; + + /* Patch logging func calls in their load location */ + dev_dbg(kbdev->dev, "FW log patch 0x%x: 0x%x\n", calling_address, + bl_instruction); + kbase_csf_update_firmware_memory_exe(kbdev, calling_address, + bl_instruction); + } + } else { + for (; list_entry < list_va_end; list_entry += 2 * sizeof(uint32_t)) { + /* Read calling address */ + kbase_csf_read_firmware_memory(kbdev, list_entry, &calling_address); + + /* Overwrite logging func calls with 2 NOP instructions */ + kbase_csf_update_firmware_memory_exe(kbdev, calling_address, + ARMV7_DOUBLE_NOP_INSTR); + } + } +} + +int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val) +{ + unsigned long flags; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + bool mcu_inactive; + bool resume_needed = false; + int ret = 0; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return -EBUSY; + + /* Suspend all the active CS groups */ + dev_dbg(kbdev->dev, "Suspend all the active CS groups"); + + kbase_csf_scheduler_lock(kbdev); + while (scheduler->state != SCHED_SUSPENDED) { + kbase_csf_scheduler_unlock(kbdev); + kbase_csf_scheduler_pm_suspend(kbdev); + kbase_csf_scheduler_lock(kbdev); + resume_needed = true; + } + + /* Wait for the MCU to get disabled */ + dev_info(kbdev->dev, "Wait for the MCU to get disabled"); + ret = kbase_pm_wait_for_desired_state(kbdev); + if (ret) { + dev_err(kbdev->dev, + "wait for PM state failed when toggling FW logging calls"); + ret = -EAGAIN; + goto out; + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + mcu_inactive = + kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (!mcu_inactive) { + dev_err(kbdev->dev, + "MCU not inactive after PM state wait when toggling FW logging calls"); + ret = -EAGAIN; + goto out; + } + + /* Toggle FW logging call in the loaded FW image */ + toggle_logging_calls_in_loaded_image(kbdev, val); + dev_dbg(kbdev->dev, "FW logging: %s", val ? "enabled" : "disabled"); + +out: + kbase_csf_scheduler_unlock(kbdev); + if (resume_needed) + /* Resume queue groups and start mcu */ + kbase_csf_scheduler_pm_resume(kbdev); + atomic_set(&fw_log->busy, 0); + return ret; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h new file mode 100644 index 000000000000..8d7a2210a457 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_LOG_H_ +#define _KBASE_CSF_FIRMWARE_LOG_H_ + +#include + +/* + * Firmware log dumping buffer size. + */ +#define FIRMWARE_LOG_DUMP_BUF_SIZE PAGE_SIZE + +/** + * kbase_csf_firmware_log_init - Initialize firmware log handling. + * + * @kbdev: Pointer to the Kbase device + * + * Return: The initialization error code. + */ +int kbase_csf_firmware_log_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_log_term - Terminate firmware log handling. + * + * @kbdev: Pointer to the Kbase device + */ +void kbase_csf_firmware_log_term(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_log_dump_buffer - Read remaining data in the firmware log + * buffer and print it to dmesg. + * + * @kbdev: Pointer to the Kbase device + */ +void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_log_parse_logging_call_list_entry - Parse FW logging function call list entry. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @entry: Pointer to section. + */ +void kbase_csf_firmware_log_parse_logging_call_list_entry(struct kbase_device *kbdev, + const uint32_t *entry); +/** + * kbase_csf_firmware_log_toggle_logging_calls - Enables/Disables FW logging function calls. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @val: Configuration option value. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 val); + +#endif /* _KBASE_CSF_FIRMWARE_LOG_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c index 54f1f6b9c199..f414d8894306 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c @@ -273,6 +273,18 @@ void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, /* NO_MALI: Nothing to do here */ } +void kbase_csf_read_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value) +{ + /* NO_MALI: Nothing to do here */ +} + +void kbase_csf_update_firmware_memory_exe(struct kbase_device *kbdev, + u32 gpu_addr, u32 value) +{ + /* NO_MALI: Nothing to do here */ +} + void kbase_csf_firmware_cs_input( const struct kbase_csf_cmd_stream_info *const info, const u32 offset, const u32 value) @@ -971,7 +983,6 @@ end: static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) { -#define PWROFF_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); u64 dur_val = dur_us; @@ -1046,16 +1057,6 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); - kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; -#ifdef KBASE_PM_RUNTIME - if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_ms /= - FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; -#endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); - kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, kbdev->csf.gpu_idle_hysteresis_ms); - INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); @@ -1068,7 +1069,26 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) return 0; } -int kbase_csf_firmware_init(struct kbase_device *kbdev) +void kbase_csf_firmware_early_term(struct kbase_device *kbdev) +{ + mutex_destroy(&kbdev->csf.reg_lock); +} + +int kbase_csf_firmware_late_init(struct kbase_device *kbdev) +{ + kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; +#ifdef KBASE_PM_RUNTIME + if (kbase_pm_gpu_sleep_allowed(kbdev)) + kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; +#endif + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms); + kbdev->csf.gpu_idle_dur_count = + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms); + + return 0; +} + +int kbase_csf_firmware_load_init(struct kbase_device *kbdev) { int ret; @@ -1134,11 +1154,11 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev) return 0; error: - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); return ret; } -void kbase_csf_firmware_term(struct kbase_device *kbdev) +void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) { cancel_work_sync(&kbdev->csf.fw_error_work); @@ -1173,8 +1193,6 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev) /* NO_MALI: No trace buffers to terminate */ - mutex_destroy(&kbdev->csf.reg_lock); - /* This will also free up the region allocated for the shared interface * entry parsed from the firmware image. */ @@ -1227,8 +1245,9 @@ void kbase_csf_firmware_ping(struct kbase_device *const kbdev) kbase_csf_scheduler_spin_unlock(kbdev, flags); } -int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int wait_timeout_ms) { + CSTD_UNUSED(wait_timeout_ms); kbase_csf_firmware_ping(kbdev); return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); } @@ -1267,7 +1286,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) +int kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) { int err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); @@ -1275,6 +1294,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev) if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); } + + return err; } void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) @@ -1483,9 +1504,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!page_list) goto page_list_alloc_error; - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - num_pages, phys, false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false); if (ret <= 0) goto phys_mem_pool_alloc_error; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c index 4b3931f6ccf7..1876d505dd5b 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,8 +154,8 @@ u64 kbase_csf_heap_context_allocator_alloc( struct kbase_csf_heap_context_allocator *const ctx_alloc) { struct kbase_context *const kctx = ctx_alloc->kctx; - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE; + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | + BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD; u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); u64 heap_gpu_va = 0; @@ -164,10 +164,6 @@ u64 kbase_csf_heap_context_allocator_alloc( */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; -#ifdef CONFIG_MALI_VECTOR_DUMP - flags |= BASE_MEM_PROT_CPU_RD; -#endif - mutex_lock(&ctx_alloc->lock); /* If the pool of heap contexts wasn't already allocated then diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c index 542f04579898..0b3f1334a9e6 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c @@ -55,7 +55,7 @@ static int kbase_kcpu_map_import_prepare( long i; int ret = 0; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); /* Take the processes mmap lock */ down_read(kbase_mem_get_process_mmap_lock()); @@ -114,7 +114,7 @@ static int kbase_kcpu_unmap_import_prepare_internal( struct kbase_va_region *reg; int ret = 0; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); kbase_gpu_vm_lock(kctx); @@ -182,7 +182,8 @@ static void kbase_jit_add_to_pending_alloc_list( &kctx->csf.kcpu_queues.jit_blocked_queues; struct kbase_kcpu_command_queue *blocked_queue; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, @@ -227,25 +228,28 @@ static int kbase_kcpu_jit_allocate_process( u32 i; int ret; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); - - if (alloc_info->blocked) { - list_del(&queue->jit_blocked); - alloc_info->blocked = false; - } + lockdep_assert_held(&queue->lock); if (WARN_ON(!info)) return -EINVAL; + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + /* Check if all JIT IDs are not in use */ for (i = 0; i < count; i++, info++) { /* The JIT ID is still in use so fail the allocation */ if (kctx->jit_alloc[info->id]) { dev_dbg(kctx->kbdev->dev, "JIT ID still in use"); - return -EINVAL; + ret = -EINVAL; + goto fail; } } + if (alloc_info->blocked) { + list_del(&queue->jit_blocked); + alloc_info->blocked = false; + } + /* Now start the allocation loop */ for (i = 0, info = alloc_info->info; i < count; i++, info++) { /* Create a JIT allocation */ @@ -280,7 +284,7 @@ static int kbase_kcpu_jit_allocate_process( */ dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); ret = -ENOMEM; - goto fail; + goto fail_rollback; } /* There are pending frees for an active allocation @@ -298,7 +302,8 @@ static int kbase_kcpu_jit_allocate_process( kctx->jit_alloc[info->id] = NULL; } - return -EAGAIN; + ret = -EAGAIN; + goto fail; } /* Bind it to the user provided ID. */ @@ -314,7 +319,7 @@ static int kbase_kcpu_jit_allocate_process( KBASE_REG_CPU_WR, &mapping); if (!ptr) { ret = -ENOMEM; - goto fail; + goto fail_rollback; } reg = kctx->jit_alloc[info->id]; @@ -323,9 +328,11 @@ static int kbase_kcpu_jit_allocate_process( kbase_vunmap(kctx, &mapping); } + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + return 0; -fail: +fail_rollback: /* Roll back completely */ for (i = 0, info = alloc_info->info; i < count; i++, info++) { /* Free the allocations that were successful. @@ -338,6 +345,8 @@ fail: kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; } +fail: + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return ret; } @@ -354,7 +363,7 @@ static int kbase_kcpu_jit_allocate_prepare( int ret = 0; u32 i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (!data || count > kcpu_queue->kctx->jit_max_allocations || count > ARRAY_SIZE(kctx->jit_alloc)) { @@ -392,11 +401,13 @@ static int kbase_kcpu_jit_allocate_prepare( } current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; - list_add_tail(¤t_command->info.jit_alloc.node, - &kctx->csf.kcpu_queues.jit_cmds_head); current_command->info.jit_alloc.info = info; current_command->info.jit_alloc.count = count; current_command->info.jit_alloc.blocked = false; + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + list_add_tail(¤t_command->info.jit_alloc.node, + &kctx->csf.kcpu_queues.jit_cmds_head); + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return 0; out_free: @@ -415,7 +426,9 @@ static void kbase_kcpu_jit_allocate_finish( struct kbase_kcpu_command_queue *queue, struct kbase_kcpu_command *cmd) { - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + + mutex_lock(&queue->kctx->csf.kcpu_queues.jit_lock); /* Remove this command from the jit_cmds_head list */ list_del(&cmd->info.jit_alloc.node); @@ -429,6 +442,8 @@ static void kbase_kcpu_jit_allocate_finish( cmd->info.jit_alloc.blocked = false; } + mutex_unlock(&queue->kctx->csf.kcpu_queues.jit_lock); + kfree(cmd->info.jit_alloc.info); } @@ -441,18 +456,17 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) { struct kbase_kcpu_command_queue *blocked_queue; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); /* * Reschedule all queues blocked by JIT_ALLOC commands. * NOTE: This code traverses the list of blocked queues directly. It * only works as long as the queued works are not executed at the same * time. This precondition is true since we're holding the - * kbase_csf_kcpu_queue_context.lock . + * kbase_csf_kcpu_queue_context.jit_lock . */ - list_for_each_entry(blocked_queue, - &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) - queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work); + list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) + queue_work(blocked_queue->wq, &blocked_queue->work); } static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, @@ -469,7 +483,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, if (WARN_ON(!ids)) return -EINVAL; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev, queue); @@ -501,9 +516,6 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, queue->kctx->kbdev, queue, item_err, pages_used); } - /* Free the list of ids */ - kfree(ids); - /* * Remove this command from the jit_cmds_head list and retry pending * allocations. @@ -511,6 +523,11 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, list_del(&cmd->info.jit_free.node); kbase_kcpu_jit_retry_pending_allocs(kctx); + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); + + /* Free the list of ids */ + kfree(ids); + return rc; } @@ -526,7 +543,7 @@ static int kbase_kcpu_jit_free_prepare( int ret; u32 i; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); /* Sanity checks */ if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { @@ -572,10 +589,12 @@ static int kbase_kcpu_jit_free_prepare( } current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; - list_add_tail(¤t_command->info.jit_free.node, - &kctx->csf.kcpu_queues.jit_cmds_head); current_command->info.jit_free.ids = ids; current_command->info.jit_free.count = count; + mutex_lock(&kctx->csf.kcpu_queues.jit_lock); + list_add_tail(¤t_command->info.jit_free.node, + &kctx->csf.kcpu_queues.jit_cmds_head); + mutex_unlock(&kctx->csf.kcpu_queues.jit_lock); return 0; out_free: @@ -601,7 +620,7 @@ static int kbase_csf_queue_group_suspend_prepare( int pinned_pages = 0, ret = 0; struct kbase_va_region *reg; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (suspend_buf->size < csg_suspend_buf_size) return -EINVAL; @@ -652,9 +671,12 @@ static int kbase_csf_queue_group_suspend_prepare( u64 start, end, i; if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) || - reg->nr_pages < nr_pages || - kbase_reg_current_backed_size(reg) != - reg->nr_pages) { + (kbase_reg_current_backed_size(reg) < nr_pages) || + !(reg->flags & KBASE_REG_CPU_WR) || + (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || + (reg->flags & KBASE_REG_DONT_NEED) || + (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) || + (reg->flags & KBASE_REG_NO_USER_FREE)) { ret = -EINVAL; goto out_clean_pages; } @@ -703,9 +725,8 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param) { struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param; - struct kbase_context *const kctx = kcpu_queue->kctx; - queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); + queue_work(kcpu_queue->wq, &kcpu_queue->work); return KBASE_CSF_EVENT_CALLBACK_KEEP; } @@ -735,7 +756,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, { u32 i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_wait->objs)) return -EINVAL; @@ -803,7 +824,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, struct base_cqs_wait_info *objs; unsigned int nr_objs = cqs_wait_info->nr_objs; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -857,7 +878,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, { unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_set->objs)) return; @@ -898,11 +919,10 @@ static int kbase_kcpu_cqs_set_prepare( struct base_kcpu_command_cqs_set_info *cqs_set_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; struct base_cqs_set *objs; unsigned int nr_objs = cqs_set_info->nr_objs; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -952,7 +972,7 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, { u32 i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_wait_operation->objs)) return -EINVAL; @@ -1039,7 +1059,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue struct base_cqs_wait_operation_info *objs; unsigned int nr_objs = cqs_wait_operation_info->nr_objs; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -1094,7 +1114,7 @@ static void kbase_kcpu_cqs_set_operation_process( { unsigned int i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); if (WARN_ON(!cqs_set_operation->objs)) return; @@ -1161,11 +1181,10 @@ static int kbase_kcpu_cqs_set_operation_prepare( struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; struct base_cqs_set_operation_info *objs; unsigned int nr_objs = cqs_set_operation_info->nr_objs; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) return -EINVAL; @@ -1212,7 +1231,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, fence->context, fence->seqno); /* Resume kcpu command queue processing. */ - queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); + queue_work(kcpu_queue->wq, &kcpu_queue->work); } static void kbase_kcpu_fence_wait_cancel( @@ -1221,7 +1240,7 @@ static void kbase_kcpu_fence_wait_cancel( { struct kbase_context *const kctx = kcpu_queue->kctx; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (WARN_ON(!fence_info->fence)) return; @@ -1293,7 +1312,7 @@ static void fence_timeout_callback(struct timer_list *timer) kbase_sync_fence_info_get(fence, &info); if (info.status == 1) { - queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); + queue_work(kcpu_queue->wq, &kcpu_queue->work); } else if (info.status == 0) { dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums", FENCE_WAIT_TIMEOUT_MS); @@ -1345,7 +1364,7 @@ static int kbase_kcpu_fence_wait_process( #endif struct kbase_context *const kctx = kcpu_queue->kctx; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (WARN_ON(!fence_info->fence)) return -EINVAL; @@ -1401,7 +1420,6 @@ static int kbase_kcpu_fence_wait_prepare( struct base_kcpu_command_fence_info *fence_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; #else @@ -1409,7 +1427,7 @@ static int kbase_kcpu_fence_wait_prepare( #endif struct base_fence fence; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) @@ -1460,7 +1478,6 @@ static int kbase_kcpu_fence_signal_prepare( struct base_kcpu_command_fence_info *fence_info, struct kbase_kcpu_command *current_command) { - struct kbase_context *const kctx = kcpu_queue->kctx; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_out; #else @@ -1471,7 +1488,7 @@ static int kbase_kcpu_fence_signal_prepare( int ret = 0; int fd; - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&kcpu_queue->lock); if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence))) @@ -1549,11 +1566,9 @@ static void kcpu_queue_process_worker(struct work_struct *data) struct kbase_kcpu_command_queue *queue = container_of(data, struct kbase_kcpu_command_queue, work); - mutex_lock(&queue->kctx->csf.kcpu_queues.lock); - + mutex_lock(&queue->lock); kcpu_queue_process(queue, false); - - mutex_unlock(&queue->kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); } static int delete_queue(struct kbase_context *kctx, u32 id) @@ -1569,6 +1584,17 @@ static int delete_queue(struct kbase_context *kctx, u32 id) KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DELETE, queue, queue->num_pending_cmds, queue->cqs_wait_count); + /* Disassociate the queue from the system to prevent further + * submissions. Draining pending commands would be acceptable + * even if a new queue is created using the same ID. + */ + kctx->csf.kcpu_queues.array[id] = NULL; + bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + mutex_lock(&queue->lock); + /* Drain the remaining work for this queue first and go past * all the waits. */ @@ -1580,17 +1606,17 @@ static int delete_queue(struct kbase_context *kctx, u32 id) /* All CQS wait commands should have been cleaned up */ WARN_ON(queue->cqs_wait_count); - kctx->csf.kcpu_queues.array[id] = NULL; - bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); - /* Fire the tracepoint with the mutex held to enforce correct * ordering with the summary stream. */ KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); - mutex_unlock(&kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); cancel_work_sync(&queue->work); + destroy_workqueue(queue->wq); + + mutex_destroy(&queue->lock); kfree(queue); } else { @@ -1657,7 +1683,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool process_next = true; size_t i; - lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + lockdep_assert_held(&queue->lock); for (i = 0; i != queue->num_pending_cmds; ++i) { struct kbase_kcpu_command *cmd = @@ -2058,9 +2084,11 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, /* The offset to the first command that is being processed or yet to * be processed is of u8 type, so the number of commands inside the - * queue cannot be more than 256. + * queue cannot be more than 256. The current implementation expects + * exactly 256, any other size will require the addition of wrapping + * logic. */ - BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256); + BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE != 256); /* Whilst the backend interface allows enqueueing multiple commands in * a single operation, the Base interface does not expose any mechanism @@ -2076,13 +2104,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } mutex_lock(&kctx->csf.kcpu_queues.lock); - - if (!kctx->csf.kcpu_queues.array[enq->id]) { - ret = -EINVAL; - goto out; - } - queue = kctx->csf.kcpu_queues.array[enq->id]; + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + if (queue == NULL) + return -EINVAL; + + mutex_lock(&queue->lock); if (kcpu_queue_get_space(queue) < enq->nr_commands) { ret = -EBUSY; @@ -2097,7 +2125,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, * for the possibility to roll back. */ - for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) { + for (i = 0; (i != enq->nr_commands) && !ret; ++i) { struct kbase_kcpu_command *kcpu_cmd = &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; struct base_kcpu_command command; @@ -2120,7 +2148,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, } } - kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds; + kcpu_cmd->enqueue_ts = atomic64_inc_return(&kctx->csf.kcpu_queues.cmd_seq_num); switch (command.type) { case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: #if IS_ENABLED(CONFIG_SYNC_FILE) @@ -2208,13 +2236,10 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, queue->num_pending_cmds += enq->nr_commands; kcpu_queue_process(queue, false); - } else { - /* Roll back the number of enqueued commands */ - kctx->csf.kcpu_queues.num_cmds -= i; } out: - mutex_unlock(&kctx->csf.kcpu_queues.lock); + mutex_unlock(&queue->lock); return ret; } @@ -2228,14 +2253,9 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) kctx->csf.kcpu_queues.array[idx] = NULL; - kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu", - WQ_UNBOUND | WQ_HIGHPRI, 0); - if (!kctx->csf.kcpu_queues.wq) - return -ENOMEM; - mutex_init(&kctx->csf.kcpu_queues.lock); - kctx->csf.kcpu_queues.num_cmds = 0; + atomic64_set(&kctx->csf.kcpu_queues.cmd_seq_num, 0); return 0; } @@ -2253,7 +2273,6 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) (void)delete_queue(kctx, id); } - destroy_workqueue(kctx->csf.kcpu_queues.wq); mutex_destroy(&kctx->csf.kcpu_queues.lock); } @@ -2297,8 +2316,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, goto out; } + queue->wq = alloc_workqueue("mali_kbase_csf_kcpu_wq_%i", WQ_UNBOUND | WQ_HIGHPRI, 0, idx); + if (queue->wq == NULL) { + kfree(queue); + ret = -ENOMEM; + + goto out; + } + bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); kctx->csf.kcpu_queues.array[idx] = queue; + mutex_init(&queue->lock); queue->kctx = kctx; queue->start_offset = 0; queue->num_pending_cmds = 0; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h index a4db86984721..5f9b8e0684bc 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h @@ -236,9 +236,11 @@ struct kbase_kcpu_command { /** * struct kbase_kcpu_command_queue - a command queue executed by the kernel * + * @lock: Lock to protect accesses to this queue. * @kctx: The context to which this command queue belongs. * @commands: Array of commands which have been successfully * enqueued to this command queue. + * @wq: Dedicated workqueue for processing commands. * @work: struct work_struct which contains a pointer to * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; @@ -274,8 +276,10 @@ struct kbase_kcpu_command { * @fence_timeout: Timer used to detect the fence wait timeout. */ struct kbase_kcpu_command_queue { + struct mutex lock; struct kbase_context *kctx; struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; + struct workqueue_struct *wq; struct work_struct work; u8 start_offset; u8 id; diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h index 177569bfb427..6dde56cb161a 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h @@ -163,6 +163,8 @@ #define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ #define CSG_CONFIG 0x0050 /* () CSG configuration options */ #define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ +#define CSG_DVS_BUF_LO 0x0060 /* () Normal mode deferred vertex shading work buffer, low word */ +#define CSG_DVS_BUF_HI 0x0064 /* () Normal mode deferred vertex shading work buffer, high word */ /* CSG_OUTPUT_BLOCK register offsets */ #define CSG_ACK 0x0000 /* () CSG acknowledge flags */ @@ -547,6 +549,13 @@ #define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) +#define CS_STATUS_WAIT_SB_SOURCE_SHIFT 16 +#define CS_STATUS_WAIT_SB_SOURCE_MASK (0xF << CS_STATUS_WAIT_SB_SOURCE_SHIFT) +#define CS_STATUS_WAIT_SB_SOURCE_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SB_SOURCE_MASK) >> CS_STATUS_WAIT_SB_SOURCE_SHIFT) +#define CS_STATUS_WAIT_SB_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SB_SOURCE_MASK) | \ + (((value) << CS_STATUS_WAIT_SB_SOURCE_SHIFT) & CS_STATUS_WAIT_SB_SOURCE_MASK)) #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ @@ -557,6 +566,7 @@ /* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 #define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE 0x5 /* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ #define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 #define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) @@ -835,11 +845,6 @@ #define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) #define CSG_REQ_IDLE_SET(reg_val, value) \ (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) -#define CSG_REQ_DOORBELL_SHIFT 30 -#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT) -#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT) -#define CSG_REQ_DOORBELL_SET(reg_val, value) \ - (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK)) #define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 #define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) #define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ @@ -956,6 +961,21 @@ (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) +/* CSG_DVS_BUF_BUFFER register */ +#define CSG_DVS_BUF_BUFFER_SIZE_SHIFT GPU_U(0) +#define CSG_DVS_BUF_BUFFER_SIZE_MASK (GPU_U(0xFFF) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) +#define CSG_DVS_BUF_BUFFER_SIZE_GET(reg_val) (((reg_val)&CSG_DVS_BUF_BUFFER_SIZE_MASK) >> CSG_DVS_BUF_BUFFER_SIZE_SHIFT) +#define CSG_DVS_BUF_BUFFER_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CSG_DVS_BUF_BUFFER_SIZE_MASK) | \ + (((value) << CSG_DVS_BUF_BUFFER_SIZE_SHIFT) & CSG_DVS_BUF_BUFFER_SIZE_MASK)) +#define CSG_DVS_BUF_BUFFER_POINTER_SHIFT GPU_U(12) +#define CSG_DVS_BUF_BUFFER_POINTER_MASK \ + (GPU_ULL(0xFFFFFFFFFFFFF) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) +#define CSG_DVS_BUF_BUFFER_POINTER_GET(reg_val) \ + (((reg_val)&CSG_DVS_BUF_BUFFER_POINTER_MASK) >> CSG_DVS_BUF_BUFFER_POINTER_SHIFT) +#define CSG_DVS_BUF_BUFFER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_DVS_BUF_BUFFER_POINTER_MASK) | \ + (((value) << CSG_DVS_BUF_BUFFER_POINTER_SHIFT) & CSG_DVS_BUF_BUFFER_POINTER_MASK)) /* End of CSG_INPUT_BLOCK register set definitions */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c index 10de93faee27..fe3b91a4845d 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include @@ -29,7 +29,7 @@ #include #include #include -#include +#include enum kbasep_soft_reset_status { RESET_SUCCESS = 0, @@ -257,68 +257,6 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); } -static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev) -{ - u8 *buf, *p, *pnewline, *pend, *pendbuf; - unsigned int read_size, remaining_size; - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - - if (tb == NULL) { - dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); - return; - } - - buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL); - if (buf == NULL) { - dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped"); - return; - } - - buf[PAGE_SIZE] = 0; - - p = buf; - pendbuf = &buf[PAGE_SIZE]; - - dev_err(kbdev->dev, "Firmware trace buffer dump:"); - while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p, - pendbuf - p))) { - pend = p + read_size; - p = buf; - - while (p < pend && (pnewline = memchr(p, '\n', pend - p))) { - /* Null-terminate the string */ - *pnewline = 0; - - dev_err(kbdev->dev, "FW> %s", p); - - p = pnewline + 1; - } - - remaining_size = pend - p; - - if (!remaining_size) { - p = buf; - } else if (remaining_size < PAGE_SIZE) { - /* Copy unfinished string to the start of the buffer */ - memmove(buf, p, remaining_size); - p = &buf[remaining_size]; - } else { - /* Print abnormal page-long string without newlines */ - dev_err(kbdev->dev, "FW> %s", buf); - p = buf; - } - } - - if (p != buf) { - /* Null-terminate and print last unfinished string */ - *p = 0; - dev_err(kbdev->dev, "FW> %s", buf); - } - - kfree(buf); -} - /** * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the * event of an error during GPU reset. @@ -378,7 +316,6 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic "The flush has completed so reset the active indicator\n"); kbdev->irq_reset_flush = false; - mutex_lock(&kbdev->pm.lock); if (!silent) dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); @@ -389,7 +326,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic if (!silent) { kbase_csf_debug_dump_registers(kbdev); if (likely(firmware_inited)) - kbase_csf_dump_firmware_trace_buffer(kbdev); + kbase_csf_firmware_log_dump_buffer(kbdev); } spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -403,6 +340,7 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic */ kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface); + mutex_lock(&kbdev->pm.lock); /* Reset the GPU */ err = kbase_pm_init_hw(kbdev, 0); @@ -633,6 +571,11 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) return kbase_csf_reset_state_is_active(reset_state); } +bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev) +{ + return atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_NOT_PENDING; +} + int kbase_reset_gpu_wait(struct kbase_device *kbdev) { const long wait_timeout = diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c index af3b6912845d..b3cdef7dae52 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c @@ -31,6 +31,7 @@ #include #include #include +#include "mali_kbase_csf_tiler_heap_reclaim.h" /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -50,36 +51,12 @@ /* CSF scheduler time slice value */ #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ -/* - * CSF scheduler time threshold for converting "tock" requests into "tick" if - * they come too close to the end of a tick interval. This avoids scheduling - * twice in a row. - */ -#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \ - CSF_SCHEDULER_TIME_TICK_MS - -#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \ - msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS) - -/* Nanoseconds per millisecond */ -#define NS_PER_MS ((u64)1000 * 1000) - -/* - * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock" - * requests are not serviced immediately, but shall wait for a minimum time in - * order to reduce load on the CSF scheduler thread. - */ -#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */ - -/* CS suspended and is idle (empty ring buffer) */ -#define CS_IDLE_FLAG (1 << 0) - -/* CS suspended and is wait for a CQS condition */ -#define CS_WAIT_SYNC_FLAG (1 << 1) - /* A GPU address space slot is reserved for MCU. */ #define NUM_RESERVED_AS_SLOTS (1) +/* Time to wait for completion of PING req before considering MCU as hung */ +#define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) + static int scheduler_group_schedule(struct kbase_queue_group *group); static void remove_group_from_idle_wait(struct kbase_queue_group *const group); static @@ -97,9 +74,105 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend); static void schedule_in_cycle(struct kbase_queue_group *group, bool force); +static bool queue_group_scheduled_locked(struct kbase_queue_group *group); #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) +/** + * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and + * scheduling tick/tock to complete before the group deschedule. + * + * @group: Pointer to the group that is being descheduled. + * + * This function blocks the descheduling of the group until the dump on fault is + * completed and scheduling tick/tock has completed. + * To deschedule an on slot group CSG termination request would be sent and that + * might time out if the fault had occurred and also potentially affect the state + * being dumped. Moreover the scheduler lock would be held, so the access to debugfs + * files would get blocked. + * Scheduler lock and 'kctx->csf.lock' are released before this function starts + * to wait. When a request sent by the Scheduler to the FW times out, Scheduler + * would also wait for the dumping to complete and release the Scheduler lock + * before the wait. Meanwhile Userspace can try to delete the group, this function + * would ensure that the group doesn't exit the Scheduler until scheduling + * tick/tock has completed. Though very unlikely, group deschedule can be triggered + * from multiple threads around the same time and after the wait Userspace thread + * can win the race and get the group descheduled and free the memory for group + * pointer before the other threads wake up and notice that group has already been + * descheduled. To avoid the freeing in such a case, a sort of refcount is used + * for the group which is incremented & decremented across the wait. + */ +static +void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_context *kctx = group->kctx; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&kctx->csf.lock); + lockdep_assert_held(&scheduler->lock); + + if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) + return; + + while ((!kbase_debug_csf_fault_dump_complete(kbdev) || + (scheduler->state == SCHED_BUSY)) && + queue_group_scheduled_locked(group)) { + group->deschedule_deferred_cnt++; + mutex_unlock(&scheduler->lock); + mutex_unlock(&kctx->csf.lock); + kbase_debug_csf_fault_wait_completion(kbdev); + mutex_lock(&kctx->csf.lock); + mutex_lock(&scheduler->lock); + group->deschedule_deferred_cnt--; + } +#endif +} + +/** + * schedule_actions_trigger_df() - Notify the client about the fault and + * wait for the dumping to complete. + * + * @kbdev: Pointer to the device + * @kctx: Pointer to the context associated with the CSG slot for which + * the timeout was seen. + * @error: Error code indicating the type of timeout that occurred. + * + * This function notifies the Userspace client waiting for the faults and wait + * for the Client to complete the dumping. + * The function is called only from Scheduling tick/tock when a request sent by + * the Scheduler to FW times out or from the protm event work item of the group + * when the protected mode entry request times out. + * In the latter case there is no wait done as scheduler lock would be released + * immediately. In the former case the function waits and releases the scheduler + * lock before the wait. It has been ensured that the Scheduler view of the groups + * won't change meanwhile, so no group can enter/exit the Scheduler, become + * runnable or go off slot. + */ +static void schedule_actions_trigger_df(struct kbase_device *kbdev, + struct kbase_context *kctx, enum dumpfault_error_type error) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (!kbase_debug_csf_fault_notify(kbdev, kctx, error)) + return; + + if (unlikely(scheduler->state != SCHED_BUSY)) { + WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE); + return; + } + + mutex_unlock(&scheduler->lock); + kbase_debug_csf_fault_wait_completion(kbdev); + mutex_lock(&scheduler->lock); + WARN_ON(scheduler->state != SCHED_BUSY); +#endif +} + #ifdef KBASE_PM_RUNTIME /** * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the @@ -207,6 +280,7 @@ static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev) } scheduler->state = SCHED_SUSPENDED; + KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); return 0; @@ -472,6 +546,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) int non_idle_offslot_grps; bool can_suspend_on_idle; + lockdep_assert_held(&kbdev->hwaccess_lock); lockdep_assert_held(&scheduler->interrupt_lock); non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); @@ -481,12 +556,23 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) if (!non_idle_offslot_grps) { if (can_suspend_on_idle) { + /* fast_gpu_idle_handling is protected by the + * interrupt_lock, which would prevent this from being + * updated whilst gpu_idle_worker() is executing. + */ + scheduler->fast_gpu_idle_handling = + (kbdev->csf.gpu_idle_hysteresis_ms == 0) || + !kbase_csf_scheduler_all_csgs_idle(kbdev); + /* The GPU idle worker relies on update_on_slot_queues_offsets() to have * finished. It's queued before to reduce the time it takes till execution * but it'll eventually be blocked by the scheduler->interrupt_lock. */ enqueue_gpu_idle_work(scheduler); - update_on_slot_queues_offsets(kbdev); + + /* The extract offsets are unused in fast GPU idle handling */ + if (!scheduler->fast_gpu_idle_handling) + update_on_slot_queues_offsets(kbdev); } } else { /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ @@ -604,10 +690,14 @@ static bool scheduler_protm_wait_quit(struct kbase_device *kbdev) remaining = wait_event_timeout(kbdev->csf.event_wait, !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); - if (!remaining) { + if (unlikely(!remaining)) { + struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp; + struct kbase_context *kctx = group ? group->kctx : NULL; + dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped", kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms); + schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT); success = false; } @@ -728,7 +818,8 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * Scheduler * * @kbdev: Pointer to the device - * @flags: flags containing previous interrupt state + * @flags: Pointer to the flags variable containing the interrupt state + * when hwaccess lock was acquired. * * This function is called when Scheduler needs to be activated from the * sleeping state. @@ -736,14 +827,14 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev, * MCU is initiated. It resets the flag that indicates to the MCU state * machine that MCU needs to be put in sleep state. * - * Note: This function shall be called with hwaccess lock held and it will - * release that lock. + * Note: This function shall be called with hwaccess lock held and it may + * release that lock and reacquire it. * * Return: zero when the PM reference was taken and non-zero when the * system is being suspending/suspended. */ static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, - unsigned long flags) + unsigned long *flags) { u32 prev_count; int ret = 0; @@ -754,20 +845,20 @@ static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev, prev_count = kbdev->csf.scheduler.pm_active_count; if (!WARN_ON(prev_count == U32_MAX)) kbdev->csf.scheduler.pm_active_count++; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* On 0 => 1, make a pm_ctx_active request */ if (!prev_count) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags); + ret = kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, *flags); if (ret) kbdev->csf.scheduler.pm_active_count--; else kbdev->pm.backend.gpu_sleep_mode_active = false; kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } return ret; @@ -871,8 +962,8 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) "Re-activating the Scheduler out of sleep"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = scheduler_pm_active_after_sleep(kbdev, flags); - /* hwaccess_lock is released in the previous function call. */ + ret = scheduler_pm_active_after_sleep(kbdev, &flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif } @@ -886,6 +977,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) } scheduler->state = SCHED_INACTIVE; + KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); if (kick) scheduler_enable_tick_timer_nolock(kbdev); @@ -901,6 +993,7 @@ static void scheduler_suspend(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Suspending the Scheduler"); scheduler_pm_idle(kbdev); scheduler->state = SCHED_SUSPENDED; + KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); } } @@ -931,6 +1024,8 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group) KBASE_CSF_GROUP_SUSPENDED); } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { group->run_state = KBASE_CSF_GROUP_SUSPENDED; + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, + group->run_state); /* If scheduler is not suspended and the given group's * static priority (reflected by the scan_seq_num) is inside @@ -1055,6 +1150,7 @@ static int halt_stream_sync(struct kbase_queue *queue) struct kbase_csf_cmd_stream_info *stream; int csi_index = queue->csi_index; long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + unsigned long flags; if (WARN_ON(!group) || WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) @@ -1086,12 +1182,15 @@ static int halt_stream_sync(struct kbase_queue *queue) kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); } + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); /* Set state to STOP */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, CS_REQ_STATE_MASK); - KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u); /* Timed wait */ remaining = wait_event_timeout(kbdev->csf.event_wait, @@ -1362,6 +1461,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) } mutex_unlock(&kbdev->csf.scheduler.lock); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state); return err; } @@ -1439,6 +1539,7 @@ static void program_cs(struct kbase_device *kbdev, struct kbase_csf_cmd_stream_group_info *ginfo; struct kbase_csf_cmd_stream_info *stream; int csi_index = queue->csi_index; + unsigned long flags; u64 user_input; u64 user_output; @@ -1495,6 +1596,20 @@ static void program_cs(struct kbase_device *kbdev, /* Enable all interrupts for now */ kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + + /* The fault bit could be misaligned between CS_REQ and CS_ACK if the + * acknowledgment was deferred due to dump on fault and the group was + * removed from the CSG slot before the fault could be acknowledged. + */ + if (queue->enabled) { + u32 const cs_ack = + kbase_csf_firmware_cs_output(stream, CS_ACK); + + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FAULT_MASK); + } + /* * Enable the CSG idle notification once the CS's ringbuffer * becomes empty or the CS becomes sync_idle, waiting sync update @@ -1508,11 +1623,12 @@ static void program_cs(struct kbase_device *kbdev, kbase_csf_firmware_cs_input_mask(stream, CS_REQ, queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, CS_REQ_STATE_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, + ring_csg_doorbell); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); - kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, - ring_csg_doorbell); update_hw_active(queue, true); } @@ -1532,6 +1648,13 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) mutex_lock(&kbdev->csf.scheduler.lock); +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) { + mutex_unlock(&kbdev->csf.scheduler.lock); + return -EBUSY; + } +#endif + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, group->run_state); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, @@ -1716,6 +1839,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) /* Set state to SUSPEND/TERMINATE */ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, CSG_REQ_STATE_MASK); + kbase_csf_ring_csg_doorbell(kbdev, slot); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); @@ -1724,7 +1848,6 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); - kbase_csf_ring_csg_doorbell(kbdev, slot); } } @@ -1738,6 +1861,31 @@ static void suspend_csg_slot(struct kbase_queue_group *group) halt_csg_slot(group, true); } +static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev) +{ + const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version); + const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version); + + switch (glb_major) { + case 0: + break; + case 1: + if (glb_minor >= 4) + return true; + break; + case 2: + if (glb_minor >= 6) + return true; + break; + case 3: + if (glb_minor >= 6) + return true; + break; + default: + return true; + } + return false; +} /** * evaluate_sync_update() - Evaluate the sync wait condition the GPU command * queue has been blocked on. @@ -1754,11 +1902,13 @@ static bool evaluate_sync_update(struct kbase_queue *queue) u32 sync_wait_cond; u32 sync_current_val; struct kbase_device *kbdev; + bool sync_wait_cond_valid = false; if (WARN_ON(!queue)) return false; kbdev = queue->kctx->kbdev; + lockdep_assert_held(&kbdev->csf.scheduler.lock); sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, @@ -1777,9 +1927,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue) sync_wait_cond = CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); + sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) || + (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) || + ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && + csf_wait_ge_condition_supported(kbdev)); - WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && - (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); + WARN_ON(!sync_wait_cond_valid); sync_current_val = READ_ONCE(*sync_ptr); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue, @@ -1790,6 +1943,8 @@ static bool evaluate_sync_update(struct kbase_queue *queue) if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && (sync_current_val > queue->sync_value)) || + ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) && + (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) || ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && (sync_current_val <= queue->sync_value))) { /* The sync wait condition is satisfied so the group to which @@ -1892,12 +2047,48 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force) * of work needs to be enforced in situation such as entering into * protected mode). */ - if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) && - !scheduler->tock_pending_request) { - scheduler->tock_pending_request = true; + if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { dev_dbg(kbdev->dev, "Kicking async for group %d\n", group->handle); - mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); + kbase_csf_scheduler_invoke_tock(kbdev); + } +} + +static void ktrace_log_group_state(struct kbase_queue_group *const group) +{ + switch (group->run_state) { + case KBASE_CSF_GROUP_INACTIVE: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group, + group->run_state); + break; + case KBASE_CSF_GROUP_RUNNABLE: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); + break; + case KBASE_CSF_GROUP_IDLE: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group, + group->run_state); + break; + case KBASE_CSF_GROUP_SUSPENDED: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group, + group->run_state); + break; + case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, + group->run_state); + break; + case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, + group, group->run_state); + break; + case KBASE_CSF_GROUP_FAULT_EVICTED: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group, + group->run_state); + break; + case KBASE_CSF_GROUP_TERMINATED: + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, + group->run_state); + break; } } @@ -1918,6 +2109,8 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, group->run_state = run_state; + ktrace_log_group_state(group); + if (run_state == KBASE_CSF_GROUP_RUNNABLE) group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; @@ -1969,6 +2162,9 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, WARN_ON(!queue_group_scheduled_locked(group)); group->run_state = run_state; + + ktrace_log_group_state(group); + list_del_init(&group->link); spin_lock_irqsave(&scheduler->interrupt_lock, flags); @@ -2067,6 +2263,8 @@ static void insert_group_to_idle_wait(struct kbase_queue_group *const group) KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group, kctx->csf.sched.num_idle_wait_grps); group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group, + group->run_state); dev_dbg(kctx->kbdev->dev, "Group-%d suspended on sync_wait, total wait_groups: %u\n", group->handle, kctx->csf.sched.num_idle_wait_grps); @@ -2092,6 +2290,7 @@ static void remove_group_from_idle_wait(struct kbase_queue_group *const group) NULL; KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u); group->run_state = KBASE_CSF_GROUP_INACTIVE; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state); } static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, @@ -2270,14 +2469,19 @@ static void save_csg_slot(struct kbase_queue_group *group) else { group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group, + group->run_state); dev_dbg(kbdev->dev, "Group-%d suspended: idle", group->handle); } } else { group->run_state = KBASE_CSF_GROUP_SUSPENDED; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, + group->run_state); } update_offslot_non_idle_cnt_on_grp_suspend(group); + kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group); } } @@ -2400,6 +2604,7 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) csg_req ^= CSG_REQ_EP_CFG_MASK; kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, CSG_REQ_EP_CFG_MASK); + kbase_csf_ring_csg_doorbell(kbdev, slot); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); csg_slot->priority = prio; @@ -2410,7 +2615,6 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio); - kbase_csf_ring_csg_doorbell(kbdev, slot); set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); } @@ -2522,6 +2726,12 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, protm_suspend_buf >> 32); } + if (group->dvs_buf) { + kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, + group->dvs_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI, + group->dvs_buf >> 32); + } /* Enable all interrupts for now */ kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); @@ -2542,6 +2752,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, state, CSG_REQ_STATE_MASK); + kbase_csf_ring_csg_doorbell(kbdev, slot); spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); /* Update status before rings the door-bell, marking ready => run */ @@ -2561,7 +2772,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) | (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); - kbase_csf_ring_csg_doorbell(kbdev, slot); + /* Update the heap reclaim manager */ + kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group); /* Programming a slot consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); @@ -2623,8 +2835,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); - if (fault) + if (fault) { group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group, + scheduler->total_runnable_grps); + } KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group, (((u64)scheduler->total_runnable_grps) << 32) | @@ -2634,6 +2849,8 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, /* Notify a group has been evicted */ wake_up_all(&kbdev->csf.event_wait); } + + kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group); } static int term_group_sync(struct kbase_queue_group *group) @@ -2648,11 +2865,16 @@ static int term_group_sync(struct kbase_queue_group *group) group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); - if (!remaining) { + if (unlikely(!remaining)) { + enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT; + dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d", kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); + if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -2667,6 +2889,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + bool wait_for_termination = true; bool on_slot; kbase_reset_gpu_assert_failed_or_prevented(kbdev); @@ -2674,6 +2897,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); + wait_for_dump_complete_on_group_deschedule(group); if (!queue_group_scheduled_locked(group)) goto unlock; @@ -2681,39 +2905,28 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) #ifdef KBASE_PM_RUNTIME /* If the queue group is on slot and Scheduler is in SLEEPING state, - * then we need to wait here for Scheduler to exit the sleep state - * (i.e. wait for the runtime suspend or power down of GPU). This would - * be better than aborting the power down. The group will be suspended - * anyways on power down, so won't have to send the CSG termination - * request to FW. + * then we need to wake up the Scheduler to exit the sleep state rather + * than waiting for the runtime suspend or power down of GPU. + * The group termination is usually triggered in the context of Application + * thread and it has been seen that certain Apps can destroy groups at + * random points and not necessarily when the App is exiting. */ if (on_slot && (scheduler->state == SCHED_SLEEPING)) { - if (wait_for_scheduler_to_exit_sleep(kbdev)) { + scheduler_wakeup(kbdev, true); + + /* Wait for MCU firmware to start running */ + if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { dev_warn( kbdev->dev, - "Wait for scheduler to exit sleep state timedout when terminating group %d of context %d_%d on slot %d", + "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", + kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid, group->kctx->id, group->csg_nr); - - scheduler_wakeup(kbdev, true); - - /* Wait for MCU firmware to start running */ - if (kbase_csf_scheduler_wait_mcu_active(kbdev)) - dev_warn( - kbdev->dev, - "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d", - kbase_backend_get_cycle_cnt(kbdev), - group->handle, group->kctx->tgid, - group->kctx->id, group->csg_nr); + /* No point in waiting for CSG termination if MCU didn't + * become active. + */ + wait_for_termination = false; } - - /* Check the group state again as scheduler lock would have been - * released when waiting for the exit from SLEEPING state. - */ - if (!queue_group_scheduled_locked(group)) - goto unlock; - - on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group); } #endif if (!on_slot) { @@ -2721,7 +2934,11 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) } else { bool as_faulty; - term_group_sync(group); + if (likely(wait_for_termination)) + term_group_sync(group); + else + term_csg_slot(group); + /* Treat the csg been terminated */ as_faulty = cleanup_csg_slot(group); /* remove from the scheduler list */ @@ -2770,6 +2987,8 @@ static int scheduler_group_schedule(struct kbase_queue_group *group) group)); group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); /* A normal mode CSG could be idle onslot during * protected mode. In this case clear the @@ -3124,7 +3343,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) csg_slot_stopped_raw), remaining); - if (remaining) { + if (likely(remaining)) { u32 i; for_each_set_bit(i, changed, num_groups) { @@ -3164,6 +3383,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) for_each_set_bit(i, slot_mask, num_groups) { struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; + enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; struct base_gpu_queue_group_error const err_payload = { .error_type = @@ -3177,10 +3397,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) if (unlikely(group == NULL)) continue; - kbase_csf_add_group_fatal_error(group, - &err_payload); - kbase_event_wakeup(group->kctx); - /* TODO GPUCORE-25328: The CSG can't be * terminated, the GPU will be reset as a * work-around. @@ -3192,6 +3408,13 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) group->handle, group->kctx->tgid, group->kctx->id, i, kbdev->csf.fw_timeout_ms); + if (kbase_csf_firmware_ping_wait(kbdev, + FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + schedule_actions_trigger_df(kbdev, group->kctx, error_type); + + kbase_csf_add_group_fatal_error(group, &err_payload); + kbase_event_wakeup(group->kctx); /* The group has failed suspension, stop * further examination. @@ -3279,7 +3502,7 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) slots_state_changed(kbdev, changed, csg_slot_running), remaining); - if (remaining) { + if (likely(remaining)) { for_each_set_bit(i, changed, num_groups) { struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; @@ -3287,12 +3510,22 @@ static void wait_csg_slots_start(struct kbase_device *kbdev) /* The on slot csg is now running */ clear_bit(i, slot_mask); group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); } } else { - dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n", - kbase_backend_get_cycle_cnt(kbdev), - kbdev->csf.fw_timeout_ms, - num_groups, slot_mask); + const int csg_nr = ffs(slot_mask[0]) - 1; + struct kbase_queue_group *group = + scheduler->csg_slots[csg_nr].resident_group; + enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT; + + dev_err(kbdev->dev, + "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n", + kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, + num_groups, slot_mask); + if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + schedule_actions_trigger_df(kbdev, group->kctx, error_type); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -3409,11 +3642,10 @@ static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, slot_mask, dones), remaining); - if (remaining) + if (likely(remaining)) bitmap_andnot(slot_mask, slot_mask, dones, num_groups); else { - /* Timed-out on the wait */ return -ETIMEDOUT; } @@ -3432,17 +3664,25 @@ static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - if (ret != 0) { - /* The update timeout is not regarded as a serious - * issue, no major consequences are expected as a - * result, so just warn the case. - */ + if (unlikely(ret != 0)) { + const int csg_nr = ffs(slot_mask[0]) - 1; + struct kbase_queue_group *group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT; + dev_warn( kbdev->dev, "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, slot_mask[0]); + if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + schedule_actions_trigger_df(kbdev, group->kctx, error_type); + + /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); } } @@ -3508,6 +3748,7 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, kbase_event_wakeup(kctx); mutex_unlock(&scheduler->lock); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups); } /** @@ -3690,6 +3931,8 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, CSG_SLOT_RUNNING) { if (kctx_as_enabled(input_grp->kctx) && scheduler_slot_protm_ack(kbdev, input_grp, slot)) { + int err; + /* Option of acknowledging to multiple * CSGs from the same kctx is dropped, * after consulting with the @@ -3708,9 +3951,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - kbase_csf_wait_protected_mode_enter(kbdev); + err = kbase_csf_wait_protected_mode_enter(kbdev); mutex_unlock(&kbdev->mmu_hw_mutex); + if (err) + schedule_actions_trigger_df(kbdev, input_grp->kctx, + DF_PROTECTED_MODE_ENTRY_FAILURE); + scheduler->protm_enter_time = ktime_get_raw(); return; @@ -4093,8 +4340,6 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, } } - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - /* The groups are aggregated into a single kernel doorbell request */ if (!bitmap_empty(csg_bitmap, num_groups)) { @@ -4103,15 +4348,22 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, u32 db_slots = (u32)csg_bitmap[0]; kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); if (wait_csg_slots_handshake_ack(kbdev, CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { + const int csg_nr = ffs(csg_bitmap[0]) - 1; + struct kbase_queue_group *group = + scheduler->csg_slots[csg_nr].resident_group; + dev_warn( kbdev->dev, "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, csg_bitmap[0]); + schedule_actions_trigger_df(kbdev, group->kctx, + DF_CSG_STATUS_UPDATE_TIMEOUT); /* Store the bitmap of timed out slots */ bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); @@ -4131,6 +4383,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots); csg_bitmap[0] = db_slots; } + } else { + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } } @@ -4185,17 +4439,21 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) if (group_on_slot_is_idle(kbdev, i)) { group->run_state = KBASE_CSF_GROUP_IDLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); set_bit(i, scheduler->csg_slots_idle_mask); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, scheduler->csg_slots_idle_mask[0]); - } else + } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); + } } bitmap_or(scheduler->csg_slots_idle_mask, scheduler->csg_slots_idle_mask, failed_csg_bitmap, num_groups); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL, + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL, scheduler->csg_slots_idle_mask[0]); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } @@ -4281,7 +4539,12 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, int ret = suspend_active_queue_groups(kbdev, slot_mask); - if (ret) { + if (unlikely(ret)) { + const int csg_nr = ffs(slot_mask[0]) - 1; + struct kbase_queue_group *group = + scheduler->csg_slots[csg_nr].resident_group; + enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; + /* The suspend of CSGs failed, * trigger the GPU reset to be in a deterministic state. */ @@ -4289,6 +4552,9 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms, kbdev->csf.global_iface.group_num, slot_mask); + if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + schedule_actions_trigger_df(kbdev, group->kctx, error_type); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); @@ -4372,6 +4638,21 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); spin_lock(&scheduler->interrupt_lock); + + if (scheduler->fast_gpu_idle_handling) { + scheduler->fast_gpu_idle_handling = false; + + if (scheduler->total_runnable_grps) { + suspend = !atomic_read(&scheduler->non_idle_offslot_grps) && + kbase_pm_idle_groups_sched_suspendable(kbdev); + } else + suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); + spin_unlock(&scheduler->interrupt_lock); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return suspend; + } + if (scheduler->total_runnable_grps) { /* Check both on-slots and off-slots groups idle status */ @@ -4418,6 +4699,7 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) cancel_tick_timer(kbdev); scheduler_pm_idle_before_sleep(kbdev); scheduler->state = SCHED_SLEEPING; + KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state); } #endif @@ -4471,8 +4753,17 @@ static void gpu_idle_worker(struct work_struct *work) __ENCODE_KTRACE_INFO(true, false, false)); return; } + kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(scheduler->state == SCHED_BUSY)) { + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); + return; + } +#endif + scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); if (scheduler_is_idle_suspendable) { KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL, @@ -4484,6 +4775,8 @@ static void gpu_idle_worker(struct work_struct *work) else #endif all_groups_suspended = scheduler_suspend_on_idle(kbdev); + + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u); } mutex_unlock(&scheduler->lock); @@ -4641,14 +4934,162 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev) struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; struct kbase_queue_group *group = csg_slot->resident_group; - if (!queue_group_idle_locked(group)) + if (!queue_group_idle_locked(group)) { group->run_state = KBASE_CSF_GROUP_IDLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state); + } } /* Return the number of idle slots for potential replacement */ return bitmap_weight(csg_bitmap, num_groups); } +static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask, + unsigned int timeout_ms) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); + u32 num_groups = kbdev->csf.global_iface.group_num; + int err = 0; + DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); + + lockdep_assert_held(&scheduler->lock); + + bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); + + while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) { + DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + + bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); + + remaining = wait_event_timeout( + kbdev->csf.event_wait, + slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining); + + if (likely(remaining)) { + u32 i; + + for_each_set_bit(i, changed, num_groups) { + struct kbase_queue_group *group; + + if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) + continue; + + /* The on slot csg is now stopped */ + clear_bit(i, slot_mask_local); + + group = scheduler->csg_slots[i].resident_group; + if (likely(group)) { + /* Only do save/cleanup if the + * group is not terminated during + * the sleep. + */ + save_csg_slot(group); + if (cleanup_csg_slot(group)) + sched_evict_group(group, true, true); + } + } + } else { + dev_warn( + kbdev->dev, + "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx", + kbase_backend_get_cycle_cnt(kbdev), slot_mask[0], + slot_mask_local[0]); + /* Return the bitmask of the timed out slots to the caller */ + bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS); + + err = -ETIMEDOUT; + } + } + + return err; +} + +/** + * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG + * + * @kbdev: Pointer to the device + * + * Used to allow for speedier starting/resumption of another CSG. The worst-case + * scenario of the evicted CSG being scheduled next is expected to be rare. + * Also, the eviction will not be applied if the GPU is running in protected mode. + * Otherwise the the eviction attempt would force the MCU to quit the execution of + * the protected mode, and likely re-request to enter it again. + */ +static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + size_t i; + struct kbase_queue_group *lru_idle_group = NULL; + const u32 total_csg_slots = kbdev->csf.global_iface.group_num; + const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >= + (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS)); + u8 as_usage[BASE_MAX_NR_AS] = { 0 }; + + lockdep_assert_held(&scheduler->lock); + if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) + return; + + BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE)); + if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots) + return; /* Some CSG slots remain unused */ + + if (all_addr_spaces_used) { + for (i = 0; i != total_csg_slots; ++i) { + if (scheduler->csg_slots[i].resident_group != NULL) + as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++; + } + } + + for (i = 0; i != total_csg_slots; ++i) { + struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group; + + /* We expect that by this point all groups would normally be + * assigned a physical CSG slot, but if circumstances have + * changed then bail out of this optimisation. + */ + if (group == NULL) + return; + + /* Real-time priority CSGs must be kept on-slot even when + * idle. + */ + if ((group->run_state == KBASE_CSF_GROUP_IDLE) && + (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && + ((lru_idle_group == NULL) || + (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { + /* If all address spaces are used, we need to ensure the group does not + * share the AS with other active CSGs. Or CSG would be freed without AS + * and this optimization would not work. + */ + if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1)) + lru_idle_group = group; + } + } + + if (lru_idle_group != NULL) { + unsigned long slot_mask = 1 << lru_idle_group->csg_nr; + + dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d", + lru_idle_group->handle, lru_idle_group->kctx->tgid, + lru_idle_group->kctx->id, lru_idle_group->csg_nr); + suspend_queue_group(lru_idle_group); + if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) { + enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; + + dev_warn( + kbdev->dev, + "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)", + kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle, + lru_idle_group->kctx->tgid, lru_idle_group->kctx->id, + lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms); + if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS)) + error_type = DF_PING_REQUEST_TIMEOUT; + schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type); + } + } +} + static void schedule_actions(struct kbase_device *kbdev, bool is_tick) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -4796,6 +5237,8 @@ redo_local_tock: } else { spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } + + evict_lru_or_blocked_csg(kbdev); } /** @@ -4817,6 +5260,9 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) lockdep_assert_held(&scheduler->lock); + if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev))) + return true; + if (scheduler->state == SCHED_SUSPENDED) return true; @@ -4826,12 +5272,12 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.exit_gpu_sleep_mode) { - int ret = scheduler_pm_active_after_sleep(kbdev, flags); - /* hwaccess_lock is released in the previous function - * call. - */ + int ret = scheduler_pm_active_after_sleep(kbdev, &flags); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (!ret) { scheduler->state = SCHED_INACTIVE; + KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); return false; } @@ -4849,14 +5295,11 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) static void schedule_on_tock(struct work_struct *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.scheduler.tock_work.work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.scheduler.tock_work.work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err; - /* Tock work item is serviced */ - scheduler->tock_pending_request = false; - err = kbase_reset_gpu_try_prevent(kbdev); /* Regardless of whether reset failed or is currently happening, exit * early @@ -4864,21 +5307,28 @@ static void schedule_on_tock(struct work_struct *work) if (err) return; + kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); if (can_skip_scheduling(kbdev)) + { + atomic_set(&scheduler->pending_tock_work, false); goto exit_no_schedule_unlock; + } WARN_ON(!(scheduler->state == SCHED_INACTIVE)); scheduler->state = SCHED_BUSY; + KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state); /* Undertaking schedule action steps */ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u); - schedule_actions(kbdev, false); + while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true) + schedule_actions(kbdev, false); /* Record time information on a non-skipped tock */ scheduler->last_schedule = jiffies; scheduler->state = SCHED_INACTIVE; + KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); if (!scheduler->total_runnable_grps) enqueue_gpu_idle_work(scheduler); mutex_unlock(&scheduler->lock); @@ -4897,8 +5347,8 @@ exit_no_schedule_unlock: static void schedule_on_tick(struct work_struct *work) { - struct kbase_device *kbdev = container_of(work, struct kbase_device, - csf.scheduler.tick_work); + struct kbase_device *kbdev = + container_of(work, struct kbase_device, csf.scheduler.tick_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err = kbase_reset_gpu_try_prevent(kbdev); @@ -4908,6 +5358,7 @@ static void schedule_on_tick(struct work_struct *work) if (err) return; + kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); WARN_ON(scheduler->tick_timer_active); @@ -4915,6 +5366,7 @@ static void schedule_on_tick(struct work_struct *work) goto exit_no_schedule_unlock; scheduler->state = SCHED_BUSY; + KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state); /* Undertaking schedule action steps */ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps); @@ -4936,6 +5388,7 @@ static void schedule_on_tick(struct work_struct *work) scheduler->state = SCHED_INACTIVE; mutex_unlock(&scheduler->lock); + KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); kbase_reset_gpu_allow(kbdev); dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); @@ -4949,67 +5402,6 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static int wait_csg_slots_suspend(struct kbase_device *kbdev, - const unsigned long *slot_mask, - unsigned int timeout_ms) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); - u32 num_groups = kbdev->csf.global_iface.group_num; - int err = 0; - DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); - - lockdep_assert_held(&scheduler->lock); - - bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); - - while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) - && remaining) { - DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); - - bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); - - remaining = wait_event_timeout(kbdev->csf.event_wait, - slots_state_changed(kbdev, changed, - csg_slot_stopped_locked), - remaining); - - if (remaining) { - u32 i; - - for_each_set_bit(i, changed, num_groups) { - struct kbase_queue_group *group; - - if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) - continue; - - /* The on slot csg is now stopped */ - clear_bit(i, slot_mask_local); - - group = scheduler->csg_slots[i].resident_group; - if (likely(group)) { - /* Only do save/cleanup if the - * group is not terminated during - * the sleep. - */ - save_csg_slot(group); - if (cleanup_csg_slot(group)) - sched_evict_group(group, true, true); - } - } - } else { - dev_warn(kbdev->dev, "[%llu] Timeout waiting for CSG slots to suspend, slot_mask: 0x%*pb\n", - kbase_backend_get_cycle_cnt(kbdev), - num_groups, slot_mask_local); - - - err = -ETIMEDOUT; - } - } - - return err; -} - static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask) { @@ -5172,6 +5564,7 @@ static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) cleanup_csg_slot(group); group->run_state = KBASE_CSF_GROUP_SUSPENDED; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state); /* Simply treat the normal mode groups as non-idle. The tick * scheduled after the reset will re-initialize the counter @@ -5186,10 +5579,15 @@ unlock: return suspend_on_slot_groups; } +static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) +{ + cancel_work_sync(&scheduler->tick_work); +} + static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) { + atomic_set(&scheduler->pending_tock_work, false); cancel_delayed_work_sync(&scheduler->tock_work); - scheduler->tock_pending_request = false; } static void scheduler_inner_reset(struct kbase_device *kbdev) @@ -5203,7 +5601,7 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) /* Cancel any potential queued delayed work(s) */ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); cancel_tick_timer(kbdev); - cancel_work_sync(&scheduler->tick_work); + cancel_tick_work(scheduler); cancel_tock_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); @@ -5238,6 +5636,8 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u); + kbase_debug_csf_fault_wait_completion(kbdev); + if (scheduler_handle_reset_in_protected_mode(kbdev) && !suspend_active_queue_groups_on_reset(kbdev)) { /* As all groups have been successfully evicted from the CSG @@ -5274,6 +5674,8 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev) mutex_unlock(&kbdev->kctx_list_lock); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u); + /* After queue groups reset, the scheduler data fields clear out */ scheduler_inner_reset(kbdev); } @@ -5328,7 +5730,7 @@ static void firmware_aliveness_monitor(struct work_struct *work) kbase_csf_scheduler_wait_mcu_active(kbdev); - err = kbase_csf_firmware_ping_wait(kbdev); + err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms); if (err) { /* It is acceptable to enqueue a reset whilst we've prevented @@ -5687,6 +6089,8 @@ static bool check_sync_update_for_on_slot_group( */ group->reevaluate_idle_status = true; group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); } KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); @@ -5796,6 +6200,15 @@ static void check_group_sync_update_worker(struct work_struct *work) mutex_lock(&scheduler->lock); +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(scheduler->state == SCHED_BUSY)) { + queue_work(kctx->csf.sched.sync_update_wq, + &kctx->csf.sched.sync_update_work); + mutex_unlock(&scheduler->lock); + return; + } +#endif + KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u); if (kctx->csf.sched.num_idle_wait_grps != 0) { struct kbase_queue_group *group, *temp; @@ -5871,6 +6284,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) INIT_WORK(&kctx->csf.sched.sync_update_work, check_group_sync_update_worker); + kbase_csf_tiler_heap_reclaim_ctx_init(kctx); + err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); if (err) { @@ -5930,6 +6345,7 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) INIT_WORK(&scheduler->tick_work, schedule_on_tick); INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); + atomic_set(&scheduler->pending_tock_work, false); INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); @@ -5945,18 +6361,19 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); scheduler->state = SCHED_SUSPENDED; + KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); scheduler->pm_active_count = 0; scheduler->ngrp_to_schedule = 0; scheduler->total_runnable_grps = 0; scheduler->top_ctx = NULL; scheduler->top_grp = NULL; scheduler->last_schedule = 0; - scheduler->tock_pending_request = false; scheduler->active_protm_grp = NULL; scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; scheduler_doorbell_init(kbdev); INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); + scheduler->fast_gpu_idle_handling = false; atomic_set(&scheduler->gpu_no_longer_idle, false); atomic_set(&scheduler->non_idle_offslot_grps, 0); @@ -5964,6 +6381,8 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) scheduler->tick_timer.function = tick_timer_callback; scheduler->tick_timer_active = false; + kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); + return 0; } @@ -5981,22 +6400,26 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) mutex_lock(&kbdev->csf.scheduler.lock); if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) { + unsigned long flags; /* The power policy could prevent the Scheduler from * getting suspended when GPU becomes idle. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev)); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); scheduler_suspend(kbdev); } mutex_unlock(&kbdev->csf.scheduler.lock); cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); cancel_tick_timer(kbdev); - cancel_work_sync(&kbdev->csf.scheduler.tick_work); + cancel_tick_work(&kbdev->csf.scheduler); cancel_tock_work(&kbdev->csf.scheduler); - mutex_destroy(&kbdev->csf.scheduler.lock); kfree(kbdev->csf.scheduler.csg_slots); kbdev->csf.scheduler.csg_slots = NULL; } + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL, + kbase_csf_scheduler_get_nr_active_csgs(kbdev)); } void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) @@ -6005,6 +6428,9 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) destroy_workqueue(kbdev->csf.scheduler.idle_wq); if (kbdev->csf.scheduler.wq) destroy_workqueue(kbdev->csf.scheduler.wq); + + kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); + mutex_destroy(&kbdev->csf.scheduler.lock); } /** @@ -6069,13 +6495,12 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, if (currently_enabled && !enable) { scheduler->timer_enabled = false; cancel_tick_timer(kbdev); - cancel_delayed_work(&scheduler->tock_work); - scheduler->tock_pending_request = false; mutex_unlock(&scheduler->lock); /* The non-sync version to cancel the normal work item is not * available, so need to drop the lock before cancellation. */ - cancel_work_sync(&scheduler->tick_work); + cancel_tick_work(scheduler); + cancel_tock_work(scheduler); return; } @@ -6112,6 +6537,12 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) int result = 0; lockdep_assert_held(&scheduler->lock); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(scheduler->state == SCHED_BUSY)) + return -EBUSY; +#endif + #ifdef KBASE_PM_RUNTIME /* If scheduler is in sleeping state, then MCU needs to be activated * to suspend CSGs. @@ -6147,7 +6578,7 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; /* Cancel any potential queued delayed work(s) */ - cancel_work_sync(&scheduler->tick_work); + cancel_tick_work(scheduler); cancel_tock_work(scheduler); result = kbase_reset_gpu_prevent_and_wait(kbdev); @@ -6271,6 +6702,7 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev) } scheduler->state = SCHED_SUSPENDED; + KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.gpu_sleep_mode_active = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h index 12df5054e573..d22d7c8b9dce 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.h @@ -36,7 +36,9 @@ * If the CSG is already scheduled and resident, the CSI will be started * right away, otherwise once the group is made resident. * - * Return: 0 on success, or negative on failure. + * Return: 0 on success, or negative on failure. -EBUSY is returned to + * indicate to the caller that queue could not be enabled due to Scheduler + * state and the caller can try to enable the queue after sometime. */ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue); @@ -530,12 +532,30 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; unsigned long flags; + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u); spin_lock_irqsave(&scheduler->interrupt_lock, flags); if (!scheduler->tick_timer_active) queue_work(scheduler->wq, &scheduler->tick_work); spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } +/** + * kbase_csf_scheduler_invoke_tock() - Invoke the scheduling tock + * + * @kbdev: Pointer to the device + * + * This function will queue the scheduling tock work item for immediate + * execution. + */ +static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u); + if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false) + mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0); +} + /** * kbase_csf_scheduler_queue_has_trace() - report whether the queue has been * configured to operate with the diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c index 769369150687..909362da0047 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,26 @@ #include "mali_kbase_csf_tiler_heap_def.h" #include "mali_kbase_csf_heap_context_alloc.h" +/* Tiler heap shrink stop limit for maintaining a minimum number of chunks */ +#define HEAP_SHRINK_STOP_LIMIT (1) + +/** + * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap + * + * @cdsbp_0: Descriptor_type and buffer_type + * @size: The size of the current heap chunk + * @pointer: Pointer to the current heap chunk + * @low_pointer: Pointer to low end of current heap chunk + * @high_pointer: Pointer to high end of current heap chunk + */ +struct kbase_csf_gpu_buffer_heap { + u32 cdsbp_0; + u32 size; + u64 pointer; + u64 low_pointer; + u64 high_pointer; +} __packed; + /** * encode_chunk_ptr - Encode the address and size of a chunk as an integer. * @@ -73,6 +93,35 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk( struct kbase_csf_tiler_heap_chunk, link); } +/** + * remove_external_chunk_mappings - Remove external mappings from a chunk that + * is being transitioned to the tiler heap + * memory system. + * + * @kctx: kbase context the chunk belongs to. + * @chunk: The chunk whose external mappings are going to be removed. + * + * This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates + * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other + * parts of kbase outside of tiler heap management should not take references on its physical + * pages, and should not modify them. + */ +static void remove_external_chunk_mappings(struct kbase_context *const kctx, + struct kbase_csf_tiler_heap_chunk *chunk) +{ + lockdep_assert_held(&kctx->reg_lock); + + if (chunk->region->cpu_alloc != NULL) { + kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0, + chunk->region->cpu_alloc->nents); + } +#if !defined(CONFIG_MALI_VECTOR_DUMP) + chunk->region->flags |= KBASE_REG_DONT_NEED; +#endif + + dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va); +} + /** * link_chunk - Link a chunk into a tiler heap * @@ -93,19 +142,12 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap, if (prev) { struct kbase_context *const kctx = heap->kctx; - struct kbase_vmap_struct map; - u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va, - sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map); + u64 *prev_hdr = prev->map.addr; - if (unlikely(!prev_hdr)) { - dev_err(kctx->kbdev->dev, - "Failed to map tiler heap chunk 0x%llX\n", - prev->gpu_va); - return -ENOMEM; - } + WARN((prev->region->flags & KBASE_REG_CPU_CACHED), + "Cannot support CPU cached chunks without sync operations"); *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); - kbase_vunmap(kctx, &map); dev_dbg(kctx->kbdev->dev, "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", @@ -132,152 +174,264 @@ static int link_chunk(struct kbase_csf_tiler_heap *const heap, static int init_chunk(struct kbase_csf_tiler_heap *const heap, struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) { - struct kbase_vmap_struct map; - struct u64 *chunk_hdr = NULL; + int err = 0; + u64 *chunk_hdr; struct kbase_context *const kctx = heap->kctx; + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { dev_err(kctx->kbdev->dev, "Tiler heap chunk address is unusable\n"); return -EINVAL; } - chunk_hdr = kbase_vmap_prot(kctx, - chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map); - - if (unlikely(!chunk_hdr)) { - dev_err(kctx->kbdev->dev, - "Failed to map a tiler heap chunk header\n"); - return -ENOMEM; + WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), + "Cannot support CPU cached chunks without sync operations"); + chunk_hdr = chunk->map.addr; + if (WARN(chunk->map.size < CHUNK_HDR_SIZE, + "Tiler chunk kernel mapping was not large enough for zero-init")) { + return -EINVAL; } memset(chunk_hdr, 0, CHUNK_HDR_SIZE); - kbase_vunmap(kctx, &map); + INIT_LIST_HEAD(&chunk->link); if (link_with_prev) - return link_chunk(heap, chunk); - else - return 0; + err = link_chunk(heap, chunk); + + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n"); + return -EINVAL; + } + + list_add_tail(&chunk->link, &heap->chunks_list); + heap->chunk_count++; + + return err; +} + +/** + * remove_unlinked_chunk - Remove a chunk that is not currently linked into a + * heap. + * + * @kctx: Kbase context that was used to allocate the memory. + * @chunk: Chunk that has been allocated, but not linked into a heap. + */ +static void remove_unlinked_chunk(struct kbase_context *kctx, + struct kbase_csf_tiler_heap_chunk *chunk) +{ + if (WARN_ON(!list_empty(&chunk->link))) + return; + + kbase_gpu_vm_lock(kctx); + kbase_vunmap(kctx, &chunk->map); + /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT + * regions), and so we must clear that flag too before freeing + */ +#if !defined(CONFIG_MALI_VECTOR_DUMP) + chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); +#else + chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; +#endif + kbase_mem_free_region(kctx, chunk->region); + kbase_gpu_vm_unlock(kctx); + + kfree(chunk); +} + +/** + * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA + * region for the chunk, and provide a kernel mapping. + * @kctx: kbase context with which the chunk will be linked + * @chunk_size: the size of the chunk from the corresponding heap + * + * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the + * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so + * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap + * lock). + * + * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we + * ensure that it is protected from being mapped by other parts of kbase. + * + * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the + * shrinker path, as it may be otherwise shrunk at any time. + * + * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer + * on failure + */ +static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx, + u64 chunk_size) +{ + u64 nr_pages = PFN_UP(chunk_size); + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR | + BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD; + struct kbase_csf_tiler_heap_chunk *chunk = NULL; + /* The chunk kernel mapping needs to be large enough to: + * - initially zero the CHUNK_HDR_SIZE area + * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area + */ + const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE); + + /* Calls to this function are inherently synchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; + flags |= kbase_mem_group_id_set(kctx->jit_group_id); + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (unlikely(!chunk)) { + dev_err(kctx->kbdev->dev, + "No kernel memory for a new tiler heap chunk\n"); + return NULL; + } + + /* Allocate GPU memory for the new chunk. */ + chunk->region = + kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info); + + if (unlikely(!chunk->region)) { + dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n"); + goto unroll_chunk; + } + + kbase_gpu_vm_lock(kctx); + + /* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made + * whilst we had dropped the region lock + */ + if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) { + dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n"); + goto unroll_region; + } + + /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE + * being requested, it's useful to document in code what those restrictions are, and ensure + * they remain in place in future. + */ + if (WARN(!chunk->region->gpu_alloc, + "KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) { + goto unroll_region; + } + + if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE, + "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) { + goto unroll_region; + } + + if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC), + "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) { + goto unroll_region; + } + + if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED), + "KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) { + goto unroll_region; + } + + if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1, + "KBASE_REG_NO_USER_FREE chunks should not have been aliased")) { + goto unroll_region; + } + + if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size, + (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map, + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) { + dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n"); + goto unroll_region; + } + + remove_external_chunk_mappings(kctx, chunk); + kbase_gpu_vm_unlock(kctx); + + return chunk; + +unroll_region: + /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT + * regions), and so we must clear that flag too before freeing. + */ +#if !defined(CONFIG_MALI_VECTOR_DUMP) + chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED); +#else + chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; +#endif + kbase_mem_free_region(kctx, chunk->region); + kbase_gpu_vm_unlock(kctx); +unroll_chunk: + kfree(chunk); + return NULL; } /** * create_chunk - Create a tiler heap chunk * * @heap: Pointer to the tiler heap for which to allocate memory. - * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be - * linked with the previously allocated chunk. * - * This function allocates a chunk of memory for a tiler heap and adds it to - * the end of the list of chunks associated with that heap. The size of the - * chunk is not a parameter because it is configured per-heap not per-chunk. + * This function allocates a chunk of memory for a tiler heap, adds it to the + * the list of chunks associated with that heap both on the host side and in GPU + * memory. * * Return: 0 if successful or a negative error code on failure. */ -static int create_chunk(struct kbase_csf_tiler_heap *const heap, - bool link_with_prev) +static int create_chunk(struct kbase_csf_tiler_heap *const heap) { int err = 0; - struct kbase_context *const kctx = heap->kctx; - u64 nr_pages = PFN_UP(heap->chunk_size); - u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | - BASE_MEM_COHERENT_LOCAL; struct kbase_csf_tiler_heap_chunk *chunk = NULL; - /* Calls to this function are inherently synchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; - - flags |= kbase_mem_group_id_set(kctx->jit_group_id); - -#if defined(CONFIG_MALI_BIFROST_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP) - flags |= BASE_MEM_PROT_CPU_RD; -#endif - - chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + chunk = alloc_new_chunk(heap->kctx, heap->chunk_size); if (unlikely(!chunk)) { - dev_err(kctx->kbdev->dev, - "No kernel memory for a new tiler heap chunk\n"); - return -ENOMEM; - } - - /* Allocate GPU memory for the new chunk. */ - INIT_LIST_HEAD(&chunk->link); - chunk->region = - kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info); - - if (unlikely(!chunk->region)) { - dev_err(kctx->kbdev->dev, - "Failed to allocate a tiler heap chunk\n"); err = -ENOMEM; - } else { - err = init_chunk(heap, chunk, link_with_prev); - if (unlikely(err)) { - kbase_gpu_vm_lock(kctx); - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; - kbase_mem_free_region(kctx, chunk->region); - kbase_gpu_vm_unlock(kctx); - } + goto allocation_failure; } - if (unlikely(err)) { - kfree(chunk); - } else { - list_add_tail(&chunk->link, &heap->chunks_list); - heap->chunk_count++; + mutex_lock(&heap->kctx->csf.tiler_heaps.lock); + err = init_chunk(heap, chunk, true); + mutex_unlock(&heap->kctx->csf.tiler_heaps.lock); - dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", - chunk->gpu_va); - } + if (unlikely(err)) + goto initialization_failure; + dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va); + + return 0; +initialization_failure: + remove_unlinked_chunk(heap->kctx, chunk); +allocation_failure: return err; } /** - * delete_chunk - Delete a tiler heap chunk - * - * @heap: Pointer to the tiler heap for which @chunk was allocated. - * @chunk: Pointer to a chunk to be deleted. - * - * This function frees a tiler heap chunk previously allocated by @create_chunk - * and removes it from the list of chunks associated with the heap. - * - * WARNING: The deleted chunk is not unlinked from the list of chunks used by - * the GPU, therefore it is only safe to use this function when - * deleting a heap. - */ -static void delete_chunk(struct kbase_csf_tiler_heap *const heap, - struct kbase_csf_tiler_heap_chunk *const chunk) -{ - struct kbase_context *const kctx = heap->kctx; - - kbase_gpu_vm_lock(kctx); - chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; - kbase_mem_free_region(kctx, chunk->region); - kbase_gpu_vm_unlock(kctx); - list_del(&chunk->link); - heap->chunk_count--; - kfree(chunk); -} - -/** - * delete_all_chunks - Delete all chunks belonging to a tiler heap + * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap * * @heap: Pointer to a tiler heap. * - * This function empties the list of chunks associated with a tiler heap by - * freeing all chunks previously allocated by @create_chunk. + * This function empties the list of chunks associated with a tiler heap by freeing all chunks + * previously allocated by @create_chunk. + * + * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the + * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct + * kbase_context.region_lock. + * + * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the + * list of chunks used by the GPU, therefore it is only safe to use this function when + * deleting a heap. */ static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) { + struct kbase_context *const kctx = heap->kctx; struct list_head *entry = NULL, *tmp = NULL; + WARN(!list_empty(&heap->link), + "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller"); + list_for_each_safe(entry, tmp, &heap->chunks_list) { struct kbase_csf_tiler_heap_chunk *chunk = list_entry( entry, struct kbase_csf_tiler_heap_chunk, link); - delete_chunk(heap, chunk); + list_del_init(&chunk->link); + heap->chunk_count--; + + remove_unlinked_chunk(kctx, chunk); } } @@ -299,7 +453,7 @@ static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, u32 i; for (i = 0; (i < nchunks) && likely(!err); i++) - err = create_chunk(heap, true); + err = create_chunk(heap); if (unlikely(err)) delete_all_chunks(heap); @@ -308,14 +462,17 @@ static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, } /** - * delete_heap - Delete a tiler heap + * delete_heap - Delete an unlinked tiler heap * * @heap: Pointer to a tiler heap to be deleted. * * This function frees any chunks allocated for a tiler heap previously - * initialized by @kbase_csf_tiler_heap_init and removes it from the list of - * heaps associated with the kbase context. The heap context structure used by + * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by * the firmware is also freed. + * + * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the + * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct + * kbase_context.region_lock. */ static void delete_heap(struct kbase_csf_tiler_heap *heap) { @@ -323,23 +480,41 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap) dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va); - lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + WARN(!list_empty(&heap->link), + "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller"); + /* Make sure that all of the VA regions corresponding to the chunks are + * freed at this time and that the work queue is not trying to access freed + * memory. + * + * Note: since the heap is unlinked, and that no references are made to chunks other + * than from their heap, there is no need to separately move the chunks out of the + * heap->chunks_list to delete them. + */ delete_all_chunks(heap); + kbase_vunmap(kctx, &heap->gpu_va_map); /* We could optimize context destruction by not freeing leaked heap - * contexts but it doesn't seem worth the extra complexity. + * contexts but it doesn't seem worth the extra complexity. After this + * point, the suballocation is returned to the heap context allocator and + * may be overwritten with new data, meaning heap->gpu_va should not + * be used past this point. */ kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, heap->gpu_va); - list_del(&heap->link); - WARN_ON(heap->chunk_count); KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, heap->target_in_flight, 0); + if (heap->buf_desc_reg) { + kbase_vunmap(kctx, &heap->buf_desc_map); + kbase_gpu_vm_lock(kctx); + heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_gpu_vm_unlock(kctx); + } + kfree(heap); } @@ -375,6 +550,23 @@ static struct kbase_csf_tiler_heap *find_tiler_heap( return NULL; } +static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap, + u64 const chunk_gpu_va) +{ + struct kbase_csf_tiler_heap_chunk *chunk = NULL; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + list_for_each_entry(chunk, &heap->chunks_list, link) { + if (chunk->gpu_va == chunk_gpu_va) + return chunk; + } + + dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va); + + return NULL; +} + int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) { int err = kbase_csf_heap_context_allocator_init( @@ -393,37 +585,88 @@ int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) { + LIST_HEAD(local_heaps_list); struct list_head *entry = NULL, *tmp = NULL; dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n"); mutex_lock(&kctx->csf.tiler_heaps.lock); + list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list); + mutex_unlock(&kctx->csf.tiler_heaps.lock); - list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) { + list_for_each_safe(entry, tmp, &local_heaps_list) { struct kbase_csf_tiler_heap *heap = list_entry( entry, struct kbase_csf_tiler_heap, link); + + list_del_init(&heap->link); delete_heap(heap); } - mutex_unlock(&kctx->csf.tiler_heaps.lock); mutex_destroy(&kctx->csf.tiler_heaps.lock); kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); } -int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, - u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks, - u16 const target_in_flight, u64 *const heap_gpu_va, - u64 *const first_chunk_va) +/** + * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house + * the tiler heap buffer descriptor + * is suitable for the purpose. + * @kctx: kbase context of the tiler heap + * @reg: VA region being checked for suitability + * + * The tiler heap buffer descriptor memory does not admit page faults according + * to its design, so it must have the entirety of the backing upon allocation, + * and it has to remain alive as long as the tiler heap is alive, meaning it + * cannot be allocated from JIT/Ephemeral, or user freeable memory. + * + * Return: true on suitability, false otherwise. + */ +static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + if (kbase_is_region_invalid_or_free(reg)) { + dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n"); + return false; + } + + if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) || + (reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) { + dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags); + return false; + } + + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + dev_err(kctx->kbdev->dev, "Region has invalid type!\n"); + return false; + } + + if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) || + (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) { + dev_err(kctx->kbdev->dev, "Region has invalid backing!\n"); + return false; + } + + return true; +} + +#define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap)) + +int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size, + u32 const initial_chunks, u32 const max_chunks, + u16 const target_in_flight, u64 const buf_desc_va, + u64 *const heap_gpu_va, u64 *const first_chunk_va) { int err = 0; struct kbase_csf_tiler_heap *heap = NULL; struct kbase_csf_heap_context_allocator *const ctx_alloc = &kctx->csf.tiler_heaps.ctx_alloc; + struct kbase_csf_tiler_heap_chunk *chunk = NULL; + struct kbase_va_region *gpu_va_reg = NULL; + void *vmap_ptr = NULL; dev_dbg(kctx->kbdev->dev, - "Creating a tiler heap with %u chunks (limit: %u) of size %u\n", - initial_chunks, max_chunks, chunk_size); + "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n", + initial_chunks, max_chunks, chunk_size, buf_desc_va); if (!kbase_mem_allow_alloc(kctx)) return -EINVAL; @@ -445,8 +688,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (unlikely(!heap)) { - dev_err(kctx->kbdev->dev, - "No kernel memory for a new tiler heap\n"); + dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap"); return -ENOMEM; } @@ -454,57 +696,126 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, heap->chunk_size = chunk_size; heap->max_chunks = max_chunks; heap->target_in_flight = target_in_flight; + heap->buf_desc_checked = false; INIT_LIST_HEAD(&heap->chunks_list); + INIT_LIST_HEAD(&heap->link); + + /* Check on the buffer descriptor virtual Address */ + if (buf_desc_va) { + struct kbase_va_region *buf_desc_reg; + + kbase_gpu_vm_lock(kctx); + buf_desc_reg = + kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va); + + if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) { + kbase_gpu_vm_unlock(kctx); + dev_err(kctx->kbdev->dev, + "Could not find a suitable VA region for the tiler heap buf desc!\n"); + err = -EINVAL; + goto buf_desc_not_suitable; + } + + /* If we don't prevent userspace from unmapping this, we may run into + * use-after-free, as we don't check for the existence of the region throughout. + */ + buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE; + + heap->buf_desc_va = buf_desc_va; + heap->buf_desc_reg = buf_desc_reg; + + vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE, + KBASE_REG_CPU_RD, &heap->buf_desc_map, + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); + kbase_gpu_vm_unlock(kctx); + + if (unlikely(!vmap_ptr)) { + dev_err(kctx->kbdev->dev, + "Could not vmap buffer descriptor into kernel memory (err %d)\n", + err); + err = -ENOMEM; + goto buf_desc_vmap_failed; + } + } heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); - if (unlikely(!heap->gpu_va)) { - dev_dbg(kctx->kbdev->dev, - "Failed to allocate a tiler heap context"); + dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n"); err = -ENOMEM; - } else { - err = create_initial_chunks(heap, initial_chunks); - if (unlikely(err)) - kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va); + goto heap_context_alloc_failed; } + gpu_va_reg = ctx_alloc->region; + + kbase_gpu_vm_lock(kctx); + /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens + * on kctx termination (after all syscalls on kctx have finished), and so it is safe to + * assume that gpu_va_reg is still present. + */ + vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE, + (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map, + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); + kbase_gpu_vm_unlock(kctx); + if (unlikely(!vmap_ptr)) { + dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n"); + err = -ENOMEM; + goto heap_context_vmap_failed; + } + + err = create_initial_chunks(heap, initial_chunks); if (unlikely(err)) { - kfree(heap); - } else { - struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry( - &heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); + dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n"); + goto create_chunks_failed; + } + chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link); - *heap_gpu_va = heap->gpu_va; - *first_chunk_va = chunk->gpu_va; + *heap_gpu_va = heap->gpu_va; + *first_chunk_va = chunk->gpu_va; - mutex_lock(&kctx->csf.tiler_heaps.lock); - kctx->csf.tiler_heaps.nr_of_heaps++; - heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; - list_add(&heap->link, &kctx->csf.tiler_heaps.list); + mutex_lock(&kctx->csf.tiler_heaps.lock); + kctx->csf.tiler_heaps.nr_of_heaps++; + heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; + list_add(&heap->link, &kctx->csf.tiler_heaps.list); - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( - kctx->kbdev, kctx->id, heap->heap_id, - PFN_UP(heap->chunk_size * heap->max_chunks), - PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks, - heap->chunk_size, heap->chunk_count, heap->target_in_flight, 0); + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, + PFN_UP(heap->chunk_size * heap->max_chunks), + PFN_UP(heap->chunk_size * heap->chunk_count), + heap->max_chunks, heap->chunk_size, heap->chunk_count, + heap->target_in_flight, 0); #if defined(CONFIG_MALI_VECTOR_DUMP) - list_for_each_entry(chunk, &heap->chunks_list, link) { - KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC( - kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va); - } -#endif - - dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va); - mutex_unlock(&kctx->csf.tiler_heaps.lock); - kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; - kctx->running_total_tiler_heap_memory += - heap->chunk_size * heap->chunk_count; - if (kctx->running_total_tiler_heap_memory > - kctx->peak_total_tiler_heap_memory) - kctx->peak_total_tiler_heap_memory = - kctx->running_total_tiler_heap_memory; + list_for_each_entry(chunk, &heap->chunks_list, link) { + KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id, + chunk->gpu_va); } +#endif + kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count; + kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count; + if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) + kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; + + dev_dbg(kctx->kbdev->dev, + "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va, + buf_desc_va, kctx->tgid, kctx->id); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + return 0; + +create_chunks_failed: + kbase_vunmap(kctx, &heap->gpu_va_map); +heap_context_vmap_failed: + kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va); +heap_context_alloc_failed: + if (heap->buf_desc_reg) + kbase_vunmap(kctx, &heap->buf_desc_map); +buf_desc_vmap_failed: + if (heap->buf_desc_reg) { + kbase_gpu_vm_lock(kctx); + heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_gpu_vm_unlock(kctx); + } +buf_desc_not_suitable: + kfree(heap); return err; } @@ -517,16 +828,19 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, u64 heap_size = 0; mutex_lock(&kctx->csf.tiler_heaps.lock); - heap = find_tiler_heap(kctx, heap_gpu_va); if (likely(heap)) { chunk_count = heap->chunk_count; heap_size = heap->chunk_size * chunk_count; - delete_heap(heap); - } else - err = -EINVAL; - mutex_unlock(&kctx->csf.tiler_heaps.lock); + list_del_init(&heap->link); + } else { + err = -EINVAL; + } + + /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list + * at all times + */ if (likely(kctx->running_total_tiler_heap_memory >= heap_size)) kctx->running_total_tiler_heap_memory -= heap_size; else @@ -537,36 +851,27 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, else dev_warn(kctx->kbdev->dev, "Running total tiler chunk count lower than expected!"); + if (!err) + dev_dbg(kctx->kbdev->dev, + "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", + heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from + * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock + */ + if (likely(heap)) + delete_heap(heap); + return err; } -/** - * alloc_new_chunk - Allocate a new chunk for the tiler heap. - * - * @heap: Pointer to the tiler heap. - * @nr_in_flight: Number of render passes that are in-flight, must not be zero. - * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. - * The minimum value is zero but it must be less or equal to - * the total number of render passes in flight - * @new_chunk_ptr: Where to store the GPU virtual address & size of the new - * chunk allocated for the heap. - * - * This function will allocate a new chunk for the chunked tiler heap depending - * on the settings provided by userspace when the heap was created and the - * heap's statistics (like number of render passes in-flight). - * - * Return: 0 if a new chunk was allocated otherwise an appropriate negative - * error code. - */ -static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap, - u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) +static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight, + u32 pending_frag_count) { - int err = -ENOMEM; - lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); - if (WARN_ON(!nr_in_flight) || - WARN_ON(pending_frag_count > nr_in_flight)) + if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight)) return -EINVAL; if (nr_in_flight <= heap->target_in_flight) { @@ -574,66 +879,446 @@ static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap, /* Not exceeded the target number of render passes yet so be * generous with memory. */ - err = create_chunk(heap, false); - - if (likely(!err)) { - struct kbase_csf_tiler_heap_chunk *new_chunk = - get_last_chunk(heap); - if (!WARN_ON(!new_chunk)) { - *new_chunk_ptr = - encode_chunk_ptr(heap->chunk_size, - new_chunk->gpu_va); - return 0; - } - } + return 0; } else if (pending_frag_count > 0) { - err = -EBUSY; + return -EBUSY; } else { - err = -ENOMEM; + return -ENOMEM; } } else { /* Reached target number of render passes in flight. * Wait for some of them to finish */ - err = -EBUSY; + return -EBUSY; } - - return err; + return -ENOMEM; } int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) { struct kbase_csf_tiler_heap *heap; + struct kbase_csf_tiler_heap_chunk *chunk; int err = -EINVAL; + u64 chunk_size = 0; + u64 heap_id = 0; + + /* To avoid potential locking issues during allocation, this is handled + * in three phases: + * 1. Take the lock, find the corresponding heap, and find its chunk size + * (this is always 2 MB, but may change down the line). + * 2. Allocate memory for the chunk and its region. + * 3. If the heap still exists, link it to the end of the list. If it + * doesn't, roll back the allocation. + */ mutex_lock(&kctx->csf.tiler_heaps.lock); + heap = find_tiler_heap(kctx, gpu_heap_va); + if (likely(heap)) { + chunk_size = heap->chunk_size; + heap_id = heap->heap_id; + } else { + dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto prelink_failure; + } + err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, + "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)", + gpu_heap_va, err); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto prelink_failure; + } + mutex_unlock(&kctx->csf.tiler_heaps.lock); + /* this heap must not be used whilst we have dropped the lock */ + heap = NULL; + + chunk = alloc_new_chunk(kctx, chunk_size); + if (unlikely(!chunk)) { + dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d", + chunk_size, kctx->tgid, kctx->id); + goto prelink_failure; + } + + /* After this point, the heap that we were targeting could already have had the needed + * chunks allocated, if we were handling multiple OoM events on multiple threads, so + * we need to revalidate the need for the allocation. + */ + mutex_lock(&kctx->csf.tiler_heaps.lock); heap = find_tiler_heap(kctx, gpu_heap_va); - if (likely(heap)) { - err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count, - new_chunk_ptr); - if (likely(!err)) { - /* update total and peak tiler heap memory record */ - kctx->running_total_tiler_heap_nr_chunks++; - kctx->running_total_tiler_heap_memory += heap->chunk_size; - - if (kctx->running_total_tiler_heap_memory > - kctx->peak_total_tiler_heap_memory) - kctx->peak_total_tiler_heap_memory = - kctx->running_total_tiler_heap_memory; - } - - KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( - kctx->kbdev, kctx->id, heap->heap_id, - PFN_UP(heap->chunk_size * heap->max_chunks), - PFN_UP(heap->chunk_size * heap->chunk_count), - heap->max_chunks, heap->chunk_size, heap->chunk_count, - heap->target_in_flight, nr_in_flight); + if (unlikely(!heap)) { + dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto unroll_chunk; } + if (heap_id != heap->heap_id) { + dev_err(kctx->kbdev->dev, + "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!", + gpu_heap_va, kctx->tgid, kctx->id, chunk_size); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto unroll_chunk; + } + + if (WARN_ON(chunk_size != heap->chunk_size)) { + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto unroll_chunk; + } + + err = validate_allocation_request(heap, nr_in_flight, pending_frag_count); + if (unlikely(err)) { + dev_warn( + kctx->kbdev->dev, + "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)", + gpu_heap_va, err); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto unroll_chunk; + } + + err = init_chunk(heap, chunk, false); + + /* On error, the chunk would not be linked, so we can still treat it as an unlinked + * chunk for error handling. + */ + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, + "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d", + chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + goto unroll_chunk; + } + + *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); + + /* update total and peak tiler heap memory record */ + kctx->running_total_tiler_heap_nr_chunks++; + kctx->running_total_tiler_heap_memory += heap->chunk_size; + + if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory) + kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory; + + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id, + PFN_UP(heap->chunk_size * heap->max_chunks), + PFN_UP(heap->chunk_size * heap->chunk_count), + heap->max_chunks, heap->chunk_size, heap->chunk_count, + heap->target_in_flight, nr_in_flight); + mutex_unlock(&kctx->csf.tiler_heaps.lock); + return err; +unroll_chunk: + remove_unlinked_chunk(kctx, chunk); +prelink_failure: return err; } + +static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va, + u64 *hdr_val) +{ + int err; + u64 *chunk_hdr; + struct kbase_context *kctx = heap->kctx; + struct kbase_csf_tiler_heap_chunk *chunk = NULL; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + chunk = find_chunk(heap, chunk_gpu_va); + if (unlikely(!chunk)) { + dev_warn(kctx->kbdev->dev, + "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n", + heap->gpu_va, chunk_gpu_va); + return false; + } + + WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), + "Cannot support CPU cached chunks without sync operations"); + chunk_hdr = chunk->map.addr; + *hdr_val = *chunk_hdr; + + dev_dbg(kctx->kbdev->dev, + "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", + chunk_gpu_va, heap->gpu_va, *hdr_val); + + err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents); + if (unlikely(err)) { + dev_warn( + kctx->kbdev->dev, + "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n", + chunk_gpu_va, heap->gpu_va, err); + + /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on + * the heap's chunk list and try a different heap. + */ + + return false; + } + /* Destroy the mapping before the physical pages which are mapped are destroyed. */ + kbase_vunmap(kctx, &chunk->map); + + err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc, + chunk->region->gpu_alloc->nents); + if (unlikely(err)) { + dev_warn( + kctx->kbdev->dev, + "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n", + chunk_gpu_va, heap->gpu_va, err); + + /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs + * anyway, so continue instead of returning early. + * + * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has + * its mapping removed, as that could lead to problems. It's safest to instead + * continue with deferred destruction of the chunk. + */ + } + + dev_dbg(kctx->kbdev->dev, + "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n", + chunk_gpu_va, heap->gpu_va, *hdr_val); + + mutex_lock(&heap->kctx->jit_evict_lock); + list_move(&chunk->region->jit_node, &kctx->jit_destroy_head); + mutex_unlock(&heap->kctx->jit_evict_lock); + + list_del(&chunk->link); + heap->chunk_count--; + kfree(chunk); + + return true; +} + +static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, + struct kbase_csf_gpu_buffer_heap *desc) +{ + u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + if (first_hoarded_chunk_gpu_va) { + struct kbase_csf_tiler_heap_chunk *chunk = + find_chunk(heap, first_hoarded_chunk_gpu_va); + + if (likely(chunk)) { + dev_dbg(heap->kctx->kbdev->dev, + "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n", + heap->buf_desc_va); + + heap->buf_desc_checked = true; + return; + } + } + /* If there is no match, defer the check to next time */ + dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n", + heap->buf_desc_va); +} + +static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr) +{ + struct kbase_context *kctx = heap->kctx; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + /* Initialize the descriptor pointer value to 0 */ + *chunk_gpu_va_ptr = 0; + + /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */ + if (heap->buf_desc_reg && !heap->buf_desc_checked) { + struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; + + /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ + if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) + kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU); + + sanity_check_gpu_buffer_heap(heap, desc); + if (heap->buf_desc_checked) + *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK; + } + + return heap->buf_desc_checked; +} + +static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap) +{ + u32 freed = 0; + u64 chunk_gpu_va = 0; + struct kbase_context *kctx = heap->kctx; + struct kbase_csf_tiler_heap_chunk *chunk = NULL; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) { + u64 chunk_hdr_val; + u64 *hw_hdr; + + if (!chunk_gpu_va) { + struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr; + + /* BufferDescriptor is supplied by userspace, so could be CPU-cached */ + if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED) + kbase_sync_mem_regions(kctx, &heap->buf_desc_map, + KBASE_SYNC_TO_CPU); + chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK; + + if (!chunk_gpu_va) { + dev_dbg(kctx->kbdev->dev, + "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n", + heap->buf_desc_va); + goto out; + } + } + + chunk = find_chunk(heap, chunk_gpu_va); + if (unlikely(!chunk)) + goto out; + + WARN((chunk->region->flags & KBASE_REG_CPU_CACHED), + "Cannot support CPU cached chunks without sync operations"); + hw_hdr = chunk->map.addr; + + /* Move onto the next chunk relevant information */ + chunk_hdr_val = *hw_hdr; + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + + while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { + bool success = + delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val); + + if (!success) + break; + + freed++; + /* On success, chunk_hdr_val is updated, extract the next chunk address */ + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + } + + /* Update the existing hardware chunk header, after reclaim deletion of chunks */ + *hw_hdr = chunk_hdr_val; + + dev_dbg(heap->kctx->kbdev->dev, + "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed, + chunk_hdr_val); + } else { + dev_dbg(kctx->kbdev->dev, + "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n", + heap->buf_desc_va); + } +out: + return freed; +} + +static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap) +{ + u32 freed_chunks = 0; + u64 freed_pages = 0; + u64 chunk_gpu_va; + u64 chunk_hdr_val; + struct kbase_context *kctx = heap->kctx; + u64 *ctx_ptr; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED, + "Cannot support CPU cached heap context without sync operations"); + + ctx_ptr = heap->gpu_va_map.addr; + + /* Extract the first chunk address from the context's free_list_head */ + chunk_hdr_val = *ctx_ptr; + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + + while (chunk_gpu_va) { + u64 hdr_val; + bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val); + + if (!success) + break; + + freed_chunks++; + chunk_hdr_val = hdr_val; + /* extract the next chunk address */ + chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK; + } + + /* Update the post-scan deletion to context header */ + *ctx_ptr = chunk_hdr_val; + + /* Try to scan the HW hoarded list of unused chunks */ + freed_chunks += delete_hoarded_chunks(heap); + freed_pages = freed_chunks * PFN_UP(heap->chunk_size); + dev_dbg(heap->kctx->kbdev->dev, + "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n", + freed_chunks, freed_pages, chunk_hdr_val); + + /* Update context tiler heaps memory usage */ + kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT; + kctx->running_total_tiler_heap_nr_chunks -= freed_chunks; + return freed_pages; +} + +u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free) +{ + u64 freed = 0; + struct kbase_csf_tiler_heap *heap; + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { + freed += delete_unused_chunk_pages(heap); + + /* If freed enough, then stop here */ + if (freed >= to_free) + break; + } + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + /* The scan is surely not more than 4-G pages, but for logic flow limit it */ + if (WARN_ON(unlikely(freed > U32_MAX))) + return U32_MAX; + else + return (u32)freed; +} + +static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap) +{ + u32 chunk_cnt = 0; + u64 page_cnt = 0; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping + * in the chunk list walk. The downside is that the number is a less reliable guide for + * later on scan (free) calls on this heap for what actually is freeable. + */ + if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) { + chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT; + page_cnt = chunk_cnt * PFN_UP(heap->chunk_size); + } + + dev_dbg(heap->kctx->kbdev->dev, + "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt, + page_cnt, heap->gpu_va); + + return page_cnt; +} + +u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx) +{ + u64 page_cnt = 0; + struct kbase_csf_tiler_heap *heap; + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) + page_cnt += count_unused_heap_pages(heap); + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + /* The count is surely not more than 4-G pages, but for logic flow limit it */ + if (WARN_ON(unlikely(page_cnt > U32_MAX))) + return U32_MAX; + else + return (u32)page_cnt; +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h index f4b80da68fe5..1b5cb560894f 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.h @@ -23,7 +23,6 @@ #define _KBASE_CSF_TILER_HEAP_H_ #include - /** * kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a * GPU address space @@ -58,6 +57,12 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); * @target_in_flight: Number of render-passes that the driver should attempt to * keep in flight for which allocation of new chunks is * allowed. Must not be zero. + * @buf_desc_va: Buffer descriptor GPU virtual address. This is a hint for + * indicating that the caller is intending to perform tiler heap + * chunks reclaim for those that are hoarded with hardware while + * the associated shader activites are suspended and the CSGs are + * off slots. If the referred reclaiming is not desired, can + * set it to 0. * @gpu_heap_va: Where to store the GPU virtual address of the context that was * set up for the tiler heap. * @first_chunk_va: Where to store the GPU virtual address of the first chunk @@ -66,10 +71,9 @@ void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); * * Return: 0 if successful or a negative error code on failure. */ -int kbase_csf_tiler_heap_init(struct kbase_context *kctx, - u32 chunk_size, u32 initial_chunks, u32 max_chunks, - u16 target_in_flight, u64 *gpu_heap_va, - u64 *first_chunk_va); +int kbase_csf_tiler_heap_init(struct kbase_context *kctx, u32 chunk_size, u32 initial_chunks, + u32 max_chunks, u16 target_in_flight, u64 const buf_desc_va, + u64 *gpu_heap_va, u64 *first_chunk_va); /** * kbase_csf_tiler_heap_term - Terminate a chunked tiler memory heap. @@ -112,4 +116,27 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); */ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); + +/** + * kbase_csf_tiler_heap_scan_kctx_unused_pages - Performs the tiler heap shrinker calim's scan + * functionality. + * + * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be + * operated with. + * @to_free: Number of pages suggested for the reclaim scan (free) method to reach. + * + * Return: the actual number of pages the scan method has freed from the call. + */ +u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free); + +/** + * kbase_csf_tiler_heap_count_kctx_unused_pages - Performs the tiler heap shrinker calim's count + * functionality. + * + * @kctx: Pointer to the kbase context for which the tiler heap recalim is to be + * operated with. + * + * Return: a number of pages that could likely be freed on the subsequent scan method call. + */ +u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx); #endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h index 2c006d9dc9e4..96f2b03d2d31 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_def.h @@ -56,12 +56,20 @@ ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) +/* The size of the area needed to be vmapped prior to handing the tiler heap + * over to the tiler, so that the shrinker could be invoked. + */ +#define NEXT_CHUNK_ADDR_SIZE (sizeof(u64)) + /** * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel * * @link: Link to this chunk in a list of chunks belonging to a * @kbase_csf_tiler_heap. * @region: Pointer to the GPU memory region allocated for the chunk. + * @map: Kernel VA mapping so that we would not need to use vmap in the + * shrinker callback, which can allocate. This maps only the header + * of the chunk, so it could be traversed. * @gpu_va: GPU virtual address of the start of the memory region. * This points to the header of the chunk and not to the low address * of free memory within it. @@ -75,9 +83,12 @@ struct kbase_csf_tiler_heap_chunk { struct list_head link; struct kbase_va_region *region; + struct kbase_vmap_struct map; u64 gpu_va; }; +#define HEAP_BUF_DESCRIPTOR_CHECKED (1 << 0) + /** * struct kbase_csf_tiler_heap - A tiler heap managed by the kernel * @@ -85,6 +96,20 @@ struct kbase_csf_tiler_heap_chunk { * associated. * @link: Link to this heap in a list of tiler heaps belonging to * the @kbase_csf_tiler_heap_context. + * @chunks_list: Linked list of allocated chunks. + * @gpu_va: The GPU virtual address of the heap context structure that + * was allocated for the firmware. This is also used to + * uniquely identify the heap. + * @heap_id: Unique id representing the heap, assigned during heap + * initialization. + * @buf_desc_va: Buffer descriptor GPU VA. Can be 0 for backward compatible + * to earlier version base interfaces. + * @buf_desc_reg: Pointer to the VA region that covers the provided buffer + * descriptor memory object pointed to by buf_desc_va. + * @gpu_va_map: Kernel VA mapping of the GPU VA region. + * @buf_desc_map: Kernel VA mapping of the buffer descriptor, read from + * during the tiler heap shrinker. Sync operations may need + * to be done before each read. * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. * @chunk_count: The number of chunks currently allocated. Must not be * zero or greater than @max_chunks. @@ -93,22 +118,23 @@ struct kbase_csf_tiler_heap_chunk { * @target_in_flight: Number of render-passes that the driver should attempt * to keep in flight for which allocation of new chunks is * allowed. Must not be zero. - * @gpu_va: The GPU virtual address of the heap context structure that - * was allocated for the firmware. This is also used to - * uniquely identify the heap. - * @heap_id: Unique id representing the heap, assigned during heap - * initialization. - * @chunks_list: Linked list of allocated chunks. + * @buf_desc_checked: Indicates if runtime check on buffer descriptor has been done. */ struct kbase_csf_tiler_heap { struct kbase_context *kctx; struct list_head link; + struct list_head chunks_list; + u64 gpu_va; + u64 heap_id; + u64 buf_desc_va; + struct kbase_va_region *buf_desc_reg; + struct kbase_vmap_struct buf_desc_map; + struct kbase_vmap_struct gpu_va_map; u32 chunk_size; u32 chunk_count; u32 max_chunks; u16 target_in_flight; - u64 gpu_va; - u64 heap_id; - struct list_head chunks_list; + bool buf_desc_checked; }; + #endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c new file mode 100644 index 000000000000..bcab31d27945 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "mali_kbase_csf.h" +#include "mali_kbase_csf_tiler_heap.h" +#include "mali_kbase_csf_tiler_heap_reclaim.h" + +/* Tiler heap shrinker seek value, needs to be higher than jit and memory pools */ +#define HEAP_SHRINKER_SEEKS (DEFAULT_SEEKS + 2) + +/* Tiler heap shrinker batch value */ +#define HEAP_SHRINKER_BATCH (512) + +/* Tiler heap reclaim scan (free) method size for limiting a scan run length */ +#define HEAP_RECLAIM_SCAN_BATCH_SIZE (HEAP_SHRINKER_BATCH << 7) + +static u8 get_kctx_highest_csg_priority(struct kbase_context *kctx) +{ + u8 prio; + + for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_LOW; + prio++) + if (!list_empty(&kctx->csf.sched.runnable_groups[prio])) + break; + + if (prio != KBASE_QUEUE_GROUP_PRIORITY_REALTIME && kctx->csf.sched.num_idle_wait_grps) { + struct kbase_queue_group *group; + + list_for_each_entry(group, &kctx->csf.sched.idle_wait_groups, link) { + if (group->priority < prio) + prio = group->priority; + } + } + + return prio; +} + +static void detach_ctx_from_heap_reclaim_mgr(struct kbase_context *kctx) +{ + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; + + lockdep_assert_held(&scheduler->lock); + + if (!list_empty(&info->mgr_link)) { + u32 remaining = (info->nr_est_unused_pages > info->nr_freed_pages) ? + info->nr_est_unused_pages - info->nr_freed_pages : + 0; + + list_del_init(&info->mgr_link); + if (remaining) + WARN_ON(atomic_sub_return(remaining, &scheduler->reclaim_mgr.unused_pages) < + 0); + + dev_dbg(kctx->kbdev->dev, + "Reclaim_mgr_detach: ctx_%d_%d, est_pages=0%u, freed_pages=%u", kctx->tgid, + kctx->id, info->nr_est_unused_pages, info->nr_freed_pages); + } +} + +static void attach_ctx_to_heap_reclaim_mgr(struct kbase_context *kctx) +{ + struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + u8 const prio = get_kctx_highest_csg_priority(kctx); + + lockdep_assert_held(&scheduler->lock); + + if (WARN_ON(!list_empty(&info->mgr_link))) + list_del_init(&info->mgr_link); + + /* Count the pages that could be freed */ + info->nr_est_unused_pages = kbase_csf_tiler_heap_count_kctx_unused_pages(kctx); + /* Initialize the scan operation tracking pages */ + info->nr_freed_pages = 0; + + list_add_tail(&info->mgr_link, &scheduler->reclaim_mgr.ctx_lists[prio]); + /* Accumulate the estimated pages to the manager total field */ + atomic_add(info->nr_est_unused_pages, &scheduler->reclaim_mgr.unused_pages); + + dev_dbg(kctx->kbdev->dev, "Reclaim_mgr_attach: ctx_%d_%d, est_count_pages=%u", kctx->tgid, + kctx->id, info->nr_est_unused_pages); +} + +void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; + + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + + info->on_slot_grps++; + /* If the kctx has an on-slot change from 0 => 1, detach it from reclaim_mgr */ + if (info->on_slot_grps == 1) { + dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d on-slot, remove kctx from reclaim manager", + group->kctx->tgid, group->kctx->id, group->handle); + + detach_ctx_from_heap_reclaim_mgr(kctx); + } +} + +void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_csf_ctx_heap_reclaim_info *const info = &kctx->csf.sched.heap_info; + struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler; + const u32 num_groups = kctx->kbdev->csf.global_iface.group_num; + u32 on_slot_grps = 0; + u32 i; + + lockdep_assert_held(&scheduler->lock); + + /* Group eviction from the scheduler is a bit more complex, but fairly less + * frequent in operations. Taking the opportunity to actually count the + * on-slot CSGs from the given kctx, for robustness and clearer code logic. + */ + for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { + struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; + struct kbase_queue_group *grp = csg_slot->resident_group; + + if (unlikely(!grp)) + continue; + + if (grp->kctx == kctx) + on_slot_grps++; + } + + info->on_slot_grps = on_slot_grps; + + /* If the kctx has no other CSGs on-slot, handle the heap reclaim related actions */ + if (!info->on_slot_grps) { + if (kctx->csf.sched.num_runnable_grps || kctx->csf.sched.num_idle_wait_grps) { + /* The kctx has other operational CSGs, attach it if not yet done */ + if (list_empty(&info->mgr_link)) { + dev_dbg(kctx->kbdev->dev, + "CSG_%d_%d_%d evict, add kctx to reclaim manager", + group->kctx->tgid, group->kctx->id, group->handle); + + attach_ctx_to_heap_reclaim_mgr(kctx); + } + } else { + /* The kctx is a zombie after the group eviction, drop it out */ + dev_dbg(kctx->kbdev->dev, + "CSG_%d_%d_%d evict leading to zombie kctx, dettach from reclaim manager", + group->kctx->tgid, group->kctx->id, group->handle); + + detach_ctx_from_heap_reclaim_mgr(kctx); + } + } +} + +void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_csf_ctx_heap_reclaim_info *info = &kctx->csf.sched.heap_info; + + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + + if (!WARN_ON(info->on_slot_grps == 0)) + info->on_slot_grps--; + /* If the kctx has no CSGs on-slot, attach it to scheduler's reclaim manager */ + if (info->on_slot_grps == 0) { + dev_dbg(kctx->kbdev->dev, "CSG_%d_%d_%d off-slot, add kctx to reclaim manager", + group->kctx->tgid, group->kctx->id, group->handle); + + attach_ctx_to_heap_reclaim_mgr(kctx); + } +} + +static unsigned long reclaim_unused_heap_pages(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_csf_sched_heap_reclaim_mgr *const mgr = &scheduler->reclaim_mgr; + unsigned long total_freed_pages = 0; + int prio; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + for (prio = KBASE_QUEUE_GROUP_PRIORITY_LOW; + total_freed_pages < HEAP_RECLAIM_SCAN_BATCH_SIZE && + prio >= KBASE_QUEUE_GROUP_PRIORITY_REALTIME; + prio--) { + struct kbase_csf_ctx_heap_reclaim_info *info, *tmp; + u32 cnt_ctxs = 0; + + list_for_each_entry_safe(info, tmp, &scheduler->reclaim_mgr.ctx_lists[prio], + mgr_link) { + struct kbase_context *kctx = + container_of(info, struct kbase_context, csf.sched.heap_info); + u32 freed_pages = kbase_csf_tiler_heap_scan_kctx_unused_pages( + kctx, info->nr_est_unused_pages); + + if (freed_pages) { + /* Remove the freed pages from the manager retained estimate. The + * accumulated removals from the kctx should not exceed the kctx + * initially notified contribution amount: + * info->nr_est_unused_pages. + */ + u32 rm_cnt = MIN(info->nr_est_unused_pages - info->nr_freed_pages, + freed_pages); + + WARN_ON(atomic_sub_return(rm_cnt, &mgr->unused_pages) < 0); + + /* tracking the freed pages, before a potential detach call */ + info->nr_freed_pages += freed_pages; + total_freed_pages += freed_pages; + + schedule_work(&kctx->jit_work); + } + + /* If the kctx can't offer anymore, drop it from the reclaim manger, + * otherwise leave it remaining in. If the kctx changes its state (i.e. + * some CSGs becoming on-slot), the scheduler will pull it out. + */ + if (info->nr_freed_pages >= info->nr_est_unused_pages || freed_pages == 0) + detach_ctx_from_heap_reclaim_mgr(kctx); + + cnt_ctxs++; + + /* Enough has been freed, break to avoid holding the lock too long */ + if (total_freed_pages >= HEAP_RECLAIM_SCAN_BATCH_SIZE) + break; + } + + dev_dbg(kbdev->dev, "Reclaim free heap pages: %lu (cnt_ctxs: %u, prio: %d)", + total_freed_pages, cnt_ctxs, prio); + } + + dev_dbg(kbdev->dev, "Reclaim free total heap pages: %lu (across all CSG priority)", + total_freed_pages); + + return total_freed_pages; +} + +static unsigned long kbase_csf_tiler_heap_reclaim_count_free_pages(struct kbase_device *kbdev, + struct shrink_control *sc) +{ + struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; + unsigned long page_cnt = atomic_read(&mgr->unused_pages); + + dev_dbg(kbdev->dev, "Reclaim count unused pages (estimate): %lu", page_cnt); + + return page_cnt; +} + +static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_device *kbdev, + struct shrink_control *sc) +{ + struct kbase_csf_sched_heap_reclaim_mgr *mgr = &kbdev->csf.scheduler.reclaim_mgr; + unsigned long freed = 0; + unsigned long avail = 0; + + /* If Scheduler is busy in action, return 0 */ + if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + /* Wait for roughly 2-ms */ + wait_event_timeout(kbdev->csf.event_wait, (scheduler->state != SCHED_BUSY), + msecs_to_jiffies(2)); + if (!mutex_trylock(&kbdev->csf.scheduler.lock)) { + dev_dbg(kbdev->dev, "Tiler heap reclaim scan see device busy (freed: 0)"); + return 0; + } + } + + avail = atomic_read(&mgr->unused_pages); + if (avail) + freed = reclaim_unused_heap_pages(kbdev); + + mutex_unlock(&kbdev->csf.scheduler.lock); + +#if (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) + if (freed > sc->nr_to_scan) + sc->nr_scanned = freed; +#endif /* (KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE) */ + + dev_info(kbdev->dev, "Tiler heap reclaim scan freed pages: %lu (unused: %lu)", freed, + avail); + + /* On estimate suggesting available, yet actual free failed, return STOP */ + if (avail && !freed) + return SHRINK_STOP; + else + return freed; +} + +static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_device *kbdev = + container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + + return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc); +} + +static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_device *kbdev = + container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim); + + return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc); +} + +void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx) +{ + /* Per-kctx heap_info object initialization */ + memset(&kctx->csf.sched.heap_info, 0, sizeof(struct kbase_csf_ctx_heap_reclaim_info)); + INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link); +} + +void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim; + u8 prio; + + for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; + prio++) + INIT_LIST_HEAD(&scheduler->reclaim_mgr.ctx_lists[prio]); + + atomic_set(&scheduler->reclaim_mgr.unused_pages, 0); + + reclaim->count_objects = kbase_csf_tiler_heap_reclaim_count_objects; + reclaim->scan_objects = kbase_csf_tiler_heap_reclaim_scan_objects; + reclaim->seeks = HEAP_SHRINKER_SEEKS; + reclaim->batch = HEAP_SHRINKER_BATCH; + +#if !defined(CONFIG_MALI_VECTOR_DUMP) + register_shrinker(reclaim); +#endif +} + +void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u8 prio; + +#if !defined(CONFIG_MALI_VECTOR_DUMP) + unregister_shrinker(&scheduler->reclaim_mgr.heap_reclaim); +#endif + + for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT; + prio++) + WARN_ON(!list_empty(&scheduler->reclaim_mgr.ctx_lists[prio])); + + WARN_ON(atomic_read(&scheduler->reclaim_mgr.unused_pages)); +} diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h new file mode 100644 index 000000000000..b6e580e48df6 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_TILER_HEAP_RECLAIM_H_ +#define _KBASE_CSF_TILER_HEAP_RECLAIM_H_ + +#include + +/** + * kbase_csf_tiler_heap_reclaim_sched_notify_grp_active - Notifier function for the scheduler + * to use when a group is put on-slot. + * + * @group: Pointer to the group object that has been placed on-slot for running. + * + */ +void kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(struct kbase_queue_group *group); + +/** + * kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict - Notifier function for the scheduler + * to use when a group is evicted out of the schedulder's scope, i.e no run of + * the group is possible afterwards. + * + * @group: Pointer to the group object that has been evicted. + * + */ +void kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(struct kbase_queue_group *group); + +/** + * kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend - Notifier function for the scheduler + * to use when a group is suspended from running, but could resume in future. + * + * @group: Pointer to the group object that is in suspended state. + * + */ +void kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(struct kbase_queue_group *group); + +/** + * kbase_csf_tiler_heap_reclaim_ctx_init - Initializer on per context data fields for use + * with the tiler heap reclaim manager. + * + * @kctx: Pointer to the kbase_context. + * + */ +void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx); + +/** + * kbase_csf_tiler_heap_reclaim_mgr_init - Initializer for the tiler heap reclaim manger. + * + * @kbdev: Pointer to the device. + * + */ +void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev); + +/** + * kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger. + * + * @kbdev: Pointer to the device. + * + */ +void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev); + +#endif diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c index 6ae1029ab123..71ec91e3de03 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tl_reader.c @@ -88,13 +88,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, kbase_csf_tl_debugfs_poll_interval_read, kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); - void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) { debugfs_create_file("csf_tl_poll_interval_in_ms", 0644, kbdev->debugfs_instr_directory, kbdev, &kbase_csf_tl_poll_interval_fops); - } #endif @@ -166,11 +164,10 @@ static int kbase_ts_converter_init( * * Return: The CPU timestamp. */ -static void __maybe_unused -kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 *gpu_ts) +static u64 __maybe_unused +kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts) { - u64 old_gpu_ts = *gpu_ts; - *gpu_ts = div64_u64(old_gpu_ts * self->multiplier, self->divisor) + + return div64_u64(gpu_ts * self->multiplier, self->divisor) + self->offset; } @@ -250,7 +247,6 @@ static void tl_reader_reset(struct kbase_csf_tl_reader *self) self->tl_header.btc = 0; } - int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) { int ret = 0; @@ -275,7 +271,6 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) return -EBUSY; } - /* Copying the whole buffer in a single shot. We assume * that the buffer will not contain partially written messages. */ @@ -326,8 +321,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) { struct kbase_csffw_tl_message *msg = (struct kbase_csffw_tl_message *) csffw_data_it; - kbase_ts_converter_convert(&self->ts_converter, - &msg->timestamp); + msg->timestamp = kbase_ts_converter_convert(&self->ts_converter, + msg->timestamp); } /* Copy the message out to the tl_stream. */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c index 23202c87a404..46872f937dbf 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.c @@ -119,7 +119,7 @@ static const struct firmware_trace_buffer_data trace_buffer_data[] = { #if MALI_UNIT_TEST { "fwutf", { 0 }, 1 }, #endif - { FW_TRACE_BUF_NAME, { 0 }, 4 }, + { FIRMWARE_LOG_BUF_NAME, { 0 }, 4 }, { "benchmark", { 0 }, 2 }, { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, }; @@ -506,10 +506,16 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data( } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); -#if IS_ENABLED(CONFIG_DEBUG_FS) +static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) +{ + unsigned int i; + + for (i = 0; i < tb->trace_enable_entry_count; i++) + kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, i, (mask >> i) & 1); +} #define U32_BITS 32 -static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb) +u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb) { u64 active_mask = tb->trace_enable_init_mask[0]; @@ -519,18 +525,7 @@ static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb) return active_mask; } -static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, - u64 mask) -{ - unsigned int i; - - for (i = 0; i < tb->trace_enable_entry_count; i++) - kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( - tb, i, (mask >> i) & 1); -} - -static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, - u64 mask) +int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { struct kbase_device *kbdev = tb->kbdev; unsigned long flags; @@ -558,123 +553,3 @@ static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, return err; } - -static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - - if (tb == NULL) { - dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); - return -EIO; - } - /* The enabled traces limited to u64 here, regarded practical */ - *val = get_trace_buffer_active_mask64(tb); - return 0; -} - -static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - u64 new_mask; - unsigned int enable_bits_count; - - if (tb == NULL) { - dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); - return -EIO; - } - - /* Ignore unsupported types */ - enable_bits_count = - kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); - if (enable_bits_count > 64) { - dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", - enable_bits_count); - enable_bits_count = 64; - } - new_mask = val & ((1 << enable_bits_count) - 1); - - if (new_mask != get_trace_buffer_active_mask64(tb)) - return set_trace_buffer_active_mask64(tb, new_mask); - else - return 0; -} - -static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in, - struct file *file) -{ - struct kbase_device *kbdev = in->i_private; - - file->private_data = kbdev; - dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); - - return 0; -} - -static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file, - char __user *buf, size_t size, loff_t *ppos) -{ - struct kbase_device *kbdev = file->private_data; - u8 *pbyte; - unsigned int n_read; - unsigned long not_copied; - /* Limit the kernel buffer to no more than two pages */ - size_t mem = MIN(size, 2 * PAGE_SIZE); - unsigned long flags; - - struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); - - if (tb == NULL) { - dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); - return -EIO; - } - - pbyte = kmalloc(mem, GFP_KERNEL); - if (pbyte == NULL) { - dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump"); - return -ENOMEM; - } - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Do the copy, if we have obtained some trace data */ - not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0; - kfree(pbyte); - - if (!not_copied) { - *ppos += n_read; - return n_read; - } - - dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); - return -EFAULT; -} - -DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops, - kbase_csf_firmware_trace_enable_mask_read, - kbase_csf_firmware_trace_enable_mask_write, "%llx\n"); - -static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = { - .owner = THIS_MODULE, - .open = kbasep_csf_firmware_trace_debugfs_open, - .read = kbasep_csf_firmware_trace_debugfs_read, - .llseek = no_llseek, -}; - -void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("fw_trace_enable_mask", 0644, - kbdev->mali_debugfs_directory, kbdev, - &kbase_csf_firmware_trace_enable_mask_fops); - - debugfs_create_file("fw_traces", 0444, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_firmware_trace_debugfs_fops); -} -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h index a28d0f057700..0389d093a904 100644 --- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_trace_buffer.h @@ -25,7 +25,7 @@ #include #define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) -#define FW_TRACE_BUF_NAME "fwlog" +#define FIRMWARE_LOG_BUF_NAME "fwlog" /* Forward declarations */ struct firmware_trace_buffer; @@ -165,14 +165,23 @@ bool kbase_csf_firmware_trace_buffer_is_empty( unsigned int kbase_csf_firmware_trace_buffer_read_data( struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); -#if IS_ENABLED(CONFIG_DEBUG_FS) /** - * kbase_csf_firmware_trace_buffer_debugfs_init() - Add debugfs entries for - * setting enable mask and dumping the binary firmware trace buffer + * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask * - * @kbdev: Pointer to the device + * @tb: Trace buffer handle + * + * Return: Trace buffer active mask. */ -void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev); -#endif /* CONFIG_DEBUG_FS */ +u64 kbase_csf_firmware_trace_buffer_get_active_mask64(struct firmware_trace_buffer *tb); + +/** + * kbase_csf_firmware_trace_buffer_set_active_mask64 - Set trace buffer active mask + * + * @tb: Trace buffer handle + * @mask: New active mask + * + * Return: 0 if successful, negative error code on failure. + */ +int kbase_csf_firmware_trace_buffer_set_active_mask64(struct firmware_trace_buffer *tb, u64 mask); #endif /* _KBASE_CSF_TRACE_BUFFER_H_ */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c new file mode 100644 index 000000000000..185779c16815 --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** + * kbasep_fault_occurred - Check if fault occurred. + * + * @kbdev: Device pointer + * + * Return: true if a fault occurred. + */ +static bool kbasep_fault_occurred(struct kbase_device *kbdev) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + ret = (kbdev->csf.dof.error_code != DF_NO_ERROR); + spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + + return ret; +} + +void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev) +{ + if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) { + dev_dbg(kbdev->dev, "No userspace client for dumping exists"); + return; + } + + wait_event(kbdev->csf.dof.dump_wait_wq, kbase_debug_csf_fault_dump_complete(kbdev)); +} +KBASE_EXPORT_TEST_API(kbase_debug_csf_fault_wait_completion); + +/** + * kbase_debug_csf_fault_wakeup - Wake up a waiting user space client. + * + * @kbdev: Kbase device + */ +static void kbase_debug_csf_fault_wakeup(struct kbase_device *kbdev) +{ + wake_up_interruptible(&kbdev->csf.dof.fault_wait_wq); +} + +bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, + struct kbase_context *kctx, enum dumpfault_error_type error) +{ + unsigned long flags; + + if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) + return false; + + if (WARN_ON(error == DF_NO_ERROR)) + return false; + + if (kctx && kbase_ctx_flag(kctx, KCTX_DYING)) { + dev_info(kbdev->dev, "kctx %d_%d is dying when error %d is reported", + kctx->tgid, kctx->id, error); + kctx = NULL; + } + + spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + + /* Only one fault at a time can be processed */ + if (kbdev->csf.dof.error_code) { + dev_info(kbdev->dev, "skip this fault as there's a pending fault"); + goto unlock; + } + + kbdev->csf.dof.kctx_tgid = kctx ? kctx->tgid : 0; + kbdev->csf.dof.kctx_id = kctx ? kctx->id : 0; + kbdev->csf.dof.error_code = error; + kbase_debug_csf_fault_wakeup(kbdev); + +unlock: + spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + return true; +} + +static ssize_t debug_csf_fault_read(struct file *file, char __user *buffer, size_t size, + loff_t *f_pos) +{ +#define BUF_SIZE 64 + struct kbase_device *kbdev; + unsigned long flags; + int count; + char buf[BUF_SIZE]; + u32 tgid, ctx_id; + enum dumpfault_error_type error_code; + + if (unlikely(!file)) { + pr_warn("%s: file is NULL", __func__); + return -EINVAL; + } + + kbdev = file->private_data; + if (unlikely(!buffer)) { + dev_warn(kbdev->dev, "%s: buffer is NULL", __func__); + return -EINVAL; + } + + if (unlikely(*f_pos < 0)) { + dev_warn(kbdev->dev, "%s: f_pos is negative", __func__); + return -EINVAL; + } + + if (size < sizeof(buf)) { + dev_warn(kbdev->dev, "%s: buffer is too small", __func__); + return -EINVAL; + } + + if (wait_event_interruptible(kbdev->csf.dof.fault_wait_wq, kbasep_fault_occurred(kbdev))) + return -ERESTARTSYS; + + spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + tgid = kbdev->csf.dof.kctx_tgid; + ctx_id = kbdev->csf.dof.kctx_id; + error_code = kbdev->csf.dof.error_code; + BUILD_BUG_ON(sizeof(buf) < (sizeof(tgid) + sizeof(ctx_id) + sizeof(error_code))); + count = scnprintf(buf, sizeof(buf), "%u_%u_%u\n", tgid, ctx_id, error_code); + spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + + dev_info(kbdev->dev, "debug csf fault info read"); + return simple_read_from_buffer(buffer, size, f_pos, buf, count); +} + +static int debug_csf_fault_open(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev; + + if (unlikely(!in)) { + pr_warn("%s: inode is NULL", __func__); + return -EINVAL; + } + + kbdev = in->i_private; + if (unlikely(!file)) { + dev_warn(kbdev->dev, "%s: file is NULL", __func__); + return -EINVAL; + } + + if (atomic_cmpxchg(&kbdev->csf.dof.enabled, 0, 1) == 1) { + dev_warn(kbdev->dev, "Only one client is allowed for dump on fault"); + return -EBUSY; + } + + dev_info(kbdev->dev, "debug csf fault file open"); + + return simple_open(in, file); +} + +static ssize_t debug_csf_fault_write(struct file *file, const char __user *ubuf, size_t count, + loff_t *ppos) +{ + struct kbase_device *kbdev; + unsigned long flags; + + if (unlikely(!file)) { + pr_warn("%s: file is NULL", __func__); + return -EINVAL; + } + + kbdev = file->private_data; + spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + kbdev->csf.dof.error_code = DF_NO_ERROR; + kbdev->csf.dof.kctx_tgid = 0; + kbdev->csf.dof.kctx_id = 0; + dev_info(kbdev->dev, "debug csf fault dump complete"); + spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + + /* User space finished the dump. + * Wake up blocked kernel threads to proceed. + */ + wake_up(&kbdev->csf.dof.dump_wait_wq); + + return count; +} + +static int debug_csf_fault_release(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev; + unsigned long flags; + + if (unlikely(!in)) { + pr_warn("%s: inode is NULL", __func__); + return -EINVAL; + } + + kbdev = in->i_private; + spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + kbdev->csf.dof.kctx_tgid = 0; + kbdev->csf.dof.kctx_id = 0; + kbdev->csf.dof.error_code = DF_NO_ERROR; + spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + + atomic_set(&kbdev->csf.dof.enabled, 0); + dev_info(kbdev->dev, "debug csf fault file close"); + + /* User space closed the debugfs file. + * Wake up blocked kernel threads to resume. + */ + wake_up(&kbdev->csf.dof.dump_wait_wq); + + return 0; +} + +static const struct file_operations kbasep_debug_csf_fault_fops = { + .owner = THIS_MODULE, + .open = debug_csf_fault_open, + .read = debug_csf_fault_read, + .write = debug_csf_fault_write, + .llseek = default_llseek, + .release = debug_csf_fault_release, +}; + +void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev) +{ + const char *fname = "csf_fault"; + + if (unlikely(!kbdev)) { + pr_warn("%s: kbdev is NULL", __func__); + return; + } + + debugfs_create_file(fname, 0600, kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_csf_fault_fops); +} + +int kbase_debug_csf_fault_init(struct kbase_device *kbdev) +{ + if (unlikely(!kbdev)) { + pr_warn("%s: kbdev is NULL", __func__); + return -EINVAL; + } + + init_waitqueue_head(&(kbdev->csf.dof.fault_wait_wq)); + init_waitqueue_head(&(kbdev->csf.dof.dump_wait_wq)); + spin_lock_init(&kbdev->csf.dof.lock); + kbdev->csf.dof.kctx_tgid = 0; + kbdev->csf.dof.kctx_id = 0; + kbdev->csf.dof.error_code = DF_NO_ERROR; + atomic_set(&kbdev->csf.dof.enabled, 0); + + return 0; +} + +void kbase_debug_csf_fault_term(struct kbase_device *kbdev) +{ +} +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h new file mode 100644 index 000000000000..6e9b1a9d51de --- /dev/null +++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_debug_csf_fault.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_CSF_FAULT_H +#define _KBASE_DEBUG_CSF_FAULT_H + +#if IS_ENABLED(CONFIG_DEBUG_FS) +/** + * kbase_debug_csf_fault_debugfs_init - Initialize CSF fault debugfs + * @kbdev: Device pointer + */ +void kbase_debug_csf_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_debug_csf_fault_init - Create the fault event wait queue per device + * and initialize the required resources. + * @kbdev: Device pointer + * + * Return: Zero on success or a negative error code. + */ +int kbase_debug_csf_fault_init(struct kbase_device *kbdev); + +/** + * kbase_debug_csf_fault_term - Clean up resources created by + * @kbase_debug_csf_fault_init. + * @kbdev: Device pointer + */ +void kbase_debug_csf_fault_term(struct kbase_device *kbdev); + +/** + * kbase_debug_csf_fault_wait_completion - Wait for the client to complete. + * + * @kbdev: Device Pointer + * + * Wait for the user space client to finish reading the fault information. + * This function must be called in thread context. + */ +void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev); + +/** + * kbase_debug_csf_fault_notify - Notify client of a fault. + * + * @kbdev: Device pointer + * @kctx: Faulty context (can be NULL) + * @error: Error code. + * + * Store fault information and wake up the user space client. + * + * Return: true if a dump on fault was initiated or was is in progress and + * so caller can opt to wait for the dumping to complete. + */ +bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, + struct kbase_context *kctx, enum dumpfault_error_type error); + +/** + * kbase_debug_csf_fault_dump_enabled - Check if dump on fault is enabled. + * + * @kbdev: Device pointer + * + * Return: true if debugfs file is opened so dump on fault is enabled. + */ +static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev) +{ + return atomic_read(&kbdev->csf.dof.enabled); +} + +/** + * kbase_debug_csf_fault_dump_complete - Check if dump on fault is completed. + * + * @kbdev: Device pointer + * + * Return: true if dump on fault completes or file is closed. + */ +static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev) +{ + unsigned long flags; + bool ret; + + if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev))) + return true; + + spin_lock_irqsave(&kbdev->csf.dof.lock, flags); + ret = (kbdev->csf.dof.error_code == DF_NO_ERROR); + spin_unlock_irqrestore(&kbdev->csf.dof.lock, flags); + + return ret; +} +#else /* CONFIG_DEBUG_FS */ +static inline int kbase_debug_csf_fault_init(struct kbase_device *kbdev) +{ + return 0; +} + +static inline void kbase_debug_csf_fault_term(struct kbase_device *kbdev) +{ +} + +static inline void kbase_debug_csf_fault_wait_completion(struct kbase_device *kbdev) +{ +} + +static inline bool kbase_debug_csf_fault_notify(struct kbase_device *kbdev, + struct kbase_context *kctx, enum dumpfault_error_type error) +{ + return false; +} + +static inline bool kbase_debug_csf_fault_dump_enabled(struct kbase_device *kbdev) +{ + return false; +} + +static inline bool kbase_debug_csf_fault_dump_complete(struct kbase_device *kbdev) +{ + return true; +} +#endif /* CONFIG_DEBUG_FS */ + +#endif /*_KBASE_DEBUG_CSF_FAULT_H*/ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h index 9e4da9f11787..41b2b00f18c8 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_ktrace_codes_csf.h @@ -42,19 +42,25 @@ int dummy_array[] = { /* * Generic CSF events */ + /* info_val = 0 */ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_START), + /* info_val == number of CSGs supported */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EVICT_CTX_SLOTS_END), /* info_val[0:7] == fw version_minor * info_val[15:8] == fw version_major * info_val[63:32] == fw version_hash */ KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_BOOT), KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_REBOOT), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_INVOKE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_INVOKE), KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_START), KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END), /* info_val == total number of runnable groups across all kctxs */ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_START), KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END), KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_START), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET_END), /* info_val = timeout in ms */ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_PROTM_WAIT_QUIT_START), /* info_val = remaining ms timeout, or 0 if timedout */ @@ -101,6 +107,8 @@ int dummy_array[] = { * purpose. */ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_HALTED), KBASE_KTRACE_CODE_MAKE_CODE(CSF_FIRMWARE_MCU_SLEEP), @@ -126,6 +134,8 @@ int dummy_array[] = { * group->csg_nr indicates which bit was set */ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NO_NON_IDLE_GROUPS), + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_NON_IDLE_GROUPS), /* info_val = scheduler's new csg_slots_idle_mask[0] * group->csg_nr indicates which bit was cleared * @@ -190,10 +200,37 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC), /* info_val == new count of off-slot non-idle groups */ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC), + /* info_val = scheduler's new csg_slots_idle_mask[0] + * group->csg_nr indicates which bit was set + */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HANDLE_IDLE_SLOTS), KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_START), KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), + /* info_val = scheduler state */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_BUSY), + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_INACTIVE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SUSPENDED), + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_SLEEPING), + + /* info_val = mcu state */ +#define KBASEP_MCU_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_MCU_ ## n), +#include "backend/gpu/mali_kbase_pm_mcu_states.h" +#undef KBASEP_MCU_STATE + + /* info_val = number of runnable groups */ + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_INACTIVE), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_IDLE), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_IDLE), + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC), + /* info_val = new run state of the evicted group */ + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_FAULT_EVICTED), + /* info_val = get the number of active CSGs */ + KBASE_KTRACE_CODE_MAKE_CODE(CSF_GROUP_TERMINATED), + /* * Group + Queue events */ diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h index 86e81e510b47..ddcac906c492 100644 --- a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_linux_ktrace_csf.h @@ -31,13 +31,17 @@ * Generic CSF events - using the common DEFINE_MALI_ADD_EVENT */ DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_EVICT_CTX_SLOTS_END); DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_BOOT); DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_REBOOT); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_INVOKE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_INVOKE); DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_START); DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END); DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_START); DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END); DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET_END); DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_START); DEFINE_MALI_ADD_EVENT(SCHEDULER_PROTM_WAIT_QUIT_END); DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_EVENT); @@ -58,8 +62,16 @@ DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START); DEFINE_MALI_ADD_EVENT(SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END); DEFINE_MALI_ADD_EVENT(SCHEDULER_UPDATE_IDLE_SLOTS_ACK); DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_START); +DEFINE_MALI_ADD_EVENT(SCHEDULER_GPU_IDLE_WORKER_HANDLING_END); DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_HALTED); DEFINE_MALI_ADD_EVENT(CSF_FIRMWARE_MCU_SLEEP); +DEFINE_MALI_ADD_EVENT(SCHED_BUSY); +DEFINE_MALI_ADD_EVENT(SCHED_INACTIVE); +DEFINE_MALI_ADD_EVENT(SCHED_SUSPENDED); +DEFINE_MALI_ADD_EVENT(SCHED_SLEEPING); +#define KBASEP_MCU_STATE(n) DEFINE_MALI_ADD_EVENT(PM_MCU_ ## n); +#include "backend/gpu/mali_kbase_pm_mcu_states.h" +#undef KBASEP_MCU_STATE DECLARE_EVENT_CLASS(mali_csf_grp_q_template, TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, @@ -136,6 +148,8 @@ DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED); DEFINE_MALI_CSF_GRP_EVENT(CSG_UPDATE_IDLE_SLOT_REQ); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NO_NON_IDLE_GROUPS); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_NON_IDLE_GROUPS); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR); DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_PRIO_UPDATE); DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_SYNC_UPDATE); @@ -160,8 +174,17 @@ DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_PROTM_EXIT); DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP); DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_INC); DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_HANDLE_IDLE_SLOTS); DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_START); DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_INACTIVE); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_RUNNABLE); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_IDLE); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_IDLE); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_FAULT_EVICTED); +DEFINE_MALI_CSF_GRP_EVENT(CSF_GROUP_TERMINATED); #undef DEFINE_MALI_CSF_GRP_EVENT diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h index 1c6b4cd26fe0..6103c3ee04a8 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_ktrace_codes.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2018-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -142,6 +142,11 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_SUSPEND_CALLBACK), KBASE_KTRACE_CODE_MAKE_CODE(PM_RUNTIME_RESUME_CALLBACK), + /* info_val = l2 state */ +#define KBASEP_L2_STATE(n) KBASE_KTRACE_CODE_MAKE_CODE(PM_L2_ ## n), +#include "backend/gpu/mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE + /* * Context Scheduler events */ diff --git a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h index 5fac763d1916..6d96647161b4 100644 --- a/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h +++ b/drivers/gpu/arm/bifrost/debug/mali_kbase_debug_linux_ktrace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -98,6 +98,9 @@ DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); DEFINE_MALI_ADD_EVENT(PM_POWEROFF_WAIT_WQ); DEFINE_MALI_ADD_EVENT(PM_RUNTIME_SUSPEND_CALLBACK); DEFINE_MALI_ADD_EVENT(PM_RUNTIME_RESUME_CALLBACK); +#define KBASEP_L2_STATE(n) DEFINE_MALI_ADD_EVENT(PM_L2_ ## n); +#include "backend/gpu/mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); #ifdef CONFIG_MALI_ARBITER_SUPPORT diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c index 1e84f6b2644d..277569381292 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c @@ -23,8 +23,8 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -40,9 +40,10 @@ #include #include #include -#include +#include #include #include +#include /** * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC @@ -60,7 +61,7 @@ static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) kbase_vinstr_term(kbdev->vinstr_ctx); kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); } } @@ -197,6 +198,20 @@ static int kbase_csf_early_init(struct kbase_device *kbdev) static void kbase_csf_early_term(struct kbase_device *kbdev) { kbase_csf_scheduler_early_term(kbdev); + kbase_csf_firmware_early_term(kbdev); +} + +/** + * kbase_csf_late_init - late initialization for firmware. + * @kbdev: Device pointer + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_csf_late_init(struct kbase_device *kbdev) +{ + int err = kbase_csf_firmware_late_init(kbdev); + + return err; } /** @@ -269,59 +284,48 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) static const struct kbase_device_init dev_init[] = { #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) - { kbase_gpu_device_create, kbase_gpu_device_destroy, - "Dummy model initialization failed" }, + { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else { assign_irqs, NULL, "IRQ search failed" }, { registers_map, registers_unmap, "Register map failed" }, #endif - { power_control_init, power_control_term, - "Power control initialization failed" }, + { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, - { kbase_device_early_init, kbase_device_early_term, - "Early device initialization failed" }, - { kbase_device_populate_max_freq, NULL, - "Populating max frequency failed" }, - { kbase_pm_lowest_gpu_freq_init, NULL, - "Lowest freq initialization failed" }, + { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, + { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, + { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, "Priority control manager initialization failed" }, - { kbase_ctx_sched_init, kbase_ctx_sched_term, - "Context scheduler initialization failed" }, - { kbase_mem_init, kbase_mem_term, - "Memory subsystem initialization failed" }, + { kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" }, + { kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" }, { kbase_csf_protected_memory_init, kbase_csf_protected_memory_term, "Protected memory allocator initialization failed" }, { kbase_device_coherency_init, NULL, "Device coherency init failed" }, { kbase_protected_mode_init, kbase_protected_mode_term, "Protected mode subsystem initialization failed" }, - { kbase_device_list_init, kbase_device_list_term, - "Device list setup failed" }, + { kbase_device_list_init, kbase_device_list_term, "Device list setup failed" }, { kbase_device_timeline_init, kbase_device_timeline_term, "Timeline stream initialization failed" }, { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, "Clock rate trace manager initialization failed" }, - { kbase_device_hwcnt_watchdog_if_init, - kbase_device_hwcnt_watchdog_if_term, + { kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term, "GPU hwcnt backend watchdog interface creation failed" }, - { kbase_device_hwcnt_backend_csf_if_init, - kbase_device_hwcnt_backend_csf_if_term, + { kbase_device_hwcnt_backend_csf_if_init, kbase_device_hwcnt_backend_csf_if_term, "GPU hwcnt backend CSF interface creation failed" }, - { kbase_device_hwcnt_backend_csf_init, - kbase_device_hwcnt_backend_csf_term, + { kbase_device_hwcnt_backend_csf_init, kbase_device_hwcnt_backend_csf_term, "GPU hwcnt backend creation failed" }, { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, "GPU hwcnt context initialization failed" }, - { kbase_csf_early_init, kbase_csf_early_term, - "Early CSF initialization failed" }, - { kbase_backend_late_init, kbase_backend_late_term, - "Late backend initialization failed" }, + { kbase_csf_early_init, kbase_csf_early_term, "Early CSF initialization failed" }, + { kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" }, + { kbase_csf_late_init, NULL, "Late CSF initialization failed" }, { NULL, kbase_device_firmware_hwcnt_term, NULL }, - { kbase_device_debugfs_init, kbase_device_debugfs_term, - "DebugFS initialization failed" }, + { kbase_debug_csf_fault_init, kbase_debug_csf_fault_term, + "CSF fault debug initialization failed" }, + { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, /* Sysfs init needs to happen before registering the device with * misc_register(), otherwise it causes a race condition between * registering the device and a uevent event being generated for @@ -339,8 +343,7 @@ static const struct kbase_device_init dev_init[] = { "Misc device registration failed" }, { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, "GPU property population failed" }, - { kbase_device_late_init, kbase_device_late_term, - "Late device initialization failed" }, + { kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" }, }; static void kbase_device_term_partial(struct kbase_device *kbdev, @@ -468,7 +471,7 @@ static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->fw_load_lock); - err = kbase_csf_firmware_init(kbdev); + err = kbase_csf_firmware_load_init(kbdev); if (!err) { unsigned long flags; @@ -498,11 +501,12 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev) ret = kbase_device_hwcnt_csf_deferred_init(kbdev); if (ret) { - kbase_csf_firmware_term(kbdev); + kbase_csf_firmware_unload_term(kbdev); goto out; } kbase_csf_debugfs_init(kbdev); + kbase_timeline_io_debugfs_init(kbdev); out: kbase_pm_context_idle(kbdev); } diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c index 7939bfd8e74c..3b792968a7d7 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c @@ -115,6 +115,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) GPU_EXCEPTION_TYPE_SW_FAULT_0, } } }; + kbase_debug_csf_fault_notify(kbdev, scheduler->active_protm_grp->kctx, + DF_GPU_PROTECTED_FAULT); + scheduler->active_protm_grp->faulted = true; kbase_csf_add_group_fatal_error( scheduler->active_protm_grp, &err_payload); @@ -201,8 +204,11 @@ static bool kbase_is_register_accessible(u32 offset) void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return; + + if (WARN_ON(kbdev->dev == NULL)) + return; if (!kbase_is_register_accessible(offset)) return; @@ -222,8 +228,11 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { u32 val; - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return 0; + + if (WARN_ON(kbdev->dev == NULL)) + return 0; if (!kbase_is_register_accessible(offset)) return 0; diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c index ed7512ef2e39..129b4e430c52 100644 --- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c +++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c @@ -27,9 +27,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) #include diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c index 7004e347fa1b..fa3669a409e2 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c @@ -42,8 +42,8 @@ #include #include "mali_kbase_kinstr_prfcnt.h" #include "mali_kbase_vinstr.h" -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_context.h" +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_device.h" #include "mali_kbase_device_internal.h" @@ -56,17 +56,15 @@ #include "arbiter/mali_kbase_arbiter_pm.h" #endif /* CONFIG_MALI_ARBITER_SUPPORT */ -/* NOTE: Magic - 0x45435254 (TRCE in ASCII). - * Supports tracing feature provided in the base module. - * Please keep it in sync with the value of base module. - */ -#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 +#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) /* Number of register accesses for the buffer that we allocate during * initialization time. The buffer size can be changed later via debugfs. */ #define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) +#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */ + static DEFINE_MUTEX(kbase_dev_list_lock); static LIST_HEAD(kbase_dev_list); static int kbase_dev_nr; diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h index 6706a61d5baa..f025011009d5 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.h +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.h @@ -130,7 +130,11 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev); * * Return: 0 if successful or a negative error code on failure. */ -#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0) +#if MALI_USE_CSF +int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, + size_t nr_bytes, u32 flush_op); +#endif /* MALI_USE_CSF */ + /** * kbase_gpu_cache_flush_and_busy_wait - Start a cache flush and busy wait * @kbdev: Kbase device diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c index 4bd545a82299..d55495045892 100644 --- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_hw.c @@ -27,9 +27,6 @@ #include #include -#define U64_LO_MASK ((1ULL << 32) - 1) -#define U64_HI_MASK (~U64_LO_MASK) - #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) bool kbase_is_gpu_removed(struct kbase_device *kbdev) { @@ -86,7 +83,38 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) return 0; } -#define kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op) (0) +#if MALI_USE_CSF +#define U64_LO_MASK ((1ULL << 32) - 1) +#define U64_HI_MASK (~U64_LO_MASK) + +int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phys_addr_t phys, + size_t nr_bytes, u32 flush_op) +{ + u64 start_pa, end_pa; + int ret = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* 1. Clear the interrupt FLUSH_PA_RANGE_COMPLETED bit. */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), FLUSH_PA_RANGE_COMPLETED); + + /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_PA_RANGE operation. */ + start_pa = phys; + end_pa = start_pa + nr_bytes - 1; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO), start_pa & U64_LO_MASK); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG0_HI), + (start_pa & U64_HI_MASK) >> 32); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_LO), end_pa & U64_LO_MASK); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI), (end_pa & U64_HI_MASK) >> 32); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); + + /* 3. Busy-wait irq status to be enabled. */ + ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); + + return ret; +} +#endif /* MALI_USE_CSF */ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, u32 flush_op) diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c index 37015ccacd7c..7f3743ca6432 100644 --- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c +++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_fault_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -170,7 +170,7 @@ const char *kbase_gpu_exception_name(u32 const exception_code) default: e = "UNKNOWN"; break; - }; + } return e; } diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h index 06c725c0e757..e7457ddb5534 100644 --- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -35,10 +35,7 @@ #define MCU_SUBSYSTEM_BASE 0x20000 /* IPA control registers */ -#define IPA_CONTROL_BASE 0x40000 -#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) #define COMMAND 0x000 /* (WO) Command register */ -#define STATUS 0x004 /* (RO) Status register */ #define TIMER 0x008 /* (RW) Timer control register */ #define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ @@ -127,8 +124,16 @@ #define MCU_STATUS_HALTED (1 << 1) +#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) +#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) +#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ + (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) +#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ + (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ + (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) + /* JOB IRQ flags */ -#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ +#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */ /* GPU_COMMAND codes */ #define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ diff --git a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h index c349f4b058cd..380ec30d607f 100644 --- a/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/drivers/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -127,29 +127,12 @@ #define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) -#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ -/* (RO) Extended affinity mask for job slot n*/ -#define JS_XAFFINITY 0x1C +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ #define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ #define JS_STATUS 0x24 /* (RO) Status register for job slot n */ -#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ - -#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -/* (RW) Next extended affinity mask for job slot n */ -#define JS_XAFFINITY_NEXT 0x5C - -#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ #define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ diff --git a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h index 396ebd5e21c9..282f566c0746 100644 --- a/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h +++ b/drivers/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h @@ -45,9 +45,6 @@ /* Begin Register Offsets */ /* GPU control registers */ -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ #define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ #define TILER_FEATURES 0x00C /* (RO) Tiler Features */ #define MEM_FEATURES 0x010 /* (RO) Memory system features */ @@ -100,6 +97,10 @@ #define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) +#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ +#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ +#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ +#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ #define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ #define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ @@ -113,26 +114,10 @@ #define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ #define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - #define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ #define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ +#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ #define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ #define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ @@ -181,6 +166,8 @@ #define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ #define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ +#define AMBA_FEATURES 0x300 /* (RO) AMBA bus supported features */ +#define AMBA_ENABLE 0x304 /* (RW) AMBA features enable */ #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ #define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ @@ -188,13 +175,7 @@ /* Job control registers */ -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - #define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ #define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ /* MMU control registers */ @@ -203,7 +184,6 @@ #define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ #define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ #define MMU_AS1 0x440 /* Configuration registers for address space 1 */ #define MMU_AS2 0x480 /* Configuration registers for address space 2 */ #define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ @@ -221,25 +201,13 @@ #define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ /* MMU address space control registers */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ #define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ #define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ #define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ #define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ #define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ #define AS_STATUS 0x28 /* (RO) Status flags for address space n */ -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 /* (RO) Secondary fault address for address space n, low word */ #define AS_FAULTEXTRA_LO 0x38 /* (RO) Secondary fault address for address space n, high word */ @@ -464,6 +432,80 @@ #define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) /* End L2_CONFIG register */ +/* AMBA_FEATURES register */ +#define AMBA_FEATURES_ACE_LITE_SHIFT GPU_U(0) +#define AMBA_FEATURES_ACE_LITE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_LITE_SHIFT) +#define AMBA_FEATURES_ACE_LITE_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_ACE_LITE_MASK) >> \ + AMBA_FEATURES_ACE_LITE_SHIFT) +#define AMBA_FEATURES_ACE_LITE_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_ACE_LITE_MASK) | \ + (((value) << AMBA_FEATURES_ACE_LITE_SHIFT) & \ + AMBA_FEATURES_ACE_LITE_MASK)) +#define AMBA_FEATURES_ACE_SHIFT GPU_U(1) +#define AMBA_FEATURES_ACE_MASK (GPU_U(0x1) << AMBA_FEATURES_ACE_SHIFT) +#define AMBA_FEATURES_ACE_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_ACE_MASK) >> AMBA_FEATURES_ACE_SHIFT) +#define AMBA_FEATURES_ACE_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_ACE_MASK) | \ + (((value) << AMBA_FEATURES_ACE_SHIFT) & AMBA_FEATURES_ACE_MASK)) +#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) +#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK \ + (GPU_U(0x1) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) +#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) >> \ + AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) +#define AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \ + (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \ + AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK)) +#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6) +#define AMBA_FEATURES_INVALIDATE_HINT_MASK \ + (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) +#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \ + (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \ + AMBA_FEATURES_INVALIDATE_HINT_SHIFT) +#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \ + (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \ + AMBA_FEATURES_INVALIDATE_HINT_MASK)) + +/* AMBA_ENABLE register */ +#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0) +#define AMBA_ENABLE_COHERENCY_PROTOCOL_MASK \ + (GPU_U(0x1F) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) +#define AMBA_ENABLE_COHERENCY_PROTOCOL_GET(reg_val) \ + (((reg_val)&AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) >> \ + AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) +#define AMBA_ENABLE_COHERENCY_PROTOCOL_SET(reg_val, value) \ + (((reg_val) & ~AMBA_ENABLE_COHERENCY_PROTOCOL_MASK) | \ + (((value) << AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT) & \ + AMBA_ENABLE_COHERENCY_PROTOCOL_MASK)) +/* AMBA_ENABLE_coherency_protocol values */ +#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE_LITE 0x0 +#define AMBA_ENABLE_COHERENCY_PROTOCOL_ACE 0x1 +#define AMBA_ENABLE_COHERENCY_PROTOCOL_NO_COHERENCY 0x1F +/* End of AMBA_ENABLE_coherency_protocol values */ +#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT GPU_U(5) +#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK \ + (GPU_U(0x1) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) +#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_GET(reg_val) \ + (((reg_val)&AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) >> \ + AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) +#define AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \ + (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \ + AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK)) +#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6) +#define AMBA_ENABLE_INVALIDATE_HINT_MASK \ + (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) +#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \ + (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \ + AMBA_ENABLE_INVALIDATE_HINT_SHIFT) +#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \ + (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \ + (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \ + AMBA_ENABLE_INVALIDATE_HINT_MASK)) /* IDVS_GROUP register */ #define IDVS_GROUP_SIZE_SHIFT (16) diff --git a/drivers/base/arm/dma_buf_lock/src/Kbuild b/drivers/gpu/arm/bifrost/hwcnt/Kbuild similarity index 53% rename from drivers/base/arm/dma_buf_lock/src/Kbuild rename to drivers/gpu/arm/bifrost/hwcnt/Kbuild index b6b741b39119..c1a381b24593 100644 --- a/drivers/base/arm/dma_buf_lock/src/Kbuild +++ b/drivers/gpu/arm/bifrost/hwcnt/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,20 @@ # # -ifeq ($(CONFIG_DMA_BUF_LOCK), y) -obj-m := dma_buf_lock.o +bifrost_kbase-y += \ + hwcnt/mali_kbase_hwcnt.o \ + hwcnt/mali_kbase_hwcnt_gpu.o \ + hwcnt/mali_kbase_hwcnt_gpu_narrow.o \ + hwcnt/mali_kbase_hwcnt_types.o \ + hwcnt/mali_kbase_hwcnt_virtualizer.o \ + hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + bifrost_kbase-y += \ + hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \ + hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o +else + bifrost_kbase-y += \ + hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \ + hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h similarity index 85% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h index b069fc12be69..6cfa6f5ee6f4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,8 +56,8 @@ struct kbase_hwcnt_backend; * * Return: Non-NULL pointer to immutable hardware counter metadata. */ -typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn( - const struct kbase_hwcnt_backend_info *info); +typedef const struct kbase_hwcnt_metadata * +kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info); /** * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. @@ -69,9 +69,8 @@ typedef const struct kbase_hwcnt_metadata *kbase_hwcnt_backend_metadata_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_init_fn( - const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend); +typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); /** * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. @@ -86,8 +85,7 @@ typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); * * Return: Backend timestamp in nanoseconds. */ -typedef u64 kbase_hwcnt_backend_timestamp_ns_fn( - struct kbase_hwcnt_backend *backend); +typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the @@ -102,9 +100,8 @@ typedef u64 kbase_hwcnt_backend_timestamp_ns_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_dump_enable_fn( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map); +typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); /** * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping @@ -118,9 +115,9 @@ typedef int kbase_hwcnt_backend_dump_enable_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_dump_enable_nolock_fn( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map); +typedef int +kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); /** * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with @@ -130,8 +127,7 @@ typedef int kbase_hwcnt_backend_dump_enable_nolock_fn( * If the backend is already disabled, does nothing. * Any undumped counter values since the last dump get will be lost. */ -typedef void kbase_hwcnt_backend_dump_disable_fn( - struct kbase_hwcnt_backend *backend); +typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped @@ -142,8 +138,7 @@ typedef void kbase_hwcnt_backend_dump_disable_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_dump_clear_fn( - struct kbase_hwcnt_backend *backend); +typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter @@ -157,9 +152,8 @@ typedef int kbase_hwcnt_backend_dump_clear_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_dump_request_fn( - struct kbase_hwcnt_backend *backend, - u64 *dump_time_ns); +typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns); /** * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested @@ -170,8 +164,7 @@ typedef int kbase_hwcnt_backend_dump_request_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_dump_wait_fn( - struct kbase_hwcnt_backend *backend); +typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the @@ -189,11 +182,10 @@ typedef int kbase_hwcnt_backend_dump_wait_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_dump_get_fn( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dump_buffer, - const struct kbase_hwcnt_enable_map *enable_map, - bool accumulate); +typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); /** * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c similarity index 76% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index 8afc990662da..10d40bedc0f8 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -19,9 +19,9 @@ * */ -#include "mali_kbase_hwcnt_backend_csf.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include #include @@ -267,8 +267,7 @@ struct kbase_hwcnt_backend_csf { struct work_struct hwc_threshold_work; }; -static bool kbasep_hwcnt_backend_csf_backend_exists( - struct kbase_hwcnt_backend_csf_info *csf_info) +static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) { WARN_ON(!csf_info); csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); @@ -282,19 +281,20 @@ static bool kbasep_hwcnt_backend_csf_backend_exists( * @backend_csf: Non-NULL pointer to backend. * @enable_map: Non-NULL pointer to enable map specifying enabled counters. */ -static void kbasep_hwcnt_backend_csf_cc_initial_sample( - struct kbase_hwcnt_backend_csf *backend_csf, - const struct kbase_hwcnt_enable_map *enable_map) +static void +kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf, + const struct kbase_hwcnt_enable_map *enable_map) { u64 clk_enable_map = enable_map->clk_enable_map; u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; /* Read cycle count from CSF interface for both clock domains. */ - backend_csf->info->csf_if->get_gpu_cycle_count( - backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); + backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, + clk_enable_map); - kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) { + kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) + { if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; } @@ -303,42 +303,35 @@ static void kbasep_hwcnt_backend_csf_cc_initial_sample( backend_csf->clk_enable_map = clk_enable_map; } -static void -kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) +static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) { u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; size_t clk; - backend_csf->info->csf_if->assert_lock_held( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); - backend_csf->info->csf_if->get_gpu_cycle_count( - backend_csf->info->csf_if->ctx, cycle_counts, - backend_csf->clk_enable_map); + backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, + backend_csf->clk_enable_map); - kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) { - if (kbase_hwcnt_clk_enable_map_enabled( - backend_csf->clk_enable_map, clk)) { + kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) { backend_csf->cycle_count_elapsed[clk] = - cycle_counts[clk] - - backend_csf->prev_cycle_count[clk]; + cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; } } } /* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ -static u64 -kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) +static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) { - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) return 0; - return backend_csf->info->csf_if->timestamp_ns( - backend_csf->info->csf_if->ctx); + return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx); } /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to @@ -347,8 +340,8 @@ kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) * required. *@phys_enable_map: HWC physical enable map to be processed. */ -static void kbasep_hwcnt_backend_csf_process_enable_map( - struct kbase_hwcnt_physical_enable_map *phys_enable_map) +static void +kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map) { WARN_ON(!phys_enable_map); @@ -408,19 +401,19 @@ static void kbasep_hwcnt_backend_csf_init_layout( }; } -static void kbasep_hwcnt_backend_csf_reset_internal_buffers( - struct kbase_hwcnt_backend_csf *backend_csf) +static void +kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; memset(backend_csf->to_user_buf, 0, user_buf_bytes); memset(backend_csf->accum_buf, 0, user_buf_bytes); - memset(backend_csf->old_sample_buf, 0, - backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); } -static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( - struct kbase_hwcnt_backend_csf *backend_csf, u32 *sample) +static void +kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf, + u32 *sample) { u32 block_idx; const struct kbase_hwcnt_csf_physical_layout *phys_layout; @@ -434,8 +427,8 @@ static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( } } -static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( - struct kbase_hwcnt_backend_csf *backend_csf) +static void +kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf) { u32 idx; u32 *sample; @@ -446,19 +439,16 @@ static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; - kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( - backend_csf, sample); + kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); } } -static void kbasep_hwcnt_backend_csf_update_user_sample( - struct kbase_hwcnt_backend_csf *backend_csf) +static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; /* Copy the data into the sample and wait for the user to get it. */ - memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, - user_buf_bytes); + memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or @@ -468,9 +458,8 @@ static void kbasep_hwcnt_backend_csf_update_user_sample( } static void kbasep_hwcnt_backend_csf_accumulate_sample( - const struct kbase_hwcnt_csf_physical_layout *phys_layout, - size_t dump_bytes, u64 *accum_buf, const u32 *old_sample_buf, - const u32 *new_sample_buf, bool clearing_samples) + const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, + u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) { size_t block_idx; const u32 *old_block = old_sample_buf; @@ -487,10 +476,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; block_idx++) { - const u32 old_enable_mask = - old_block[phys_layout->enable_mask_offset]; - const u32 new_enable_mask = - new_block[phys_layout->enable_mask_offset]; + const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; + const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; if (new_enable_mask == 0) { /* Hardware block was unavailable or we didn't turn on @@ -503,9 +490,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( size_t ctr_idx; /* Unconditionally copy the headers. */ - for (ctr_idx = 0; - ctr_idx < phys_layout->headers_per_block; - ctr_idx++) { + for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { acc_block[ctr_idx] = new_block[ctr_idx]; } @@ -534,34 +519,25 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( * counters only, as we know previous * values are zeroes. */ - for (ctr_idx = - phys_layout - ->headers_per_block; - ctr_idx < values_per_block; - ctr_idx++) { - acc_block[ctr_idx] += - new_block[ctr_idx]; + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < values_per_block; ctr_idx++) { + acc_block[ctr_idx] += new_block[ctr_idx]; } } else { /* Hardware block was previously * available. Accumulate the delta * between old and new counter values. */ - for (ctr_idx = - phys_layout - ->headers_per_block; - ctr_idx < values_per_block; - ctr_idx++) { + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < values_per_block; ctr_idx++) { acc_block[ctr_idx] += - new_block[ctr_idx] - - old_block[ctr_idx]; + new_block[ctr_idx] - old_block[ctr_idx]; } } } else { for (ctr_idx = phys_layout->headers_per_block; ctr_idx < values_per_block; ctr_idx++) { - acc_block[ctr_idx] += - new_block[ctr_idx]; + acc_block[ctr_idx] += new_block[ctr_idx]; } } } @@ -570,18 +546,16 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( acc_block += values_per_block; } - WARN_ON(old_block != - old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); - WARN_ON(new_block != - new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - (values_per_block * phys_layout->fw_block_cnt)); (void)dump_bytes; } -static void kbasep_hwcnt_backend_csf_accumulate_samples( - struct kbase_hwcnt_backend_csf *backend_csf, u32 extract_index_to_start, - u32 insert_index_to_stop) +static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf, + u32 extract_index_to_start, + u32 insert_index_to_stop) { u32 raw_idx; unsigned long flags; @@ -598,25 +572,22 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( /* Sync all the buffers to CPU side before read the data. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, - backend_csf->ring_buf, - extract_index_to_start, + backend_csf->ring_buf, extract_index_to_start, insert_index_to_stop, true); /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ - for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; - raw_idx++) { + for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { /* The logical "&" acts as a modulo operation since buf_count * must be a power of two. */ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); - new_sample_buf = - (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; + new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - kbasep_hwcnt_backend_csf_accumulate_sample( - &backend_csf->phys_layout, buf_dump_bytes, - backend_csf->accum_buf, old_sample_buf, new_sample_buf, - clearing_samples); + kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, + buf_dump_bytes, backend_csf->accum_buf, + old_sample_buf, new_sample_buf, + clearing_samples); old_sample_buf = new_sample_buf; } @@ -625,19 +596,16 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); /* Reset the prfcnt_en header on each sample before releasing them. */ - for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; - raw_idx++) { + for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( - backend_csf, sample); + kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); } /* Sync zeroed buffers to avoid coherency issues on future use. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, - backend_csf->ring_buf, - extract_index_to_start, + backend_csf->ring_buf, extract_index_to_start, insert_index_to_stop, false); /* After consuming all samples between extract_idx and insert_idx, @@ -645,22 +613,20 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples( * can be released back to the ring buffer pool. */ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); - backend_csf->info->csf_if->set_extract_index( - backend_csf->info->csf_if->ctx, insert_index_to_stop); + backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx, + insert_index_to_stop); /* Update the watchdog last seen index to check any new FW auto samples * in next watchdog callback. */ backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop; - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); } static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( struct kbase_hwcnt_backend_csf *backend_csf, enum kbase_hwcnt_backend_csf_enable_state new_state) { - backend_csf->info->csf_if->assert_lock_held( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); if (backend_csf->enable_state != new_state) { backend_csf->enable_state = new_state; @@ -691,26 +657,22 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) (!csf_info->fw_in_protected_mode) && /* 3. dump state indicates no other dumping is in progress. */ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) || - (backend_csf->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { + (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { u32 extract_index; u32 insert_index; /* Read the raw extract and insert indexes from the CSF interface. */ - csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, - &extract_index, &insert_index); + csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index); /* Do watchdog request if no new FW auto samples. */ - if (insert_index == - backend_csf->watchdog_last_seen_insert_idx) { + if (insert_index == backend_csf->watchdog_last_seen_insert_idx) { /* Trigger the watchdog request. */ csf_info->csf_if->dump_request(csf_info->csf_if->ctx); /* A watchdog dump is required, change the state to * start the request process. */ - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED; } } @@ -719,12 +681,10 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) * counter enabled interrupt. */ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) || - (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { + (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { /* Reschedule the timer for next watchdog callback. */ - csf_info->watchdog_if->modify( - csf_info->watchdog_if->timer, - HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + csf_info->watchdog_if->modify(csf_info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); } csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); @@ -747,8 +707,7 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) u32 insert_index; WARN_ON(!work); - backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, - hwc_dump_work); + backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); @@ -757,26 +716,22 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) * launched. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { - WARN_ON(backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return; } - WARN_ON(backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; insert_index_to_acc = backend_csf->insert_index_to_accumulate; /* Read the raw extract and insert indexes from the CSF interface. */ - backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, - &extract_index, &insert_index); + backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, + &insert_index); - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Accumulate up to the insert we grabbed at the prfcnt request * interrupt. @@ -797,22 +752,18 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) /* The backend was disabled or had an error while we were accumulating. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { - WARN_ON(backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return; } - WARN_ON(backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); /* Our work here is done - set the wait object and unblock waiters. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; complete_all(&backend_csf->dump_completed); - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); } /** @@ -832,23 +783,21 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) WARN_ON(!work); - backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, - hwc_threshold_work); + backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); /* Assert the backend is not destroyed. */ WARN_ON(backend_csf != backend_csf->info->backend); /* Read the raw extract and insert indexes from the CSF interface. */ - backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, - &extract_index, &insert_index); + backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, + &insert_index); /* The backend was disabled or had an error while the worker was being * launched. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return; } @@ -857,14 +806,11 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) * interfere. */ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && - (backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return; } - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Accumulate everything we possibly can. We grabbed the insert index * immediately after we acquired the lock but before we checked whether @@ -873,14 +819,13 @@ static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) * fact that our insert will not exceed the concurrent dump's * insert_to_accumulate, so we don't risk accumulating too much data. */ - kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, - insert_index); + kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); /* No need to wake up anything since it is not a user dump request. */ } -static void kbase_hwcnt_backend_csf_submit_dump_worker( - struct kbase_hwcnt_backend_csf_info *csf_info) +static void +kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info) { u32 extract_index; @@ -888,31 +833,26 @@ static void kbase_hwcnt_backend_csf_submit_dump_worker( csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); - WARN_ON(csf_info->backend->enable_state != - KBASE_HWCNT_BACKEND_CSF_ENABLED); - WARN_ON(csf_info->backend->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); + WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED); + WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); /* Save insert index now so that the dump worker only accumulates the * HWC data associated with this request. Extract index is not stored * as that needs to be checked when accumulating to prevent re-reading * buffers that have already been read and returned to the GPU. */ - csf_info->csf_if->get_indexes( - csf_info->csf_if->ctx, &extract_index, - &csf_info->backend->insert_index_to_accumulate); - csf_info->backend->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; + csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, + &csf_info->backend->insert_index_to_accumulate); + csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; /* Submit the accumulator task into the work queue. */ - queue_work(csf_info->backend->hwc_dump_workq, - &csf_info->backend->hwc_dump_work); + queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work); } -static void kbasep_hwcnt_backend_csf_get_physical_enable( - struct kbase_hwcnt_backend_csf *backend_csf, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_backend_csf_if_enable *enable) +static void +kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_backend_csf_if_enable *enable) { enum kbase_hwcnt_physical_set phys_counter_set; struct kbase_hwcnt_physical_enable_map phys_enable_map; @@ -924,8 +864,7 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable( */ kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); - kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, - backend_csf->info->counter_set); + kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set); /* Use processed enable_map to enable HWC in HW level. */ enable->fe_bm = phys_enable_map.fe_bm; @@ -937,33 +876,29 @@ static void kbasep_hwcnt_backend_csf_get_physical_enable( } /* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -static int kbasep_hwcnt_backend_csf_dump_enable_nolock( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) +static int +kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) { - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; struct kbase_hwcnt_backend_csf_if_enable enable; int err; - if (!backend_csf || !enable_map || - (enable_map->metadata != backend_csf->info->metadata)) + if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) return -EINVAL; - backend_csf->info->csf_if->assert_lock_held( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); - kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, - &enable); + kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); /* enable_state should be DISABLED before we transfer it to enabled */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) return -EIO; - err = backend_csf->info->watchdog_if->enable( - backend_csf->info->watchdog_if->timer, - HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS, - kbasep_hwcnt_backend_watchdog_timer_cb, backend_csf->info); + err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS, + kbasep_hwcnt_backend_watchdog_timer_cb, + backend_csf->info); if (err) return err; @@ -981,58 +916,46 @@ static int kbasep_hwcnt_backend_csf_dump_enable_nolock( } /* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ -static int kbasep_hwcnt_backend_csf_dump_enable( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) +static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) { int errcode; unsigned long flags; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend_csf) return -EINVAL; backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); - errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, - enable_map); - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return errcode; } static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) { - backend_csf->info->csf_if->assert_lock_held( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); - while ((backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || - (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, *lock_flags); + while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || + (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags); - wait_event( - backend_csf->enable_state_waitq, - (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && - (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); + wait_event(backend_csf->enable_state_waitq, + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); - backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, - lock_flags); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags); } } /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void -kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) { unsigned long flags; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_disable = false; WARN_ON(!backend_csf); @@ -1042,24 +965,20 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) /* Make sure we wait until any previous enable or disable have completed * before doing anything. */ - kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, - &flags); + kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || - backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { + backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { /* If we are already disabled or in an unrecoverable error * state, there is nothing for us to do. */ - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return; } if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( - backend_csf, - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); /* Only disable if we were previously enabled - in all other @@ -1071,15 +990,13 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); WARN_ON(!completion_done(&backend_csf->dump_completed)); - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Deregister the timer and block until any timer callback has completed. * We've transitioned out of the ENABLED state so we can guarantee it * won't reschedule itself. */ - backend_csf->info->watchdog_if->disable( - backend_csf->info->watchdog_if->timer); + backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer); /* Block until any async work has completed. We have transitioned out of * the ENABLED state so we can guarantee no new work will concurrently @@ -1090,11 +1007,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); if (do_disable) - backend_csf->info->csf_if->dump_disable( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); - kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, - &flags); + kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); switch (backend_csf->enable_state) { case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: @@ -1103,8 +1018,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) break; case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( - backend_csf, - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); + backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); break; default: WARN_ON(true); @@ -1114,8 +1028,7 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) backend_csf->user_requested = false; backend_csf->watchdog_last_seen_insert_idx = 0; - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* After disable, zero the header of all buffers in the ring buffer back * to 0 to prepare for the next enable. @@ -1123,9 +1036,9 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); /* Sync zeroed buffers to avoid coherency issues on future use. */ - backend_csf->info->csf_if->ring_buf_sync( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, - backend_csf->info->ring_buf_cnt, false); + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, 0, + backend_csf->info->ring_buf_cnt, false); /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare * for next enable. @@ -1134,13 +1047,11 @@ kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ -static int -kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, - u64 *dump_time_ns) +static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) { unsigned long flags; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_request = false; bool watchdog_dumping = false; @@ -1153,22 +1064,18 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, * the user dump buffer is already zeroed. We can just short circuit to * the DUMP_COMPLETED state. */ - if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); kbasep_hwcnt_backend_csf_cc_update(backend_csf); backend_csf->user_requested = true; - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return 0; } /* Otherwise, make sure we're already enabled. */ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return -EIO; } @@ -1181,15 +1088,12 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, * request can be processed instead of ignored. */ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && - (backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) && - (backend_csf->dump_state != - KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) { + (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) && + (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) { /* HWC is disabled or another user dump is ongoing, * or we're on fault. */ - backend_csf->info->csf_if->unlock( - backend_csf->info->csf_if->ctx, flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* HWC is disabled or another dump is ongoing, or we are on * fault. */ @@ -1199,8 +1103,7 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, /* Reset the completion so dump_wait() has something to wait on. */ reinit_completion(&backend_csf->dump_completed); - if (backend_csf->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) + if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) watchdog_dumping = true; if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && @@ -1208,15 +1111,13 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, /* Only do the request if we are fully enabled and not in * protected mode. */ - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; do_request = true; } else { /* Skip the request and waiting for ack and go straight to * checking the insert and kicking off the worker to do the dump */ - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; } /* CSF firmware might enter protected mode now, but still call request. @@ -1238,31 +1139,26 @@ kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, * ownership of the sample which watchdog requested. */ if (!watchdog_dumping) - backend_csf->info->csf_if->dump_request( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx); } else kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Modify watchdog timer to delay the regular check time since * just requested. */ - backend_csf->info->watchdog_if->modify( - backend_csf->info->watchdog_if->timer, - HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); return 0; } /* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ -static int -kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) +static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) { unsigned long flags; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int errcode; if (!backend_csf) @@ -1275,26 +1171,21 @@ kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) * set. */ if (backend_csf->user_requested && - ((backend_csf->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) || - (backend_csf->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED))) + ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) || + (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED))) errcode = 0; else errcode = -EIO; - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); return errcode; } /* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ -static int -kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) +static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) { - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int errcode; u64 ts; @@ -1313,13 +1204,12 @@ kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) } /* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ -static int kbasep_hwcnt_backend_csf_dump_get( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) +static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) { - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; int ret; size_t clk; @@ -1329,9 +1219,9 @@ static int kbasep_hwcnt_backend_csf_dump_get( return -EINVAL; /* Extract elapsed cycle count for each clock domain if enabled. */ - kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { - if (!kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk)) + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) + { + if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) continue; /* Reset the counter to zero if accumulation is off. */ @@ -1344,8 +1234,7 @@ static int kbasep_hwcnt_backend_csf_dump_get( * as it is undefined to call this function without a prior succeeding * one to dump_wait(). */ - ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, - dst_enable_map, accumulate); + ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); return ret; } @@ -1357,8 +1246,7 @@ static int kbasep_hwcnt_backend_csf_dump_get( * Can be safely called on a backend in any state of partial construction. * */ -static void -kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) +static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) { if (!backend_csf) return; @@ -1388,9 +1276,8 @@ kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) * * Return: 0 on success, else error code. */ -static int -kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, - struct kbase_hwcnt_backend_csf **out_backend) +static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, + struct kbase_hwcnt_backend_csf **out_backend) { struct kbase_hwcnt_backend_csf *backend_csf = NULL; int errcode = -ENOMEM; @@ -1403,27 +1290,23 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, goto alloc_error; backend_csf->info = csf_info; - kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, - &backend_csf->phys_layout); + kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); - backend_csf->accum_buf = - kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); + backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); if (!backend_csf->accum_buf) goto err_alloc_acc_buf; - backend_csf->old_sample_buf = - kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); + backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); if (!backend_csf->old_sample_buf) goto err_alloc_pre_sample_buf; - backend_csf->to_user_buf = - kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); + backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; - errcode = csf_info->csf_if->ring_buf_alloc( - csf_info->csf_if->ctx, csf_info->ring_buf_cnt, - &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); + errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, + &backend_csf->ring_buf_cpu_base, + &backend_csf->ring_buf); if (errcode) goto err_ring_buf_alloc; errcode = -ENOMEM; @@ -1432,9 +1315,9 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); /* Sync zeroed buffers to avoid coherency issues on use. */ - backend_csf->info->csf_if->ring_buf_sync( - backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, - backend_csf->info->ring_buf_cnt, false); + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, 0, + backend_csf->info->ring_buf_cnt, false); init_completion(&backend_csf->dump_completed); @@ -1448,10 +1331,8 @@ kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, if (!backend_csf->hwc_dump_workq) goto err_alloc_workqueue; - INIT_WORK(&backend_csf->hwc_dump_work, - kbasep_hwcnt_backend_csf_dump_worker); - INIT_WORK(&backend_csf->hwc_threshold_work, - kbasep_hwcnt_backend_csf_threshold_worker); + INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker); + INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker); backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; @@ -1481,14 +1362,12 @@ alloc_error: } /* CSF backend implementation of kbase_hwcnt_backend_init_fn */ -static int -kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend) +static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) { unsigned long flags; struct kbase_hwcnt_backend_csf *backend_csf = NULL; - struct kbase_hwcnt_backend_csf_info *csf_info = - (struct kbase_hwcnt_backend_csf_info *)info; + struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info; int errcode; bool success = false; @@ -1509,11 +1388,9 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, *out_backend = (struct kbase_hwcnt_backend *)backend_csf; success = true; if (csf_info->unrecoverable_error_happened) - backend_csf->enable_state = - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; + backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; } - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); /* Destroy the new created backend if the backend has already created * before. In normal case, this won't happen if the client call init() @@ -1531,8 +1408,7 @@ kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) { unsigned long flags; - struct kbase_hwcnt_backend_csf *backend_csf = - (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; if (!backend) return; @@ -1544,8 +1420,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) */ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); backend_csf->info->backend = NULL; - backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, - flags); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); kbasep_hwcnt_backend_csf_destroy(backend_csf); } @@ -1557,8 +1432,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) * Can be safely called on a backend info in any state of partial construction. * */ -static void kbasep_hwcnt_backend_csf_info_destroy( - const struct kbase_hwcnt_backend_csf_info *info) +static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info) { if (!info) return; @@ -1585,10 +1459,10 @@ static void kbasep_hwcnt_backend_csf_info_destroy( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_csf_info_create( - struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, - struct kbase_hwcnt_watchdog_interface *watchdog_if, - const struct kbase_hwcnt_backend_csf_info **out_info) +static int +kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + const struct kbase_hwcnt_backend_csf_info **out_info) { struct kbase_hwcnt_backend_csf_info *info = NULL; @@ -1611,8 +1485,7 @@ static int kbasep_hwcnt_backend_csf_info_create( .counter_set = KBASE_HWCNT_SET_PRIMARY, #endif .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt, - .fw_in_protected_mode = false, - .unrecoverable_error_happened = false, + .fw_in_protected_mode = false, .unrecoverable_error_happened = false, .watchdog_if = watchdog_if, }; *out_info = info; @@ -1632,19 +1505,17 @@ kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; } -static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - struct kbase_hwcnt_backend_csf *backend_csf) +static void +kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) { bool do_disable = false; - backend_csf->info->csf_if->assert_lock_held( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); /* We are already in or transitioning to the unrecoverable error state. * Early out. */ - if ((backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) return; @@ -1654,8 +1525,7 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( */ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( - backend_csf, - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); + backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); return; } @@ -1663,12 +1533,11 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( * disabled, we don't want to disable twice if an unrecoverable error * happens while we are disabling. */ - do_disable = (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + do_disable = + (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( - backend_csf, - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); + backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); /* Transition the dump to the IDLE state and unblock any waiters. The * IDLE state signifies an error. @@ -1681,15 +1550,13 @@ static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( * happens while we are disabling. */ if (do_disable) - backend_csf->info->csf_if->dump_disable( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); } -static void kbasep_hwcnt_backend_csf_handle_recoverable_error( - struct kbase_hwcnt_backend_csf *backend_csf) +static void +kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) { - backend_csf->info->csf_if->assert_lock_held( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); switch (backend_csf->enable_state) { case KBASE_HWCNT_BACKEND_CSF_DISABLED: @@ -1705,8 +1572,7 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( /* A seemingly recoverable error that occurs while we are * transitioning to enabled is probably unrecoverable. */ - kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - backend_csf); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf); return; case KBASE_HWCNT_BACKEND_CSF_ENABLED: /* Start transitioning to the disabled state. We can't wait for @@ -1715,22 +1581,19 @@ static void kbasep_hwcnt_backend_csf_handle_recoverable_error( * disable(). */ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( - backend_csf, - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); /* Transition the dump to the IDLE state and unblock any * waiters. The IDLE state signifies an error. */ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; complete_all(&backend_csf->dump_completed); - backend_csf->info->csf_if->dump_disable( - backend_csf->info->csf_if->ctx); + backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); return; } } -void kbase_hwcnt_backend_csf_protm_entered( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; @@ -1744,8 +1607,7 @@ void kbase_hwcnt_backend_csf_protm_entered( kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); } -void kbase_hwcnt_backend_csf_protm_exited( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; @@ -1755,8 +1617,7 @@ void kbase_hwcnt_backend_csf_protm_exited( csf_info->fw_in_protected_mode = false; } -void kbase_hwcnt_backend_csf_on_unrecoverable_error( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface) { unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; @@ -1776,8 +1637,7 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error( csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } -void kbase_hwcnt_backend_csf_on_before_reset( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface) { unsigned long flags; struct kbase_hwcnt_backend_csf_info *csf_info; @@ -1795,8 +1655,7 @@ void kbase_hwcnt_backend_csf_on_before_reset( backend_csf = csf_info->backend; if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && - (backend_csf->enable_state != - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { + (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { /* Before a reset occurs, we must either have been disabled * (else we lose data) or we should have encountered an * unrecoverable error. Either way, we will have disabled the @@ -1807,13 +1666,11 @@ void kbase_hwcnt_backend_csf_on_before_reset( * We can't wait for this disable to complete, but it doesn't * really matter, the power is being pulled. */ - kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); } /* A reset is the only way to exit the unrecoverable error state */ - if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); } @@ -1821,8 +1678,7 @@ void kbase_hwcnt_backend_csf_on_before_reset( csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); } -void kbase_hwcnt_backend_csf_on_prfcnt_sample( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; @@ -1836,10 +1692,8 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample( backend_csf = csf_info->backend; /* Skip the dump_work if it's a watchdog request. */ - if (backend_csf->dump_state == - KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) { - backend_csf->dump_state = - KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) { + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; return; } @@ -1853,8 +1707,7 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample( kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); } -void kbase_hwcnt_backend_csf_on_prfcnt_threshold( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; @@ -1871,12 +1724,10 @@ void kbase_hwcnt_backend_csf_on_prfcnt_threshold( /* Submit the threshold work into the work queue to consume the * available samples. */ - queue_work(backend_csf->hwc_dump_workq, - &backend_csf->hwc_threshold_work); + queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); } -void kbase_hwcnt_backend_csf_on_prfcnt_overflow( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; @@ -1897,8 +1748,7 @@ void kbase_hwcnt_backend_csf_on_prfcnt_overflow( kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); } -void kbase_hwcnt_backend_csf_on_prfcnt_enable( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; @@ -1911,12 +1761,10 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable( return; backend_csf = csf_info->backend; - if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); - } else if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_ENABLED) { + } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { /* Unexpected, but we are already in the right state so just * ignore it. */ @@ -1924,13 +1772,11 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable( /* Unexpected state change, assume everything is broken until * we reset. */ - kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); } } -void kbase_hwcnt_backend_csf_on_prfcnt_disable( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_backend_csf *backend_csf; @@ -1943,13 +1789,10 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable( return; backend_csf = csf_info->backend; - if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( - backend_csf, - KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); - } else if (backend_csf->enable_state == - KBASE_HWCNT_BACKEND_CSF_DISABLED) { + backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); + } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { /* Unexpected, but we are already in the right state so just * ignore it. */ @@ -1957,13 +1800,11 @@ void kbase_hwcnt_backend_csf_on_prfcnt_disable( /* Unexpected state change, assume everything is broken until * we reset. */ - kbasep_hwcnt_backend_csf_handle_unrecoverable_error( - csf_info->backend); + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); } } -int kbase_hwcnt_backend_csf_metadata_init( - struct kbase_hwcnt_backend_interface *iface) +int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; struct kbase_hwcnt_gpu_info gpu_info; @@ -1975,8 +1816,7 @@ int kbase_hwcnt_backend_csf_metadata_init( WARN_ON(!csf_info->csf_if->get_prfcnt_info); - csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, - &csf_info->prfcnt_info); + csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info); /* The clock domain counts should not exceed the number of maximum * number of clock regulators. @@ -1988,14 +1828,12 @@ int kbase_hwcnt_backend_csf_metadata_init( gpu_info.core_mask = csf_info->prfcnt_info.core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = - csf_info->prfcnt_info.prfcnt_block_size / - KBASE_HWCNT_VALUE_HW_BYTES; + csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set, &csf_info->metadata); } -void kbase_hwcnt_backend_csf_metadata_term( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface) { struct kbase_hwcnt_backend_csf_info *csf_info; @@ -2009,10 +1847,9 @@ void kbase_hwcnt_backend_csf_metadata_term( } } -int kbase_hwcnt_backend_csf_create( - struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, - struct kbase_hwcnt_watchdog_interface *watchdog_if, - struct kbase_hwcnt_backend_interface *iface) +int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + struct kbase_hwcnt_backend_interface *iface) { int errcode; const struct kbase_hwcnt_backend_csf_info *info = NULL; @@ -2024,8 +1861,7 @@ int kbase_hwcnt_backend_csf_create( if (!is_power_of_2(ring_buf_cnt)) return -EINVAL; - errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, - watchdog_if, &info); + errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info); if (errcode) return errcode; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h similarity index 77% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h index e0cafbe70660..9c5a5c996ebd 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,9 +27,9 @@ #ifndef _KBASE_HWCNT_BACKEND_CSF_H_ #define _KBASE_HWCNT_BACKEND_CSF_H_ -#include "mali_kbase_hwcnt_backend.h" -#include "mali_kbase_hwcnt_backend_csf_if.h" -#include "mali_kbase_hwcnt_watchdog_if.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" +#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" /** * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend @@ -47,10 +47,9 @@ * * Return: 0 on success, else error code. */ -int kbase_hwcnt_backend_csf_create( - struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, - struct kbase_hwcnt_watchdog_interface *watchdog_if, - struct kbase_hwcnt_backend_interface *iface); +int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF @@ -58,16 +57,14 @@ int kbase_hwcnt_backend_csf_create( * @iface: Non-NULL pointer to backend interface structure * Return: 0 on success, else error code. */ -int kbase_hwcnt_backend_csf_metadata_init( - struct kbase_hwcnt_backend_interface *iface); +int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF * hardware counter backend. * @iface: Non-NULL pointer to backend interface structure. */ -void kbase_hwcnt_backend_csf_metadata_term( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend @@ -77,8 +74,7 @@ void kbase_hwcnt_backend_csf_metadata_term( * Can be safely called on an all-zeroed interface, or on an already destroyed * interface. */ -void kbase_hwcnt_backend_csf_destroy( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive @@ -86,8 +82,7 @@ void kbase_hwcnt_backend_csf_destroy( * has been entered. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_protm_entered( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive @@ -95,8 +90,7 @@ void kbase_hwcnt_backend_csf_protm_entered( * been exited. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_protm_exited( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function @@ -108,8 +102,7 @@ void kbase_hwcnt_backend_csf_protm_exited( * with reset, or that may put HWC logic in state that could result in hang. For * example, on bus error, or when FW becomes unresponsive. */ -void kbase_hwcnt_backend_csf_on_unrecoverable_error( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be @@ -119,16 +112,14 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error( * were in it. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_on_before_reset( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample * complete interrupt handler. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_on_prfcnt_sample( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter @@ -136,31 +127,27 @@ void kbase_hwcnt_backend_csf_on_prfcnt_sample( * interrupt handler. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_on_prfcnt_threshold( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer * overflow interrupt handler. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_on_prfcnt_overflow( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled * interrupt handler. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_on_prfcnt_enable( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter * disabled interrupt handler. * @iface: Non-NULL pointer to HWC backend interface. */ -void kbase_hwcnt_backend_csf_on_prfcnt_disable( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); #endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h similarity index 85% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if.h rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h index 24b26c2bd6f4..382a3adaa127 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -85,8 +85,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { * held. * @ctx: Non-NULL pointer to a CSF context. */ -typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void +kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. @@ -95,9 +95,8 @@ typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn( * @flags: Pointer to the memory location that would store the previous * interrupt state. */ -typedef void kbase_hwcnt_backend_csf_if_lock_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long *flags); +typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags); /** * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. @@ -106,9 +105,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn( * @flags: Previously stored interrupt state when Scheduler interrupt * spinlock was acquired. */ -typedef void kbase_hwcnt_backend_csf_if_unlock_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long flags); +typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags); /** * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance @@ -137,10 +135,10 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( * * Return: 0 on success, else error code. */ -typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, - void **cpu_dump_base, - struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); +typedef int +kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 buf_count, void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers @@ -159,10 +157,10 @@ typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn( * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU * are correctly observed. */ -typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - u32 buf_index_first, u32 buf_index_last, bool for_cpu); +typedef void +kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + u32 buf_index_first, u32 buf_index_last, bool for_cpu); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for @@ -171,9 +169,9 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn( * @ctx: Non-NULL pointer to a CSF interface context. * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. */ -typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); +typedef void +kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); /** * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current @@ -183,8 +181,7 @@ typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn( * * Return: CSF interface timestamp in nanoseconds. */ -typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware @@ -195,10 +192,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn( * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable); +typedef void +kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable); /** * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter @@ -207,8 +204,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn( * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. @@ -217,8 +213,7 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn( * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_request_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and @@ -231,9 +226,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn( * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, - u32 *insert_index); +typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 *extract_index, u32 *insert_index); /** * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract @@ -245,8 +239,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn( * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); +typedef void +kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 extract_index); /** * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current @@ -260,9 +255,9 @@ typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn( * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, - u64 clk_enable_map); +typedef void +kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u64 *cycle_counts, u64 clk_enable_map); /** * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c similarity index 73% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index ab33a0b26486..9985752a3748 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -26,12 +26,12 @@ #include #include #include -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include #include "csf/mali_kbase_csf_firmware.h" -#include "mali_kbase_hwcnt_backend_csf_if_fw.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h" #include "mali_kbase_hwaccess_time.h" #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" @@ -42,9 +42,6 @@ #include #endif /* CONFIG_MALI_BIFROST_NO_MALI */ -/** The number of nanoseconds in a second. */ -#define NSECS_IN_SEC 1000000000ull /* ns */ - /* Ring buffer virtual address start at 4GB */ #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) @@ -90,8 +87,8 @@ struct kbase_hwcnt_backend_csf_if_fw_ctx { struct kbase_ccswe ccswe_shader_cores; }; -static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( - struct kbase_hwcnt_backend_csf_if_ctx *ctx) +static void +kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -104,9 +101,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( kbase_csf_scheduler_spin_lock_assert_held(kbdev); } -static void -kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long *flags) +static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -119,8 +115,8 @@ kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, kbase_csf_scheduler_spin_lock(kbdev, flags); } -static void kbasep_hwcnt_backend_csf_if_fw_unlock( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) +static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; struct kbase_device *kbdev; @@ -141,22 +137,19 @@ static void kbasep_hwcnt_backend_csf_if_fw_unlock( * @clk_index: Clock index * @clk_rate_hz: Clock frequency(hz) */ -static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change( - struct kbase_clk_rate_listener *rate_listener, u32 clk_index, - u32 clk_rate_hz) +static void +kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, u32 clk_rate_hz) { - struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = - container_of(rate_listener, - struct kbase_hwcnt_backend_csf_if_fw_ctx, - rate_listener); + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of( + rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener); u64 timestamp_ns; if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) return; timestamp_ns = ktime_get_raw_ns(); - kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, - clk_rate_hz); + kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz); } /** @@ -165,17 +158,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change( * @fw_ctx: Non-NULL pointer to CSF firmware interface context. * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. */ -static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( - struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map) +static void +kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, + u64 clk_enable_map) { struct kbase_device *kbdev = fw_ctx->kbdev; - if (kbase_hwcnt_clk_enable_map_enabled( - clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { /* software estimation for non-top clock domains */ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; - const struct kbase_clk_data *clk_data = - rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; u32 cur_freq; unsigned long flags; u64 timestamp_ns; @@ -186,11 +178,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( cur_freq = (u32)clk_data->clock_val; kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); - kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, - timestamp_ns, cur_freq); + kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq); - kbase_clk_rate_trace_manager_subscribe_no_lock( - rtm, &fw_ctx->rate_listener); + kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener); spin_unlock_irqrestore(&rtm->lock, flags); } @@ -203,17 +193,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( * * @fw_ctx: Non-NULL pointer to CSF firmware interface context. */ -static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( - struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +static void +kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) { struct kbase_device *kbdev = fw_ctx->kbdev; struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; u64 clk_enable_map = fw_ctx->clk_enable_map; - if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, - KBASE_CLOCK_DOMAIN_SHADER_CORES)) - kbase_clk_rate_trace_manager_unsubscribe( - rtm, &fw_ctx->rate_listener); + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) + kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener); } static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( @@ -244,8 +232,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( u32 prfcnt_size; u32 prfcnt_hw_size; u32 prfcnt_fw_size; - u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * - KBASE_HWCNT_VALUE_HW_BYTES; + u32 prfcnt_block_size = + KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; WARN_ON(!ctx); WARN_ON(!prfcnt_info); @@ -262,10 +250,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( */ if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >= GPU_ID2_PRODUCT_TTUX) { - prfcnt_block_size = - PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read( - kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) - << 8; + prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET( + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) + << 8; } *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ @@ -280,17 +267,14 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( }; /* Block size must be multiple of counter size. */ - WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != - 0); + WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); /* Total size must be multiple of block size. */ - WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != - 0); + WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0); #endif } static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, - void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) { struct kbase_device *kbdev; @@ -342,9 +326,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( goto page_list_alloc_error; /* Get physical page for the buffer */ - ret = kbase_mem_pool_alloc_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, - phys, false); + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false); if (ret != num_pages) goto phys_mem_pool_alloc_error; @@ -360,9 +343,8 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); /* Update MMU table */ - ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, - gpu_va_base >> PAGE_SHIFT, phys, num_pages, - flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, + num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, mmu_sync_info); if (ret) goto mmu_insert_failed; @@ -381,17 +363,15 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( fw_ring_buf->as_nr = MCU_AS_NR; *cpu_dump_base = fw_ring_buf->cpu_dump_base; - *out_ring_buf = - (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; + *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; return 0; mmu_insert_failed: vunmap(cpu_addr); vmap_error: - kbase_mem_pool_free_pages( - &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, - phys, false, false); + kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, + false, false); phys_mem_pool_alloc_error: kfree(page_list); page_list_alloc_error: @@ -401,10 +381,10 @@ phys_alloc_error: return -ENOMEM; } -static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - u32 buf_index_first, u32 buf_index_last, bool for_cpu) +static void +kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + u32 buf_index_first, u32 buf_index_last, bool for_cpu) { struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; @@ -435,8 +415,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( * inclusive at both ends so full flushes are not 0 -> 0. */ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); - ring_buf_index_last = - (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); + ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); /* The start address is the offset of the first buffer. */ start_address = fw_ctx->buf_bytes * ring_buf_index_first; @@ -453,15 +432,11 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( struct page *pg = as_page(fw_ring_buf->phys[i]); if (for_cpu) { - kbase_sync_single_for_cpu(fw_ctx->kbdev, - kbase_dma_addr(pg), - PAGE_SIZE, - DMA_BIDIRECTIONAL); + kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), + PAGE_SIZE, DMA_BIDIRECTIONAL); } else { - kbase_sync_single_for_device(fw_ctx->kbdev, - kbase_dma_addr(pg), - PAGE_SIZE, - DMA_BIDIRECTIONAL); + kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), + PAGE_SIZE, DMA_BIDIRECTIONAL); } } @@ -473,28 +448,24 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( struct page *pg = as_page(fw_ring_buf->phys[i]); if (for_cpu) { - kbase_sync_single_for_cpu(fw_ctx->kbdev, - kbase_dma_addr(pg), PAGE_SIZE, + kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, DMA_BIDIRECTIONAL); } else { - kbase_sync_single_for_device(fw_ctx->kbdev, - kbase_dma_addr(pg), - PAGE_SIZE, + kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, DMA_BIDIRECTIONAL); } } } -static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns( - struct kbase_hwcnt_backend_csf_if_ctx *ctx) +static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx) { CSTD_UNUSED(ctx); return ktime_get_raw_ns(); } -static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) +static void +kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) { struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; @@ -513,10 +484,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( vunmap(fw_ring_buf->cpu_dump_base); - kbase_mem_pool_free_pages( - &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], - fw_ring_buf->num_pages, fw_ring_buf->phys, false, - false); + kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); kfree(fw_ring_buf->phys); @@ -524,10 +493,10 @@ static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( } } -static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable) +static void +kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable) { u32 prfcnt_config; struct kbase_device *kbdev; @@ -550,8 +519,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set); /* Configure the ring buffer base address */ - kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, - fw_ring_buf->as_nr); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, fw_ring_buf->gpu_dump_base & U32_MAX); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, @@ -561,38 +529,29 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); /* Configure the enable bitmap */ - kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, - enable->fe_bm); - kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, - enable->shader_bm); - kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, - enable->mmu_l2_bm); - kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, - enable->tiler_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); /* Configure the HWC set and buffer size */ - kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, - prfcnt_config); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); kbdev->csf.hwcnt.enable_pending = true; /* Unmask the interrupts */ - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, - GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); /* Enable the HWC */ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, @@ -600,15 +559,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( GLB_REQ_PRFCNT_ENABLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); - prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, - GLB_PRFCNT_CONFIG); + prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); - kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, - enable->clk_enable_map); + kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); } -static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( - struct kbase_hwcnt_backend_csf_if_ctx *ctx) +static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx) { struct kbase_device *kbdev; struct kbase_csf_global_iface *global_iface; @@ -623,20 +579,16 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( /* Disable the HWC */ kbdev->csf.hwcnt.enable_pending = true; - kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, - GLB_REQ_PRFCNT_ENABLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); /* mask the interrupts */ - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, 0, - GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, 0, - GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); - kbase_csf_firmware_global_input_mask( - global_iface, GLB_ACK_IRQ_MASK, 0, - GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); /* In case we have a previous request in flight when the disable * happens. @@ -646,8 +598,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); } -static void kbasep_hwcnt_backend_csf_if_fw_dump_request( - struct kbase_hwcnt_backend_csf_if_ctx *ctx) +static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx) { u32 glb_req; struct kbase_device *kbdev; @@ -670,9 +621,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_dump_request( kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); } -static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, - u32 *insert_index) +static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 *extract_index, u32 *insert_index) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; @@ -682,14 +632,15 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( WARN_ON(!insert_index); kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); - *extract_index = kbase_csf_firmware_global_input_read( - &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); - *insert_index = kbase_csf_firmware_global_output( - &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); + *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface, + GLB_PRFCNT_EXTRACT); + *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface, + GLB_PRFCNT_INSERT); } -static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) +static void +kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 extract_idx) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; @@ -700,13 +651,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( /* Set the raw extract index to release the buffer back to the ring * buffer. */ - kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, - GLB_PRFCNT_EXTRACT, extract_idx); + kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, + extract_idx); } -static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( - struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, - u64 clk_enable_map) +static void +kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u64 *cycle_counts, u64 clk_enable_map) { struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; @@ -723,12 +674,12 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( if (clk == KBASE_CLOCK_DOMAIN_TOP) { /* Read cycle count for top clock domain. */ - kbase_backend_get_gpu_time_norequest( - fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL); + kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk], + NULL, NULL); } else { /* Estimate cycle count for non-top clock domain. */ - cycle_counts[clk] = kbase_ccswe_cycle_at( - &fw_ctx->ccswe_shader_cores, timestamp_ns); + cycle_counts[clk] = + kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns); } } } @@ -738,8 +689,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( * * @fw_ctx: Pointer to context to destroy. */ -static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( - struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +static void +kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) { if (!fw_ctx) return; @@ -754,9 +705,9 @@ static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( * @out_ctx: Non-NULL pointer to where info is stored on success. * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_csf_if_fw_ctx_create( - struct kbase_device *kbdev, - struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) +static int +kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) { u8 clk; int errcode = -ENOMEM; @@ -780,8 +731,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ctx_create( ctx->clk_enable_map = 0; kbase_ccswe_init(&ctx->ccswe_shader_cores); - ctx->rate_listener.notify = - kbasep_hwcnt_backend_csf_if_fw_on_freq_change; + ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change; *out_ctx = ctx; @@ -791,8 +741,7 @@ error: return errcode; } -void kbase_hwcnt_backend_csf_if_fw_destroy( - struct kbase_hwcnt_backend_csf_if *if_fw) +void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw) { if (!if_fw) return; @@ -802,8 +751,8 @@ void kbase_hwcnt_backend_csf_if_fw_destroy( memset(if_fw, 0, sizeof(*if_fw)); } -int kbase_hwcnt_backend_csf_if_fw_create( - struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw) +int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if *if_fw) { int errcode; struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; @@ -816,8 +765,7 @@ int kbase_hwcnt_backend_csf_if_fw_create( return errcode; if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; - if_fw->assert_lock_held = - kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; + if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; @@ -828,11 +776,9 @@ int kbase_hwcnt_backend_csf_if_fw_create( if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; - if_fw->get_gpu_cycle_count = - kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; + if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; - if_fw->set_extract_index = - kbasep_hwcnt_backend_csf_if_fw_set_extract_index; + if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index; return 0; } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h similarity index 82% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.h rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h index b69668b2e822..71d1506694f4 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_csf_if_fw.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,7 @@ #ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ #define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ -#include "mali_kbase_hwcnt_backend_csf_if.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" /** * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface @@ -36,15 +36,14 @@ * creation success. * Return: 0 on success, else error code. */ -int kbase_hwcnt_backend_csf_if_fw_create( - struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw); +int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if *if_fw); /** * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of * hardware counter backend. * @if_fw: Pointer to a CSF interface to destroy. */ -void kbase_hwcnt_backend_csf_if_fw_destroy( - struct kbase_hwcnt_backend_csf_if *if_fw); +void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw); #endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c similarity index 73% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index c995a1923583..9d9889a0e426 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -19,9 +19,9 @@ * */ -#include "mali_kbase_hwcnt_backend_jm.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include "mali_kbase.h" #include "backend/gpu/mali_kbase_pm_ca.h" #include "mali_kbase_hwaccess_instr.h" @@ -136,9 +136,8 @@ struct kbase_hwcnt_backend_jm { * * Return: 0 on success, else error code. */ -static int -kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info) +static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) { size_t clk; @@ -153,13 +152,11 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, { const struct base_gpu_props *props = &kbdev->gpu_props.props; const size_t l2_count = props->l2_props.num_l2_slices; - const size_t core_mask = - props->coherency_info.group[0].core_mask; + const size_t core_mask = props->coherency_info.group[0].core_mask; info->l2_count = l2_count; info->core_mask = core_mask; - info->prfcnt_values_per_block = - KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; + info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; } #endif /* CONFIG_MALI_BIFROST_NO_MALI */ @@ -173,9 +170,8 @@ kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, return 0; } -static void kbasep_hwcnt_backend_jm_init_layout( - const struct kbase_hwcnt_gpu_info *gpu_info, - struct kbase_hwcnt_jm_physical_layout *phys_layout) +static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info, + struct kbase_hwcnt_jm_physical_layout *phys_layout) { u8 shader_core_cnt; @@ -189,32 +185,29 @@ static void kbasep_hwcnt_backend_jm_init_layout( .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, .mmu_l2_cnt = gpu_info->l2_count, .shader_cnt = shader_core_cnt, - .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + - KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + gpu_info->l2_count + shader_core_cnt, .shader_avail_mask = gpu_info->core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = gpu_info->prfcnt_values_per_block, - .counters_per_block = gpu_info->prfcnt_values_per_block - - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .counters_per_block = + gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, }; } -static void kbasep_hwcnt_backend_jm_dump_sample( - const struct kbase_hwcnt_backend_jm *const backend_jm) +static void +kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm) { size_t block_idx; const u32 *new_sample_buf = backend_jm->cpu_dump_va; const u32 *new_block = new_sample_buf; u64 *dst_buf = backend_jm->to_user_buf; u64 *dst_block = dst_buf; - const size_t values_per_block = - backend_jm->phys_layout.values_per_block; + const size_t values_per_block = backend_jm->phys_layout.values_per_block; const size_t dump_bytes = backend_jm->info->dump_bytes; - for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; - block_idx++) { + for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) { size_t ctr_idx; for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++) @@ -224,10 +217,8 @@ static void kbasep_hwcnt_backend_jm_dump_sample( dst_block += values_per_block; } - WARN_ON(new_block != - new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); - WARN_ON(dst_block != - dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); } /** @@ -237,21 +228,18 @@ static void kbasep_hwcnt_backend_jm_dump_sample( * @clk_index: Clock index * @clk_rate_hz: Clock frequency(hz) */ -static void kbasep_hwcnt_backend_jm_on_freq_change( - struct kbase_clk_rate_listener *rate_listener, - u32 clk_index, - u32 clk_rate_hz) +static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, u32 clk_rate_hz) { - struct kbase_hwcnt_backend_jm *backend_jm = container_of( - rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); + struct kbase_hwcnt_backend_jm *backend_jm = + container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); u64 timestamp_ns; if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) return; timestamp_ns = ktime_get_raw_ns(); - kbase_ccswe_freq_change( - &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); + kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); } /** @@ -261,53 +249,42 @@ static void kbasep_hwcnt_backend_jm_on_freq_change( * @enable_map: Non-NULL pointer to enable map specifying enabled counters. * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. */ -static void kbasep_hwcnt_backend_jm_cc_enable( - struct kbase_hwcnt_backend_jm *backend_jm, - const struct kbase_hwcnt_enable_map *enable_map, - u64 timestamp_ns) +static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm, + const struct kbase_hwcnt_enable_map *enable_map, + u64 timestamp_ns) { struct kbase_device *kbdev = backend_jm->kctx->kbdev; u64 clk_enable_map = enable_map->clk_enable_map; u64 cycle_count; - if (kbase_hwcnt_clk_enable_map_enabled( - clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { /* turn on the cycle counter */ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); /* Read cycle count for top clock domain. */ - kbase_backend_get_gpu_time_norequest( - kbdev, &cycle_count, NULL, NULL); + kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL); - backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = - cycle_count; + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count; } - if (kbase_hwcnt_clk_enable_map_enabled( - clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { /* software estimation for non-top clock domains */ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; - const struct kbase_clk_data *clk_data = - rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; u32 cur_freq; unsigned long flags; spin_lock_irqsave(&rtm->lock, flags); - cur_freq = (u32) clk_data->clock_val; + cur_freq = (u32)clk_data->clock_val; kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); - kbase_ccswe_freq_change( - &backend_jm->ccswe_shader_cores, - timestamp_ns, - cur_freq); + kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq); - kbase_clk_rate_trace_manager_subscribe_no_lock( - rtm, &backend_jm->rate_listener); + kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener); spin_unlock_irqrestore(&rtm->lock, flags); /* ccswe was reset. The estimated cycle is zero. */ - backend_jm->prev_cycle_count[ - KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; } /* Keep clk_enable_map for dump_request. */ @@ -319,28 +296,22 @@ static void kbasep_hwcnt_backend_jm_cc_enable( * * @backend_jm: Non-NULL pointer to backend. */ -static void kbasep_hwcnt_backend_jm_cc_disable( - struct kbase_hwcnt_backend_jm *backend_jm) +static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm) { struct kbase_device *kbdev = backend_jm->kctx->kbdev; struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; u64 clk_enable_map = backend_jm->clk_enable_map; - if (kbase_hwcnt_clk_enable_map_enabled( - clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { /* turn off the cycle counter */ kbase_pm_release_gpu_cycle_counter(kbdev); } - if (kbase_hwcnt_clk_enable_map_enabled( - clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { - - kbase_clk_rate_trace_manager_unsubscribe( - rtm, &backend_jm->rate_listener); + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener); } } - /** * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with * current config information. @@ -356,38 +327,33 @@ static void kbasep_hwcnt_backend_jm_cc_disable( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_gpu_update_curr_config( - struct kbase_device *kbdev, - struct kbase_hwcnt_curr_config *curr_config) +static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev, + struct kbase_hwcnt_curr_config *curr_config) { if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) return -EINVAL; lockdep_assert_held(&kbdev->hwaccess_lock); - curr_config->num_l2_slices = - kbdev->gpu_props.curr_config.l2_slices; - curr_config->shader_present = - kbdev->gpu_props.curr_config.shader_present; + curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices; + curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present; return 0; } /* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ -static u64 kbasep_hwcnt_backend_jm_timestamp_ns( - struct kbase_hwcnt_backend *backend) +static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend) { (void)backend; return ktime_get_raw_ns(); } /* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -static int kbasep_hwcnt_backend_jm_dump_enable_nolock( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) +static int +kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) { int errcode; - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; struct kbase_context *kctx; struct kbase_device *kbdev; struct kbase_hwcnt_physical_enable_map phys_enable_map; @@ -406,8 +372,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock( kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); - kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, - backend_jm->info->counter_set); + kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set); enable.fe_bm = phys_enable_map.fe_bm; enable.shader_bm = phys_enable_map.shader_bm; @@ -425,8 +390,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock( timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); /* Update the current configuration information. */ - errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, - &backend_jm->curr_config); + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); if (errcode) goto error; @@ -446,14 +410,12 @@ error: } /* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ -static int kbasep_hwcnt_backend_jm_dump_enable( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) +static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) { unsigned long flags; int errcode; - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; struct kbase_device *kbdev; if (!backend_jm) @@ -463,8 +425,7 @@ static int kbasep_hwcnt_backend_jm_dump_enable( spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock( - backend, enable_map); + errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -472,12 +433,10 @@ static int kbasep_hwcnt_backend_jm_dump_enable( } /* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_jm_dump_disable( - struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend) { int errcode; - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; if (WARN_ON(!backend_jm) || !backend_jm->enabled) return; @@ -491,11 +450,9 @@ static void kbasep_hwcnt_backend_jm_dump_disable( } /* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ -static int kbasep_hwcnt_backend_jm_dump_clear( - struct kbase_hwcnt_backend *backend) +static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend) { - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; if (!backend_jm || !backend_jm->enabled) return -EINVAL; @@ -504,12 +461,10 @@ static int kbasep_hwcnt_backend_jm_dump_clear( } /* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ -static int kbasep_hwcnt_backend_jm_dump_request( - struct kbase_hwcnt_backend *backend, - u64 *dump_time_ns) +static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) { - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; struct kbase_device *kbdev; const struct kbase_hwcnt_metadata *metadata; u64 current_cycle_count; @@ -528,28 +483,25 @@ static int kbasep_hwcnt_backend_jm_dump_request( *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); - kbase_hwcnt_metadata_for_each_clock(metadata, clk) { - if (!kbase_hwcnt_clk_enable_map_enabled( - backend_jm->clk_enable_map, clk)) + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk)) continue; if (clk == KBASE_CLOCK_DOMAIN_TOP) { /* Read cycle count for top clock domain. */ - kbase_backend_get_gpu_time_norequest( - kbdev, ¤t_cycle_count, - NULL, NULL); + kbase_backend_get_gpu_time_norequest(kbdev, ¤t_cycle_count, + NULL, NULL); } else { /* * Estimate cycle count for non-top clock * domain. */ current_cycle_count = kbase_ccswe_cycle_at( - &backend_jm->ccswe_shader_cores, - *dump_time_ns); + &backend_jm->ccswe_shader_cores, *dump_time_ns); } backend_jm->cycle_count_elapsed[clk] = - current_cycle_count - - backend_jm->prev_cycle_count[clk]; + current_cycle_count - backend_jm->prev_cycle_count[clk]; /* * Keep the current cycle count for later calculation. @@ -563,11 +515,9 @@ static int kbasep_hwcnt_backend_jm_dump_request( } /* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ -static int kbasep_hwcnt_backend_jm_dump_wait( - struct kbase_hwcnt_backend *backend) +static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend) { - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; if (!backend_jm || !backend_jm->enabled) return -EINVAL; @@ -576,14 +526,12 @@ static int kbasep_hwcnt_backend_jm_dump_wait( } /* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ -static int kbasep_hwcnt_backend_jm_dump_get( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) +static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) { - struct kbase_hwcnt_backend_jm *backend_jm = - (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; size_t clk; #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) struct kbase_device *kbdev; @@ -597,16 +545,15 @@ static int kbasep_hwcnt_backend_jm_dump_get( return -EINVAL; /* Invalidate the kernel buffer before reading from it. */ - kbase_sync_mem_regions( - backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); + kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); /* Dump sample to the internal 64-bit user buffer. */ kbasep_hwcnt_backend_jm_dump_sample(backend_jm); /* Extract elapsed cycle count for each clock domain if enabled. */ - kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { - if (!kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk)) + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) + { + if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) continue; /* Reset the counter to zero if accumulation is off. */ @@ -621,17 +568,16 @@ static int kbasep_hwcnt_backend_jm_dump_get( spin_lock_irqsave(&kbdev->hwaccess_lock, flags); /* Update the current configuration information. */ - errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, - &backend_jm->curr_config); + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (errcode) return errcode; #endif /* CONFIG_MALI_BIFROST_NO_MALI */ - return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, - dst_enable_map, backend_jm->pm_core_mask, - &backend_jm->curr_config, accumulate); + return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, + backend_jm->pm_core_mask, &backend_jm->curr_config, + accumulate); } /** @@ -643,10 +589,8 @@ static int kbasep_hwcnt_backend_jm_dump_get( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_jm_dump_alloc( - const struct kbase_hwcnt_backend_jm_info *info, - struct kbase_context *kctx, - u64 *gpu_dump_va) +static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_context *kctx, u64 *gpu_dump_va) { struct kbase_va_region *reg; u64 flags; @@ -661,16 +605,12 @@ static int kbasep_hwcnt_backend_jm_dump_alloc( WARN_ON(!kctx); WARN_ON(!gpu_dump_va); - flags = BASE_MEM_PROT_CPU_RD | - BASE_MEM_PROT_GPU_WR | - BASEP_MEM_PERMANENT_KERNEL_MAPPING | - BASE_MEM_CACHED_CPU | - BASE_MEM_UNCACHED_GPU; + flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU; nr_pages = PFN_UP(info->dump_bytes); - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, - mmu_sync_info); + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info); if (!reg) return -ENOMEM; @@ -683,9 +623,7 @@ static int kbasep_hwcnt_backend_jm_dump_alloc( * @kctx: Non-NULL pointer to kbase context. * @gpu_dump_va: GPU dump buffer virtual address. */ -static void kbasep_hwcnt_backend_jm_dump_free( - struct kbase_context *kctx, - u64 gpu_dump_va) +static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va) { WARN_ON(!kctx); if (gpu_dump_va) @@ -698,8 +636,7 @@ static void kbasep_hwcnt_backend_jm_dump_free( * * Can be safely called on a backend in any state of partial construction. */ -static void kbasep_hwcnt_backend_jm_destroy( - struct kbase_hwcnt_backend_jm *backend) +static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend) { if (!backend) return; @@ -712,8 +649,7 @@ static void kbasep_hwcnt_backend_jm_destroy( kbase_phy_alloc_mapping_put(kctx, backend->vmap); if (backend->gpu_dump_va) - kbasep_hwcnt_backend_jm_dump_free( - kctx, backend->gpu_dump_va); + kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va); kbasep_js_release_privileged_ctx(kbdev, kctx); kbase_destroy_context(kctx); @@ -731,9 +667,8 @@ static void kbasep_hwcnt_backend_jm_destroy( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_jm_create( - const struct kbase_hwcnt_backend_jm_info *info, - struct kbase_hwcnt_backend_jm **out_backend) +static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_hwcnt_backend_jm **out_backend) { int errcode; struct kbase_device *kbdev; @@ -749,28 +684,25 @@ static int kbasep_hwcnt_backend_jm_create( goto alloc_error; backend->info = info; - kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, - &backend->phys_layout); + kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout); backend->kctx = kbase_create_context(kbdev, true, - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); if (!backend->kctx) goto alloc_error; kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); - errcode = kbasep_hwcnt_backend_jm_dump_alloc( - info, backend->kctx, &backend->gpu_dump_va); + errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va); if (errcode) goto error; - backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, - backend->gpu_dump_va, &backend->vmap); + backend->cpu_dump_va = + kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap); if (!backend->cpu_dump_va || !backend->vmap) goto alloc_error; - backend->to_user_buf = - kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL); + backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL); if (!backend->to_user_buf) goto alloc_error; @@ -798,9 +730,8 @@ kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info) } /* JM backend implementation of kbase_hwcnt_backend_init_fn */ -static int kbasep_hwcnt_backend_jm_init( - const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend) +static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) { int errcode; struct kbase_hwcnt_backend_jm *backend = NULL; @@ -808,8 +739,8 @@ static int kbasep_hwcnt_backend_jm_init( if (!info || !out_backend) return -EINVAL; - errcode = kbasep_hwcnt_backend_jm_create( - (const struct kbase_hwcnt_backend_jm_info *) info, &backend); + errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info, + &backend); if (errcode) return errcode; @@ -825,8 +756,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) return; kbasep_hwcnt_backend_jm_dump_disable(backend); - kbasep_hwcnt_backend_jm_destroy( - (struct kbase_hwcnt_backend_jm *)backend); + kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); } /** @@ -835,8 +765,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) * * Can be safely called on a backend info in any state of partial construction. */ -static void kbasep_hwcnt_backend_jm_info_destroy( - const struct kbase_hwcnt_backend_jm_info *info) +static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info) { if (!info) return; @@ -852,9 +781,8 @@ static void kbasep_hwcnt_backend_jm_info_destroy( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_jm_info_create( - struct kbase_device *kbdev, - const struct kbase_hwcnt_backend_jm_info **out_info) +static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_jm_info **out_info) { int errcode = -ENOMEM; struct kbase_hwcnt_backend_jm_info *info = NULL; @@ -877,15 +805,12 @@ static int kbasep_hwcnt_backend_jm_info_create( info->counter_set = KBASE_HWCNT_SET_PRIMARY; #endif - errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, - &info->hwcnt_gpu_info); + errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info); if (errcode) goto error; - errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, - info->counter_set, - &info->metadata, - &info->dump_bytes); + errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set, + &info->metadata, &info->dump_bytes); if (errcode) goto error; @@ -897,9 +822,8 @@ error: return errcode; } -int kbase_hwcnt_backend_jm_create( - struct kbase_device *kbdev, - struct kbase_hwcnt_backend_interface *iface) +int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) { int errcode; const struct kbase_hwcnt_backend_jm_info *info = NULL; @@ -928,8 +852,7 @@ int kbase_hwcnt_backend_jm_create( return 0; } -void kbase_hwcnt_backend_jm_destroy( - struct kbase_hwcnt_backend_interface *iface) +void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface) { if (!iface) return; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h similarity index 84% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h index 1bc39066b414..4a6293c25473 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,7 @@ #ifndef _KBASE_HWCNT_BACKEND_JM_H_ #define _KBASE_HWCNT_BACKEND_JM_H_ -#include "mali_kbase_hwcnt_backend.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" struct kbase_device; @@ -42,9 +42,8 @@ struct kbase_device; * * Return: 0 on success, else error code. */ -int kbase_hwcnt_backend_jm_create( - struct kbase_device *kbdev, - struct kbase_hwcnt_backend_interface *iface); +int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); /** * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend @@ -54,7 +53,6 @@ int kbase_hwcnt_backend_jm_create( * Can be safely called on an all-zeroed interface, or on an already destroyed * interface. */ -void kbase_hwcnt_backend_jm_destroy( - struct kbase_hwcnt_backend_interface *iface); +void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface); #endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm_watchdog.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c similarity index 97% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm_watchdog.c rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c index 8bb7ccb49a64..564700b2d978 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm_watchdog.c +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c @@ -21,11 +21,12 @@ #include -#include -#include +#include +#include -#include -#include +#include +#include +#include #if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) /* Backend watch dog timer interval in milliseconds: 18 seconds. */ @@ -118,8 +119,7 @@ enum backend_watchdog_state { */ enum wd_init_state { HWCNT_JM_WD_INIT_START, - HWCNT_JM_WD_INIT_ALLOC = HWCNT_JM_WD_INIT_START, - HWCNT_JM_WD_INIT_BACKEND, + HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START, HWCNT_JM_WD_INIT_ENABLE_MAP, HWCNT_JM_WD_INIT_DUMP_BUFFER, HWCNT_JM_WD_INIT_END @@ -296,16 +296,10 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc if (!wd_backend) return; - /* disable timer thread to avoid concurrent access to shared resources */ - wd_backend->info->dump_watchdog_iface->disable( - wd_backend->info->dump_watchdog_iface->timer); + WARN_ON(state > HWCNT_JM_WD_INIT_END); - /*will exit the loop when state reaches HWCNT_JM_WD_INIT_START*/ while (state-- > HWCNT_JM_WD_INIT_START) { switch (state) { - case HWCNT_JM_WD_INIT_ALLOC: - kfree(wd_backend); - break; case HWCNT_JM_WD_INIT_BACKEND: wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend); break; @@ -319,6 +313,8 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc break; } } + + kfree(wd_backend); } /* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn @@ -326,11 +322,17 @@ kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watc */ static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend) { + struct kbase_hwcnt_backend_jm_watchdog *wd_backend = + (struct kbase_hwcnt_backend_jm_watchdog *)backend; + if (!backend) return; - kbasep_hwcnt_backend_jm_watchdog_term_partial( - (struct kbase_hwcnt_backend_jm_watchdog *)backend, HWCNT_JM_WD_INIT_END); + /* disable timer thread to avoid concurrent access to shared resources */ + wd_backend->info->dump_watchdog_iface->disable( + wd_backend->info->dump_watchdog_iface->timer); + + kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END); } /* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */ @@ -350,20 +352,20 @@ static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backen jm_info = wd_info->jm_backend_iface->info; metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); + wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL); + if (!wd_backend) { + *out_backend = NULL; + return -ENOMEM; + } + + *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){ + .info = wd_info, + .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms, + .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false } + }; + while (state < HWCNT_JM_WD_INIT_END && !errcode) { switch (state) { - case HWCNT_JM_WD_INIT_ALLOC: - wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL); - if (wd_backend) { - *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){ - .info = wd_info, - .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms, - .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, - .is_enabled = false } - }; - } else - errcode = -ENOMEM; - break; case HWCNT_JM_WD_INIT_BACKEND: errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend); break; @@ -823,5 +825,5 @@ void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interfac kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info); /*blanking the watchdog backend interface*/ - *iface = (struct kbase_hwcnt_backend_interface){ NULL }; + memset(iface, 0, sizeof(*iface)); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm_watchdog.h b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h similarity index 94% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm_watchdog.h rename to drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h index 5021b4fdb966..02a7952cced2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_backend_jm_watchdog.h +++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,8 +32,8 @@ #ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ #define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ -#include -#include +#include +#include /** * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c similarity index 87% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c index a54f005915ae..e724572560d5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt.c @@ -23,10 +23,10 @@ * Implementation of hardware counter context and accumulator APIs. */ -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_accumulator.h" -#include "mali_kbase_hwcnt_backend.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_context.h" +#include "hwcnt/mali_kbase_hwcnt_accumulator.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include #include @@ -39,11 +39,7 @@ * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are * any enabled counters. */ -enum kbase_hwcnt_accum_state { - ACCUM_STATE_ERROR, - ACCUM_STATE_DISABLED, - ACCUM_STATE_ENABLED -}; +enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED }; /** * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. @@ -130,9 +126,8 @@ struct kbase_hwcnt_context { struct workqueue_struct *wq; }; -int kbase_hwcnt_context_init( - const struct kbase_hwcnt_backend_interface *iface, - struct kbase_hwcnt_context **out_hctx) +int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx) { struct kbase_hwcnt_context *hctx = NULL; @@ -149,8 +144,7 @@ int kbase_hwcnt_context_init( mutex_init(&hctx->accum_lock); hctx->accum_inited = false; - hctx->wq = - alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); + hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); if (!hctx->wq) goto err_alloc_workqueue; @@ -208,35 +202,30 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) WARN_ON(!hctx); WARN_ON(!hctx->accum_inited); - errcode = hctx->iface->init( - hctx->iface->info, &hctx->accum.backend); + errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend); if (errcode) goto error; hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info); hctx->accum.state = ACCUM_STATE_ERROR; - errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, - &hctx->accum.enable_map); + errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map); if (errcode) goto error; hctx->accum.enable_map_any_enabled = false; - errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, - &hctx->accum.accum_buf); + errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf); if (errcode) goto error; - errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, - &hctx->accum.scratch_map); + errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map); if (errcode) goto error; hctx->accum.accumulated = false; - hctx->accum.ts_last_dump_ns = - hctx->iface->timestamp_ns(hctx->accum.backend); + hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend); return 0; @@ -252,8 +241,7 @@ error: * @hctx: Non-NULL pointer to hardware counter context. * @accumulate: True if we should accumulate before disabling, else false. */ -static void kbasep_hwcnt_accumulator_disable( - struct kbase_hwcnt_context *hctx, bool accumulate) +static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate) { int errcode = 0; bool backend_enabled = false; @@ -272,8 +260,7 @@ static void kbasep_hwcnt_accumulator_disable( WARN_ON(hctx->disable_count != 0); WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); - if ((hctx->accum.state == ACCUM_STATE_ENABLED) && - (accum->enable_map_any_enabled)) + if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled)) backend_enabled = true; if (!backend_enabled) @@ -297,8 +284,8 @@ static void kbasep_hwcnt_accumulator_disable( if (errcode) goto disable; - errcode = hctx->iface->dump_get(accum->backend, - &accum->accum_buf, &accum->enable_map, accum->accumulated); + errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map, + accum->accumulated); if (errcode) goto disable; @@ -336,8 +323,7 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) /* The backend only needs enabling if any counters are enabled */ if (accum->enable_map_any_enabled) - errcode = hctx->iface->dump_enable_nolock( - accum->backend, &accum->enable_map); + errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map); if (!errcode) accum->state = ACCUM_STATE_ENABLED; @@ -364,12 +350,9 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_accumulator_dump( - struct kbase_hwcnt_context *hctx, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf, - const struct kbase_hwcnt_enable_map *new_map) +static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns, + u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf, + const struct kbase_hwcnt_enable_map *new_map) { int errcode = 0; unsigned long flags; @@ -398,8 +381,7 @@ static int kbasep_hwcnt_accumulator_dump( kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); if (new_map) - new_map_any_enabled = - kbase_hwcnt_enable_map_any_enabled(new_map); + new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map); /* * We're holding accum_lock, so the accumulator state might transition @@ -426,8 +408,7 @@ static int kbasep_hwcnt_accumulator_dump( * then we'll do it ourselves after the dump. */ if (new_map) { - kbase_hwcnt_enable_map_copy( - &accum->enable_map, new_map); + kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map); accum->enable_map_any_enabled = new_map_any_enabled; } @@ -440,12 +421,10 @@ static int kbasep_hwcnt_accumulator_dump( /* Initiate the dump if the backend is enabled. */ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { if (dump_buf) { - errcode = hctx->iface->dump_request( - accum->backend, &dump_time_ns); + errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); dump_requested = true; } else { - dump_time_ns = hctx->iface->timestamp_ns( - accum->backend); + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); errcode = hctx->iface->dump_clear(accum->backend); } @@ -457,8 +436,7 @@ static int kbasep_hwcnt_accumulator_dump( /* Copy any accumulation into the dest buffer */ if (accum->accumulated && dump_buf) { - kbase_hwcnt_dump_buffer_copy( - dump_buf, &accum->accum_buf, cur_map); + kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map); dump_written = true; } @@ -483,8 +461,7 @@ static int kbasep_hwcnt_accumulator_dump( * we're already enabled and holding accum_lock is impossible. */ if (new_map_any_enabled) { - errcode = hctx->iface->dump_enable( - accum->backend, new_map); + errcode = hctx->iface->dump_enable(accum->backend, new_map); if (errcode) goto error; } @@ -495,11 +472,8 @@ static int kbasep_hwcnt_accumulator_dump( /* If we dumped, copy or accumulate it into the destination */ if (dump_requested) { WARN_ON(state != ACCUM_STATE_ENABLED); - errcode = hctx->iface->dump_get( - accum->backend, - dump_buf, - cur_map, - dump_written); + errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map, + dump_written); if (errcode) goto error; dump_written = true; @@ -540,8 +514,7 @@ error: * @hctx: Non-NULL pointer to hardware counter context. * @accumulate: True if we should accumulate before disabling, else false. */ -static void kbasep_hwcnt_context_disable( - struct kbase_hwcnt_context *hctx, bool accumulate) +static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate) { unsigned long flags; @@ -563,9 +536,8 @@ static void kbasep_hwcnt_context_disable( } } -int kbase_hwcnt_accumulator_acquire( - struct kbase_hwcnt_context *hctx, - struct kbase_hwcnt_accumulator **accum) +int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum) { int errcode = 0; unsigned long flags; @@ -618,9 +590,7 @@ int kbase_hwcnt_accumulator_acquire( * Regardless of initial state, counters don't need to be enabled via * the backend, as the initial enable map has no enabled counters. */ - hctx->accum.state = (hctx->disable_count == 0) ? - ACCUM_STATE_ENABLED : - ACCUM_STATE_DISABLED; + hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED; spin_unlock_irqrestore(&hctx->state_lock, flags); @@ -728,8 +698,7 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) spin_unlock_irqrestore(&hctx->state_lock, flags); } -const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( - struct kbase_hwcnt_context *hctx) +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx) { if (!hctx) return NULL; @@ -737,8 +706,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( return hctx->iface->metadata(hctx->iface->info); } -bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, - struct work_struct *work) +bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work) { if (WARN_ON(!hctx) || WARN_ON(!work)) return false; @@ -746,12 +714,10 @@ bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, return queue_work(hctx->wq, work); } -int kbase_hwcnt_accumulator_set_counters( - struct kbase_hwcnt_accumulator *accum, - const struct kbase_hwcnt_enable_map *new_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) +int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) { int errcode; struct kbase_hwcnt_context *hctx; @@ -767,19 +733,15 @@ int kbase_hwcnt_accumulator_set_counters( mutex_lock(&hctx->accum_lock); - errcode = kbasep_hwcnt_accumulator_dump( - hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); mutex_unlock(&hctx->accum_lock); return errcode; } -int kbase_hwcnt_accumulator_dump( - struct kbase_hwcnt_accumulator *accum, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) +int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, + u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) { int errcode; struct kbase_hwcnt_context *hctx; @@ -794,8 +756,7 @@ int kbase_hwcnt_accumulator_dump( mutex_lock(&hctx->accum_lock); - errcode = kbasep_hwcnt_accumulator_dump( - hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); + errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); mutex_unlock(&hctx->accum_lock); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h similarity index 90% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h index af542ea5b56b..069e02068902 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_accumulator.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_accumulator.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -67,9 +67,8 @@ struct kbase_hwcnt_dump_buffer; * * Return: 0 on success or error code. */ -int kbase_hwcnt_accumulator_acquire( - struct kbase_hwcnt_context *hctx, - struct kbase_hwcnt_accumulator **accum); +int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum); /** * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. @@ -102,12 +101,10 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); * * Return: 0 on success or error code. */ -int kbase_hwcnt_accumulator_set_counters( - struct kbase_hwcnt_accumulator *accum, - const struct kbase_hwcnt_enable_map *new_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); +int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); /** * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled @@ -127,11 +124,8 @@ int kbase_hwcnt_accumulator_set_counters( * * Return: 0 on success or error code. */ -int kbase_hwcnt_accumulator_dump( - struct kbase_hwcnt_accumulator *accum, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); +int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, + u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf); /** * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h similarity index 95% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h index 34423d1b60c7..89732a908789 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_context.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_context.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,9 +43,8 @@ struct kbase_hwcnt_context; * * Return: 0 on success, else error code. */ -int kbase_hwcnt_context_init( - const struct kbase_hwcnt_backend_interface *iface, - struct kbase_hwcnt_context **out_hctx); +int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx); /** * kbase_hwcnt_context_term() - Terminate a hardware counter context. @@ -61,8 +60,7 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); * * Return: Non-NULL pointer to metadata, or NULL on error. */ -const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( - struct kbase_hwcnt_context *hctx); +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx); /** * kbase_hwcnt_context_disable() - Increment the disable count of the context. @@ -145,7 +143,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); * this meant progress through the power management states could be stalled * for however long that higher priority thread took. */ -bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, - struct work_struct *work); +bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work); #endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c similarity index 78% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c index 5f5c36f33d41..74916dab060d 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c @@ -19,8 +19,8 @@ * */ -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include @@ -32,8 +32,7 @@ enum enable_map_idx { EM_COUNT, }; -static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, - bool is_csf) +static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: @@ -56,8 +55,7 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, } } -static void kbasep_get_tiler_block_type(u64 *dst, - enum kbase_hwcnt_set counter_set) +static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: @@ -72,8 +70,7 @@ static void kbasep_get_tiler_block_type(u64 *dst, } } -static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, - bool is_csf) +static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: @@ -93,8 +90,7 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, } } -static void kbasep_get_memsys_block_type(u64 *dst, - enum kbase_hwcnt_set counter_set) +static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: @@ -122,15 +118,14 @@ static void kbasep_get_memsys_block_type(u64 *dst, * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_gpu_metadata_create( - const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf, - enum kbase_hwcnt_set counter_set, - const struct kbase_hwcnt_metadata **metadata) +static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, + const bool is_csf, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **metadata) { struct kbase_hwcnt_description desc; struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description - blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; size_t non_sc_block_count; size_t sc_block_count; @@ -156,22 +151,19 @@ static int kbasep_hwcnt_backend_gpu_metadata_create( kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); blks[0].inst_cnt = 1; blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* One Tiler block */ kbasep_get_tiler_block_type(&blks[1].type, counter_set); blks[1].inst_cnt = 1; blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* l2_count memsys blks */ kbasep_get_memsys_block_type(&blks[2].type, counter_set); blks[2].inst_cnt = gpu_info->l2_count; blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* * There are as many shader cores in the system as there are bits set in @@ -192,8 +184,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create( kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); blks[3].inst_cnt = sc_block_count; blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); @@ -220,8 +211,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create( * * Return: Size of buffer the GPU needs to perform a counter dump. */ -static size_t -kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) +static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) { WARN_ON(!gpu_info); @@ -229,11 +219,10 @@ kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; } -int kbase_hwcnt_jm_metadata_create( - const struct kbase_hwcnt_gpu_info *gpu_info, - enum kbase_hwcnt_set counter_set, - const struct kbase_hwcnt_metadata **out_metadata, - size_t *out_dump_bytes) +int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) { int errcode; const struct kbase_hwcnt_metadata *metadata; @@ -250,8 +239,7 @@ int kbase_hwcnt_jm_metadata_create( * all the available L2 cache and Shader cores are allocated. */ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); - errcode = kbasep_hwcnt_backend_gpu_metadata_create( - gpu_info, false, counter_set, &metadata); + errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata); if (errcode) return errcode; @@ -276,10 +264,9 @@ void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata kbase_hwcnt_metadata_destroy(metadata); } -int kbase_hwcnt_csf_metadata_create( - const struct kbase_hwcnt_gpu_info *gpu_info, - enum kbase_hwcnt_set counter_set, - const struct kbase_hwcnt_metadata **out_metadata) +int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata) { int errcode; const struct kbase_hwcnt_metadata *metadata; @@ -287,8 +274,7 @@ int kbase_hwcnt_csf_metadata_create( if (!gpu_info || !out_metadata) return -EINVAL; - errcode = kbasep_hwcnt_backend_gpu_metadata_create( - gpu_info, true, counter_set, &metadata); + errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata); if (errcode) return errcode; @@ -297,8 +283,7 @@ int kbase_hwcnt_csf_metadata_create( return 0; } -void kbase_hwcnt_csf_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata) +void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { if (!metadata) return; @@ -306,10 +291,7 @@ void kbase_hwcnt_csf_metadata_destroy( kbase_hwcnt_metadata_destroy(metadata); } -static bool is_block_type_shader( - const u64 grp_type, - const u64 blk_type, - const size_t blk) +static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk) { bool is_shader = false; @@ -326,9 +308,7 @@ static bool is_block_type_shader( return is_shader; } -static bool is_block_type_l2_cache( - const u64 grp_type, - const u64 blk_type) +static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type) { bool is_l2_cache = false; @@ -348,10 +328,8 @@ static bool is_block_type_l2_cache( } int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - u64 pm_core_mask, - const struct kbase_hwcnt_curr_config *curr_config, - bool accumulate) + const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; @@ -362,28 +340,21 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* Variables to deal with the current configuration */ int l2_count = 0; - if (!dst || !src || !dst_enable_map || - (dst_enable_map->metadata != dst->metadata)) + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block( - metadata, grp, blk, blk_inst) { - const size_t hdr_cnt = - kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count( - metadata, grp, blk); - const u64 blk_type = kbase_hwcnt_metadata_block_type( - metadata, grp, blk); + kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); const bool is_shader_core = is_block_type_shader( - kbase_hwcnt_metadata_group_type(metadata, grp), - blk_type, blk); + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); const bool is_l2_cache = is_block_type_l2_cache( - kbase_hwcnt_metadata_group_type(metadata, grp), - blk_type); + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); const bool is_undefined = kbase_hwcnt_is_block_type_undefined( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); bool hw_res_available = true; @@ -412,10 +383,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* * Skip block if no values in the destination block are enabled. */ - if (kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { + u64 *dst_blk = + kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; bool blk_powered; @@ -435,13 +405,11 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, if (blk_powered && !is_undefined && hw_res_available) { /* Only powered and defined blocks have valid data. */ if (accumulate) { - kbase_hwcnt_dump_buffer_block_accumulate( - dst_blk, src_blk, hdr_cnt, - ctr_cnt); + kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, + hdr_cnt, ctr_cnt); } else { - kbase_hwcnt_dump_buffer_block_copy( - dst_blk, src_blk, - (hdr_cnt + ctr_cnt)); + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); } } else { /* Even though the block might be undefined, the @@ -469,26 +437,23 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, } int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) + const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u64 *dump_src = src; size_t src_offset = 0; size_t grp, blk, blk_inst; - if (!dst || !src || !dst_enable_map || - (dst_enable_map->metadata != dst->metadata)) + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count(metadata, grp, - blk); + kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); const bool is_undefined = kbase_hwcnt_is_block_type_undefined( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); @@ -496,10 +461,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* * Skip block if no values in the destination block are enabled. */ - if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, - blk, blk_inst)) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { + u64 *dst_blk = + kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; if (!is_undefined) { @@ -542,12 +506,9 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction * will be stored. */ -static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( - u32 phys, - u64 *lo, - u64 *hi) +static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi) { - u64 dwords[2] = {0, 0}; + u64 dwords[2] = { 0, 0 }; size_t dword_idx; @@ -572,9 +533,8 @@ static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( *hi = dwords[1]; } -void kbase_hwcnt_gpu_enable_map_to_physical( - struct kbase_hwcnt_physical_enable_map *dst, - const struct kbase_hwcnt_enable_map *src) +void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) { const struct kbase_hwcnt_metadata *metadata; u64 fe_bm[EM_COUNT] = { 0 }; @@ -588,17 +548,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical( metadata = src->metadata; - kbase_hwcnt_metadata_for_each_block( - metadata, grp, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type( - metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type( - metadata, grp, blk); - const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( - src, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst); - if ((enum kbase_hwcnt_gpu_group_type)grp_type == - KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { const size_t map_stride = kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); size_t map_idx; @@ -649,8 +605,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]); } -void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, - enum kbase_hwcnt_set src) +void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) { switch (src) { case KBASE_HWCNT_SET_PRIMARY: @@ -667,9 +622,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, } } -void kbase_hwcnt_gpu_enable_map_from_physical( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_physical_enable_map *src) +void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src) { const struct kbase_hwcnt_metadata *metadata; @@ -692,16 +646,13 @@ void kbase_hwcnt_gpu_enable_map_from_physical( kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO], &mmu_l2_bm[EM_HI]); - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type( - metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type( - metadata, grp, blk); - u64 *blk_map = kbase_hwcnt_enable_map_block_instance( - dst, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); - if ((enum kbase_hwcnt_gpu_group_type)grp_type == - KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { const size_t map_stride = kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); size_t map_idx; @@ -744,29 +695,25 @@ void kbase_hwcnt_gpu_enable_map_from_physical( } } -void kbase_hwcnt_gpu_patch_dump_headers( - struct kbase_hwcnt_dump_buffer *buf, - const struct kbase_hwcnt_enable_map *enable_map) +void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; - if (WARN_ON(!buf) || WARN_ON(!enable_map) || - WARN_ON(buf->metadata != enable_map->metadata)) + if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) return; metadata = buf->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - const u64 grp_type = - kbase_hwcnt_metadata_group_type(metadata, grp); - u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( - buf, grp, blk, blk_inst); - const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( - enable_map, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); + u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst); + const u64 *blk_map = + kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); - if ((enum kbase_hwcnt_gpu_group_type)grp_type == - KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { const size_t map_stride = kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); u64 prfcnt_bm[EM_COUNT] = { 0 }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h similarity index 92% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h index f890d451c2c1..a49c31e52f98 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h @@ -34,9 +34,8 @@ struct kbase_hwcnt_dump_buffer; #define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 #define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 #define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ - (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \ - KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) +#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) /* FrontEnd block count in V5 GPU hardware counter. */ #define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1 @@ -228,19 +227,17 @@ static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, * * Return: 0 on success, else error code. */ -int kbase_hwcnt_jm_metadata_create( - const struct kbase_hwcnt_gpu_info *info, - enum kbase_hwcnt_set counter_set, - const struct kbase_hwcnt_metadata **out_metadata, - size_t *out_dump_bytes); +int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); /** * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. * * @metadata: Pointer to metadata to destroy. */ -void kbase_hwcnt_jm_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata); +void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); /** * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the @@ -252,18 +249,16 @@ void kbase_hwcnt_jm_metadata_destroy( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_csf_metadata_create( - const struct kbase_hwcnt_gpu_info *info, - enum kbase_hwcnt_set counter_set, - const struct kbase_hwcnt_metadata **out_metadata); +int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata); /** * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter * metadata. * @metadata: Pointer to metadata to destroy. */ -void kbase_hwcnt_csf_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata); +void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); /** * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw @@ -289,8 +284,7 @@ void kbase_hwcnt_csf_metadata_destroy( int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, const u64 pm_core_mask, - const struct kbase_hwcnt_curr_config *curr_config, - bool accumulate); + const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); /** * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw @@ -310,8 +304,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, * Return: 0 on success, else error code. */ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate); + const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate); /** * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block @@ -365,9 +358,8 @@ static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi) * individual counter block value, but the physical enable map uses 1 bit for * every 4 counters, shared over all instances of a block. */ -void kbase_hwcnt_gpu_enable_map_to_physical( - struct kbase_hwcnt_physical_enable_map *dst, - const struct kbase_hwcnt_enable_map *src); +void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); /** * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical @@ -376,8 +368,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( * @dst: Non-NULL pointer to destination physical SET_SELECT value. * @src: Non-NULL pointer to source counter set selection. */ -void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, - enum kbase_hwcnt_set src); +void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src); /** * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to @@ -393,9 +384,8 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, * more than 64, so the enable map abstraction has nowhere to store the enable * information for the 64 non-existent counters. */ -void kbase_hwcnt_gpu_enable_map_from_physical( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_physical_enable_map *src); +void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src); /** * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter @@ -411,8 +401,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( * kernel-user boundary, to ensure the header is accurate for the enable map * used by the user. */ -void kbase_hwcnt_gpu_patch_dump_headers( - struct kbase_hwcnt_dump_buffer *buf, - const struct kbase_hwcnt_enable_map *enable_map); +void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map); #endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu_narrow.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c similarity index 68% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu_narrow.c rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c index 2a1cde79709b..0cf2f94cfb87 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu_narrow.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c @@ -19,21 +19,19 @@ * */ -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_gpu_narrow.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" #include #include #include -int kbase_hwcnt_gpu_metadata_narrow_create( - const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, - const struct kbase_hwcnt_metadata *src_md) +int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, + const struct kbase_hwcnt_metadata *src_md) { struct kbase_hwcnt_description desc; struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description - blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; size_t prfcnt_values_per_block; size_t blk; int err; @@ -47,18 +45,15 @@ int kbase_hwcnt_gpu_metadata_narrow_create( * count in the metadata. */ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || - (kbase_hwcnt_metadata_block_count(src_md, 0) != - KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) + (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) return -EINVAL; /* Get the values count in the first block. */ - prfcnt_values_per_block = - kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); + prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); /* check all blocks should have same values count. */ for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { - size_t val_cnt = - kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); if (val_cnt != prfcnt_values_per_block) return -EINVAL; } @@ -75,12 +70,10 @@ int kbase_hwcnt_gpu_metadata_narrow_create( prfcnt_values_per_block = 64; for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { - size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count( - src_md, 0, blk); + size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk); blks[blk] = (struct kbase_hwcnt_block_description){ .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk), - .inst_cnt = kbase_hwcnt_metadata_block_instance_count( - src_md, 0, blk), + .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk), .hdr_cnt = blk_hdr_cnt, .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt, }; @@ -105,8 +98,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create( * only supports 32-bit but the created metadata uses 64-bit for * block entry. */ - metadata_narrow->dump_buf_bytes = - metadata_narrow->metadata->dump_buf_bytes >> 1; + metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1; *dst_md_narrow = metadata_narrow; } else { kfree(metadata_narrow); @@ -115,8 +107,7 @@ int kbase_hwcnt_gpu_metadata_narrow_create( return err; } -void kbase_hwcnt_gpu_metadata_narrow_destroy( - const struct kbase_hwcnt_metadata_narrow *md_narrow) +void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow) { if (!md_narrow) return; @@ -125,9 +116,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy( kfree(md_narrow); } -int kbase_hwcnt_dump_buffer_narrow_alloc( - const struct kbase_hwcnt_metadata_narrow *md_narrow, - struct kbase_hwcnt_dump_buffer_narrow *dump_buf) +int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, + struct kbase_hwcnt_dump_buffer_narrow *dump_buf) { size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; @@ -137,8 +127,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc( return -EINVAL; dump_buf_bytes = md_narrow->dump_buf_bytes; - clk_cnt_buf_bytes = - sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; + clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; /* Make a single allocation for both dump_buf and clk_cnt_buf. */ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); @@ -154,14 +143,15 @@ int kbase_hwcnt_dump_buffer_narrow_alloc( return 0; } -void kbase_hwcnt_dump_buffer_narrow_free( - struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) +void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) { if (!dump_buf_narrow) return; kfree(dump_buf_narrow->dump_buf); - *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ NULL }; + *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL, + .dump_buf = NULL, + .clk_cnt_buf = NULL }; } int kbase_hwcnt_dump_buffer_narrow_array_alloc( @@ -180,8 +170,7 @@ int kbase_hwcnt_dump_buffer_narrow_array_alloc( return -EINVAL; dump_buf_bytes = md_narrow->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * - md_narrow->metadata->clk_cnt; + clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt; /* Allocate memory for the dump buffer struct array */ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); @@ -234,27 +223,22 @@ void kbase_hwcnt_dump_buffer_narrow_array_free( memset(dump_bufs, 0, sizeof(*dump_bufs)); } -void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, - const u64 *src_blk, - const u64 *blk_em, - size_t val_cnt) +void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, + const u64 *blk_em, size_t val_cnt) { size_t val; for (val = 0; val < val_cnt; val++) { - bool val_enabled = - kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); - u32 src_val = - (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); + u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; dst_blk[val] = val_enabled ? src_val : 0; } } -void kbase_hwcnt_dump_buffer_copy_strict_narrow( - struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata_narrow *metadata_narrow; size_t grp; @@ -262,68 +246,53 @@ void kbase_hwcnt_dump_buffer_copy_strict_narrow( if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != - src->metadata->grp_cnt) || + WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) || WARN_ON(src->metadata->grp_cnt != 1) || WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != src->metadata->grp_metadata[0].blk_cnt) || WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0] - .blk_metadata[0] - .ctr_cnt > + WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) return; /* Don't use src metadata since src buffer is bigger than dst buffer. */ metadata_narrow = dst_narrow->md_narrow; - for (grp = 0; - grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); - grp++) { + for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) { size_t blk; - size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count( - metadata_narrow, grp); + size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp); for (blk = 0; blk < blk_cnt; blk++) { size_t blk_inst; - size_t blk_inst_cnt = - kbase_hwcnt_metadata_narrow_block_instance_count( - metadata_narrow, grp, blk); + size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count( + metadata_narrow, grp, blk); - for (blk_inst = 0; blk_inst < blk_inst_cnt; - blk_inst++) { + for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) { /* The narrowed down buffer is only 32-bit. */ - u32 *dst_blk = - kbase_hwcnt_dump_buffer_narrow_block_instance( - dst_narrow, grp, blk, blk_inst); - const u64 *src_blk = - kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, - blk_inst); - size_t val_cnt = - kbase_hwcnt_metadata_narrow_block_values_count( - metadata_narrow, grp, blk); + u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance( + dst_narrow, grp, blk, blk_inst); + const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count( + metadata_narrow, grp, blk); /* Align upwards to include padding bytes */ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( - val_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); - kbase_hwcnt_dump_buffer_block_copy_strict_narrow( - dst_blk, src_blk, blk_em, val_cnt); + kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk, + blk_em, val_cnt); } } } for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) { - bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk); + bool clk_enabled = + kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); - dst_narrow->clk_cnt_buf[clk] = - clk_enabled ? src->clk_cnt_buf[clk] : 0; + dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu_narrow.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h similarity index 84% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu_narrow.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h index af6fa19f71e3..afd236d71a7c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_gpu_narrow.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,7 +22,7 @@ #ifndef _KBASE_HWCNT_GPU_NARROW_H_ #define _KBASE_HWCNT_GPU_NARROW_H_ -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include struct kbase_device; @@ -86,8 +86,8 @@ struct kbase_hwcnt_dump_buffer_narrow_array { * * Return: Number of hardware counter groups described by narrow metadata. */ -static inline size_t kbase_hwcnt_metadata_narrow_group_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow) +static inline size_t +kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow) { return kbase_hwcnt_metadata_group_count(md_narrow->metadata); } @@ -100,8 +100,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_group_count( * * Return: Type of the group grp. */ -static inline u64 kbase_hwcnt_metadata_narrow_group_type( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp) +static inline u64 +kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp) { return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp); } @@ -114,8 +115,9 @@ static inline u64 kbase_hwcnt_metadata_narrow_group_type( * * Return: Number of blocks in group grp. */ -static inline size_t kbase_hwcnt_metadata_narrow_block_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp) +static inline size_t +kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp) { return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp); } @@ -131,11 +133,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_count( * Return: Number of instances of block blk in group grp. */ static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, - size_t blk) + const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) { - return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, - grp, blk); + return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk); } /** @@ -148,12 +148,11 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( * * Return: Number of counter headers in each instance of block blk in group grp. */ -static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, - size_t blk) +static inline size_t +kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp, size_t blk) { - return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, - grp, blk); + return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk); } /** @@ -167,11 +166,9 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_headers_count( * Return: Number of counters in each instance of block blk in group grp. */ static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, - size_t blk) + const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) { - return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, - grp, blk); + return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk); } /** @@ -184,14 +181,12 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( * Return: Number of headers plus counters in each instance of block blk * in group grp. */ -static inline size_t kbase_hwcnt_metadata_narrow_block_values_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, - size_t blk) +static inline size_t +kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp, size_t blk) { - return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, - blk) + - kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, - blk); + return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) + + kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk); } /** @@ -205,18 +200,13 @@ static inline size_t kbase_hwcnt_metadata_narrow_block_values_count( * * Return: u32* to the dump buffer for the block instance. */ -static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance( - const struct kbase_hwcnt_dump_buffer_narrow *buf, size_t grp, - size_t blk, size_t blk_inst) +static inline u32 * +kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf, + size_t grp, size_t blk, size_t blk_inst) { - return buf->dump_buf + - buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + - buf->md_narrow->metadata->grp_metadata[grp] - .blk_metadata[blk] - .dump_buf_index + - (buf->md_narrow->metadata->grp_metadata[grp] - .blk_metadata[blk] - .dump_buf_stride * + return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + + buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + + (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); } @@ -239,17 +229,15 @@ static inline u32 *kbase_hwcnt_dump_buffer_narrow_block_instance( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_gpu_metadata_narrow_create( - const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, - const struct kbase_hwcnt_metadata *src_md); +int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, + const struct kbase_hwcnt_metadata *src_md); /** * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow * metadata object. * @md_narrow: Pointer to hardware counter narrow metadata. */ -void kbase_hwcnt_gpu_metadata_narrow_destroy( - const struct kbase_hwcnt_metadata_narrow *md_narrow); +void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow); /** * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer. @@ -260,9 +248,8 @@ void kbase_hwcnt_gpu_metadata_narrow_destroy( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_dump_buffer_narrow_alloc( - const struct kbase_hwcnt_metadata_narrow *md_narrow, - struct kbase_hwcnt_dump_buffer_narrow *dump_buf); +int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, + struct kbase_hwcnt_dump_buffer_narrow *dump_buf); /** * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer. @@ -271,8 +258,7 @@ int kbase_hwcnt_dump_buffer_narrow_alloc( * Can be safely called on an all-zeroed narrow dump buffer structure, or on an * already freed narrow dump buffer. */ -void kbase_hwcnt_dump_buffer_narrow_free( - struct kbase_hwcnt_dump_buffer_narrow *dump_buf); +void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf); /** * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow @@ -320,10 +306,8 @@ void kbase_hwcnt_dump_buffer_narrow_array_free( * source value is bigger than U32_MAX, or copy the value from source if the * corresponding source value is less than or equal to U32_MAX. */ -void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, - const u64 *src_blk, - const u64 *blk_em, - size_t val_cnt); +void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, + const u64 *blk_em, size_t val_cnt); /** * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a @@ -339,9 +323,8 @@ void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, * corresponding source value is bigger than U32_MAX, or copy the value from * source if the corresponding source value is less than or equal to U32_MAX. */ -void kbase_hwcnt_dump_buffer_copy_strict_narrow( - struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); #endif /* _KBASE_HWCNT_GPU_NARROW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c similarity index 56% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c index d925ed744d3d..763eb315d9a2 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,13 +19,12 @@ * */ -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include -int kbase_hwcnt_metadata_create( - const struct kbase_hwcnt_description *desc, - const struct kbase_hwcnt_metadata **out_metadata) +int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **out_metadata) { char *buf; struct kbase_hwcnt_metadata *metadata; @@ -56,8 +55,7 @@ int kbase_hwcnt_metadata_create( /* Block metadata */ for (grp = 0; grp < desc->grp_cnt; grp++) { - size += sizeof(struct kbase_hwcnt_block_metadata) * - desc->grps[grp].blk_cnt; + size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt; } /* Single allocation for the entire metadata */ @@ -83,8 +81,7 @@ int kbase_hwcnt_metadata_create( for (grp = 0; grp < desc->grp_cnt; grp++) { size_t blk; - const struct kbase_hwcnt_group_description *grp_desc = - desc->grps + grp; + const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp; struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; size_t group_enable_map_count = 0; @@ -94,37 +91,28 @@ int kbase_hwcnt_metadata_create( /* Bump allocate this group's block metadata */ struct kbase_hwcnt_block_metadata *blk_mds = (struct kbase_hwcnt_block_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_block_metadata) * - grp_desc->blk_cnt; + offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt; /* Fill in each block in the group's information */ for (blk = 0; blk < grp_desc->blk_cnt; blk++) { - const struct kbase_hwcnt_block_description *blk_desc = - grp_desc->blks + blk; - struct kbase_hwcnt_block_metadata *blk_md = - blk_mds + blk; - const size_t n_values = - blk_desc->hdr_cnt + blk_desc->ctr_cnt; + const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; + const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; blk_md->type = blk_desc->type; blk_md->inst_cnt = blk_desc->inst_cnt; blk_md->hdr_cnt = blk_desc->hdr_cnt; blk_md->ctr_cnt = blk_desc->ctr_cnt; blk_md->enable_map_index = group_enable_map_count; - blk_md->enable_map_stride = - kbase_hwcnt_bitfield_count(n_values); + blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values); blk_md->dump_buf_index = group_dump_buffer_count; - blk_md->dump_buf_stride = - KBASE_HWCNT_ALIGN_UPWARDS( - n_values, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); + blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( + n_values, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); blk_md->avail_mask_index = group_avail_mask_bits; - group_enable_map_count += - blk_md->enable_map_stride * blk_md->inst_cnt; - group_dump_buffer_count += - blk_md->dump_buf_stride * blk_md->inst_cnt; + group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; + group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt; group_avail_mask_bits += blk_md->inst_cnt; } @@ -144,8 +132,7 @@ int kbase_hwcnt_metadata_create( /* Fill in the top level metadata's information */ metadata->grp_cnt = desc->grp_cnt; metadata->grp_metadata = grp_mds; - metadata->enable_map_bytes = - enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; metadata->avail_mask = desc->avail_mask; metadata->clk_cnt = desc->clk_cnt; @@ -155,8 +142,7 @@ int kbase_hwcnt_metadata_create( * bit per 4 bytes in the dump buffer. */ WARN_ON(metadata->dump_buf_bytes != - (metadata->enable_map_bytes * - BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); *out_metadata = metadata; return 0; @@ -167,9 +153,8 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) kfree(metadata); } -int kbase_hwcnt_enable_map_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_enable_map *enable_map) +int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map) { u64 *enable_map_buf; @@ -177,8 +162,7 @@ int kbase_hwcnt_enable_map_alloc( return -EINVAL; if (metadata->enable_map_bytes > 0) { - enable_map_buf = - kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); if (!enable_map_buf) return -ENOMEM; } else { @@ -200,9 +184,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) enable_map->metadata = NULL; } -int kbase_hwcnt_dump_buffer_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_dump_buffer *dump_buf) +int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) { size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; @@ -235,10 +218,8 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) memset(dump_buf, 0, sizeof(*dump_buf)); } -int kbase_hwcnt_dump_buffer_array_alloc( - const struct kbase_hwcnt_metadata *metadata, - size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs) +int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs) { struct kbase_hwcnt_dump_buffer *buffers; size_t buf_idx; @@ -251,8 +232,7 @@ int kbase_hwcnt_dump_buffer_array_alloc( return -EINVAL; dump_buf_bytes = metadata->dump_buf_bytes; - clk_cnt_buf_bytes = - sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; + clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; /* Allocate memory for the dump buffer struct array */ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); @@ -283,15 +263,13 @@ int kbase_hwcnt_dump_buffer_array_alloc( buffers[buf_idx].metadata = metadata; buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset); - buffers[buf_idx].clk_cnt_buf = - (u64 *)(addr + clk_cnt_buf_offset); + buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset); } return 0; } -void kbase_hwcnt_dump_buffer_array_free( - struct kbase_hwcnt_dump_buffer_array *dump_bufs) +void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs) { if (!dump_bufs) return; @@ -301,84 +279,71 @@ void kbase_hwcnt_dump_buffer_array_free( memset(dump_bufs, 0, sizeof(*dump_bufs)); } -void kbase_hwcnt_dump_buffer_zero( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; - if (WARN_ON(!dst) || - WARN_ON(!dst_enable_map) || + if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || WARN_ON(dst->metadata != dst_enable_map->metadata)) return; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { u64 *dst_blk; size_t val_cnt; - if (!kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); } - memset(dst->clk_cnt_buf, 0, - sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); + memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); } -void kbase_hwcnt_dump_buffer_zero_strict( - struct kbase_hwcnt_dump_buffer *dst) +void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) { if (WARN_ON(!dst)) return; memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); - memset(dst->clk_cnt_buf, 0, - sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); + memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); } -void kbase_hwcnt_dump_buffer_zero_non_enabled( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; - if (WARN_ON(!dst) || - WARN_ON(!dst_enable_map) || + if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || WARN_ON(dst->metadata != dst_enable_map->metadata)) return; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *blk_em = + kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); /* Align upwards to include padding bytes */ - val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); - if (kbase_hwcnt_metadata_block_instance_avail( - metadata, grp, blk, blk_inst)) { + if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) { /* Block available, so only zero non-enabled values */ - kbase_hwcnt_dump_buffer_block_zero_non_enabled( - dst_blk, blk_em, val_cnt); + kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt); } else { /* Block not available, so zero the entire thing */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); @@ -386,188 +351,159 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled( } } -void kbase_hwcnt_dump_buffer_copy( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; size_t clk; - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || WARN_ON(dst->metadata != src->metadata) || WARN_ON(dst->metadata != dst_enable_map->metadata)) return; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { u64 *dst_blk; const u64 *src_blk; size_t val_cnt; - if (!kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) { - if (kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk)) + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; } } -void kbase_hwcnt_dump_buffer_copy_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; size_t clk; - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || WARN_ON(dst->metadata != src->metadata) || WARN_ON(dst->metadata != dst_enable_map->metadata)) return; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *src_blk = + kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + const u64 *blk_em = + kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); /* Align upwards to include padding bytes */ - val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); - kbase_hwcnt_dump_buffer_block_copy_strict( - dst_blk, src_blk, blk_em, val_cnt); + kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { bool clk_enabled = - kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk); + kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; } } -void kbase_hwcnt_dump_buffer_accumulate( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; size_t clk; - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || WARN_ON(dst->metadata != src->metadata) || WARN_ON(dst->metadata != dst_enable_map->metadata)) return; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { u64 *dst_blk; const u64 *src_blk; size_t hdr_cnt; size_t ctr_cnt; - if (!kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - hdr_cnt = kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); - ctr_cnt = kbase_hwcnt_metadata_block_counters_count( - metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); - kbase_hwcnt_dump_buffer_block_accumulate( - dst_blk, src_blk, hdr_cnt, ctr_cnt); + kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) { - if (kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk)) + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; } } -void kbase_hwcnt_dump_buffer_accumulate_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) +void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; size_t clk; - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || WARN_ON(dst->metadata != src->metadata) || WARN_ON(dst->metadata != dst_enable_map->metadata)) return; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); - size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( - metadata, grp, blk); + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *src_blk = + kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + const u64 *blk_em = + kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); /* Align upwards to include padding bytes */ - ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + hdr_cnt + ctr_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); - kbase_hwcnt_dump_buffer_block_accumulate_strict( - dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt, + ctr_cnt); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) { - if (kbase_hwcnt_clk_enable_map_enabled( - dst_enable_map->clk_enable_map, clk)) + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; else dst->clk_cnt_buf[clk] = 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h similarity index 84% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h index 9397840146b4..5c5ada401768 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_types.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -104,8 +104,7 @@ #define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) /* Minimum alignment of each block of hardware counters */ -#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ - (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) /** * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value. @@ -115,7 +114,7 @@ * Return: Input value if already aligned to the specified boundary, or next * (incrementing upwards) aligned value. */ -#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ (value + ((alignment - (value % alignment)) % alignment)) /** @@ -307,9 +306,8 @@ struct kbase_hwcnt_dump_buffer_array { * * Return: 0 on success, else error code. */ -int kbase_hwcnt_metadata_create( - const struct kbase_hwcnt_description *desc, - const struct kbase_hwcnt_metadata **metadata); +int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **metadata); /** * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. @@ -323,8 +321,7 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); * * Return: Number of hardware counter groups described by metadata. */ -static inline size_t -kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) +static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) { if (WARN_ON(!metadata)) return 0; @@ -339,9 +336,8 @@ kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) * * Return: Type of the group grp. */ -static inline u64 -kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, - size_t grp) +static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, + size_t grp) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) return 0; @@ -356,9 +352,8 @@ kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, * * Return: Number of blocks in group grp. */ -static inline size_t -kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, - size_t grp) +static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, + size_t grp) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) return 0; @@ -374,9 +369,8 @@ kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, * * Return: Type of the block blk in group grp. */ -static inline u64 -kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, - size_t grp, size_t blk) +static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, + size_t grp, size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -394,8 +388,9 @@ kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, * * Return: Number of instances of block blk in group grp. */ -static inline size_t kbase_hwcnt_metadata_block_instance_count( - const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk) +static inline size_t +kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -413,8 +408,9 @@ static inline size_t kbase_hwcnt_metadata_block_instance_count( * * Return: Number of counter headers in each instance of block blk in group grp. */ -static inline size_t kbase_hwcnt_metadata_block_headers_count( - const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk) +static inline size_t +kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -431,8 +427,9 @@ static inline size_t kbase_hwcnt_metadata_block_headers_count( * * Return: Number of counters in each instance of block blk in group grp. */ -static inline size_t kbase_hwcnt_metadata_block_counters_count( - const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk) +static inline size_t +kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -449,8 +446,9 @@ static inline size_t kbase_hwcnt_metadata_block_counters_count( * * Return: enable map stride in each instance of block blk in group grp. */ -static inline size_t kbase_hwcnt_metadata_block_enable_map_stride( - const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk) +static inline size_t +kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata, + size_t grp, size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -468,8 +466,9 @@ static inline size_t kbase_hwcnt_metadata_block_enable_map_stride( * Return: Number of headers plus counters in each instance of block blk * in group grp. */ -static inline size_t kbase_hwcnt_metadata_block_values_count( - const struct kbase_hwcnt_metadata *metadata, size_t grp, size_t blk) +static inline size_t +kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -490,10 +489,13 @@ static inline size_t kbase_hwcnt_metadata_block_values_count( * Iteration order is group, then block, then block instance (i.e. linearly * through memory). */ -#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ - for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ - for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ - for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++) +#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ + for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ + for ((blk_inst) = 0; \ + (blk_inst) < \ + kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \ + (blk_inst)++) /** * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail @@ -504,10 +506,9 @@ static inline size_t kbase_hwcnt_metadata_block_values_count( * * Return: The bit index into the avail mask for the block. */ -static inline size_t kbase_hwcnt_metadata_block_avail_bit( - const struct kbase_hwcnt_metadata *metadata, - size_t grp, - size_t blk) +static inline size_t +kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) { if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) @@ -527,11 +528,9 @@ static inline size_t kbase_hwcnt_metadata_block_avail_bit( * * Return: true if the block instance is available, else false. */ -static inline bool kbase_hwcnt_metadata_block_instance_avail( - const struct kbase_hwcnt_metadata *metadata, - size_t grp, - size_t blk, - size_t blk_inst) +static inline bool +kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk, size_t blk_inst) { size_t bit; u64 mask; @@ -553,9 +552,8 @@ static inline bool kbase_hwcnt_metadata_block_instance_avail( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_enable_map_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_enable_map *enable_map); +int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map); /** * kbase_hwcnt_enable_map_free() - Free an enable map. @@ -577,9 +575,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); * Return: u64* to the bitfield(s) used as the enable map for the * block instance. */ -static inline u64 * -kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, - size_t grp, size_t blk, size_t blk_inst) +static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, + size_t grp, size_t blk, size_t blk_inst) { if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map)) return NULL; @@ -589,15 +586,9 @@ kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) return map->hwcnt_enable_map; - return map->hwcnt_enable_map + - map->metadata->grp_metadata[grp].enable_map_index + - map->metadata->grp_metadata[grp] - .blk_metadata[blk] - .enable_map_index + - (map->metadata->grp_metadata[grp] - .blk_metadata[blk] - .enable_map_stride * - blk_inst); + return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index + + map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index + + (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst); } /** @@ -609,8 +600,7 @@ kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, */ static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) { - return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / - KBASE_HWCNT_BITFIELD_BITS; + return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS; } /** @@ -620,11 +610,8 @@ static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) * @blk: Index of the block in the group. * @blk_inst: Index of the block instance in the block. */ -static inline void kbase_hwcnt_enable_map_block_disable_all( - struct kbase_hwcnt_enable_map *dst, - size_t grp, - size_t blk, - size_t blk_inst) +static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst, + size_t grp, size_t blk, size_t blk_inst) { size_t val_cnt; size_t bitfld_cnt; @@ -644,15 +631,13 @@ static inline void kbase_hwcnt_enable_map_block_disable_all( * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. * @dst: Non-NULL pointer to enable map to zero. */ -static inline void kbase_hwcnt_enable_map_disable_all( - struct kbase_hwcnt_enable_map *dst) +static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst) { if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) return; if (dst->hwcnt_enable_map != NULL) - memset(dst->hwcnt_enable_map, 0, - dst->metadata->enable_map_bytes); + memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes); dst->clk_enable_map = 0; } @@ -664,11 +649,8 @@ static inline void kbase_hwcnt_enable_map_disable_all( * @blk: Index of the block in the group. * @blk_inst: Index of the block instance in the block. */ -static inline void kbase_hwcnt_enable_map_block_enable_all( - struct kbase_hwcnt_enable_map *dst, - size_t grp, - size_t blk, - size_t blk_inst) +static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst, + size_t grp, size_t blk, size_t blk_inst) { size_t val_cnt; size_t bitfld_cnt; @@ -683,8 +665,7 @@ static inline void kbase_hwcnt_enable_map_block_enable_all( bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { - const u64 remaining_values = val_cnt - - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); u64 block_enable_map_mask = U64_MAX; if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) @@ -699,8 +680,7 @@ static inline void kbase_hwcnt_enable_map_block_enable_all( * map. * @dst: Non-NULL pointer to enable map. */ -static inline void kbase_hwcnt_enable_map_enable_all( - struct kbase_hwcnt_enable_map *dst) +static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst) { size_t grp, blk, blk_inst; @@ -708,8 +688,7 @@ static inline void kbase_hwcnt_enable_map_enable_all( return; kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) - kbase_hwcnt_enable_map_block_enable_all( - dst, grp, blk, blk_inst); + kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst); dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; } @@ -721,9 +700,8 @@ static inline void kbase_hwcnt_enable_map_enable_all( * * The dst and src MUST have been created from the same metadata. */ -static inline void kbase_hwcnt_enable_map_copy( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_enable_map *src) +static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) { if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || WARN_ON(dst->metadata != src->metadata)) @@ -733,8 +711,7 @@ static inline void kbase_hwcnt_enable_map_copy( if (WARN_ON(!src->hwcnt_enable_map)) return; - memcpy(dst->hwcnt_enable_map, - src->hwcnt_enable_map, + memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map, dst->metadata->enable_map_bytes); } @@ -748,9 +725,8 @@ static inline void kbase_hwcnt_enable_map_copy( * * The dst and src MUST have been created from the same metadata. */ -static inline void kbase_hwcnt_enable_map_union( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_enable_map *src) +static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) { if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || WARN_ON(dst->metadata != src->metadata)) @@ -781,11 +757,9 @@ static inline void kbase_hwcnt_enable_map_union( * * Return: true if any values in the block are enabled, else false. */ -static inline bool kbase_hwcnt_enable_map_block_enabled( - const struct kbase_hwcnt_enable_map *enable_map, - size_t grp, - size_t blk, - size_t blk_inst) +static inline bool +kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp, + size_t blk, size_t blk_inst) { bool any_enabled = false; size_t val_cnt; @@ -801,15 +775,13 @@ static inline bool kbase_hwcnt_enable_map_block_enabled( bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { - const u64 remaining_values = val_cnt - - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); u64 block_enable_map_mask = U64_MAX; if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) block_enable_map_mask = (1ull << remaining_values) - 1; - any_enabled = any_enabled || - (block_enable_map[bitfld_idx] & block_enable_map_mask); + any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask); } return any_enabled; @@ -821,8 +793,8 @@ static inline bool kbase_hwcnt_enable_map_block_enabled( * * Return: true if any values are enabled, else false. */ -static inline bool kbase_hwcnt_enable_map_any_enabled( - const struct kbase_hwcnt_enable_map *enable_map) +static inline bool +kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map) { size_t grp, blk, blk_inst; u64 clk_enable_map_mask; @@ -832,14 +804,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled( clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1; - if (enable_map->metadata->clk_cnt > 0 && - (enable_map->clk_enable_map & clk_enable_map_mask)) + if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask)) return true; - kbase_hwcnt_metadata_for_each_block( - enable_map->metadata, grp, blk, blk_inst) { - if (kbase_hwcnt_enable_map_block_enabled( - enable_map, grp, blk, blk_inst)) + kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst) + { + if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) return true; } @@ -855,9 +825,7 @@ static inline bool kbase_hwcnt_enable_map_any_enabled( * * Return: true if the value was enabled, else false. */ -static inline bool kbase_hwcnt_enable_map_block_value_enabled( - const u64 *bitfld, - size_t val_idx) +static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx) { const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; @@ -873,9 +841,7 @@ static inline bool kbase_hwcnt_enable_map_block_value_enabled( * kbase_hwcnt_enable_map_block_instance. * @val_idx: Index of the value to enable in the block instance. */ -static inline void kbase_hwcnt_enable_map_block_enable_value( - u64 *bitfld, - size_t val_idx) +static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx) { const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; @@ -891,9 +857,7 @@ static inline void kbase_hwcnt_enable_map_block_enable_value( * kbase_hwcnt_enable_map_block_instance. * @val_idx: Index of the value to disable in the block instance. */ -static inline void kbase_hwcnt_enable_map_block_disable_value( - u64 *bitfld, - size_t val_idx) +static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx) { const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; @@ -911,9 +875,8 @@ static inline void kbase_hwcnt_enable_map_block_disable_value( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_dump_buffer_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_dump_buffer *dump_buf); +int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf); /** * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. @@ -936,10 +899,8 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); * * Return: 0 on success, else error code. */ -int kbase_hwcnt_dump_buffer_array_alloc( - const struct kbase_hwcnt_metadata *metadata, - size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs); +int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs); /** * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. @@ -948,8 +909,7 @@ int kbase_hwcnt_dump_buffer_array_alloc( * Can be safely called on an all-zeroed dump buffer array structure, or on an * already freed dump buffer array. */ -void kbase_hwcnt_dump_buffer_array_free( - struct kbase_hwcnt_dump_buffer_array *dump_bufs); +void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs); /** * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block @@ -961,9 +921,8 @@ void kbase_hwcnt_dump_buffer_array_free( * * Return: u64* to the dump buffer for the block instance. */ -static inline u64 *kbase_hwcnt_dump_buffer_block_instance( - const struct kbase_hwcnt_dump_buffer *buf, size_t grp, size_t blk, - size_t blk_inst) +static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf, + size_t grp, size_t blk, size_t blk_inst) { if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) return NULL; @@ -975,10 +934,7 @@ static inline u64 *kbase_hwcnt_dump_buffer_block_instance( return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index + buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + - (buf->metadata->grp_metadata[grp] - .blk_metadata[blk] - .dump_buf_stride * - blk_inst); + (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); } /** @@ -990,9 +946,8 @@ static inline u64 *kbase_hwcnt_dump_buffer_block_instance( * * The dst and dst_enable_map MUST have been created from the same metadata. */ -void kbase_hwcnt_dump_buffer_zero( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); /** * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. @@ -1000,8 +955,7 @@ void kbase_hwcnt_dump_buffer_zero( * kbase_hwcnt_dump_buffer_block_instance. * @val_cnt: Number of values in the block. */ -static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, - size_t val_cnt) +static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt) { if (WARN_ON(!dst_blk)) return; @@ -1017,8 +971,7 @@ static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, * Slower than the non-strict variant. * @dst: Non-NULL pointer to dump buffer. */ -void kbase_hwcnt_dump_buffer_zero_strict( - struct kbase_hwcnt_dump_buffer *dst); +void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst); /** * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in @@ -1031,9 +984,8 @@ void kbase_hwcnt_dump_buffer_zero_strict( * * The dst and dst_enable_map MUST have been created from the same metadata. */ -void kbase_hwcnt_dump_buffer_zero_non_enabled( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); /** * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled @@ -1047,9 +999,8 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled( * kbase_hwcnt_enable_map_block_instance. * @val_cnt: Number of values in the block. */ -static inline void -kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em, - size_t val_cnt) +static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em, + size_t val_cnt) { size_t val; @@ -1073,10 +1024,9 @@ kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em, * The dst, src, and dst_enable_map MUST have been created from the same * metadata. */ -void kbase_hwcnt_dump_buffer_copy( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); /** * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. @@ -1086,8 +1036,7 @@ void kbase_hwcnt_dump_buffer_copy( * kbase_hwcnt_dump_buffer_block_instance. * @val_cnt: Number of values in the block. */ -static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, - const u64 *src_blk, +static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk, size_t val_cnt) { if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) @@ -1113,10 +1062,9 @@ static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, * The dst, src, and dst_enable_map MUST have been created from the same * metadata. */ -void kbase_hwcnt_dump_buffer_copy_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); /** * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values @@ -1134,10 +1082,8 @@ void kbase_hwcnt_dump_buffer_copy_strict( * * After the copy, any disabled values in dst will be zero. */ -static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, - const u64 *src_blk, - const u64 *blk_em, - size_t val_cnt) +static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk, + const u64 *blk_em, size_t val_cnt) { size_t val; @@ -1145,8 +1091,7 @@ static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, return; for (val = 0; val < val_cnt; val++) { - bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled( - blk_em, val); + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); dst_blk[val] = val_enabled ? src_blk[val] : 0; } @@ -1165,10 +1110,9 @@ static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, * The dst, src, and dst_enable_map MUST have been created from the same * metadata. */ -void kbase_hwcnt_dump_buffer_accumulate( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); /** * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and @@ -1181,10 +1125,8 @@ void kbase_hwcnt_dump_buffer_accumulate( * @hdr_cnt: Number of headers in the block. * @ctr_cnt: Number of counters in the block. */ -static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, - const u64 *src_blk, - size_t hdr_cnt, - size_t ctr_cnt) +static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk, + size_t hdr_cnt, size_t ctr_cnt) { size_t ctr; @@ -1219,10 +1161,9 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, * The dst, src, and dst_enable_map MUST have been created from the same * metadata. */ -void kbase_hwcnt_dump_buffer_accumulate_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); +void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); /** * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block @@ -1241,21 +1182,19 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( * @hdr_cnt: Number of headers in the block. * @ctr_cnt: Number of counters in the block. */ -static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( - u64 *dst_blk, const u64 *src_blk, const u64 *blk_em, size_t hdr_cnt, - size_t ctr_cnt) +static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk, + const u64 *blk_em, + size_t hdr_cnt, size_t ctr_cnt) { size_t ctr; if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) return; - kbase_hwcnt_dump_buffer_block_copy_strict( - dst_blk, src_blk, blk_em, hdr_cnt); + kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt); for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { - bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled( - blk_em, ctr); + bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr); if (ctr_enabled) dst_blk[ctr] += src_blk[ctr]; @@ -1270,8 +1209,7 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( * @md: Non-NULL pointer to metadata. * @clk: size_t variable used as clock iterator. */ -#define kbase_hwcnt_metadata_for_each_clock(md, clk) \ - for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) +#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) /** * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled @@ -1281,8 +1219,7 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( * * Return: true if the index of the clock domain is enabled, else false. */ -static inline bool kbase_hwcnt_clk_enable_map_enabled( - const u64 clk_enable_map, const size_t index) +static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index) { if (WARN_ON(index >= 64)) return false; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c similarity index 75% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c index 52ecb7bed03f..d618764d3b32 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,10 @@ * */ -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_accumulator.h" -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_accumulator.h" +#include "hwcnt/mali_kbase_hwcnt_context.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include #include @@ -75,8 +75,8 @@ struct kbase_hwcnt_virtualizer_client { u64 ts_start_ns; }; -const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( - struct kbase_hwcnt_virtualizer *hvirt) +const struct kbase_hwcnt_metadata * +kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt) { if (!hvirt) return NULL; @@ -90,8 +90,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( * * Will safely free a client in any partial state of construction. */ -static void kbasep_hwcnt_virtualizer_client_free( - struct kbase_hwcnt_virtualizer_client *hvcli) +static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli) { if (!hvcli) return; @@ -110,9 +109,8 @@ static void kbasep_hwcnt_virtualizer_client_free( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_virtualizer_client_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_virtualizer_client **out_hvcli) +static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_virtualizer_client **out_hvcli) { int errcode; struct kbase_hwcnt_virtualizer_client *hvcli = NULL; @@ -145,9 +143,9 @@ error: * @hvcli: Non-NULL pointer to virtualizer client. * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. */ -static void kbasep_hwcnt_virtualizer_client_accumulate( - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_dump_buffer *dump_buf) +static void +kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_dump_buffer *dump_buf) { WARN_ON(!hvcli); WARN_ON(!dump_buf); @@ -155,12 +153,10 @@ static void kbasep_hwcnt_virtualizer_client_accumulate( if (hvcli->has_accum) { /* If already some accumulation, accumulate */ - kbase_hwcnt_dump_buffer_accumulate( - &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); } else { /* If no accumulation, copy */ - kbase_hwcnt_dump_buffer_copy( - &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); } hvcli->has_accum = true; } @@ -173,8 +169,7 @@ static void kbasep_hwcnt_virtualizer_client_accumulate( * * Will safely terminate the accumulator in any partial state of initialisation. */ -static void kbasep_hwcnt_virtualizer_accumulator_term( - struct kbase_hwcnt_virtualizer *hvirt) +static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt) { WARN_ON(!hvirt); lockdep_assert_held(&hvirt->lock); @@ -194,8 +189,7 @@ static void kbasep_hwcnt_virtualizer_accumulator_term( * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_virtualizer_accumulator_init( - struct kbase_hwcnt_virtualizer *hvirt) +static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt) { int errcode; @@ -204,18 +198,15 @@ static int kbasep_hwcnt_virtualizer_accumulator_init( WARN_ON(hvirt->client_count); WARN_ON(hvirt->accum); - errcode = kbase_hwcnt_accumulator_acquire( - hvirt->hctx, &hvirt->accum); + errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum); if (errcode) goto error; - errcode = kbase_hwcnt_enable_map_alloc( - hvirt->metadata, &hvirt->scratch_map); + errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map); if (errcode) goto error; - errcode = kbase_hwcnt_dump_buffer_alloc( - hvirt->metadata, &hvirt->scratch_buf); + errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf); if (errcode) goto error; @@ -234,10 +225,9 @@ error: * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_virtualizer_client_add( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map) +static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map) { int errcode = 0; u64 ts_start_ns; @@ -258,28 +248,25 @@ static int kbasep_hwcnt_virtualizer_client_add( if (hvirt->client_count == 1) { /* First client, so just pass the enable map onwards as is */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - enable_map, &ts_start_ns, &ts_end_ns, NULL); + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, + &ts_start_ns, &ts_end_ns, NULL); } else { struct kbase_hwcnt_virtualizer_client *pos; /* Make the scratch enable map the union of all enable maps */ - kbase_hwcnt_enable_map_copy( - &hvirt->scratch_map, enable_map); - list_for_each_entry(pos, &hvirt->clients, node) - kbase_hwcnt_enable_map_union( - &hvirt->scratch_map, &pos->enable_map); + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry (pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - &hvirt->scratch_map, - &ts_start_ns, &ts_end_ns, - &hvirt->scratch_buf); + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); /* Accumulate into only existing clients' accumulation bufs */ if (!errcode) - list_for_each_entry(pos, &hvirt->clients, node) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); + list_for_each_entry (pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate(pos, + &hvirt->scratch_buf); } if (errcode) goto error; @@ -307,9 +294,8 @@ error: * @hvirt: Non-NULL pointer to the hardware counter virtualizer. * @hvcli: Non-NULL pointer to the virtualizer client to remove. */ -static void kbasep_hwcnt_virtualizer_client_remove( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli) +static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli) { int errcode = 0; u64 ts_start_ns; @@ -329,22 +315,21 @@ static void kbasep_hwcnt_virtualizer_client_remove( struct kbase_hwcnt_virtualizer_client *pos; /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); - list_for_each_entry(pos, &hvirt->clients, node) - kbase_hwcnt_enable_map_union( - &hvirt->scratch_map, &pos->enable_map); + list_for_each_entry (pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - &hvirt->scratch_map, - &ts_start_ns, &ts_end_ns, - &hvirt->scratch_buf); + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); /* Accumulate into remaining clients' accumulation bufs */ - if (!errcode) - list_for_each_entry(pos, &hvirt->clients, node) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); + if (!errcode) { + list_for_each_entry (pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate(pos, + &hvirt->scratch_buf); - /* Store the most recent dump time for rate limiting */ - hvirt->ts_last_dump_ns = ts_end_ns; + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + } } WARN_ON(errcode); } @@ -370,11 +355,8 @@ static void kbasep_hwcnt_virtualizer_client_remove( * Return: 0 on success or error code. */ static int kbasep_hwcnt_virtualizer_client_set_counters( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map, - u64 *ts_start_ns, - u64 *ts_end_ns, + struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) { int errcode; @@ -391,32 +373,29 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); - list_for_each_entry(pos, &hvirt->clients, node) + list_for_each_entry (pos, &hvirt->clients, node) /* Ignore the enable map of the selected client */ if (pos != hvcli) - kbase_hwcnt_enable_map_union( - &hvirt->scratch_map, &pos->enable_map); + kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - &hvirt->scratch_map, ts_start_ns, ts_end_ns, - &hvirt->scratch_buf); + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, + ts_start_ns, ts_end_ns, &hvirt->scratch_buf); if (errcode) return errcode; /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry(pos, &hvirt->clients, node) + list_for_each_entry (pos, &hvirt->clients, node) if (pos != hvcli) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); + kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); /* Finally, write into the dump buf */ if (dump_buf) { const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; if (hvcli->has_accum) { - kbase_hwcnt_dump_buffer_accumulate( - &hvcli->accum_buf, src, &hvcli->enable_map); + kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, + &hvcli->enable_map); src = &hvcli->accum_buf; } kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); @@ -436,12 +415,10 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( return errcode; } -int kbase_hwcnt_virtualizer_client_set_counters( - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) +int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) { int errcode; struct kbase_hwcnt_virtualizer *hvirt; @@ -464,14 +441,12 @@ int kbase_hwcnt_virtualizer_client_set_counters( * to the accumulator, saving a fair few copies and * accumulations. */ - errcode = kbase_hwcnt_accumulator_set_counters( - hvirt->accum, enable_map, - ts_start_ns, ts_end_ns, dump_buf); + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, + ts_start_ns, ts_end_ns, dump_buf); if (!errcode) { /* Update the selected client's enable map */ - kbase_hwcnt_enable_map_copy( - &hvcli->enable_map, enable_map); + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); /* Fix up the timestamps */ *ts_start_ns = hvcli->ts_start_ns; @@ -483,8 +458,7 @@ int kbase_hwcnt_virtualizer_client_set_counters( } else { /* Otherwise, do the full virtualize */ errcode = kbasep_hwcnt_virtualizer_client_set_counters( - hvirt, hvcli, enable_map, - ts_start_ns, ts_end_ns, dump_buf); + hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf); } mutex_unlock(&hvirt->lock); @@ -507,12 +481,10 @@ int kbase_hwcnt_virtualizer_client_set_counters( * * Return: 0 on success or error code. */ -static int kbasep_hwcnt_virtualizer_client_dump( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) +static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) { int errcode; struct kbase_hwcnt_virtualizer_client *pos; @@ -525,24 +497,23 @@ static int kbasep_hwcnt_virtualizer_client_dump( lockdep_assert_held(&hvirt->lock); /* Perform the dump */ - errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, - ts_start_ns, ts_end_ns, &hvirt->scratch_buf); + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, + &hvirt->scratch_buf); if (errcode) return errcode; /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry(pos, &hvirt->clients, node) + list_for_each_entry (pos, &hvirt->clients, node) if (pos != hvcli) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); + kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); /* Finally, write into the dump buf */ if (dump_buf) { const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; if (hvcli->has_accum) { - kbase_hwcnt_dump_buffer_accumulate( - &hvcli->accum_buf, src, &hvcli->enable_map); + kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, + &hvcli->enable_map); src = &hvcli->accum_buf; } kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); @@ -578,11 +549,8 @@ static int kbasep_hwcnt_virtualizer_client_dump( * Return: 0 on success or error code. */ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) + struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) { bool rate_limited = true; @@ -602,10 +570,8 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( */ rate_limited = false; } else { - const u64 ts_ns = - kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); - const u64 time_since_last_dump_ns = - ts_ns - hvirt->ts_last_dump_ns; + const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); + const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns; /* Dump period equals or exceeds the threshold */ if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) @@ -613,8 +579,8 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( } if (!rate_limited) - return kbasep_hwcnt_virtualizer_client_dump( - hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns, + dump_buf); /* If we've gotten this far, the client must have something accumulated * otherwise it is a logic error @@ -622,8 +588,7 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( WARN_ON(!hvcli->has_accum); if (dump_buf) - kbase_hwcnt_dump_buffer_copy( - dump_buf, &hvcli->accum_buf, &hvcli->enable_map); + kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map); hvcli->has_accum = false; *ts_start_ns = hvcli->ts_start_ns; @@ -633,11 +598,9 @@ static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( return 0; } -int kbase_hwcnt_virtualizer_client_dump( - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) +int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) { int errcode; struct kbase_hwcnt_virtualizer *hvirt; @@ -659,8 +622,8 @@ int kbase_hwcnt_virtualizer_client_dump( * to the accumulator, saving a fair few copies and * accumulations. */ - errcode = kbase_hwcnt_accumulator_dump( - hvirt->accum, ts_start_ns, ts_end_ns, dump_buf); + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, + dump_buf); if (!errcode) { /* Fix up the timestamps */ @@ -681,20 +644,17 @@ int kbase_hwcnt_virtualizer_client_dump( return errcode; } -int kbase_hwcnt_virtualizer_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_virtualizer_client **out_hvcli) +int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli) { int errcode; struct kbase_hwcnt_virtualizer_client *hvcli; - if (!hvirt || !enable_map || !out_hvcli || - (enable_map->metadata != hvirt->metadata)) + if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata)) return -EINVAL; - errcode = kbasep_hwcnt_virtualizer_client_alloc( - hvirt->metadata, &hvcli); + errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli); if (errcode) return errcode; @@ -713,8 +673,7 @@ int kbase_hwcnt_virtualizer_client_create( return 0; } -void kbase_hwcnt_virtualizer_client_destroy( - struct kbase_hwcnt_virtualizer_client *hvcli) +void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli) { if (!hvcli) return; @@ -728,10 +687,8 @@ void kbase_hwcnt_virtualizer_client_destroy( kbasep_hwcnt_virtualizer_client_free(hvcli); } -int kbase_hwcnt_virtualizer_init( - struct kbase_hwcnt_context *hctx, - u64 dump_threshold_ns, - struct kbase_hwcnt_virtualizer **out_hvirt) +int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt) { struct kbase_hwcnt_virtualizer *virt; const struct kbase_hwcnt_metadata *metadata; @@ -758,8 +715,7 @@ int kbase_hwcnt_virtualizer_init( return 0; } -void kbase_hwcnt_virtualizer_term( - struct kbase_hwcnt_virtualizer *hvirt) +void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt) { if (!hvirt) return; @@ -768,7 +724,7 @@ void kbase_hwcnt_virtualizer_term( if (WARN_ON(hvirt->client_count != 0)) { struct kbase_hwcnt_virtualizer_client *pos, *n; - list_for_each_entry_safe(pos, n, &hvirt->clients, node) + list_for_each_entry_safe (pos, n, &hvirt->clients, node) kbase_hwcnt_virtualizer_client_destroy(pos); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h similarity index 83% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h index 08e8e9f1d596..485ba74960f6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_virtualizer.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_virtualizer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,17 +51,14 @@ struct kbase_hwcnt_dump_buffer; * * Return: 0 on success, else error code. */ -int kbase_hwcnt_virtualizer_init( - struct kbase_hwcnt_context *hctx, - u64 dump_threshold_ns, - struct kbase_hwcnt_virtualizer **out_hvirt); +int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt); /** * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. * @hvirt: Pointer to virtualizer to be terminated. */ -void kbase_hwcnt_virtualizer_term( - struct kbase_hwcnt_virtualizer *hvirt); +void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt); /** * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by @@ -71,8 +68,8 @@ void kbase_hwcnt_virtualizer_term( * * Return: Non-NULL pointer to metadata, or NULL on error. */ -const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( - struct kbase_hwcnt_virtualizer *hvirt); +const struct kbase_hwcnt_metadata * +kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt); /** * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. @@ -84,17 +81,15 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_virtualizer_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_virtualizer_client **out_hvcli); +int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli); /** * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. * @hvcli: Pointer to the hardware counter client. */ -void kbase_hwcnt_virtualizer_client_destroy( - struct kbase_hwcnt_virtualizer_client *hvcli); +void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli); /** * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's @@ -115,12 +110,10 @@ void kbase_hwcnt_virtualizer_client_destroy( * * Return: 0 on success or error code. */ -int kbase_hwcnt_virtualizer_client_set_counters( - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); +int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); /** * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's @@ -136,11 +129,9 @@ int kbase_hwcnt_virtualizer_client_set_counters( * * Return: 0 on success or error code. */ -int kbase_hwcnt_virtualizer_client_dump( - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); +int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); /** * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h similarity index 84% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h index 187331866428..501c0087b7e6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,17 +50,17 @@ typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data); * * Return: 0 if the watchdog timer enabled successfully, error code otherwise. */ -typedef int kbase_hwcnt_watchdog_enable_fn( - const struct kbase_hwcnt_watchdog_info *timer, u32 period_ms, - kbase_hwcnt_watchdog_callback_fn *callback, void *user_data); +typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer, + u32 period_ms, + kbase_hwcnt_watchdog_callback_fn *callback, + void *user_data); /** * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer * * @timer: Non-NULL pointer to a watchdog timer interface context */ -typedef void -kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); +typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); /** * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout @@ -68,9 +68,8 @@ kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); * @timer: Non-NULL pointer to a watchdog timer interface context * @delay_ms: Watchdog timer expiration in milliseconds */ -typedef void -kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer, - u32 delay_ms); +typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer, + u32 delay_ms); /** * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.c b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c similarity index 76% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.c rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c index 69b957adc4dd..4caa832cd587 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.c +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,8 +20,8 @@ */ #include "mali_kbase.h" -#include "mali_kbase_hwcnt_watchdog_if.h" -#include "mali_kbase_hwcnt_watchdog_if_timer.h" +#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" +#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h" #include #include @@ -62,12 +62,10 @@ static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work) } static int kbasep_hwcnt_watchdog_if_timer_enable( - const struct kbase_hwcnt_watchdog_info *const timer, - u32 const period_ms, kbase_hwcnt_watchdog_callback_fn *const callback, - void *const user_data) + const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms, + kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data) { - struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = - (void *)timer; + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled)) return -EINVAL; @@ -81,11 +79,10 @@ static int kbasep_hwcnt_watchdog_if_timer_enable( return 0; } -static void kbasep_hwcnt_watchdog_if_timer_disable( - const struct kbase_hwcnt_watchdog_info *const timer) +static void +kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer) { - struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = - (void *)timer; + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; if (WARN_ON(!timer)) return; @@ -97,11 +94,11 @@ static void kbasep_hwcnt_watchdog_if_timer_disable( timer_info->timer_enabled = false; } -static void kbasep_hwcnt_watchdog_if_timer_modify( - const struct kbase_hwcnt_watchdog_info *const timer, u32 const delay_ms) +static void +kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer, + u32 const delay_ms) { - struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = - (void *)timer; + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled)) return; @@ -109,8 +106,7 @@ static void kbasep_hwcnt_watchdog_if_timer_modify( mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms)); } -void kbase_hwcnt_watchdog_if_timer_destroy( - struct kbase_hwcnt_watchdog_interface *const watchdog_if) +void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if) { struct kbase_hwcnt_watchdog_if_timer_info *timer_info; @@ -125,11 +121,12 @@ void kbase_hwcnt_watchdog_if_timer_destroy( destroy_workqueue(timer_info->workq); kfree(timer_info); - *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ NULL }; + *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ + .timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL + }; } -int kbase_hwcnt_watchdog_if_timer_create( - struct kbase_hwcnt_watchdog_interface *const watchdog_if) +int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if) { struct kbase_hwcnt_watchdog_if_timer_info *timer_info; @@ -140,9 +137,7 @@ int kbase_hwcnt_watchdog_if_timer_create( if (!timer_info) return -ENOMEM; - *timer_info = - (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = - false }; + *timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false }; INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.h b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h similarity index 85% rename from drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.h rename to drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h index 3bd69c3401c4..a545ad3e39e3 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hwcnt_watchdog_if_timer.h +++ b/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,8 +35,7 @@ struct kbase_hwcnt_watchdog_interface; * * Return: 0 on success, error otherwise. */ -int kbase_hwcnt_watchdog_if_timer_create( - struct kbase_hwcnt_watchdog_interface *watchdog_if); +int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if); /** * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter @@ -44,7 +43,6 @@ int kbase_hwcnt_watchdog_if_timer_create( * * @watchdog_if: Pointer to watchdog interface to destroy */ -void kbase_hwcnt_watchdog_if_timer_destroy( - struct kbase_hwcnt_watchdog_interface *watchdog_if); +void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if); #endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */ diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h index 4479a4b8665f..6089610847b4 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_common_jm.h @@ -23,8 +23,8 @@ #define _KBASE_IPA_COUNTER_COMMON_JM_H_ #include "mali_kbase.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" /* Maximum number of IPA groups for an IPA model. */ #define KBASE_IPA_MAX_GROUP_DEF_NUM 16 diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c index 66e56e267e68..43cdf18a5e3b 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,9 +59,6 @@ .counter_block_type = block_type, \ } -#define CSHW_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ - COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_CSHW) - #define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS) diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c index 4bb880e79b4a..a32a2c207163 100644 --- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c +++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -30,21 +30,15 @@ /* Performance counter blocks base offsets */ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) #define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) /* JM counter block offsets */ #define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) -/* Tiler counter block offsets */ -#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45) - /* MEMSYS counter block offsets */ #define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) /* SC counter block offsets */ -#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4) -#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26) #define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) #define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) #define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) @@ -52,10 +46,6 @@ #define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) #define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) #define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) -#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50) -#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51) -#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56) -#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) #define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) /** diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h index f722f5fadff9..c875ffb4990e 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -271,7 +271,6 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, unsigned long freq, unsigned long voltage); -#if MALI_UNIT_TEST /* Called by kbase_get_real_power() to invoke the power models. * Must be called with kbdev->ipa.lock held. * This function is only exposed for use by unit tests. @@ -279,7 +278,6 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, unsigned long freq, unsigned long voltage); -#endif /* MALI_UNIT_TEST */ extern struct devfreq_cooling_power kbase_ipa_power_model_ops; diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c index 78c343cf249f..57508eb24749 100644 --- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c +++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c @@ -34,6 +34,8 @@ #include "mali_kbase_ipa_simple.h" #include "mali_kbase_ipa_debugfs.h" +#if MALI_USE_CSF + /* This is used if the dynamic power for top-level is estimated separately * through the counter model. To roughly match the contribution of top-level * power in the total dynamic power, when calculated through counter model, @@ -44,6 +46,8 @@ */ #define TOP_LEVEL_DYN_COEFF_SCALER (3) +#endif /* MALI_USE_CSF */ + #if MALI_UNIT_TEST static int dummy_temp; diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h index f9e0099a5cbf..e4316981e635 100644 --- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h @@ -344,19 +344,6 @@ enum kbase_atom_exit_protected_state { KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, }; -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - /** * struct kbase_jd_atom - object representing the atom, containing the complete * state and attributes of an atom. @@ -390,7 +377,8 @@ struct kbase_ext_res { * each allocation is read in order to enforce an * overall physical memory usage limit. * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about + * @extres: Pointer to @nr_extres VA regions containing the external + * resource allocation and other information. * @nr_extres external resources referenced by the atom. * @device_nr: indicates the coregroup with which the atom is * associated, when @@ -408,16 +396,21 @@ struct kbase_ext_res { * sync through soft jobs and for the implicit * synchronization required on access to external * resources. - * @dma_fence.fence_in: Input fence + * @dma_fence.fence_in: Points to the dma-buf input fence for this atom. + * The atom would complete only after the fence is + * signaled. * @dma_fence.fence: Points to the dma-buf output fence for this atom. + * @dma_fence.fence_cb: The object that is passed at the time of adding the + * callback that gets invoked when @dma_fence.fence_in + * is signaled. + * @dma_fence.fence_cb_added: Flag to keep a track if the callback was successfully + * added for @dma_fence.fence_in, which is supposed to be + * invoked on the signaling of fence. * @dma_fence.context: The dma-buf fence context number for this atom. A * unique context number is allocated to each katom in * the context on context creation. * @dma_fence.seqno: The dma-buf fence sequence number for this atom. This * is increased every time this katom uses dma-buf fence - * @dma_fence.callbacks: List of all callbacks set up to wait on other fences - * @dma_fence.dep_count: Atomic counter of number of outstandind dma-buf fence - * dependencies for this atom. * @event_code: Event code for the job chain represented by the atom, * both HW and low-level SW events are represented by * event codes. @@ -519,21 +512,17 @@ struct kbase_jd_atom { #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ u16 nr_extres; - struct kbase_ext_res *extres; + struct kbase_va_region **extres; u32 device_nr; u64 jc; void *softjob_data; -#if defined(CONFIG_SYNC) - struct sync_fence *fence; - struct sync_fence_waiter sync_waiter; -#endif /* CONFIG_SYNC */ -#if defined(CONFIG_MALI_BIFROST_DMA_FENCE) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) struct { /* Use the functions/API defined in mali_kbase_fence.h to * when working with this sub struct */ -#if defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; #else @@ -556,38 +545,21 @@ struct kbase_jd_atom { #else struct dma_fence *fence; #endif + + /* This is the callback object that is registered for the fence_in. + * The callback is invoked when the fence_in is signaled. + */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence_cb fence_cb; +#else + struct dma_fence_cb fence_cb; +#endif + bool fence_cb_added; + unsigned int context; atomic_t seqno; - /* This contains a list of all callbacks set up to wait on - * other fences. This atom must be held back from JS until all - * these callbacks have been called and dep_count have reached - * 0. The initial value of dep_count must be equal to the - * number of callbacks on this list. - * - * This list is protected by jctx.lock. Callbacks are added to - * this list when the atom is built and the wait are set up. - * All the callbacks then stay on the list until all callbacks - * have been called and the atom is queued, or cancelled, and - * then all callbacks are taken off the list and freed. - */ - struct list_head callbacks; - /* Atomic counter of number of outstandind dma-buf fence - * dependencies for this atom. When dep_count reaches 0 the - * atom may be queued. - * - * The special value "-1" may only be set after the count - * reaches 0, while holding jctx.lock. This indicates that the - * atom has been handled, either queued in JS or cancelled. - * - * If anyone but the dma-fence worker sets this to -1 they must - * ensure that any potentially queued worker must have - * completed before allowing the atom to be marked as unused. - * This can be done by flushing the fence work queue: - * kctx->dma_fence.wq. - */ - atomic_t dep_count; } dma_fence; -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE || CONFIG_SYNC_FILE */ +#endif /* CONFIG_SYNC_FILE */ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy * of some of the following members diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h index 3669f7e23fa6..ea143ab49642 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h @@ -39,6 +39,7 @@ enum base_hw_feature { BASE_HW_FEATURE_GPU_SLEEP, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER, BASE_HW_FEATURE_CORE_FEATURES, + BASE_HW_FEATURE_PBHA_HWU, BASE_HW_FEATURE_END }; @@ -177,5 +178,17 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[ BASE_HW_FEATURE_END }; +__attribute__((unused)) static const enum base_hw_feature base_hw_features_tTIx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_ASN_HASH, + BASE_HW_FEATURE_GPU_SLEEP, + BASE_HW_FEATURE_CORE_FEATURES, + BASE_HW_FEATURE_PBHA_HWU, + BASE_HW_FEATURE_END +}; + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h index 391730106f6d..a360984acca5 100644 --- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h @@ -700,5 +700,35 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1 BASE_HW_ISSUE_END }; +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_GPU2019_3878, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2019_3901, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_END +}; + +__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TURSEHW_2716, + BASE_HW_ISSUE_GPU2021PRO_290, + BASE_HW_ISSUE_END +}; + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h index 7d0d0dae0279..8e4d36141368 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase.h +++ b/drivers/gpu/arm/bifrost/mali_kbase.h @@ -70,7 +70,9 @@ #include "mali_kbase_jd_debugfs.h" #include "mali_kbase_jm.h" #include "mali_kbase_js.h" -#endif /* !MALI_USE_CSF */ +#else /* !MALI_USE_CSF */ +#include "csf/mali_kbase_debug_csf_fault.h" +#endif /* MALI_USE_CSF */ #include "ipa/mali_kbase_ipa.h" @@ -466,7 +468,7 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom); void kbase_cancel_soft_job(struct kbase_jd_atom *katom); void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); #endif int kbase_soft_event_update(struct kbase_context *kctx, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c index 0c8f653a9bff..480e693f3c61 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c @@ -35,7 +35,7 @@ #include "backend/gpu/mali_kbase_model_linux.h" #include #endif /* CONFIG_MALI_BIFROST_NO_MALI */ -#include "mali_kbase_mem_profile_debugfs_buf_size.h" +#include "uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" #include "mali_kbase_mem_pool_group.h" @@ -54,8 +54,8 @@ #if !MALI_USE_CSF #include "mali_kbase_kinstr_jm.h" #endif -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_context.h" +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_kinstr_prfcnt.h" #include "mali_kbase_vinstr.h" #if MALI_USE_CSF @@ -95,15 +95,16 @@ #include #include #include +#include #include #include /* is_compat_task/in_compat_syscall */ #include #include #include #include -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) #include -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#endif /* CONFIG_SYNC_FILE */ #include #include #include @@ -123,11 +124,6 @@ #include -/* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 - #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" /** @@ -139,9 +135,6 @@ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) -#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF) -#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF) - /** * struct mali_kbase_capability_def - kbase capabilities table * @@ -173,6 +166,11 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA #endif }; +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) +/* Mutex to synchronize the probe of multiple kbase instances */ +static struct mutex kbase_probe_mutex; +#endif + /** * mali_kbase_supports_cap - Query whether a kbase capability is supported * @@ -432,6 +430,12 @@ static struct kbase_device *to_kbase_device(struct device *dev) int assign_irqs(struct kbase_device *kbdev) { + static const char *const irq_names_caps[] = { "JOB", "MMU", "GPU" }; + +#if IS_ENABLED(CONFIG_OF) + static const char *const irq_names[] = { "job", "mmu", "gpu" }; +#endif + struct platform_device *pdev; int i; @@ -439,34 +443,31 @@ int assign_irqs(struct kbase_device *kbdev) return -ENODEV; pdev = to_platform_device(kbdev->dev); - /* 3 IRQ resources */ - for (i = 0; i < 3; i++) { - struct resource *irq_res; - int irqtag; - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq_res) { - dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); - return -ENOENT; - } + for (i = 0; i < ARRAY_SIZE(irq_names_caps); i++) { + int irq; #if IS_ENABLED(CONFIG_OF) - if (!strncasecmp(irq_res->name, "JOB", 4)) { - irqtag = JOB_IRQ_TAG; - } else if (!strncasecmp(irq_res->name, "MMU", 4)) { - irqtag = MMU_IRQ_TAG; - } else if (!strncasecmp(irq_res->name, "GPU", 4)) { - irqtag = GPU_IRQ_TAG; - } else { - dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", - irq_res->name); - return -EINVAL; - } + /* We recommend using Upper case for the irq names in dts, but if + * there are devices in the world using Lower case then we should + * avoid breaking support for them. So try using names in Upper case + * first then try using Lower case names. If both attempts fail then + * we assume there is no IRQ resource specified for the GPU. + */ + irq = platform_get_irq_byname(pdev, irq_names_caps[i]); + if (irq < 0) + irq = platform_get_irq_byname(pdev, irq_names[i]); #else - irqtag = i; + irq = platform_get_irq(pdev, i); #endif /* CONFIG_OF */ - kbdev->irqs[irqtag].irq = irq_res->start; - kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; + + if (irq < 0) { + dev_err(kbdev->dev, "No IRQ resource '%s'\n", irq_names_caps[i]); + return irq; + } + + kbdev->irqs[i].irq = irq; + kbdev->irqs[i].flags = irqd_get_trigger_type(irq_get_irq_data(irq)); } return 0; @@ -663,6 +664,9 @@ static int kbase_open(struct inode *inode, struct file *filp) if (!kbdev) return -ENODEV; + /* Set address space operation for page migration */ + kbase_mem_migrate_set_address_space_ops(kbdev, filp); + /* Device-wide firmware load is moved here from probing to comply with * Android GKI vendor guideline. */ @@ -1040,52 +1044,11 @@ static int kbase_api_get_ddk_version(struct kbase_context *kctx, return len; } -/* Defaults for legacy just-in-time memory allocator initialization - * kernel calls - */ -#define DEFAULT_MAX_JIT_ALLOCATIONS 255 -#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ - -static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init_10_2 *jit_init) -{ - kctx->jit_version = 1; - - /* since no phys_pages parameter, use the maximum: va_pages */ - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, - DEFAULT_MAX_JIT_ALLOCATIONS, - JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT, - jit_init->va_pages); -} - -static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init_11_5 *jit_init) -{ - int i; - - kctx->jit_version = 2; - - for (i = 0; i < sizeof(jit_init->padding); i++) { - /* Ensure all padding bytes are 0 for potential future - * extension - */ - if (jit_init->padding[i]) - return -EINVAL; - } - - /* since no phys_pages parameter, use the maximum: va_pages */ - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, - jit_init->max_allocations, jit_init->trim_level, - jit_init->group_id, jit_init->va_pages); -} - static int kbase_api_mem_jit_init(struct kbase_context *kctx, struct kbase_ioctl_mem_jit_init *jit_init) { int i; - kctx->jit_version = 3; - for (i = 0; i < sizeof(jit_init->padding); i++) { /* Ensure all padding bytes are 0 for potential future * extension @@ -1243,7 +1206,7 @@ static int kbase_api_mem_flags_change(struct kbase_context *kctx, static int kbase_api_stream_create(struct kbase_context *kctx, struct kbase_ioctl_stream_create *stream) { -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) int fd, ret; /* Name must be NULL-terminated and padded with NULLs, so check last @@ -1265,7 +1228,7 @@ static int kbase_api_stream_create(struct kbase_context *kctx, static int kbase_api_fence_validate(struct kbase_context *kctx, struct kbase_ioctl_fence_validate *validate) { -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) return kbase_sync_fence_validate(validate->fd); #else return -ENOENT; @@ -1279,12 +1242,18 @@ static int kbase_api_mem_profile_add(struct kbase_context *kctx, int err; if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { - dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n"); + dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big"); return -EINVAL; } + if (!data->len) { + dev_err(kctx->kbdev->dev, "mem_profile_add: buffer size is 0"); + /* Should return -EINVAL, but returning -ENOMEM for backwards compat */ + return -ENOMEM; + } + buf = kmalloc(data->len, GFP_KERNEL); - if (ZERO_OR_NULL_PTR(buf)) + if (!buf) return -ENOMEM; err = copy_from_user(buf, u64_to_user_ptr(data->buffer), @@ -1494,9 +1463,22 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, kctx->jit_group_id = heap_init->in.group_id; return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, - heap_init->in.initial_chunks, heap_init->in.max_chunks, - heap_init->in.target_in_flight, - &heap_init->out.gpu_heap_va, &heap_init->out.first_chunk_va); + heap_init->in.initial_chunks, heap_init->in.max_chunks, + heap_init->in.target_in_flight, heap_init->in.buf_desc_va, + &heap_init->out.gpu_heap_va, + &heap_init->out.first_chunk_va); +} + +static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx, + union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init) +{ + kctx->jit_group_id = heap_init->in.group_id; + + return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, + heap_init->in.initial_chunks, heap_init->in.max_chunks, + heap_init->in.target_in_flight, 0, + &heap_init->out.gpu_heap_va, + &heap_init->out.first_chunk_va); } static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, @@ -1578,6 +1560,31 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, cpu_queue_info->size); } +#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) +static int kbase_ioctl_read_user_page(struct kbase_context *kctx, + union kbase_ioctl_read_user_page *user_page) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + + /* As of now, only LATEST_FLUSH is supported */ + if (unlikely(user_page->in.offset != LATEST_FLUSH)) + return -EINVAL; + + /* Validating padding that must be zero */ + if (unlikely(user_page->in.padding != 0)) + return -EINVAL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!kbdev->pm.backend.gpu_powered) + user_page->out.val_lo = POWER_DOWN_LATEST_FLUSH_VALUE; + else + user_page->out.val_lo = kbase_reg_read(kbdev, USER_REG(LATEST_FLUSH)); + user_page->out.val_hi = 0; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} #endif /* MALI_USE_CSF */ static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, @@ -1779,18 +1786,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_get_ddk_version, kctx); break; - case KBASE_IOCTL_MEM_JIT_INIT_10_2: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2, - kbase_api_mem_jit_init_10_2, - struct kbase_ioctl_mem_jit_init_10_2, - kctx); - break; - case KBASE_IOCTL_MEM_JIT_INIT_11_5: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5, - kbase_api_mem_jit_init_11_5, - struct kbase_ioctl_mem_jit_init_11_5, - kctx); - break; case KBASE_IOCTL_MEM_JIT_INIT: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, kbase_api_mem_jit_init, @@ -2028,6 +2023,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) union kbase_ioctl_cs_tiler_heap_init, kctx); break; + case KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13, + kbasep_cs_tiler_heap_init_1_13, + union kbase_ioctl_cs_tiler_heap_init_1_13, kctx); + break; case KBASE_IOCTL_CS_TILER_HEAP_TERM: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, kbasep_cs_tiler_heap_term, @@ -2046,6 +2046,10 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_cs_cpu_queue_info, kctx); break; + case KBASE_IOCTL_READ_USER_PAGE: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page, + union kbase_ioctl_read_user_page, kctx); + break; #endif /* MALI_USE_CSF */ #if MALI_UNIT_TEST case KBASE_IOCTL_TLSTREAM_STATS: @@ -2088,6 +2092,9 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (unlikely(!kctx)) return -EPERM; + if (count < data_size) + return -ENOBUFS; + if (atomic_read(&kctx->event_count)) read_event = true; else @@ -3185,10 +3192,10 @@ static ssize_t gpuinfo_show(struct device *dev, .name = "Mali-G510" }, { .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G310" }, - { .id = GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TTUX" }, - { .id = GPU_ID2_PRODUCT_LTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LTUX" }, + { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TTIX" }, + { .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-LTIX" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3223,19 +3230,19 @@ static ssize_t gpuinfo_show(struct device *dev, GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features); const u8 nr_cores = gpu_props->num_cores; - /* Mali-TTUX_B(ig) if 10 < number of cores with ray tracing supproted. - * Mali-TTUX if 10 < number of cores without ray tracing supported. - * Mali-TTUX if 7 <= number of cores <= 10 regardless ray tracing. - * Mali-LTUX if number of cores < 7. + /* Mali-G715-Immortalis if 10 < number of cores with ray tracing supproted. + * Mali-G715 if 10 < number of cores without ray tracing supported. + * Mali-G715 if 7 <= number of cores <= 10 regardless ray tracing. + * Mali-G615 if number of cores < 7. */ if ((nr_cores > 10) && rt_supported) - product_name = "Mali-TTUX_B"; + product_name = "Mali-G715-Immortalis"; else if (nr_cores >= 7) - product_name = "Mali-TTUX"; + product_name = "Mali-G715"; if (nr_cores < 7) { - dev_warn(kbdev->dev, "nr_cores(%u) GPU ID must be LTUX", nr_cores); - product_name = "Mali-LTUX"; + dev_warn(kbdev->dev, "nr_cores(%u) GPU ID must be G615", nr_cores); + product_name = "Mali-G615"; } else dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name, nr_cores); @@ -4511,7 +4518,7 @@ int power_control_init(struct kbase_device *kbdev) for (i = 0; i < ARRAY_SIZE(regulator_names); i++) { kbdev->regulators[i] = regulator_get_optional(kbdev->dev, regulator_names[i]); - if (IS_ERR_OR_NULL(kbdev->regulators[i])) { + if (IS_ERR(kbdev->regulators[i])) { err = PTR_ERR(kbdev->regulators[i]); kbdev->regulators[i] = NULL; break; @@ -4539,7 +4546,7 @@ int power_control_init(struct kbase_device *kbdev) */ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i); - if (IS_ERR_OR_NULL(kbdev->clocks[i])) { + if (IS_ERR(kbdev->clocks[i])) { err = PTR_ERR(kbdev->clocks[i]); kbdev->clocks[i] = NULL; break; @@ -4801,52 +4808,84 @@ static const struct file_operations .release = single_release, }; -int kbase_device_debugfs_init(struct kbase_device *kbdev) +/** + * debugfs_ctx_defaults_init - Create the default configuration of new contexts in debugfs + * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver. + * Return: A pointer to the last dentry that it tried to create, whether successful or not. + * Could be NULL or encode another error value. + */ +static struct dentry *debugfs_ctx_defaults_init(struct kbase_device *const kbdev) { - struct dentry *debugfs_ctx_defaults_directory; - int err; /* prevent unprivileged use of debug file system * in old kernel version */ const mode_t mode = 0644; + struct dentry *dentry = debugfs_create_dir("defaults", kbdev->debugfs_ctx_directory); + struct dentry *debugfs_ctx_defaults_directory = dentry; - kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, - NULL); - if (IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)) { + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); + return dentry; + } + + debugfs_create_bool("infinite_cache", mode, + debugfs_ctx_defaults_directory, + &kbdev->infinite_cache_active_default); + + dentry = debugfs_create_file("mem_pool_max_size", mode, debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.small, + &kbase_device_debugfs_mem_pool_max_size_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create mem_pool_max_size debugfs entry\n"); + return dentry; + } + + dentry = debugfs_create_file("lp_mem_pool_max_size", mode, debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.large, + &kbase_device_debugfs_mem_pool_max_size_fops); + if (IS_ERR_OR_NULL(dentry)) + dev_err(kbdev->dev, "Unable to create lp_mem_pool_max_size debugfs entry\n"); + + return dentry; +} + +/** + * init_debugfs - Create device-wide debugfs directories and files for the Mali driver + * @kbdev: An instance of the GPU platform device, allocated from the probe method of the driver. + * Return: A pointer to the last dentry that it tried to create, whether successful or not. + * Could be NULL or encode another error value. + */ +static struct dentry *init_debugfs(struct kbase_device *kbdev) +{ + struct dentry *dentry = debugfs_create_dir(kbdev->devname, NULL); + + kbdev->mali_debugfs_directory = dentry; + if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Couldn't create mali debugfs directory: %s\n", kbdev->devname); - err = -ENOMEM; - goto out; + return dentry; } - kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", - kbdev->mali_debugfs_directory); - if (IS_ERR_OR_NULL(kbdev->debugfs_ctx_directory)) { + dentry = debugfs_create_dir("ctx", kbdev->mali_debugfs_directory); + kbdev->debugfs_ctx_directory = dentry; + if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); - err = -ENOMEM; - goto out; + return dentry; } - kbdev->debugfs_instr_directory = debugfs_create_dir("instrumentation", - kbdev->mali_debugfs_directory); - if (IS_ERR_OR_NULL(kbdev->debugfs_instr_directory)) { + dentry = debugfs_create_dir("instrumentation", kbdev->mali_debugfs_directory); + kbdev->debugfs_instr_directory = dentry; + if (IS_ERR_OR_NULL(dentry)) { dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n"); - err = -ENOMEM; - goto out; - } - - debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", - kbdev->debugfs_ctx_directory); - if (IS_ERR_OR_NULL(debugfs_ctx_defaults_directory)) { - dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); - err = -ENOMEM; - goto out; + return dentry; } kbasep_regs_history_debugfs_init(kbdev); -#if !MALI_USE_CSF +#if MALI_USE_CSF + kbase_debug_csf_fault_debugfs_init(kbdev); +#else /* MALI_USE_CSF */ kbase_debug_job_fault_debugfs_init(kbdev); #endif /* !MALI_USE_CSF */ @@ -4860,41 +4899,58 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) /* fops_* variables created by invocations of macro * MAKE_QUIRK_ACCESSORS() above. */ - debugfs_create_file("quirks_sc", 0644, + dentry = debugfs_create_file("quirks_sc", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_sc_quirks); - debugfs_create_file("quirks_tiler", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_tiler_quirks); - debugfs_create_file("quirks_mmu", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_mmu_quirks); - debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, - kbdev, &fops_gpu_quirks); - - debugfs_create_bool("infinite_cache", mode, - debugfs_ctx_defaults_directory, - &kbdev->infinite_cache_active_default); - - debugfs_create_file("mem_pool_max_size", mode, - debugfs_ctx_defaults_directory, - &kbdev->mem_pool_defaults.small, - &kbase_device_debugfs_mem_pool_max_size_fops); - - debugfs_create_file("lp_mem_pool_max_size", mode, - debugfs_ctx_defaults_directory, - &kbdev->mem_pool_defaults.large, - &kbase_device_debugfs_mem_pool_max_size_fops); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - debugfs_create_file("protected_debug_mode", 0444, - kbdev->mali_debugfs_directory, kbdev, - &fops_protected_debug_mode); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create quirks_sc debugfs entry\n"); + return dentry; } - debugfs_create_file("reset", 0644, + dentry = debugfs_create_file("quirks_tiler", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_tiler_quirks); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create quirks_tiler debugfs entry\n"); + return dentry; + } + + dentry = debugfs_create_file("quirks_mmu", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_mmu_quirks); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create quirks_mmu debugfs entry\n"); + return dentry; + } + + dentry = debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, + kbdev, &fops_gpu_quirks); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create quirks_gpu debugfs entry\n"); + return dentry; + } + + dentry = debugfs_ctx_defaults_init(kbdev); + if (IS_ERR_OR_NULL(dentry)) + return dentry; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + dentry = debugfs_create_file("protected_debug_mode", 0444, + kbdev->mali_debugfs_directory, kbdev, + &fops_protected_debug_mode); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create protected_debug_mode debugfs entry\n"); + return dentry; + } + } + + dentry = debugfs_create_file("reset", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_trigger_reset); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create reset debugfs entry\n"); + return dentry; + } kbase_ktrace_debugfs_init(kbdev); @@ -4907,19 +4963,30 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ #if !MALI_USE_CSF - debugfs_create_file("serialize_jobs", 0644, + dentry = debugfs_create_file("serialize_jobs", 0644, kbdev->mali_debugfs_directory, kbdev, &kbasep_serialize_jobs_debugfs_fops); - + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create serialize_jobs debugfs entry\n"); + return dentry; + } + kbase_timeline_io_debugfs_init(kbdev); #endif kbase_dvfs_status_debugfs_init(kbdev); - return 0; + return dentry; +} -out: - debugfs_remove_recursive(kbdev->mali_debugfs_directory); - return err; +int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *dentry = init_debugfs(kbdev); + + if (IS_ERR_OR_NULL(dentry)) { + debugfs_remove_recursive(kbdev->mali_debugfs_directory); + return IS_ERR(dentry) ? PTR_ERR(dentry) : -ENOMEM; + } + return 0; } void kbase_device_debugfs_term(struct kbase_device *kbdev) @@ -5441,7 +5508,9 @@ static int kbase_platform_device_probe(struct platform_device *pdev) kbdev->dev = &pdev->dev; dev_set_drvdata(kbdev->dev, kbdev); - +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_lock(&kbase_probe_mutex); +#endif err = kbase_device_init(kbdev); if (err) { @@ -5453,10 +5522,16 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_set_drvdata(kbdev->dev, NULL); kbase_device_free(kbdev); +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_unlock(&kbase_probe_mutex); +#endif } else { dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); kbase_increment_device_id(); +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_unlock(&kbase_probe_mutex); +#endif #ifdef CONFIG_MALI_ARBITER_SUPPORT mutex_lock(&kbdev->pm.lock); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); @@ -5690,10 +5765,11 @@ static const struct dev_pm_ops kbase_pm_ops = { }; #if IS_ENABLED(CONFIG_OF) -static const struct of_device_id kbase_dt_ids[] = { - { .compatible = "arm,mali-bifrost" }, - { /* sentinel */ } -}; +static const struct of_device_id kbase_dt_ids[] = { { .compatible = "arm,malit6xx" }, + { .compatible = "arm,mali-midgard" }, + { .compatible = "arm,mali-bifrost" }, + { .compatible = "arm,mali-valhall" }, + { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, kbase_dt_ids); #endif @@ -5708,26 +5784,29 @@ static struct platform_driver kbase_platform_driver = { }, }; -/* - * The driver will not provide a shortcut to create the Mali platform device - * anymore when using Device Tree. - */ -#if IS_ENABLED(CONFIG_OF) +#if (KERNEL_VERSION(5, 3, 0) > LINUX_VERSION_CODE) && IS_ENABLED(CONFIG_OF) module_platform_driver(kbase_platform_driver); #else - static int __init kbase_driver_init(void) { int ret; +#if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) + mutex_init(&kbase_probe_mutex); +#endif + +#ifndef CONFIG_OF ret = kbase_platform_register(); if (ret) return ret; - +#endif ret = platform_driver_register(&kbase_platform_driver); - - if (ret) +#ifndef CONFIG_OF + if (ret) { kbase_platform_unregister(); + return ret; + } +#endif return ret; } @@ -5735,14 +5814,14 @@ static int __init kbase_driver_init(void) static void __exit kbase_driver_exit(void) { platform_driver_unregister(&kbase_platform_driver); +#ifndef CONFIG_OF kbase_platform_unregister(); +#endif } module_init(kbase_driver_init); module_exit(kbase_driver_exit); - -#endif /* CONFIG_OF */ - +#endif MODULE_LICENSE("GPL"); MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ __stringify(BASE_UK_VERSION_MAJOR) "." \ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c index f4a46c12ac92..60afde2ceb7f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c @@ -327,16 +327,14 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) { bool result = false; - int as_nr; if (WARN_ON(kctx == NULL)) return result; lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - as_nr = kctx->as_nr; if (atomic_read(&kctx->refcount) > 0) { - KBASE_DEBUG_ASSERT(as_nr >= 0); + KBASE_DEBUG_ASSERT(kctx->as_nr >= 0); kbase_ctx_sched_retain_ctx_refcount(kctx); KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c new file mode 100644 index 000000000000..598d8f594644 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Debugfs interface to dump information about GPU allocations in kctx + */ + +#include "mali_kbase_debug_mem_allocs.h" +#include "mali_kbase.h" + +#include +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** + * debug_zone_mem_allocs_show - Show information from specific rbtree + * @zone: Name of GPU virtual memory zone + * @rbtree: Pointer to the root of the rbtree associated with @zone + * @sfile: The debugfs entry + * + * This function is called to show information about all the GPU allocations of a + * a particular zone within GPU virtual memory space of a context. + * The information like the start virtual address and size (in bytes) is shown for + * every GPU allocation mapped in the zone. + */ +static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile) +{ + struct rb_node *p; + struct kbase_va_region *reg; + const char *type_names[5] = { + "Native", + "Imported UMM", + "Imported user buf", + "Alias", + "Raw" + }; + +#define MEM_ALLOCS_HEADER \ + " VA, VA size, Commit size, Flags, Mem type\n" + seq_printf(sfile, "Zone name: %s\n:", zone); + seq_printf(sfile, MEM_ALLOCS_HEADER); + for (p = rb_first(rbtree); p; p = rb_next(p)) { + reg = rb_entry(p, struct kbase_va_region, rblink); + if (!(reg->flags & KBASE_REG_FREE)) { + seq_printf(sfile, "%16llx, %16zx, %16zx, %8lx, %s\n", + reg->start_pfn << PAGE_SHIFT, reg->nr_pages << PAGE_SHIFT, + kbase_reg_current_backed_size(reg) << PAGE_SHIFT, + reg->flags, type_names[reg->gpu_alloc->type]); + } + } +} + +/** + * debug_ctx_mem_allocs_show - Show information about GPU allocations in a kctx + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * Return: + * 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ +static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *const kctx = sfile->private; + + kbase_gpu_vm_lock(kctx); + + debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile); + debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile); + debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile); + +#if MALI_USE_CSF + debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile); + debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile); +#endif /* MALI_USE_CSF */ + + kbase_gpu_vm_unlock(kctx); + return 0; +} + +/* + * File operations related to debugfs entry for mem_zones + */ +static int debug_mem_allocs_open(struct inode *in, struct file *file) +{ + return single_open(file, debug_ctx_mem_allocs_show, in->i_private); +} + +static const struct file_operations kbase_debug_mem_allocs_fops = { + .owner = THIS_MODULE, + .open = debug_mem_allocs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * Initialize debugfs entry for mem_allocs + */ +void kbase_debug_mem_allocs_init(struct kbase_context *const kctx) +{ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + debugfs_create_file("mem_allocs", 0400, kctx->kctx_dentry, kctx, + &kbase_debug_mem_allocs_fops); +} +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_debug_mem_allocs_init(struct kbase_context *const kctx) +{ +} +#endif diff --git a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h similarity index 56% rename from drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h rename to drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h index b5bb22553fe2..8cf69c2cbaf9 100644 --- a/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_allocs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,26 +19,21 @@ * */ -#ifndef _DMA_BUF_LOCK_H -#define _DMA_BUF_LOCK_H +#ifndef _KBASE_DEBUG_MEM_ALLOCS_H +#define _KBASE_DEBUG_MEM_ALLOCS_H -enum dma_buf_lock_exclusive { - DMA_BUF_LOCK_NONEXCLUSIVE = 0, - DMA_BUF_LOCK_EXCLUSIVE = -1 -}; +#include -struct dma_buf_lock_k_request { - int count; - int *list_of_dma_buf_fds; - int timeout; - enum dma_buf_lock_exclusive exclusive; -}; +/** + * kbase_debug_mem_allocs_init() - Initialize the mem_allocs debugfs file + * @kctx: Pointer to kernel base context + * + * This function creates a "mem_allocs" file for a context to show infor about the + * GPU allocations created for that context. + * + * The file is cleaned up by a call to debugfs_remove_recursive() deleting the + * parent directory. + */ +void kbase_debug_mem_allocs_init(struct kbase_context *kctx); -#define DMA_BUF_LOCK_IOC_MAGIC '~' - -#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, struct dma_buf_lock_k_request) - -#define DMA_BUF_LOCK_IOC_MINNR 11 -#define DMA_BUF_LOCK_IOC_MAXNR 11 - -#endif /* _DMA_BUF_LOCK_H */ +#endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h index d03483219b08..cb8050d9b32c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_debug_mem_view.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2015, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015, 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ #include /** - * kbase_debug_mem_view_init - Initialize the mem_view sysfs file + * kbase_debug_mem_view_init - Initialize the mem_view debugfs file * @kctx: Pointer to kernel base context * * This function creates a "mem_view" file which can be used to get a view of diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h index 9fafe96d14a8..1072eac6d186 100755 --- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h @@ -35,13 +35,13 @@ #include #include #include -#include +#include #if MALI_USE_CSF -#include +#include #else -#include -#include +#include +#include #endif #include @@ -53,11 +53,7 @@ #include -#if defined(CONFIG_SYNC) -#include -#else #include "mali_kbase_fence_defs.h" -#endif #if IS_ENABLED(CONFIG_DEBUG_FS) #include @@ -133,8 +129,7 @@ /* Maximum number of pages of memory that require a permanent mapping, per * kbase_context */ -#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ - PAGE_SHIFT) +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT) /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer * clients, to reduce undesired system load. * If a virtualizer client requests a dump within this threshold period after @@ -446,36 +441,40 @@ struct kbase_pm_device_data { /** * struct kbase_mem_pool - Page based memory pool for kctx/kbdev - * @kbdev: Kbase device where memory is used - * @cur_size: Number of free pages currently in the pool (may exceed - * @max_size in some corner cases) - * @max_size: Maximum number of free pages in the pool - * @order: order = 0 refers to a pool of 4 KB pages - * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) - * @group_id: A memory group ID to be passed to a platform-specific - * memory group manager, if present. Immutable. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * @pool_lock: Lock protecting the pool - must be held when modifying - * @cur_size and @page_list - * @page_list: List of free pages in the pool - * @reclaim: Shrinker for kernel reclaim of free pages - * @next_pool: Pointer to next pool where pages can be allocated when this - * pool is empty. Pages will spill over to the next pool when - * this pool is full. Can be NULL if there is no next pool. - * @dying: true if the pool is being terminated, and any ongoing - * operations should be abandoned - * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from - * this pool, eg during a grow operation + * @kbdev: Kbase device where memory is used + * @cur_size: Number of free pages currently in the pool (may exceed + * @max_size in some corner cases) + * @max_size: Maximum number of free pages in the pool + * @order: order = 0 refers to a pool of 4 KB pages + * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. Immutable. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @pool_lock: Lock protecting the pool - must be held when modifying + * @cur_size and @page_list + * @page_list: List of free pages in the pool + * @reclaim: Shrinker for kernel reclaim of free pages + * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation. + * This is used to avoid race condition between pool termination + * and page isolation for page migration. + * @next_pool: Pointer to next pool where pages can be allocated when this + * pool is empty. Pages will spill over to the next pool when + * this pool is full. Can be NULL if there is no next pool. + * @dying: true if the pool is being terminated, and any ongoing + * operations should be abandoned + * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from + * this pool, eg during a grow operation */ struct kbase_mem_pool { struct kbase_device *kbdev; - size_t cur_size; - size_t max_size; - u8 order; - u8 group_id; - spinlock_t pool_lock; - struct list_head page_list; - struct shrinker reclaim; + size_t cur_size; + size_t max_size; + u8 order; + u8 group_id; + spinlock_t pool_lock; + struct list_head page_list; + struct shrinker reclaim; + atomic_t isolation_in_progress_cnt; struct kbase_mem_pool *next_pool; @@ -562,7 +561,7 @@ struct kbase_devfreq_opp { * @entry_set_pte: program the pte to be a valid entry to encode the physical * address of the next lower level page table and also update * the number of valid entries. - * @entry_invalidate: clear out or invalidate the pte. + * @entries_invalidate: clear out or invalidate a range of ptes. * @get_num_valid_entries: returns the number of valid entries for a specific pgd. * @set_num_valid_entries: sets the number of valid entries for a specific pgd * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. @@ -580,7 +579,7 @@ struct kbase_mmu_mode { void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); - void (*entry_invalidate)(u64 *entry); + void (*entries_invalidate)(u64 *entry, u32 count); unsigned int (*get_num_valid_entries)(u64 *pgd); void (*set_num_valid_entries)(u64 *pgd, unsigned int num_of_valid_entries); @@ -647,6 +646,30 @@ struct kbase_process { struct rb_root dma_buf_root; }; +/** + * struct kbase_mem_migrate - Object representing an instance for managing + * page migration. + * + * @mapping: Pointer to address space struct used for page migration. + * @free_pages_list: List of deferred pages to free. Mostly used when page migration + * is enabled. Pages in memory pool that require migrating + * will be freed instead. However page cannot be freed + * right away as Linux will need to release the page lock. + * Therefore page will be added to this list and freed later. + * @free_pages_lock: This lock should be held when adding or removing pages + * from @free_pages_list. + * @free_pages_workq: Work queue to process the work items queued to free + * pages in @free_pages_list. + * @free_pages_work: Work item to free pages in @free_pages_list. + */ +struct kbase_mem_migrate { + struct address_space *mapping; + struct list_head free_pages_list; + spinlock_t free_pages_lock; + struct workqueue_struct *free_pages_workq; + struct work_struct free_pages_work; +}; + /** * struct kbase_device - Object representing an instance of GPU platform device, * allocated from the probe method of mali driver. @@ -962,6 +985,7 @@ struct kbase_process { * @pcm_dev: The priority control manager device. * @oom_notifier_block: notifier_block containing kernel-registered out-of- * memory handler. + * @mem_migrate: Per device object for managing page migration. */ struct kbase_device { u32 hw_quirks_sc; @@ -1029,6 +1053,12 @@ struct kbase_device { s8 nr_hw_address_spaces; s8 nr_user_address_spaces; + /** + * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to + * restore to L2_CONFIG upon GPU reset. + */ + u8 pbha_propagate_bits; + #if MALI_USE_CSF struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; #else @@ -1115,7 +1145,9 @@ struct kbase_device { #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ unsigned long previous_frequency; +#if !MALI_USE_CSF atomic_t job_fault_debug; +#endif /* !MALI_USE_CSF */ #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *mali_debugfs_directory; @@ -1126,11 +1158,13 @@ struct kbase_device { u64 debugfs_as_read_bitmap; #endif /* CONFIG_MALI_BIFROST_DEBUG */ +#if !MALI_USE_CSF wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; spinlock_t job_fault_event_lock; +#endif /* !MALI_USE_CSF */ #if !MALI_CUSTOMER_RELEASE struct { @@ -1250,6 +1284,8 @@ struct kbase_device { */ u32 num_of_atoms_hw_completed; #endif + + struct kbase_mem_migrate mem_migrate; }; /** @@ -1332,10 +1368,6 @@ struct kbase_file { * * @KCTX_DYING: Set when the context process is in the process of being evicted. * - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this - * context, to disable use of implicit dma-buf fences. This is used to avoid - * potential synchronization deadlocks. - * * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory * allocations. For 64-bit clients it is enabled by default, and disabled by * default on 32-bit clients. Being able to clear this flag is only used for @@ -1378,7 +1410,6 @@ enum kbase_context_flags { KCTX_PRIVILEGED = 1U << 7, KCTX_SCHEDULED = 1U << 8, KCTX_DYING = 1U << 9, - KCTX_NO_IMPLICIT_SYNC = 1U << 10, KCTX_FORCE_SAME_VA = 1U << 11, KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, @@ -1417,9 +1448,6 @@ enum kbase_context_flags { * * @KCTX_DYING: Set when the context process is in the process of being evicted. * - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this - * context, to disable use of implicit dma-buf fences. This is used to avoid - * potential synchronization deadlocks. * * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory * allocations. For 64-bit clients it is enabled by default, and disabled by @@ -1460,7 +1488,6 @@ enum kbase_context_flags { KCTX_PRIVILEGED = 1U << 7, KCTX_SCHEDULED = 1U << 8, KCTX_DYING = 1U << 9, - KCTX_NO_IMPLICIT_SYNC = 1U << 10, KCTX_FORCE_SAME_VA = 1U << 11, KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, @@ -1667,12 +1694,6 @@ struct kbase_sub_alloc { * memory allocations. * @jit_current_allocations_per_bin: Current number of in-flight just-in-time * memory allocations per bin. - * @jit_version: Version number indicating whether userspace is using - * old or new version of interface for just-in-time - * memory allocations. - * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 - * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 - * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT * @jit_group_id: A memory group ID to be passed to a platform-specific * memory group manager. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). @@ -1826,12 +1847,6 @@ struct kbase_context { struct list_head waiting_soft_jobs; spinlock_t waiting_soft_jobs_lock; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - struct { - struct list_head waiting_resource; - struct workqueue_struct *wq; - } dma_fence; -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ int as_nr; @@ -1863,7 +1878,6 @@ struct kbase_context { u8 jit_max_allocations; u8 jit_current_allocations; u8 jit_current_allocations_per_bin[256]; - u8 jit_version; u8 jit_group_id; #if MALI_JIT_PRESSURE_LIMIT_BASE u64 jit_phys_pages_limit; @@ -1930,17 +1944,15 @@ struct kbasep_gwt_list_element { * to a @kbase_context. * @ext_res_node: List head for adding the metadata to a * @kbase_context. - * @alloc: The physical memory allocation structure - * which is mapped. - * @gpu_addr: The GPU virtual address the resource is - * mapped to. + * @reg: External resource information, containing + * the corresponding VA region * @ref: Reference count. * * External resources can be mapped into multiple contexts as well as the same * context multiple times. - * As kbase_va_region itself isn't refcounted we can't attach our extra - * information to it as it could be removed under our feet leaving external - * resources pinned. + * As kbase_va_region is refcounted, we guarantee that it will be available + * for the duration of the external resource, meaning it is sufficient to use + * it to rederive any additional data, like the GPU address. * This metadata structure binds a single external resource to a single * context, ensuring that per context mapping is tracked separately so it can * be overridden when needed and abuses by the application (freeing the resource @@ -1948,8 +1960,7 @@ struct kbasep_gwt_list_element { */ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; + struct kbase_va_region *reg; u32 ref; }; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c deleted file mode 100644 index d5f4fae091e8..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.c +++ /dev/null @@ -1,491 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2011-2016, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_BIFROST_DMA_FENCE as - * it will be set there. - */ -#include "mali_kbase_dma_fence.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void -kbase_dma_fence_work(struct work_struct *pwork); - -static void -kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); -} - -static void -kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) -{ - list_del(&katom->queue); -} - -static int -kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, - struct ww_acquire_ctx *ctx) -{ -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - struct reservation_object *content_res = NULL; -#else - struct dma_resv *content_res = NULL; -#endif - unsigned int content_res_idx = 0; - unsigned int r; - int err = 0; - - ww_acquire_init(ctx, &reservation_ww_class); - -retry: - for (r = 0; r < info->dma_fence_resv_count; r++) { - if (info->resv_objs[r] == content_res) { - content_res = NULL; - continue; - } - - err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); - if (err) - goto error; - } - - ww_acquire_done(ctx); - return err; - -error: - content_res_idx = r; - - /* Unlock the locked one ones */ - while (r--) - ww_mutex_unlock(&info->resv_objs[r]->lock); - - if (content_res) - ww_mutex_unlock(&content_res->lock); - - /* If we deadlock try with lock_slow and retry */ - if (err == -EDEADLK) { - content_res = info->resv_objs[content_res_idx]; - ww_mutex_lock_slow(&content_res->lock, ctx); - goto retry; - } - - /* If we are here the function failed */ - ww_acquire_fini(ctx); - return err; -} - -static void -kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, - struct ww_acquire_ctx *ctx) -{ - unsigned int r; - - for (r = 0; r < info->dma_fence_resv_count; r++) - ww_mutex_unlock(&info->resv_objs[r]->lock); - ww_acquire_fini(ctx); -} - - - -/** - * kbase_dma_fence_queue_work() - Queue work to handle @katom - * @katom: Pointer to atom for which to queue work - * - * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and - * submit the atom. - */ -static void -kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - bool ret; - - INIT_WORK(&katom->work, kbase_dma_fence_work); - ret = queue_work(kctx->dma_fence.wq, &katom->work); - /* Warn if work was already queued, that should not happen. */ - WARN_ON(!ret); -} - -/** - * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom - * @katom: Katom to cancel - * - * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. - */ -static void -kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&katom->kctx->jctx.lock); - - /* Cancel callbacks and clean up. */ - kbase_fence_free_callbacks(katom); - - /* Mark the atom as handled in case all fences signaled just before - * canceling the callbacks and the worker was queued. - */ - kbase_fence_dep_count_set(katom, -1); - - /* Prevent job_done_nolock from being called twice on an atom when - * there is a race between job completion and cancellation. - */ - - if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { - /* Wait was cancelled - zap the atom */ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (kbase_jd_done_nolock(katom, true)) - kbase_js_sched_all(katom->kctx->kbdev); - } -} - -/** - * kbase_dma_fence_work() - Worker thread called when a fence is signaled - * @pwork: work_struct containing a pointer to a katom - * - * This function will clean and mark all dependencies as satisfied - */ -static void -kbase_dma_fence_work(struct work_struct *pwork) -{ - struct kbase_jd_atom *katom; - struct kbase_jd_context *ctx; - - katom = container_of(pwork, struct kbase_jd_atom, work); - ctx = &katom->kctx->jctx; - - mutex_lock(&ctx->lock); - if (kbase_fence_dep_count_read(katom) != 0) - goto out; - - kbase_fence_dep_count_set(katom, -1); - - /* Remove atom from list of dma-fence waiting atoms. */ - kbase_dma_fence_waiters_remove(katom); - /* Cleanup callbacks. */ - kbase_fence_free_callbacks(katom); - /* - * Queue atom on GPU, unless it has already completed due to a failing - * dependency. Run kbase_jd_done_nolock() on the katom if it is completed. - */ - if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) - kbase_jd_done_nolock(katom, true); - else - kbase_jd_dep_clear_locked(katom); - -out: - mutex_unlock(&ctx->lock); -} - -static void -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) -#else -kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -#endif -{ - struct kbase_fence_cb *kcb = container_of(cb, - struct kbase_fence_cb, - fence_cb); - struct kbase_jd_atom *katom = kcb->katom; - - /* If the atom is zapped dep_count will be forced to a negative number - * preventing this callback from ever scheduling work. Which in turn - * would reschedule the atom. - */ - - if (kbase_fence_dep_count_dec_and_test(katom)) - kbase_dma_fence_queue_work(katom); -} - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -static int -kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, - struct reservation_object *resv, - bool exclusive) -#else -static int -kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, - struct dma_resv *resv, - bool exclusive) -#endif -{ -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence *excl_fence = NULL; - struct fence **shared_fences = NULL; -#else - struct dma_fence *excl_fence = NULL; - struct dma_fence **shared_fences = NULL; -#endif - unsigned int shared_count = 0; - int err, i; - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - err = reservation_object_get_fences_rcu( -#elif (KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE) - err = dma_resv_get_fences_rcu( -#else - err = dma_resv_get_fences( -#endif - resv, - &excl_fence, - &shared_count, - &shared_fences); - if (err) - return err; - - if (excl_fence) { - err = kbase_fence_add_callback(katom, - excl_fence, - kbase_dma_fence_cb); - - /* Release our reference, taken by reservation_object_get_fences_rcu(), - * to the fence. We have set up our callback (if that was possible), - * and it's the fence's owner is responsible for singling the fence - * before allowing it to disappear. - */ - dma_fence_put(excl_fence); - - if (err) - goto out; - } - - if (exclusive) { - for (i = 0; i < shared_count; i++) { - err = kbase_fence_add_callback(katom, - shared_fences[i], - kbase_dma_fence_cb); - if (err) - goto out; - } - } - - /* Release all our references to the shared fences, taken by - * reservation_object_get_fences_rcu(). We have set up our callback (if - * that was possible), and it's the fence's owner is responsible for - * signaling the fence before allowing it to disappear. - */ -out: - for (i = 0; i < shared_count; i++) - dma_fence_put(shared_fences[i]); - kfree(shared_fences); - - if (err) { - /* - * On error, cancel and clean up all callbacks that was set up - * before the error. - */ - kbase_fence_free_callbacks(katom); - } - - return err; -} - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -void kbase_dma_fence_add_reservation(struct reservation_object *resv, - struct kbase_dma_fence_resv_info *info, - bool exclusive) -#else -void kbase_dma_fence_add_reservation(struct dma_resv *resv, - struct kbase_dma_fence_resv_info *info, - bool exclusive) -#endif -{ - unsigned int i; - - for (i = 0; i < info->dma_fence_resv_count; i++) { - /* Duplicate resource, ignore */ - if (info->resv_objs[i] == resv) - return; - } - - info->resv_objs[info->dma_fence_resv_count] = resv; - if (exclusive) - set_bit(info->dma_fence_resv_count, - info->dma_fence_excl_bitmap); - (info->dma_fence_resv_count)++; -} - -int kbase_dma_fence_wait(struct kbase_jd_atom *katom, - struct kbase_dma_fence_resv_info *info) -{ - int err, i; -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - struct ww_acquire_ctx ww_ctx; - - lockdep_assert_held(&katom->kctx->jctx.lock); - - fence = kbase_fence_out_new(katom); - if (!fence) { - err = -ENOMEM; - dev_err(katom->kctx->kbdev->dev, - "Error %d creating fence.\n", err); - return err; - } - - kbase_fence_dep_count_set(katom, 1); - - err = kbase_dma_fence_lock_reservations(info, &ww_ctx); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d locking reservations.\n", err); - kbase_fence_dep_count_set(katom, -1); - kbase_fence_out_remove(katom); - return err; - } - - for (i = 0; i < info->dma_fence_resv_count; i++) { -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - struct reservation_object *obj = info->resv_objs[i]; -#else - struct dma_resv *obj = info->resv_objs[i]; -#endif - if (!test_bit(i, info->dma_fence_excl_bitmap)) { -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - err = reservation_object_reserve_shared(obj); -#else - err = dma_resv_reserve_shared(obj, 0); -#endif - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d reserving space for shared fence.\n", err); - goto end; - } - - err = kbase_dma_fence_add_reservation_callback(katom, obj, false); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d adding reservation to callback.\n", err); - goto end; - } - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - reservation_object_add_shared_fence(obj, fence); -#else - dma_resv_add_shared_fence(obj, fence); -#endif - } else { - err = kbase_dma_fence_add_reservation_callback(katom, obj, true); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d adding reservation to callback.\n", err); - goto end; - } - -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - reservation_object_add_excl_fence(obj, fence); -#else - dma_resv_add_excl_fence(obj, fence); -#endif - } - } - -end: - kbase_dma_fence_unlock_reservations(info, &ww_ctx); - - if (likely(!err)) { - /* Test if the callbacks are already triggered */ - if (kbase_fence_dep_count_dec_and_test(katom)) { - kbase_fence_dep_count_set(katom, -1); - kbase_fence_free_callbacks(katom); - } else { - /* Add katom to the list of dma-buf fence waiting atoms - * only if it is still waiting. - */ - kbase_dma_fence_waiters_add(katom); - } - } else { - /* There was an error, cancel callbacks, set dep_count to -1 to - * indicate that the atom has been handled (the caller will - * kill it for us), signal the fence, free callbacks and the - * fence. - */ - kbase_fence_free_callbacks(katom); - kbase_fence_dep_count_set(katom, -1); - kbase_dma_fence_signal(katom); - } - - return err; -} - -void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) -{ - struct list_head *list = &kctx->dma_fence.waiting_resource; - - while (!list_empty(list)) { - struct kbase_jd_atom *katom; - - katom = list_first_entry(list, struct kbase_jd_atom, queue); - kbase_dma_fence_waiters_remove(katom); - kbase_dma_fence_cancel_atom(katom); - } -} - -void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) -{ - /* Cancel callbacks and clean up. */ - if (kbase_fence_free_callbacks(katom)) - kbase_dma_fence_queue_work(katom); -} - -void kbase_dma_fence_signal(struct kbase_jd_atom *katom) -{ - if (!katom->dma_fence.fence) - return; - - /* Signal the atom's fence. */ - dma_fence_signal(katom->dma_fence.fence); - - kbase_fence_out_remove(katom); - - kbase_fence_free_callbacks(katom); -} - -void kbase_dma_fence_term(struct kbase_context *kctx) -{ - destroy_workqueue(kctx->dma_fence.wq); - kctx->dma_fence.wq = NULL; -} - -int kbase_dma_fence_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); - - kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", - WQ_UNBOUND, 1, kctx->pid); - if (!kctx->dma_fence.wq) - return -ENOMEM; - - return 0; -} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h deleted file mode 100644 index f0c8d069b02c..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_dma_fence.h +++ /dev/null @@ -1,150 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_DMA_FENCE_H_ -#define _KBASE_DMA_FENCE_H_ - -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - -#include -#include -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -#include -#else -#include -#endif -#include - -/* Forward declaration from mali_kbase_defs.h */ -struct kbase_jd_atom; -struct kbase_context; - -/** - * struct kbase_dma_fence_resv_info - Structure with list of reservation objects - * @resv_objs: Array of reservation objects to attach the - * new fence to. - * @dma_fence_resv_count: Number of reservation objects in the array. - * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. - * - * This is used by some functions to pass around a collection of data about - * reservation objects. - */ -struct kbase_dma_fence_resv_info { -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - struct reservation_object **resv_objs; -#else - struct dma_resv **resv_objs; -#endif - unsigned int dma_fence_resv_count; - unsigned long *dma_fence_excl_bitmap; -}; - -/** - * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs - * @resv: Reservation object to add to the array. - * @info: Pointer to struct with current reservation info - * @exclusive: Boolean indicating if exclusive access is needed - * - * The function adds a new reservation_object to an existing array of - * reservation_objects. At the same time keeps track of which objects require - * exclusive access in dma_fence_excl_bitmap. - */ -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -void kbase_dma_fence_add_reservation(struct reservation_object *resv, - struct kbase_dma_fence_resv_info *info, - bool exclusive); -#else -void kbase_dma_fence_add_reservation(struct dma_resv *resv, - struct kbase_dma_fence_resv_info *info, - bool exclusive); -#endif - -/** - * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs - * @katom: Katom with the external dependency. - * @info: Pointer to struct with current reservation info - * - * Return: An error code or 0 if succeeds - */ -int kbase_dma_fence_wait(struct kbase_jd_atom *katom, - struct kbase_dma_fence_resv_info *info); - -/** - * kbase_dma_fence_cancel_all_atoms() - Cancel all dma-fences blocked atoms on kctx - * @kctx: Pointer to kbase context - * - * This function will cancel and clean up all katoms on @kctx that is waiting - * on dma-buf fences. - * - * Locking: jctx.lock needs to be held when calling this function. - */ -void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); - -/** - * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom - * @katom: Pointer to katom whose callbacks are to be canceled - * - * This function cancels all dma-buf fence callbacks on @katom, but does not - * cancel the katom itself. - * - * The caller is responsible for ensuring that kbase_jd_done_nolock is called on - * @katom. - * - * Locking: jctx.lock must be held when calling this function. - */ -void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); - -/** - * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait - * @katom: Pointer to katom to signal and clean up - * - * This function will signal the @katom's fence, if it has one, and clean up - * the callback data from the katom's wait on earlier fences. - * - * Locking: jctx.lock must be held while calling this function. - */ -void kbase_dma_fence_signal(struct kbase_jd_atom *katom); - -/** - * kbase_dma_fence_term() - Terminate Mali dma-fence context - * @kctx: kbase context to terminate - */ -void kbase_dma_fence_term(struct kbase_context *kctx); - -/** - * kbase_dma_fence_init() - Initialize Mali dma-fence context - * @kctx: kbase context to initialize - * - * Return: 0 on success, error code otherwise. - */ -int kbase_dma_fence_init(struct kbase_context *kctx); - -#else /* CONFIG_MALI_BIFROST_DMA_FENCE */ -/* Dummy functions for when dma-buf fence isn't enabled. */ - -static inline int kbase_dma_fence_init(struct kbase_context *kctx) -{ - return 0; -} - -static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ -#endif diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.c b/drivers/gpu/arm/bifrost/mali_kbase_fence.c index 01557cd3ba47..b16b27659e61 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,95 +59,3 @@ kbase_fence_out_new(struct kbase_jd_atom *katom) return fence; } -bool -kbase_fence_free_callbacks(struct kbase_jd_atom *katom) -{ - struct kbase_fence_cb *cb, *tmp; - bool res = false; - - lockdep_assert_held(&katom->kctx->jctx.lock); - - /* Clean up and free callbacks. */ - list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { - bool ret; - - /* Cancel callbacks that hasn't been called yet. */ - ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb); - if (ret) { - int ret; - - /* Fence had not signaled, clean up after - * canceling. - */ - ret = atomic_dec_return(&katom->dma_fence.dep_count); - - if (unlikely(ret == 0)) - res = true; - } - - /* - * Release the reference taken in - * kbase_fence_add_callback(). - */ - dma_fence_put(cb->fence); - list_del(&cb->node); - kfree(cb); - } - - return res; -} - -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -int -kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct fence *fence, - fence_func_t callback) -#else -int -kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct dma_fence *fence, - dma_fence_func_t callback) -#endif -{ - int err = 0; - struct kbase_fence_cb *kbase_fence_cb; - - if (!fence) - return -EINVAL; - - kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); - if (!kbase_fence_cb) - return -ENOMEM; - - kbase_fence_cb->fence = fence; - kbase_fence_cb->katom = katom; - INIT_LIST_HEAD(&kbase_fence_cb->node); - atomic_inc(&katom->dma_fence.dep_count); - - err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, - callback); - if (err == -ENOENT) { - /* Fence signaled, get the completion result */ - err = dma_fence_get_status(fence); - - /* remap success completion to err code */ - if (err == 1) - err = 0; - - kfree(kbase_fence_cb); - atomic_dec(&katom->dma_fence.dep_count); - } else if (err) { - kfree(kbase_fence_cb); - atomic_dec(&katom->dma_fence.dep_count); - } else { - /* - * Get reference to fence that will be kept until callback gets - * cleaned up in kbase_fence_free_callbacks(). - */ - dma_fence_get(fence); - /* Add callback to katom's list of callbacks */ - list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); - } - - return err; -} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h index 4f952ad4d509..dfe33e52b4ce 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h @@ -23,12 +23,11 @@ #define _KBASE_FENCE_H_ /* - * mali_kbase_fence.[hc] has common fence code used by both - * - CONFIG_MALI_BIFROST_DMA_FENCE - implicit DMA fences - * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel + * mali_kbase_fence.[hc] has fence code used only by + * - CONFIG_SYNC_FILE - explicit fences */ -#if defined(CONFIG_MALI_BIFROST_DMA_FENCE) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) #include #include "mali_kbase_fence_defs.h" @@ -40,25 +39,6 @@ extern const struct fence_ops kbase_fence_ops; extern const struct dma_fence_ops kbase_fence_ops; #endif -/** - * struct kbase_fence_cb - Mali dma-fence callback data struct - * @fence_cb: Callback function - * @katom: Pointer to katom that is waiting on this callback - * @fence: Pointer to the fence object on which this callback is waiting - * @node: List head for linking this callback to the katom - */ -struct kbase_fence_cb { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence_cb fence_cb; - struct fence *fence; -#else - struct dma_fence_cb fence_cb; - struct dma_fence *fence; -#endif - struct kbase_jd_atom *katom; - struct list_head node; -}; - /** * kbase_fence_out_new() - Creates a new output fence and puts it on the atom * @katom: Atom to create an output fence for @@ -71,7 +51,7 @@ struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); #endif -#if defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) /** * kbase_fence_fence_in_set() - Assign input fence to atom * @katom: Atom to assign input fence to @@ -102,7 +82,7 @@ static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) } } -#if defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) /** * kbase_fence_in_remove() - Removes the input fence from atom * @katom: Atom to remove input fence for @@ -153,101 +133,7 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, return dma_fence_signal(katom->dma_fence.fence); } -/** - * kbase_fence_add_callback() - Add callback on @fence to block @katom - * @katom: Pointer to katom that will be blocked by @fence - * @fence: Pointer to fence on which to set up the callback - * @callback: Pointer to function to be called when fence is signaled - * - * Caller needs to hold a reference to @fence when calling this function, and - * the caller is responsible for releasing that reference. An additional - * reference to @fence will be taken when the callback was successfully set up - * and @fence needs to be kept valid until the callback has been called and - * cleanup have been done. - * - * Return: 0 on success: fence was either already signaled, or callback was - * set up. Negative error code is returned on error. - */ -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) -int kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct fence *fence, - fence_func_t callback); -#else -int kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct dma_fence *fence, - dma_fence_func_t callback); -#endif - -/** - * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value - * @katom: Atom to set dep_count for - * @val: value to set dep_count to - * - * The dep_count is available to the users of this module so that they can - * synchronize completion of the wait with cancellation and adding of more - * callbacks. For instance, a user could do the following: - * - * dep_count set to 1 - * callback #1 added, dep_count is increased to 2 - * callback #1 happens, dep_count decremented to 1 - * since dep_count > 0, no completion is done - * callback #2 is added, dep_count is increased to 2 - * dep_count decremented to 1 - * callback #2 happens, dep_count decremented to 0 - * since dep_count now is zero, completion executes - * - * The dep_count can also be used to make sure that the completion only - * executes once. This is typically done by setting dep_count to -1 for the - * thread that takes on this responsibility. - */ -static inline void -kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val) -{ - atomic_set(&katom->dma_fence.dep_count, val); -} - -/** - * kbase_fence_dep_count_dec_and_test() - Decrements dep_count - * @katom: Atom to decrement dep_count for - * - * See @kbase_fence_dep_count_set for general description about dep_count - * - * Return: true if value was decremented to zero, otherwise false - */ -static inline bool -kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom) -{ - return atomic_dec_and_test(&katom->dma_fence.dep_count); -} - -/** - * kbase_fence_dep_count_read() - Returns the current dep_count value - * @katom: Pointer to katom - * - * See @kbase_fence_dep_count_set for general description about dep_count - * - * Return: The current dep_count value - */ -static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom) -{ - return atomic_read(&katom->dma_fence.dep_count); -} - -/** - * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom - * @katom: Pointer to katom - * - * This function will free all fence callbacks on the katom's list of - * callbacks. Callbacks that have not yet been called, because their fence - * hasn't yet signaled, will first be removed from the fence. - * - * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. - * - * Return: true if dep_count reached 0, otherwise false. - */ -bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); - -#if defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) /** * kbase_fence_in_get() - Retrieve input fence for atom. * @katom: Atom to get input fence from @@ -288,6 +174,6 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); #define kbase_fence_put(fence) dma_fence_put(fence) -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE || defined(CONFIG_SYNC_FILE */ +#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ #endif /* _KBASE_FENCE_H_ */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c index 0bea655178d5..0282aaf8eb3a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c @@ -677,9 +677,11 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) int idx; const bool asn_he = regdump.l2_config & L2_CONFIG_ASN_HASH_ENABLE_MASK; +#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) if (!asn_he && kbdev->l2_hash_values_override) dev_err(kbdev->dev, "Failed to use requested ASN_HASH, fallback to default"); +#endif for (idx = 0; idx < ASN_HASH_COUNT; idx++) dev_info(kbdev->dev, "%s ASN_HASH[%d] is [0x%08x]\n", @@ -705,10 +707,6 @@ static struct { #define PROP(name, member) \ {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ sizeof(((struct base_gpu_props *)0)->member)} -#define BACKWARDS_COMPAT_PROP(name, type) \ - { \ - KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \ - } PROP(PRODUCT_ID, core_props.product_id), PROP(VERSION_STATUS, core_props.version_status), PROP(MINOR_REVISION, core_props.minor_revision), @@ -722,6 +720,10 @@ static struct { PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), #if MALI_USE_CSF +#define BACKWARDS_COMPAT_PROP(name, type) \ + { \ + KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \ + } BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8), #else PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), @@ -820,7 +822,7 @@ int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) } kprops->prop_buffer_size = size; - kprops->prop_buffer = kmalloc(size, GFP_KERNEL); + kprops->prop_buffer = kzalloc(size, GFP_KERNEL); if (!kprops->prop_buffer) { kprops->prop_buffer_size = 0; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c index 1de1e29fcb75..b6a8a2e5608f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c @@ -85,6 +85,10 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_LTUX: features = base_hw_features_tTUx; break; + case GPU_ID2_PRODUCT_TTIX: + case GPU_ID2_PRODUCT_LTIX: + features = base_hw_features_tTIx; + break; default: features = base_hw_features_generic; break; @@ -233,12 +237,22 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, + { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, { U32_MAX, NULL } } }, { GPU_ID2_PRODUCT_LTUX, { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 }, { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 }, { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 }, + { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TTIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LTIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTIx_r0p0 }, { U32_MAX, NULL } } }, }; @@ -396,6 +410,10 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_LTUX: issues = base_hw_issues_model_tTUx; break; + case GPU_ID2_PRODUCT_TTIX: + case GPU_ID2_PRODUCT_LTIX: + issues = base_hw_issues_model_tTIx; + break; default: dev_err(kbdev->dev, diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c index 5a96f924bfbd..f5faa92525c5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c @@ -35,7 +35,6 @@ #include #include -#include "mali_kbase_dma_fence.h" #include #include @@ -158,15 +157,6 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - /* Flush dma-fence workqueue to ensure that any callbacks that may have - * been queued are done before continuing. - * Any successfully completed atom would have had all it's callbacks - * completed before the atom was run, so only flush for failed atoms. - */ - if (katom->event_code != BASE_JD_EVENT_DONE) - flush_workqueue(katom->kctx->dma_fence.wq); -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ } static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) @@ -174,10 +164,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) KBASE_DEBUG_ASSERT(katom); KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - kbase_dma_fence_signal(katom); -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - kbase_gpu_vm_lock(katom->kctx); /* only roll back if extres is non-NULL */ if (katom->extres) { @@ -185,13 +171,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, reg, alloc); + kbase_unmap_external_resource(katom->kctx, katom->extres[res_no]); } kfree(katom->extres); katom->extres = NULL; @@ -207,26 +187,8 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) { - int err_ret_val = -EINVAL; + int err = -EINVAL; u32 res_no; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - struct kbase_dma_fence_resv_info info = { - .resv_objs = NULL, - .dma_fence_resv_count = 0, - .dma_fence_excl_bitmap = NULL - }; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - /* - * When both dma-buf fence and Android native sync is enabled, we - * disable dma-buf fence for contexts that are using Android native - * fences. - */ - const bool implicit_sync = !kbase_ctx_flag(katom->kctx, - KCTX_NO_IMPLICIT_SYNC); -#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/ - const bool implicit_sync = true; -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ struct base_external_resource *input_extres; KBASE_DEBUG_ASSERT(katom); @@ -240,68 +202,32 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!katom->extres) return -ENOMEM; - /* copy user buffer to the end of our real buffer. - * Make sure the struct sizes haven't changed in a way - * we don't support - */ - BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); - input_extres = (struct base_external_resource *) - (((unsigned char *)katom->extres) + - (sizeof(*katom->extres) - sizeof(*input_extres)) * - katom->nr_extres); + input_extres = kmalloc_array(katom->nr_extres, sizeof(*input_extres), GFP_KERNEL); + if (!input_extres) { + err = -ENOMEM; + goto failed_input_alloc; + } if (copy_from_user(input_extres, get_compat_pointer(katom->kctx, user_atom->extres_list), sizeof(*input_extres) * katom->nr_extres) != 0) { - err_ret_val = -EINVAL; - goto early_err_out; + err = -EINVAL; + goto failed_input_copy; } -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (implicit_sync) { - info.resv_objs = - kmalloc_array(katom->nr_extres, -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - sizeof(struct reservation_object *), -#else - sizeof(struct dma_resv *), -#endif - GFP_KERNEL); - if (!info.resv_objs) { - err_ret_val = -ENOMEM; - goto early_err_out; - } - - info.dma_fence_excl_bitmap = - kcalloc(BITS_TO_LONGS(katom->nr_extres), - sizeof(unsigned long), GFP_KERNEL); - if (!info.dma_fence_excl_bitmap) { - err_ret_val = -ENOMEM; - goto early_err_out; - } - } -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - /* Take the processes mmap lock */ down_read(kbase_mem_get_process_mmap_lock()); /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { - struct base_external_resource *res = &input_extres[res_no]; + struct base_external_resource *user_res = &input_extres[res_no]; struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - bool exclusive; - exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) - ? true : false; -#endif reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, - res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + katom->kctx, user_res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ - if (kbase_is_region_invalid_or_free(reg)) { + if (unlikely(kbase_is_region_invalid_or_free(reg))) { /* roll back */ goto failed_loop; } @@ -311,36 +237,11 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; } - alloc = kbase_map_external_resource(katom->kctx, reg, - current->mm); - if (!alloc) { - err_ret_val = -EINVAL; + err = kbase_map_external_resource(katom->kctx, reg, current->mm); + if (err) goto failed_loop; - } -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (implicit_sync && - reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) - struct reservation_object *resv; -#else - struct dma_resv *resv; -#endif - resv = reg->gpu_alloc->imported.umm.dma_buf->resv; - if (resv) - kbase_dma_fence_add_reservation(resv, &info, - exclusive); - } -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - - /* finish with updating out array with the data we found */ - /* NOTE: It is important that this is the last thing we do (or - * at least not before the first write) as we overwrite elements - * as we loop and could be overwriting ourself, so no writes - * until the last read for an element. - */ - katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = alloc; + katom->extres[res_no] = reg; } /* successfully parsed the extres array */ /* drop the vm lock now */ @@ -349,57 +250,33 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st /* Release the processes mmap lock */ up_read(kbase_mem_get_process_mmap_lock()); -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (implicit_sync) { - if (info.dma_fence_resv_count) { - int ret; - - ret = kbase_dma_fence_wait(katom, &info); - if (ret < 0) - goto failed_dma_fence_setup; - } - - kfree(info.resv_objs); - kfree(info.dma_fence_excl_bitmap); - } -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ + /* Free the buffer holding data from userspace */ + kfree(input_extres); /* all done OK */ return 0; /* error handling section */ - -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE -failed_dma_fence_setup: - /* Lock the processes mmap lock */ - down_read(kbase_mem_get_process_mmap_lock()); - - /* lock before we unmap */ - kbase_gpu_vm_lock(katom->kctx); -#endif - - failed_loop: - /* undo the loop work */ +failed_loop: + /* undo the loop work. We are guaranteed to have access to the VA region + * as we hold a reference to it until it's unmapped + */ while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg = katom->extres[res_no]; - kbase_unmap_external_resource(katom->kctx, NULL, alloc); + kbase_unmap_external_resource(katom->kctx, reg); } kbase_gpu_vm_unlock(katom->kctx); /* Release the processes mmap lock */ up_read(kbase_mem_get_process_mmap_lock()); - early_err_out: +failed_input_copy: + kfree(input_extres); +failed_input_alloc: kfree(katom->extres); katom->extres = NULL; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (implicit_sync) { - kfree(info.resv_objs); - kfree(info.dma_fence_excl_bitmap); - } -#endif - return err_ret_val; + return err; } static inline void jd_resolve_dep(struct list_head *out_list, @@ -422,10 +299,6 @@ static inline void jd_resolve_dep(struct list_head *out_list, if (katom->event_code != BASE_JD_EVENT_DONE && (dep_type != BASE_JD_DEP_TYPE_ORDER)) { -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - kbase_dma_fence_cancel_callbacks(dep_atom); -#endif - dep_atom->event_code = katom->event_code; KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -439,35 +312,8 @@ static inline void jd_resolve_dep(struct list_head *out_list, (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && !dep_atom->will_fail_event_code && !other_dep_atom->will_fail_event_code))) { - bool dep_satisfied = true; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - int dep_count; - - dep_count = kbase_fence_dep_count_read(dep_atom); - if (likely(dep_count == -1)) { - dep_satisfied = true; - } else { - /* - * There are either still active callbacks, or - * all fences for this @dep_atom has signaled, - * but the worker that will queue the atom has - * not yet run. - * - * Wait for the fences to signal and the fence - * worker to run and handle @dep_atom. If - * @dep_atom was completed due to error on - * @katom, then the fence worker will pick up - * the complete status and error code set on - * @dep_atom above. - */ - dep_satisfied = false; - } -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - - if (dep_satisfied) { - dep_atom->in_jd_list = true; - list_add_tail(&dep_atom->jd_item, out_list); - } + dep_atom->in_jd_list = true; + list_add_tail(&dep_atom->jd_item, out_list); } } } @@ -526,33 +372,8 @@ static void jd_try_submitting_deps(struct list_head *out_list, dep_atom->dep[0].atom); bool dep1_valid = is_dep_valid( dep_atom->dep[1].atom); - bool dep_satisfied = true; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - int dep_count; - dep_count = kbase_fence_dep_count_read( - dep_atom); - if (likely(dep_count == -1)) { - dep_satisfied = true; - } else { - /* - * There are either still active callbacks, or - * all fences for this @dep_atom has signaled, - * but the worker that will queue the atom has - * not yet run. - * - * Wait for the fences to signal and the fence - * worker to run and handle @dep_atom. If - * @dep_atom was completed due to error on - * @katom, then the fence worker will pick up - * the complete status and error code set on - * @dep_atom above. - */ - dep_satisfied = false; - } -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - - if (dep0_valid && dep1_valid && dep_satisfied) { + if (dep0_valid && dep1_valid) { dep_atom->in_jd_list = true; list_add(&dep_atom->jd_item, out_list); } @@ -963,9 +784,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx, INIT_LIST_HEAD(&katom->queue); INIT_LIST_HEAD(&katom->jd_item); -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - kbase_fence_dep_count_set(katom, -1); -#endif /* Don't do anything if there is a mess up with dependencies. * This is done in a separate cycle to check both the dependencies at ones, otherwise @@ -1185,12 +1003,6 @@ static bool jd_submit_atom(struct kbase_context *const kctx, if (queued && !IS_GPU_ATOM(katom)) return false; -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (kbase_fence_dep_count_read(katom) != -1) - return false; - -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); @@ -1273,7 +1085,7 @@ int kbase_jd_submit(struct kbase_context *kctx, if (unlikely(jd_atom_is_v2)) { if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { dev_dbg(kbdev->dev, - "Invalid atom address %p passed to job_submit\n", + "Invalid atom address %pK passed to job_submit\n", user_addr); err = -EFAULT; break; @@ -1284,7 +1096,7 @@ int kbase_jd_submit(struct kbase_context *kctx, } else { if (copy_from_user(&user_atom, user_addr, stride) != 0) { dev_dbg(kbdev->dev, - "Invalid atom address %p passed to job_submit\n", + "Invalid atom address %pK passed to job_submit\n", user_addr); err = -EFAULT; break; @@ -1599,6 +1411,7 @@ static void jd_cancel_worker(struct work_struct *data) bool need_to_try_schedule_context; bool attr_state_changed; struct kbase_device *kbdev; + CSTD_UNUSED(need_to_try_schedule_context); /* Soft jobs should never reach this function */ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); @@ -1746,20 +1559,8 @@ void kbase_jd_zap_context(struct kbase_context *kctx) kbase_cancel_soft_job(katom); } - -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - kbase_dma_fence_cancel_all_atoms(kctx); -#endif - mutex_unlock(&kctx->jctx.lock); -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - /* Flush dma-fence workqueue to ensure that any callbacks that may have - * been queued are done before continuing. - */ - flush_workqueue(kctx->dma_fence.wq); -#endif - #if IS_ENABLED(CONFIG_DEBUG_FS) kbase_debug_job_fault_kctx_unblock(kctx); #endif @@ -1796,11 +1597,10 @@ int kbase_jd_init(struct kbase_context *kctx) kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; -#if defined(CONFIG_MALI_BIFROST_DMA_FENCE) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) kctx->jctx.atoms[i].dma_fence.context = dma_fence_context_alloc(1); atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); - INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); #endif } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c index 87c92330dfe2..6196c0985c7e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_jd_debugfs.c @@ -24,8 +24,7 @@ #include #include #include -#include -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) #include #endif #include @@ -38,7 +37,7 @@ struct kbase_jd_debugfs_depinfo { static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, struct seq_file *sfile) { -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_sync_fence_info info; int res; @@ -58,51 +57,7 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, default: break; } -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ - -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - struct kbase_fence_cb *cb; - - if (atom->dma_fence.fence) { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence *fence = atom->dma_fence.fence; -#else - struct dma_fence *fence = atom->dma_fence.fence; -#endif - - seq_printf(sfile, -#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - "Sd(%llu#%u: %s) ", -#else - "Sd(%llu#%llu: %s) ", -#endif - fence->context, fence->seqno, - dma_fence_is_signaled(fence) ? "signaled" : - "active"); - } - - list_for_each_entry(cb, &atom->dma_fence.callbacks, - node) { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - struct fence *fence = cb->fence; -#else - struct dma_fence *fence = cb->fence; -#endif - - seq_printf(sfile, -#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - "Wd(%llu#%u: %s) ", -#else - "Wd(%llu#%llu: %s) ", -#endif - fence->context, fence->seqno, - dma_fence_is_signaled(fence) ? "signaled" : - "active"); - } - } -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ - +#endif /* CONFIG_SYNC_FILE */ } static void kbasep_jd_debugfs_atom_deps( diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c index 1991bfa9532d..a64d7327a76b 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_js.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c @@ -621,6 +621,7 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev) { struct kbasep_js_device_data *js_devdata; s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + CSTD_UNUSED(js_devdata); KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -638,15 +639,12 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev) int kbasep_js_kctx_init(struct kbase_context *const kctx) { - struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; int i, j; + CSTD_UNUSED(js_kctx_info); KBASE_DEBUG_ASSERT(kctx != NULL); - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); - for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); @@ -688,6 +686,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) int js; bool update_ctx_count = false; unsigned long flags; + CSTD_UNUSED(js_kctx_info); KBASE_DEBUG_ASSERT(kctx != NULL); @@ -1800,6 +1799,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( bool runpool_ctx_attr_change = false; int kctx_as_nr; int new_ref_count; + CSTD_UNUSED(kctx_as_nr); KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); @@ -2183,6 +2183,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, #endif /* Cause it to leave at some later point */ bool retained; + CSTD_UNUSED(retained); retained = kbase_ctx_sched_inc_refcount_nolock(kctx); KBASE_DEBUG_ASSERT(retained); @@ -3918,6 +3919,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) } else { unsigned long flags; bool was_retained; + CSTD_UNUSED(was_retained); /* Case c: didn't evict, but it is scheduled - it's in the Run * Pool diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c index 78fa6f37ef6c..7b8961679a10 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c @@ -61,10 +61,6 @@ #define __static_assert(e, msg, ...) _Static_assert(e, msg) #endif -#ifndef ENOTSUP -#define ENOTSUP EOPNOTSUPP -#endif - /* The module printing prefix */ #define PR_ "mali_kbase_kinstr_jm: " @@ -224,11 +220,8 @@ static inline bool reader_changes_is_valid_size(const size_t size) * * Return: * (0, U16_MAX] - the number of data elements allocated - * -EINVAL - a pointer was invalid - * -ENOTSUP - we do not support allocation of the context * -ERANGE - the requested memory size was invalid * -ENOMEM - could not allocate the memory - * -EADDRINUSE - the buffer memory was already allocated */ static int reader_changes_init(struct reader_changes *const changes, const size_t size) @@ -623,31 +616,34 @@ exit: * * Return: * * 0 - no data ready - * * POLLIN - state changes have been buffered - * * -EBADF - the file descriptor did not have an attached reader - * * -EINVAL - the IO control arguments were invalid + * * EPOLLIN | EPOLLRDNORM - state changes have been buffered + * * EPOLLHUP | EPOLLERR - IO control arguments were invalid or the file + * descriptor did not have an attached reader. */ static __poll_t reader_poll(struct file *const file, struct poll_table_struct *const wait) { struct reader *reader; struct reader_changes *changes; + __poll_t mask = 0; if (unlikely(!file || !wait)) - return (__poll_t)-EINVAL; + return EPOLLHUP | EPOLLERR; reader = file->private_data; if (unlikely(!reader)) - return (__poll_t)-EBADF; + return EPOLLHUP | EPOLLERR; changes = &reader->changes; - if (reader_changes_count(changes) >= changes->threshold) - return POLLIN; + return EPOLLIN | EPOLLRDNORM; poll_wait(file, &reader->wait_queue, wait); - return (reader_changes_count(changes) > 0) ? POLLIN : 0; + if (reader_changes_count(changes) > 0) + mask |= EPOLLIN | EPOLLRDNORM; + + return mask; } /* The file operations virtual function table */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c index 81758c32259c..5fb11b7b94c5 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c @@ -21,8 +21,8 @@ #include "mali_kbase.h" #include "mali_kbase_kinstr_prfcnt.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" #include #include "mali_malisw.h" #include "mali_kbase_debug.h" @@ -47,9 +47,6 @@ /* The maximum allowed buffers per client */ #define MAX_BUFFER_COUNT 32 -/* The module printing prefix */ -#define KINSTR_PRFCNT_PREFIX "mali_kbase_kinstr_prfcnt: " - /** * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware * counters. @@ -224,8 +221,8 @@ static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = { * @filp: Non-NULL pointer to file structure. * @wait: Non-NULL pointer to poll table. * - * Return: POLLIN if data can be read without blocking, 0 if data can not be - * read without blocking, else error code. + * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if + * data can not be read without blocking, else EPOLLHUP | EPOLLERR. */ static __poll_t kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, @@ -234,19 +231,19 @@ kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp, struct kbase_kinstr_prfcnt_client *cli; if (!filp || !wait) - return (__poll_t)-EINVAL; + return EPOLLHUP | EPOLLERR; cli = filp->private_data; if (!cli) - return (__poll_t)-EINVAL; + return EPOLLHUP | EPOLLERR; poll_wait(filp, &cli->waitq, wait); if (atomic_read(&cli->write_idx) != atomic_read(&cli->fetch_idx)) - return POLLIN; + return EPOLLIN | EPOLLRDNORM; - return 0; + return (__poll_t)0; } /** diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h index 0ffc10e5c496..bbe33796e62f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.h @@ -26,7 +26,7 @@ #ifndef _KBASE_KINSTR_PRFCNT_H_ #define _KBASE_KINSTR_PRFCNT_H_ -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include struct kbase_kinstr_prfcnt_context; @@ -80,7 +80,6 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) */ void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx); -#if MALI_KERNEL_TEST_API /** * kbasep_kinstr_prfcnt_get_block_info_list() - Get list of all block types * with their information. @@ -158,7 +157,6 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli, * @cli: kinstr_prfcnt client. Must not be attached to a kinstr_prfcnt context. */ void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli); -#endif /* MALI_KERNEL_TEST_API */ /** * kbase_kinstr_prfcnt_enum_info - Enumerate performance counter information. diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c index e0785793e26a..3743b4df999f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c @@ -44,6 +44,8 @@ #include #include +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* * Alignment of objects allocated by the GPU inside a just-in-time memory * region whose size is given by an end address @@ -66,6 +68,7 @@ */ #define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* Forward declarations */ static void free_partial_locked(struct kbase_context *kctx, @@ -429,15 +432,15 @@ void kbase_remove_va_region(struct kbase_device *kbdev, next->nr_pages += reg->nr_pages; rb_erase(&(reg->rblink), reg_rbtree); merged_back = 1; - if (merged_front) { - /* We already merged with prev, free it */ - kfree(reg); - } } } - /* If we failed to merge then we need to add a new block */ - if (!(merged_front || merged_back)) { + if (merged_front && merged_back) { + /* We already merged with prev, free it */ + kfree(reg); + } else if (!(merged_front || merged_back)) { + /* If we failed to merge then we need to add a new block */ + /* * We didn't merge anything. Try to add a new free * placeholder, and in any case, remove the original one. @@ -1416,6 +1419,7 @@ int kbase_mem_init(struct kbase_device *kbdev) memdev = &kbdev->memdev; + kbase_mem_migrate_init(kbdev); kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, KBASE_MEM_POOL_MAX_SIZE_KCTX); @@ -1478,8 +1482,7 @@ int kbase_mem_init(struct kbase_device *kbdev) kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, KBASE_MEM_POOL_MAX_SIZE_KBDEV); - err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, - &mem_pool_defaults, NULL); + err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL); } return err; @@ -1505,6 +1508,8 @@ void kbase_mem_term(struct kbase_device *kbdev) kbase_mem_pool_group_term(&kbdev->mem_pools); + kbase_mem_migrate_term(kbdev); + WARN_ON(kbdev->total_gpu_pages); WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); @@ -1613,6 +1618,7 @@ static struct kbase_context *kbase_reg_flags_to_kctx( * alloc object will be released. * It is a bug if no alloc object exists for non-free regions. * + * If region is KBASE_REG_ZONE_MCU_SHARED it is freed */ void kbase_free_alloced_region(struct kbase_va_region *reg) { @@ -1636,6 +1642,13 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) (void *)reg); #if MALI_USE_CSF if (reg->flags & KBASE_REG_CSF_EVENT) + /* + * This should not be reachable if called from 'mcu_shared' functions + * such as: + * kbase_csf_firmware_mcu_shared_mapping_init + * kbase_csf_firmware_mcu_shared_mapping_term + */ + kbase_unlink_event_mem_page(kctx, reg); #endif @@ -1649,8 +1662,6 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) * on the list at termination time of the region tracker. */ if (!list_empty(®->gpu_alloc->evict_node)) { - mutex_unlock(&kctx->jit_evict_lock); - /* * Unlink the physical allocation before unmaking it * evictable so that the allocation isn't grown back to @@ -1661,6 +1672,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (reg->cpu_alloc != reg->gpu_alloc) reg->gpu_alloc->reg = NULL; + mutex_unlock(&kctx->jit_evict_lock); + /* * If a region has been made evictable then we must * unmake it before trying to free it. @@ -1812,8 +1825,8 @@ bad_insert: KBASE_EXPORT_TEST_API(kbase_gpu_mmap); -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable); +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -1879,7 +1892,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, alloc, + kbase_jd_user_buf_unmap(kctx, alloc, reg, (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); } @@ -2004,7 +2017,8 @@ void kbase_sync_single(struct kbase_context *kctx, BUG_ON(!cpu_page); BUG_ON(offset + size > PAGE_SIZE); - dma_addr = kbase_dma_addr(cpu_page) + offset; + dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset; + if (sync_fn == KBASE_SYNC_TO_CPU) dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); @@ -2015,19 +2029,20 @@ void kbase_sync_single(struct kbase_context *kctx, void *src = NULL; void *dst = NULL; struct page *gpu_page; + dma_addr_t dma_addr; if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) return; gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); + dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; if (sync_fn == KBASE_SYNC_TO_DEVICE) { src = ((unsigned char *)kmap(cpu_page)) + offset; dst = ((unsigned char *)kmap(gpu_page)) + offset; } else if (sync_fn == KBASE_SYNC_TO_CPU) { - dma_sync_single_for_cpu(kctx->kbdev->dev, - kbase_dma_addr(gpu_page) + offset, - size, DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, + DMA_BIDIRECTIONAL); src = ((unsigned char *)kmap(gpu_page)) + offset; dst = ((unsigned char *)kmap(cpu_page)) + offset; } @@ -2035,9 +2050,8 @@ void kbase_sync_single(struct kbase_context *kctx, kunmap(gpu_page); kunmap(cpu_page); if (sync_fn == KBASE_SYNC_TO_DEVICE) - dma_sync_single_for_device(kctx->kbdev->dev, - kbase_dma_addr(gpu_page) + offset, - size, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, + DMA_BIDIRECTIONAL); } } @@ -2188,24 +2202,22 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re return -EINVAL; } - /* - * Unlink the physical allocation before unmaking it evictable so - * that the allocation isn't grown back to its last backed size - * as we're going to unmap it anyway. - */ - reg->cpu_alloc->reg = NULL; - if (reg->cpu_alloc != reg->gpu_alloc) - reg->gpu_alloc->reg = NULL; - - /* - * If a region has been made evictable then we must unmake it + /* If a region has been made evictable then we must unmake it * before trying to free it. * If the memory hasn't been reclaimed it will be unmapped and freed * below, if it has been reclaimed then the operations below are no-ops. */ if (reg->flags & KBASE_REG_DONT_NEED) { - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == - KBASE_MEM_TYPE_NATIVE); + WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE); + mutex_lock(&kctx->jit_evict_lock); + /* Unlink the physical allocation before unmaking it evictable so + * that the allocation isn't grown back to its last backed size + * as we're going to unmap it anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_unmake(reg->gpu_alloc); } @@ -2464,11 +2476,8 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left >= (SZ_2M / SZ_4K)) { int nr_lp = nr_left / (SZ_2M / SZ_4K); - res = kbase_mem_pool_alloc_pages( - &kctx->mem_pools.large[alloc->group_id], - nr_lp * (SZ_2M / SZ_4K), - tp, - true); + res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id], + nr_lp * (SZ_2M / SZ_4K), tp, true); if (res > 0) { nr_left -= res; @@ -2567,9 +2576,8 @@ no_new_partial: #endif if (nr_left) { - res = kbase_mem_pool_alloc_pages( - &kctx->mem_pools.small[alloc->group_id], - nr_left, tp, false); + res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left, + tp, false); if (res <= 0) goto alloc_failed; } @@ -3061,6 +3069,13 @@ KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); /** * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. * @alloc: The allocation for the imported user buffer. + * + * This must only be called when terminating an alloc, when its refcount + * (number of users) has become 0. This also ensures it is only called once all + * CPU mappings have been closed. + * + * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active + * allocations */ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); #endif @@ -3434,10 +3449,6 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, #undef KBASE_MSG_PRE } -/** - * kbase_gpu_vm_lock() - Acquire the per-context region list lock - * @kctx: KBase context - */ void kbase_gpu_vm_lock(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); @@ -3446,10 +3457,6 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); -/** - * kbase_gpu_vm_unlock() - Release the per-context region list lock - * @kctx: KBase context - */ void kbase_gpu_vm_unlock(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx != NULL); @@ -3774,6 +3781,7 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); #if MALI_USE_CSF + mutex_init(&kctx->csf.kcpu_queues.jit_lock); INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); #else /* !MALI_USE_CSF */ @@ -4211,11 +4219,11 @@ static bool jit_allow_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { -#if MALI_USE_CSF - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); -#else +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); -#endif +#else /* MALI_USE_CSF */ + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); +#endif /* !MALI_USE_CSF */ #if MALI_JIT_PRESSURE_LIMIT_BASE if (!ignore_pressure_limit && @@ -4306,11 +4314,11 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; -#if MALI_USE_CSF - lockdep_assert_held(&kctx->csf.kcpu_queues.lock); -#else +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); -#endif +#else /* MALI_USE_CSF */ + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); +#endif /* !MALI_USE_CSF */ if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; @@ -4518,6 +4526,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) { u64 old_pages; +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#else /* MALI_USE_CSF */ + lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock); +#endif /* !MALI_USE_CSF */ + /* JIT id not immediately available here, so use 0u */ trace_mali_jit_free(reg, 0u); @@ -4764,7 +4778,23 @@ void kbase_unpin_user_buf_page(struct page *page) #if MALI_USE_CSF static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) { - if (alloc->nents) { + /* In CSF builds, we keep pages pinned until the last reference is + * released on the alloc. A refcount of 0 also means we can be sure + * that all CPU mappings have been closed on this alloc, and no more + * mappings of it will be created. + * + * Further, the WARN() below captures the restriction that this + * function will not handle anything other than the alloc termination + * path, because the caller of kbase_mem_phy_alloc_put() is not + * required to hold the kctx's reg_lock, and so we could not handle + * removing an existing CPU mapping here. + * + * Refer to this function's kernel-doc comments for alternatives for + * unpinning a User buffer. + */ + + if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0, + "must only be called on terminating an allocation")) { struct page **pages = alloc->imported.user_buf.pages; long i; @@ -4772,6 +4802,8 @@ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) for (i = 0; i < alloc->nents; i++) kbase_unpin_user_buf_page(pages[i]); + + alloc->nents = 0; } } #endif @@ -4787,6 +4819,8 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, long i; int write; + lockdep_assert_held(&kctx->reg_lock); + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) return -EINVAL; @@ -4817,6 +4851,9 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, return pinned_pages; if (pinned_pages != alloc->imported.user_buf.nr_pages) { + /* Above code already ensures there will not have been a CPU + * mapping by ensuring alloc->nents is 0 + */ for (i = 0; i < pinned_pages; i++) kbase_unpin_user_buf_page(pages[i]); return -ENOMEM; @@ -4830,23 +4867,26 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) { - long pinned_pages; + int err; + long pinned_pages = 0; struct kbase_mem_phy_alloc *alloc; struct page **pages; struct tagged_addr *pa; - long i; + long i, dma_mapped_pages; unsigned long address; struct device *dev; - unsigned long offset; - unsigned long local_size; + unsigned long offset_within_page; + unsigned long remaining_size; unsigned long gwt_mask = ~0; - int err = kbase_jd_user_buf_pin_pages(kctx, reg); - /* Calls to this function are inherently asynchronous, with respect to * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + lockdep_assert_held(&kctx->reg_lock); + + err = kbase_jd_user_buf_pin_pages(kctx, reg); + if (err) return err; @@ -4856,17 +4896,16 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, pinned_pages = alloc->nents; pages = alloc->imported.user_buf.pages; dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; + offset_within_page = address & ~PAGE_MASK; + remaining_size = alloc->imported.user_buf.size; for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, + unsigned long map_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = dma_map_page(dev, pages[i], + offset_within_page, map_size, DMA_BIDIRECTIONAL); + err = dma_mapping_error(dev, dma_addr); if (err) goto unwind; @@ -4874,8 +4913,8 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + remaining_size -= map_size; + offset_within_page = 0; } #ifdef CONFIG_MALI_CINSTR_GWT @@ -4893,13 +4932,28 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, /* fall down */ unwind: alloc->nents = 0; - while (i--) { + offset_within_page = address & ~PAGE_MASK; + remaining_size = alloc->imported.user_buf.size; + dma_mapped_pages = i; + /* Run the unmap loop in the same order as map loop */ + for (i = 0; i < dma_mapped_pages; i++) { + unsigned long unmap_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + unmap_size, DMA_BIDIRECTIONAL); + remaining_size -= unmap_size; + offset_within_page = 0; } - while (++i < pinned_pages) { + /* The user buffer could already have been previously pinned before + * entering this function, and hence there could potentially be CPU + * mappings of it + */ + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages); + + for (i = 0; i < pinned_pages; i++) { kbase_unpin_user_buf_page(pages[i]); pages[i] = NULL; } @@ -4911,21 +4965,31 @@ unwind: * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT * have a corresponding call to kbase_jd_user_buf_unpin_pages(). */ -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, + struct kbase_va_region *reg, bool writeable) { long i; struct page **pages; - unsigned long size = alloc->imported.user_buf.size; + unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; + unsigned long remaining_size = alloc->imported.user_buf.size; + + lockdep_assert_held(&kctx->reg_lock); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); pages = alloc->imported.user_buf.pages; + +#if !MALI_USE_CSF + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); +#else + CSTD_UNUSED(reg); +#endif + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; + unsigned long unmap_size = + MIN(remaining_size, PAGE_SIZE - offset_within_page); dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size, DMA_BIDIRECTIONAL); if (writeable) set_page_dirty_lock(pages[i]); @@ -4934,7 +4998,8 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, pages[i] = NULL; #endif - size -= local_size; + remaining_size -= unmap_size; + offset_within_page = 0; } #if !MALI_USE_CSF alloc->nents = 0; @@ -4981,11 +5046,11 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, return 0; } -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm) +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm) { - int err; + int err = 0; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; lockdep_assert_held(&kctx->reg_lock); @@ -4994,7 +5059,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && (!reg->gpu_alloc->nents)) - goto exit; + return -EINVAL; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; if (reg->gpu_alloc->imported.user_buf @@ -5002,7 +5067,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( err = kbase_jd_user_buf_map(kctx, reg); if (err) { reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto exit; + return err; } } } @@ -5010,21 +5075,29 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( case KBASE_MEM_TYPE_IMPORTED_UMM: { err = kbase_mem_umm_map(kctx, reg); if (err) - goto exit; + return err; break; } default: - goto exit; + WARN(1, "Invalid external resource GPU allocation type (%x) on mapping", + alloc->type); + return -EINVAL; } - return kbase_mem_phy_alloc_get(reg->gpu_alloc); -exit: - return NULL; + kbase_va_region_alloc_get(kctx, reg); + kbase_mem_phy_alloc_get(alloc); + return err; } -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg) { + /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the + * unmapping operation. + */ + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + lockdep_assert_held(&kctx->reg_lock); + switch (alloc->type) { case KBASE_MEM_TYPE_IMPORTED_UMM: { kbase_mem_umm_unmap(kctx, reg, alloc); @@ -5036,24 +5109,32 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, if (alloc->imported.user_buf.current_mapping_usage_count == 0) { bool writeable = true; - if (!kbase_is_region_invalid_or_free(reg) && - reg->gpu_alloc == alloc) + if (!kbase_is_region_invalid_or_free(reg)) { kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, kbase_reg_current_backed_size(reg), kctx->as_nr); + } - if (reg && ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)) + if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) writeable = false; - kbase_jd_user_buf_unmap(kctx, alloc, writeable); + kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); + } } - } break; default: - break; + WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping", + alloc->type); + return; } kbase_mem_phy_alloc_put(alloc); + kbase_va_region_alloc_put(kctx, reg); +} + +static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg) +{ + return reg->start_pfn << PAGE_SHIFT; } struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( @@ -5069,7 +5150,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( * metadata which matches the region which is being acquired. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (walker->gpu_addr == gpu_addr) { + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) { meta = walker; meta->ref++; break; @@ -5081,8 +5162,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( struct kbase_va_region *reg; /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) goto failed; @@ -5090,18 +5170,18 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( meta = kzalloc(sizeof(*meta), GFP_KERNEL); if (!meta) goto failed; - /* * Fill in the metadata object and acquire a reference * for the physical resource. */ - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); - meta->ref = 1; + meta->reg = reg; - if (!meta->alloc) + /* Map the external resource to the GPU allocation of the region + * and acquire the reference to the VA region + */ + if (kbase_map_external_resource(kctx, meta->reg, NULL)) goto fail_map; - - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + meta->ref = 1; list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); } @@ -5126,7 +5206,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) * metadata which matches the region which is being released. */ list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) - if (walker->gpu_addr == gpu_addr) + if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) return walker; return NULL; @@ -5135,14 +5215,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) static void release_sticky_resource_meta(struct kbase_context *kctx, struct kbase_ctx_ext_res_meta *meta) { - struct kbase_va_region *reg; - - /* Drop the physical memory reference and free the metadata. */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); - - kbase_unmap_external_resource(kctx, reg, meta->alloc); + kbase_unmap_external_resource(kctx, meta->reg); list_del(&meta->ext_res_node); kfree(meta); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h index f590a449504a..5820f6d8a556 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h @@ -37,6 +37,7 @@ #include "mali_kbase_defs.h" /* Required for kbase_mem_evictable_unmake */ #include "mali_kbase_mem_linux.h" +#include "mali_kbase_mem_migrate.h" static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages); @@ -182,6 +183,89 @@ struct kbase_mem_phy_alloc { } imported; }; +/** + * enum kbase_page_status - Status of a page used for page migration. + * + * @MEM_POOL: Stable state. Page is located in a memory pool and can safely + * be migrated. + * @ALLOCATE_IN_PROGRESS: Transitory state. A page is set to this status as + * soon as it leaves a memory pool. + * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory + * pool of a dying context are being moved to the device + * memory pool. + * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU + * and has reference to kbase_mem_phy_alloc object. + * @MULTI_MAPPED: Stable state. This state is used to manage all use cases + * where a page may have "unusual" mappings. + * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't + * reference kbase_mem_phy_alloc object. Used as a page in MMU + * page table. + * @FREE_IN_PROGRESS: Transitory state. A page is set to this status as soon as + * the driver manages to acquire a lock on the page while + * unmapping it. This status means that a memory release is + * happening and it's still not complete. + * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case. + * A page is isolated while it is in ALLOCATED_MAPPED or + * PT_MAPPED state, but then the driver tries to destroy the + * allocation. + * + * Pages can only be migrated in stable states. + */ +enum kbase_page_status { + MEM_POOL = 0, + ALLOCATE_IN_PROGRESS, + SPILL_IN_PROGRESS, + ALLOCATED_MAPPED, + MULTI_MAPPED, + PT_MAPPED, + FREE_IN_PROGRESS, + FREE_ISOLATED_IN_PROGRESS, +}; + +/** + * struct kbase_page_metadata - Metadata for each page in kbase + * + * @kbdev: Pointer to kbase device. + * @dma_addr: DMA address mapped to page. + * @migrate_lock: A spinlock to protect the private metadata. + * @status: Status to keep track if page can be migrated at any + * given moment. MSB will indicate if page is isolated. + * Protected by @migrate_lock. + * @data: Member in union valid based on @status. + * + * Each 4KB page will have a reference to this struct in the private field. + * This will be used to keep track of information required for Linux page + * migration functionality as well as address for DMA mapping. + */ +struct kbase_page_metadata { + dma_addr_t dma_addr; + spinlock_t migrate_lock; + u8 status; + + union { + struct { + struct kbase_mem_pool *pool; + /* Pool could be terminated after page is isolated and therefore + * won't be able to get reference to kbase device. + */ + struct kbase_device *kbdev; + } mem_pool; + struct { + struct kbase_mem_phy_alloc *phy_alloc; + struct kbase_va_region *reg; + struct kbase_mmu_table *mmut; + struct page *pgd; + u64 vpfn; + size_t page_array_index; + } mapped; + struct { + struct kbase_mmu_table *mmut; + struct page *pgd; + u16 entry_info; + } pt_mapped; + } data; +}; + /* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is * used to signify that a buffer was pinned when it was imported. Since the * reference count is limited by the number of atoms that can be submitted at @@ -224,8 +308,9 @@ static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc * } /** - * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings - * counter for a memory region to prevent commit and flag changes + * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings counter for a + * memory region to prevent commit and flag + * changes * * @alloc: Pointer to physical pages tracking object */ @@ -387,6 +472,13 @@ struct kbase_va_region { #define KBASE_REG_PROTECTED (1ul << 19) +/* Region belongs to a shrinker. + * + * This can either mean that it is part of the JIT/Ephemeral or tiler heap + * shrinker paths. Should be removed only after making sure that there are + * no references remaining to it in these paths, as it may cause the physical + * backing of the region to disappear during use. + */ #define KBASE_REG_DONT_NEED (1ul << 20) /* Imported buffer is padded? */ @@ -862,12 +954,9 @@ static inline size_t kbase_mem_pool_config_get_max_size( * * Return: 0 on success, negative -errno on error */ -int kbase_mem_pool_init(struct kbase_mem_pool *pool, - const struct kbase_mem_pool_config *config, - unsigned int order, - int group_id, - struct kbase_device *kbdev, - struct kbase_mem_pool *next_pool); +int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, + unsigned int order, int group_id, struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool); /** * kbase_mem_pool_term - Destroy a memory pool @@ -963,7 +1052,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. */ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed); + struct tagged_addr *pages, bool partial_allowed); /** * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool @@ -1114,6 +1203,16 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); */ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); +/** + * kbase_mem_pool_free_page - Free a page from a memory pool. + * @pool: Memory pool to free a page from + * @p: Page to free + * + * This will free any associated data stored for the page and release + * the page back to the kernel. + */ +void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p); + /** * kbase_region_tracker_init - Initialize the region tracker data structure * @kctx: kbase context @@ -1233,7 +1332,55 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long flags); +/** + * kbase_gpu_vm_lock() - Acquire the per-context region list lock + * @kctx: KBase context + * + * Care must be taken when making an allocation whilst holding this lock, because of interaction + * with the Kernel's OoM-killer and use of this lock in &vm_operations_struct close() handlers. + * + * If this lock is taken during a syscall, and/or the allocation is 'small' then it is safe to use. + * + * If the caller is not in a syscall, and the allocation is 'large', then it must not hold this + * lock. + * + * This is because the kernel OoM killer might target the process corresponding to that same kbase + * context, and attempt to call the context's close() handlers for its open VMAs. This is safe if + * the allocating caller is in a syscall, because the VMA close() handlers are delayed until all + * syscalls have finished (noting that no new syscalls can start as the remaining user threads will + * have been killed too), and so there is no possibility of contention between the thread + * allocating with this lock held, and the VMA close() handler. + * + * However, outside of a syscall (e.g. a kworker or other kthread), one of kbase's VMA close() + * handlers (kbase_cpu_vm_close()) also takes this lock, and so prevents the process from being + * killed until the caller of the function allocating memory has released this lock. On subsequent + * retries for allocating a page, the OoM killer would be re-invoked but skips over the process + * stuck in its close() handler. + * + * Also because the caller is not in a syscall, the page allocation code in the kernel is not aware + * that the allocation is being done on behalf of another process, and so does not realize that + * process has received a kill signal due to an OoM, and so will continually retry with the OoM + * killer until enough memory has been released, or until all other killable processes have been + * killed (at which point the kernel halts with a panic). + * + * However, if the allocation outside of a syscall is small enough to be satisfied by killing + * another process, then the allocation completes, the caller releases this lock, and + * kbase_cpu_vm_close() can unblock and allow the process to be killed. + * + * Hence, this is effectively a deadlock with kbase_cpu_vm_close(), except that if the memory + * allocation is small enough the deadlock can be resolved. For that reason, such a memory deadlock + * is NOT discovered with CONFIG_PROVE_LOCKING. + * + * If this may be called outside of a syscall, consider moving allocations outside of this lock, or + * use __GFP_NORETRY for such allocations (which will allow direct-reclaim attempts, but will + * prevent OoM kills to satisfy the allocation, and will just fail the allocation instead). + */ void kbase_gpu_vm_lock(struct kbase_context *kctx); + +/** + * kbase_gpu_vm_unlock() - Release the per-context region list lock + * @kctx: KBase context + */ void kbase_gpu_vm_unlock(struct kbase_context *kctx); int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); @@ -1311,6 +1458,7 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); +#if defined(CONFIG_MALI_VECTOR_DUMP) /** * kbase_mmu_dump() - Dump the MMU tables to a buffer. * @@ -1330,6 +1478,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); * (including if the @c nr_pages is too small) */ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); +#endif /** * kbase_sync_now - Perform cache maintenance on a memory region @@ -1449,15 +1598,21 @@ int kbasep_find_enclosing_gpu_mapping_start_and_offset( * @alloc: allocation object to add pages to * @nr_pages_requested: number of physical pages to allocate * - * Allocates \a nr_pages_requested and updates the alloc object. + * Allocates @nr_pages_requested and updates the alloc object. * - * Return: 0 if all pages have been successfully allocated. Error code otherwise + * Note: if kbase_gpu_vm_lock() is to be held around this function to ensure thread-safe updating + * of @alloc, then refer to the documentation of kbase_gpu_vm_lock() about the requirements of + * either calling during a syscall, or ensuring the allocation is small. These requirements prevent + * an effective deadlock between the kernel's OoM killer and kbase's VMA close() handlers, which + * could take kbase_gpu_vm_lock() too. * - * Note : The caller must not hold vm_lock, as this could cause a deadlock if - * the kernel OoM killer runs. If the caller must allocate pages while holding - * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + * If the requirements of kbase_gpu_vm_lock() cannot be satisfied when calling this function, but + * @alloc must still be updated in a thread-safe way, then instead use + * kbase_alloc_phy_pages_helper_locked() and restructure callers into the sequence outlined there. * * This function cannot be used from interrupt context + * + * Return: 0 if all pages have been successfully allocated. Error code otherwise */ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); @@ -1467,17 +1622,19 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * @alloc: allocation object to add pages to * @pool: Memory pool to allocate from * @nr_pages_requested: number of physical pages to allocate - * @prealloc_sa: Information about the partial allocation if the amount - * of memory requested is not a multiple of 2MB. One - * instance of struct kbase_sub_alloc must be allocated by - * the caller iff CONFIG_MALI_2MB_ALLOC is enabled. * - * Allocates \a nr_pages_requested and updates the alloc object. This function - * does not allocate new pages from the kernel, and therefore will never trigger - * the OoM killer. Therefore, it can be run while the vm_lock is held. + * @prealloc_sa: Information about the partial allocation if the amount of memory requested + * is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be + * allocated by the caller iff CONFIG_MALI_2MB_ALLOC is enabled. * - * As new pages can not be allocated, the caller must ensure there are - * sufficient pages in the pool. Usage of this function should look like : + * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new + * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be + * called whilst a thread operating outside of a syscall has held the region list lock + * (kbase_gpu_vm_lock()), as it will not cause an effective deadlock with VMA close() handlers used + * by the OoM killer. + * + * As new pages can not be allocated, the caller must ensure there are sufficient pages in the + * pool. Usage of this function should look like : * * kbase_gpu_vm_lock(kctx); * kbase_mem_pool_lock(pool) @@ -1490,24 +1647,24 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * } * kbase_alloc_phy_pages_helper_locked(pool) * kbase_mem_pool_unlock(pool) - * Perform other processing that requires vm_lock... + * // Perform other processing that requires vm_lock... * kbase_gpu_vm_unlock(kctx); * - * This ensures that the pool can be grown to the required size and that the - * allocation can complete without another thread using the newly grown pages. + * This ensures that the pool can be grown to the required size and that the allocation can + * complete without another thread using the newly grown pages. * - * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then - * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be - * alloc->imported.native.kctx->mem_pool. - * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be - * pre-allocated because we must not sleep (due to the usage of kmalloc()) - * whilst holding pool->pool_lock. - * @prealloc_sa shall be set to NULL if it has been consumed by this function - * to indicate that the caller must not free it. + * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then @pool must be one of the + * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the + * mempools from alloc->imported.native.kctx->mem_pools.small[]. + * + * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we + * must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa + * shall be set to NULL if it has been consumed by this function to indicate that the caller no + * longer owns it and should not access it further. + * + * Note: Caller must hold @pool->pool_lock * * Return: Pointer to array of allocated pages. NULL on failure. - * - * Note : Caller must hold pool->pool_lock */ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, @@ -1546,7 +1703,7 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, struct tagged_addr *pages, size_t nr_pages_to_free); -static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) +static inline void kbase_set_dma_addr_as_priv(struct page *p, dma_addr_t dma_addr) { SetPagePrivate(p); if (sizeof(dma_addr_t) > sizeof(p->private)) { @@ -1562,7 +1719,7 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) } } -static inline dma_addr_t kbase_dma_addr(struct page *p) +static inline dma_addr_t kbase_dma_addr_as_priv(struct page *p) { if (sizeof(dma_addr_t) > sizeof(p->private)) return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; @@ -1570,11 +1727,34 @@ static inline dma_addr_t kbase_dma_addr(struct page *p) return (dma_addr_t)page_private(p); } -static inline void kbase_clear_dma_addr(struct page *p) +static inline void kbase_clear_dma_addr_as_priv(struct page *p) { ClearPagePrivate(p); } +static inline struct kbase_page_metadata *kbase_page_private(struct page *p) +{ + return (struct kbase_page_metadata *)page_private(p); +} + +static inline dma_addr_t kbase_dma_addr(struct page *p) +{ + if (kbase_page_migration_enabled) + return kbase_page_private(p)->dma_addr; + + return kbase_dma_addr_as_priv(p); +} + +static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa) +{ + phys_addr_t pa = as_phys_addr_t(tagged_pa); + struct page *page = pfn_to_page(PFN_DOWN(pa)); + dma_addr_t dma_addr = + is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page); + + return dma_addr; +} + /** * kbase_flush_mmu_wqs() - Flush MMU workqueues. * @kbdev: Device pointer. @@ -1868,28 +2048,36 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx); /** * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. - * @reg: The region to map. + * @reg: External resource to map. * @locked_mm: The mm_struct which has been locked for this operation. * - * Return: The physical allocation which backs the region on success or NULL - * on failure. + * On successful mapping, the VA region and the gpu_alloc refcounts will be + * increased, making it safe to use and store both values directly. + * + * Return: Zero on success, or negative error code. */ -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm); +int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm); /** * kbase_unmap_external_resource - Unmap an external resource from the GPU. * @kctx: kbase context. - * @reg: The region to unmap or NULL if it has already been released. - * @alloc: The physical allocation being unmapped. + * @reg: VA region corresponding to external resource + * + * On successful unmapping, the VA region and the gpu_alloc refcounts will + * be decreased. If the refcount reaches zero, both @reg and the corresponding + * allocation may be freed, so using them after returning from this function + * requires the caller to explicitly check their state. */ -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); +void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg); /** * kbase_unpin_user_buf_page - Unpin a page of a user buffer. * @page: page to unpin + * + * The caller must have ensured that there are no CPU mappings for @page (as + * might be created from the struct kbase_mem_phy_alloc that tracks @page), and + * that userspace will not be able to recreate the CPU mappings again. */ void kbase_unpin_user_buf_page(struct page *page); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c index c373cf82ea37..9899cef317ac 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -83,24 +84,18 @@ #define IR_THRESHOLD_STEPS (256u) #if MALI_USE_CSF -static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, - struct vm_area_struct *vma); -static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, - struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, struct vm_area_struct *vma); #endif -static int kbase_vmap_phy_pages(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 offset_bytes, size_t size, - struct kbase_vmap_struct *map); +static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags); static void kbase_vunmap_phy_pages(struct kbase_context *kctx, struct kbase_vmap_struct *map); static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); -static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - static bool is_process_exiting(struct vm_area_struct *vma) { /* PF_EXITING flag can't be reliably used here for the detection @@ -198,20 +193,12 @@ static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) return -EINVAL; - if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - - atomic_read(&kctx->permanent_mapped_pages))) { - dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", - (u64)size, - KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, - atomic_read(&kctx->permanent_mapped_pages)); - return -ENOMEM; - } - kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); if (!kern_mapping) return -ENOMEM; - err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping); + err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping, + KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); if (err < 0) goto vmap_fail; @@ -219,7 +206,6 @@ static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, reg->flags &= ~KBASE_REG_GROWABLE; reg->cpu_alloc->permanent_map = kern_mapping; - atomic_add(size, &kctx->permanent_mapped_pages); return 0; vmap_fail: @@ -235,13 +221,6 @@ void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, kfree(alloc->permanent_map); alloc->permanent_map = NULL; - - /* Mappings are only done on cpu_alloc, so don't need to worry about - * this being reduced a second time if a separate gpu_alloc is - * freed - */ - WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages)); - atomic_sub(alloc->nents, &kctx->permanent_mapped_pages); } void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, @@ -499,7 +478,25 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages *gpu_va = (u64) cookie; } else /* we control the VA */ { - if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1, + size_t align = 1; + +#ifdef CONFIG_MALI_2MB_ALLOC + /* If there's enough (> 33 bits) of GPU VA space, align to 2MB + * boundaries. The similar condition is used for mapping from + * the SAME_VA zone inside kbase_context_get_unmapped_area(). + */ + if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { + if (va_pages >= (SZ_2M / SZ_4K)) + align = (SZ_2M / SZ_4K); + } + if (*gpu_va) + align = 1; +#if !MALI_USE_CSF + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) + align = 1; +#endif /* !MALI_USE_CSF */ +#endif /* CONFIG_MALI_2MB_ALLOC */ + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align, mmu_sync_info) != 0) { dev_warn(dev, "Failed to map memory on GPU"); kbase_gpu_vm_unlock(kctx); @@ -675,24 +672,36 @@ out_unlock: * @s: Shrinker * @sc: Shrinker control * - * Return: Number of pages which can be freed. + * Return: Number of pages which can be freed or SHRINK_EMPTY if no page remains. */ static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { - struct kbase_context *kctx; - - kctx = container_of(s, struct kbase_context, reclaim); + struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim); + int evict_nents = atomic_read(&kctx->evict_nents); + unsigned long nr_freeable_items; WARN((sc->gfp_mask & __GFP_ATOMIC), "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n", sc->gfp_mask); WARN(in_atomic(), - "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n", + "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n", sc->gfp_mask); - return atomic_read(&kctx->evict_nents); + if (unlikely(evict_nents < 0)) { + dev_err(kctx->kbdev->dev, "invalid evict_nents(%d)", evict_nents); + nr_freeable_items = 0; + } else { + nr_freeable_items = evict_nents; + } + +#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE + if (nr_freeable_items == 0) + nr_freeable_items = SHRINK_EMPTY; +#endif + + return nr_freeable_items; } /** @@ -701,8 +710,8 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, * @s: Shrinker * @sc: Shrinker control * - * Return: Number of pages freed (can be less then requested) or -1 if the - * shrinker failed to free pages in its pool. + * Return: Number of pages freed (can be less then requested) or + * SHRINK_STOP if reclaim isn't possible. * * Note: * This function accesses region structures without taking the region lock, @@ -730,17 +739,15 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { int err; + if (!alloc->reg) + continue; + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, 0, alloc->nents); - if (err != 0) { - /* - * Failed to remove GPU mapping, tell the shrinker - * to stop trying to shrink our slab even though we - * have pages in it. - */ - freed = -1; - goto out_unlock; - } + + /* Failed to remove GPU mapping, proceed to next one. */ + if (err != 0) + continue; /* * Update alloc->evicted before freeing the backing so the @@ -764,7 +771,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, if (freed > sc->nr_to_scan) break; } -out_unlock: + mutex_unlock(&kctx->jit_evict_lock); return freed; @@ -964,6 +971,15 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; + /* There is no use case to support MEM_FLAGS_CHANGE ioctl for allocations + * that have NO_USER_FREE flag set, to mark them as evictable/reclaimable. + * This would usually include JIT allocations, Tiler heap related allocations + * & GPU queue ringbuffer and none of them needs to be explicitly marked + * as evictable by Userspace. + */ + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; @@ -1536,13 +1552,15 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct kbase_context *kctx, unsigned long address, unsigned long size, u64 *va_pages, u64 *flags) { - long i; + long i, dma_mapped_pages; struct kbase_va_region *reg; struct rb_root *rbtree; long faulted_pages; int zone = KBASE_REG_ZONE_CUSTOM_VA; bool shared_zone = false; u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); + unsigned long offset_within_page; + unsigned long remaining_size; struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; int write; @@ -1688,29 +1706,27 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (pages) { struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); /* Top bit signifies that this was pinned on import */ user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + offset_within_page = user_buf->address & ~PAGE_MASK; + remaining_size = user_buf->size; for (i = 0; i < faulted_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; + unsigned long map_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_addr_t dma_addr = dma_map_page(dev, pages[i], + offset_within_page, map_size, DMA_BIDIRECTIONAL); - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - local_size -= min; - offset = 0; + remaining_size -= map_size; + offset_within_page = 0; } reg->gpu_alloc->nents = faulted_pages; @@ -1719,13 +1735,26 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( return reg; unwind_dma_map: - while (i--) { + offset_within_page = user_buf->address & ~PAGE_MASK; + remaining_size = user_buf->size; + dma_mapped_pages = i; + /* Run the unmap loop in the same order as map loop */ + for (i = 0; i < dma_mapped_pages; i++) { + unsigned long unmap_size = + MIN(PAGE_SIZE - offset_within_page, remaining_size); + dma_unmap_page(kctx->kbdev->dev, user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); + unmap_size, DMA_BIDIRECTIONAL); + remaining_size -= unmap_size; + offset_within_page = 0; } fault_mismatch: if (pages) { + /* In this case, the region was not yet in the region tracker, + * and so there are no CPU mappings to remove before we unpin + * the page + */ for (i = 0; i < faulted_pages; i++) kbase_unpin_user_buf_page(pages[i]); } @@ -1750,6 +1779,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 gpu_va; size_t i; bool coherent; + uint64_t max_stride; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -1782,7 +1812,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, if (!nents) goto bad_nents; - if (stride > U64_MAX / nents) + max_stride = div64_u64(U64_MAX, nents); + + if (stride > max_stride) goto bad_size; if ((nents * stride) > (U64_MAX / PAGE_SIZE)) @@ -2156,22 +2188,9 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, (old_pages - new_pages)<gpu_alloc; @@ -2242,10 +2261,13 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) goto out_unlock; - /* can't grow regions which are ephemeral */ + if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; + if (reg->flags & KBASE_REG_NO_USER_FREE) + goto out_unlock; + #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED /* Reject resizing commit size */ if (reg->flags & KBASE_REG_PF_GROW) @@ -2628,7 +2650,6 @@ static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) while (kbase_jit_evict(kctx)) ; } -#endif static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, @@ -2645,9 +2666,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, size = (vma->vm_end - vma->vm_start); nr_pages = size >> PAGE_SHIFT; -#ifdef CONFIG_MALI_VECTOR_DUMP kbase_free_unused_jit_allocations(kctx); -#endif kaddr = kbase_mmu_dump(kctx, nr_pages); @@ -2695,7 +2714,7 @@ out_va_region: out: return err; } - +#endif void kbase_os_mem_map_lock(struct kbase_context *kctx) { @@ -2836,6 +2855,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = -EINVAL; goto out_unlock; case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): +#if defined(CONFIG_MALI_VECTOR_DUMP) /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); if (err != 0) @@ -2843,6 +2863,11 @@ int kbase_context_mmap(struct kbase_context *const kctx, /* free the region on munmap */ free_on_close = 1; break; +#else + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ #if MALI_USE_CSF case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): kbase_gpu_vm_unlock(kctx); @@ -2930,7 +2955,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); - +#if defined(CONFIG_MALI_VECTOR_DUMP) if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on * the pages, so we can now free the kernel mapping @@ -2949,7 +2974,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, */ vma->vm_pgoff = PFN_DOWN(vma->vm_start); } - +#endif /* defined(CONFIG_MALI_VECTOR_DUMP) */ out_unlock: kbase_gpu_vm_unlock(kctx); out: @@ -2991,9 +3016,9 @@ void kbase_sync_mem_regions(struct kbase_context *kctx, } } -static int kbase_vmap_phy_pages(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 offset_bytes, size_t size, - struct kbase_vmap_struct *map) +static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 offset_bytes, size_t size, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags) { unsigned long page_index; unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; @@ -3004,6 +3029,12 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, pgprot_t prot; size_t i; + if (WARN_ON(vmap_flags & ~KBASE_VMAP_INPUT_FLAGS)) + return -EINVAL; + + if (WARN_ON(kbase_is_region_invalid_or_free(reg))) + return -EINVAL; + if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) return -EINVAL; @@ -3020,6 +3051,17 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, if (page_index + page_count > kbase_reg_current_backed_size(reg)) return -ENOMEM; + if ((vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) && + (page_count > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - + atomic_read(&kctx->permanent_mapped_pages)))) { + dev_warn( + kctx->kbdev->dev, + "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", + (u64)page_count, KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, + atomic_read(&kctx->permanent_mapped_pages)); + return -ENOMEM; + } + if (reg->flags & KBASE_REG_DONT_NEED) return -EINVAL; @@ -3058,59 +3100,73 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); map->size = size; - map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) && - !kbase_mem_is_imported(map->gpu_alloc->type); + map->flags = vmap_flags; + if ((reg->flags & KBASE_REG_CPU_CACHED) && !kbase_mem_is_imported(map->gpu_alloc->type)) + map->flags |= KBASE_VMAP_FLAG_SYNC_NEEDED; - if (map->sync_needed) + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + if (vmap_flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) + atomic_add(page_count, &kctx->permanent_mapped_pages); + kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); return 0; } +void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, + size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags) +{ + u64 offset_bytes; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(kbase_is_region_invalid_or_free(reg))) + return NULL; + + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} + */ + if ((reg->flags & prot_request) != prot_request) + return NULL; + + offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map, vmap_flags); + if (err < 0) + goto fail_vmap_phy_pages; + + return map->addr; + +fail_vmap_phy_pages: + kbase_mem_phy_alloc_put(cpu_alloc); + kbase_mem_phy_alloc_put(gpu_alloc); + return NULL; +} + void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, unsigned long prot_request, struct kbase_vmap_struct *map) { struct kbase_va_region *reg; void *addr = NULL; - u64 offset_bytes; - struct kbase_mem_phy_alloc *cpu_alloc; - struct kbase_mem_phy_alloc *gpu_alloc; - int err; kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - gpu_addr); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); if (kbase_is_region_invalid_or_free(reg)) goto out_unlock; - /* check access permissions can be satisfied - * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} - */ - if ((reg->flags & prot_request) != prot_request) - goto out_unlock; - - offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); - cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - - err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map); - if (err < 0) - goto fail_vmap_phy_pages; - - addr = map->addr; + addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u); out_unlock: kbase_gpu_vm_unlock(kctx); return addr; - -fail_vmap_phy_pages: - kbase_gpu_vm_unlock(kctx); - kbase_mem_phy_alloc_put(cpu_alloc); - kbase_mem_phy_alloc_put(gpu_alloc); - - return NULL; } void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, @@ -3133,16 +3189,23 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, vunmap(addr); - if (map->sync_needed) + if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) { + size_t page_count = PFN_UP(map->offset_in_page + map->size); + + WARN_ON(page_count > atomic_read(&kctx->permanent_mapped_pages)); + atomic_sub(page_count, &kctx->permanent_mapped_pages); + } kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc); + map->offset_in_page = 0; map->cpu_pages = NULL; map->gpu_pages = NULL; map->addr = NULL; map->size = 0; - map->sync_needed = false; + map->flags = 0; } void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) @@ -3266,9 +3329,27 @@ static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); } +static int +#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE || \ + KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE) +kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma) +#else +kbase_csf_user_io_pages_vm_mremap(struct vm_area_struct *vma, unsigned long flags) +#endif +{ + pr_debug("Unexpected call to mremap method for User IO pages mapping vma\n"); + return -EINVAL; +} + +static int kbase_csf_user_io_pages_vm_split(struct vm_area_struct *vma, unsigned long addr) +{ + pr_debug("Unexpected call to split method for User IO pages mapping vma\n"); + return -EINVAL; +} + static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma) { - WARN(1, "Unexpected attempt to clone private vma\n"); + pr_debug("Unexpected call to the open method for User IO pages mapping vma\n"); vma->vm_private_data = NULL; } @@ -3280,8 +3361,10 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) int err; bool reset_prevented = false; - if (WARN_ON(!queue)) + if (!queue) { + pr_debug("Close method called for the new User IO pages mapping vma\n"); return; + } kctx = queue->kctx; kbdev = kctx->kbdev; @@ -3325,9 +3408,12 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) struct memory_group_manager_device *mgm_dev; /* Few sanity checks up front */ - if ((nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || - (vma->vm_pgoff != queue->db_file_offset)) + if (!queue || (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || + (vma->vm_pgoff != queue->db_file_offset)) { + pr_warn("Unexpected CPU page fault on User IO pages mapping for process %s tgid %d pid %d\n", + current->comm, current->tgid, current->pid); return VM_FAULT_SIGBUS; + } kbdev = queue->kctx->kbdev; mgm_dev = kbdev->mgm_dev; @@ -3382,6 +3468,12 @@ exit: static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { .open = kbase_csf_user_io_pages_vm_open, .close = kbase_csf_user_io_pages_vm_close, +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + .may_split = kbase_csf_user_io_pages_vm_split, +#else + .split = kbase_csf_user_io_pages_vm_split, +#endif + .mremap = kbase_csf_user_io_pages_vm_mremap, .fault = kbase_csf_user_io_pages_vm_fault }; @@ -3461,13 +3553,71 @@ map_failed: return err; } +/** + * kbase_csf_user_reg_vm_open - VMA open function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + * Note: + * This function isn't expected to be called. If called (i.e> mremap), + * set private_data as NULL to indicate to close() and fault() functions. + */ +static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma) +{ + pr_debug("Unexpected call to the open method for USER register mapping"); + vma->vm_private_data = NULL; +} + +/** + * kbase_csf_user_reg_vm_close - VMA close function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + */ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; - WARN_ON(!kctx->csf.user_reg_vma); + if (!kctx) { + pr_debug("Close function called for the unexpected mapping"); + return; + } + + if (unlikely(!kctx->csf.user_reg_vma)) + dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL"); kctx->csf.user_reg_vma = NULL; + + mutex_lock(&kctx->kbdev->csf.reg_lock); + if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0)) + dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter"); + else + kctx->kbdev->csf.nr_user_page_mapped--; + mutex_unlock(&kctx->kbdev->csf.reg_lock); +} + +/** + * kbase_csf_user_reg_vm_mremap - VMA mremap function for the USER page + * + * @vma: Pointer to the struct containing information about + * the userspace mapping of USER page. + * + * Return: -EINVAL + * + * Note: + * User space must not attempt mremap on USER page mapping. + * This function will return an error to fail the attempt. + */ +static int +#if ((KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 11, 0) > LINUX_VERSION_CODE)) +kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma) +#else +kbase_csf_user_reg_vm_mremap(struct vm_area_struct *vma, unsigned long flags) +#endif +{ + pr_debug("Unexpected call to mremap method for USER page mapping vma\n"); + return -EINVAL; } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) @@ -3480,19 +3630,24 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; #endif struct kbase_context *kctx = vma->vm_private_data; - struct kbase_device *kbdev = kctx->kbdev; - struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; - unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); + struct kbase_device *kbdev; + struct memory_group_manager_device *mgm_dev; + unsigned long pfn; size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); vm_fault_t ret = VM_FAULT_SIGBUS; unsigned long flags; /* Few sanity checks up front */ - if (WARN_ON(nr_pages != 1) || - WARN_ON(vma != kctx->csf.user_reg_vma) || - WARN_ON(vma->vm_pgoff != - PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) + if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) || + (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) { + pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n", + current->comm, current->tgid, current->pid); return VM_FAULT_SIGBUS; + } + + kbdev = kctx->kbdev; + mgm_dev = kbdev->mgm_dev; + pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); mutex_lock(&kbdev->csf.reg_lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3517,14 +3672,31 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) } static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { + .open = kbase_csf_user_reg_vm_open, .close = kbase_csf_user_reg_vm_close, + .mremap = kbase_csf_user_reg_vm_mremap, .fault = kbase_csf_user_reg_vm_fault }; +/** + * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page. + * + * @kctx: Pointer of the kernel context. + * @vma: Pointer to the struct containing the information about + * the userspace mapping of USER page. + * + * Return: 0 on success, error code otherwise. + * + * Note: + * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf. + * But this function needs to be kept for backward-compatibility as old Base (<=1.12) + * will try to mmap USER page for direct access when it creates a base context. + */ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, struct vm_area_struct *vma) { size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + struct kbase_device *kbdev = kctx->kbdev; /* Few sanity checks */ if (kctx->csf.user_reg_vma) @@ -3548,6 +3720,17 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, kctx->csf.user_reg_vma = vma; + mutex_lock(&kbdev->csf.reg_lock); + kbdev->csf.nr_user_page_mapped++; + + if (!kbdev->csf.mali_file_inode) + kbdev->csf.mali_file_inode = kctx->filp->f_inode; + + if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode)) + dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts"); + + mutex_unlock(&kbdev->csf.reg_lock); + vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h index 5e5d991105a6..5b12e181bf4c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h @@ -217,6 +217,26 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); */ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); +typedef unsigned int kbase_vmap_flag; + +/* Sync operations are needed on beginning and ending of access to kernel-mapped GPU memory. + * + * This is internal to the struct kbase_vmap_struct and should not be passed in by callers of + * kbase_vmap-related functions. + */ +#define KBASE_VMAP_FLAG_SYNC_NEEDED (((kbase_vmap_flag)1) << 0) + +/* Permanently mapped memory accounting (including enforcing limits) should be done on the + * kernel-mapped GPU memory. + * + * This should be used if the kernel mapping is going to live for a potentially long time, for + * example if it will persist after the caller has returned. + */ +#define KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING (((kbase_vmap_flag)1) << 1) + +/* Set of flags that can be passed into kbase_vmap-related functions */ +#define KBASE_VMAP_INPUT_FLAGS (KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) + struct kbase_vmap_struct { off_t offset_in_page; struct kbase_mem_phy_alloc *cpu_alloc; @@ -225,9 +245,55 @@ struct kbase_vmap_struct { struct tagged_addr *gpu_pages; void *addr; size_t size; - bool sync_needed; + kbase_vmap_flag flags; }; +/** + * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region or NULL if there isn't one + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, negative -errno on error + * + * Unmap the shrunk pages from the GPU mapping. Note that the size of the region + * itself is unmodified as we still need to reserve the VA, only the page tables + * will be modified by this function. + */ +int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_vmap_reg - Map part of an existing region into the kernel safely, only if the requested + * access permissions are supported + * @kctx: Context @reg belongs to + * @reg: The GPU region to map part of + * @gpu_addr: Start address of VA range to map, which must be within @reg + * @size: Size of VA range, which when added to @gpu_addr must be within @reg + * @prot_request: Flags indicating how the caller will then access the memory + * @map: Structure to be given to kbase_vunmap() on freeing + * @vmap_flags: Flags of type kbase_vmap_flag + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Variant of kbase_vmap_prot() that can be used given an existing region. + * + * The caller must satisfy one of the following for @reg: + * * It must have been obtained by finding it on the region tracker, and the region lock must not + * have been released in the mean time. + * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region + * lock is now held again. + * * Or, @reg has had KBASE_REG_NO_USER_FREE set at creation time or under the region lock, and the + * region lock is now held again. + * + * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS. + * + * Refer to kbase_vmap_prot() for more information on the operation of this function. + */ +void *kbase_vmap_reg(struct kbase_context *kctx, struct kbase_va_region *reg, u64 gpu_addr, + size_t size, unsigned long prot_request, struct kbase_vmap_struct *map, + kbase_vmap_flag vmap_flags); /** * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c new file mode 100644 index 000000000000..dfa70252bcf1 --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel page migration implementation. + */ + +#include + +#include +#include + +/* Global integer used to determine if module parameter value has been + * provided and if page migration feature is enabled. + * Feature is disabled on all platforms by default. + */ +int kbase_page_migration_enabled; +module_param(kbase_page_migration_enabled, int, 0444); +KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); + +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr) +{ + struct kbase_page_metadata *page_md = + kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); + + if (!page_md) + return false; + + SetPagePrivate(p); + set_page_private(p, (unsigned long)page_md); + page_md->dma_addr = dma_addr; + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS); + spin_lock_init(&page_md->migrate_lock); + + lock_page(p); + if (kbdev->mem_migrate.mapping) + __SetPageMovable(p, kbdev->mem_migrate.mapping); + unlock_page(p); + + return true; +} + +static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p) +{ + struct device *const dev = kbdev->dev; + struct kbase_page_metadata *page_md; + dma_addr_t dma_addr; + + page_md = kbase_page_private(p); + if (!page_md) + return; + + dma_addr = kbase_dma_addr(p); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + + kfree(page_md); + ClearPagePrivate(p); +} + +static void kbase_free_pages_worker(struct work_struct *work) +{ + struct kbase_mem_migrate *mem_migrate = + container_of(work, struct kbase_mem_migrate, free_pages_work); + struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate); + struct page *p, *tmp; + LIST_HEAD(free_list); + + spin_lock(&mem_migrate->free_pages_lock); + list_splice_init(&mem_migrate->free_pages_list, &free_list); + spin_unlock(&mem_migrate->free_pages_lock); + + list_for_each_entry_safe(p, tmp, &free_list, lru) { + list_del_init(&p->lru); + + lock_page(p); + if (PageMovable(p)) + __ClearPageMovable(p); + unlock_page(p); + + kbase_free_page_metadata(kbdev, p); + __free_pages(p, 0); + } +} + +void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) +{ + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + spin_lock(&mem_migrate->free_pages_lock); + list_add(&p->lru, &mem_migrate->free_pages_list); + spin_unlock(&mem_migrate->free_pages_lock); +} + +/** + * kbase_page_isolate - Isolate a page for migration. + * + * @p: Pointer of the page struct of page to isolate. + * @mode: LRU Isolation modes. + * + * Callback function for Linux to isolate a page and prepare it for migration. + * + * Return: true on success, false otherwise. + */ +static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) +{ + bool status_mem_pool = false; + struct kbase_mem_pool *mem_pool = NULL; + struct kbase_page_metadata *page_md = kbase_page_private(p); + + CSTD_UNUSED(mode); + + if (!spin_trylock(&page_md->migrate_lock)) + return false; + + if (WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { + spin_unlock(&page_md->migrate_lock); + return false; + } + + switch (PAGE_STATUS_GET(page_md->status)) { + case MEM_POOL: + /* Prepare to remove page from memory pool later only if pool is not + * in the process of termination. + */ + mem_pool = page_md->data.mem_pool.pool; + status_mem_pool = true; + preempt_disable(); + atomic_inc(&mem_pool->isolation_in_progress_cnt); + break; + case ALLOCATED_MAPPED: + case PT_MAPPED: + /* Only pages in a memory pool can be isolated for now. */ + break; + case SPILL_IN_PROGRESS: + case ALLOCATE_IN_PROGRESS: + case FREE_IN_PROGRESS: + /* Transitory state: do nothing. */ + break; + default: + /* State should always fall in one of the previous cases! + * Also notice that FREE_ISOLATED_IN_PROGRESS is impossible because + * that state only applies to pages that are already isolated. + */ + page_md->status = PAGE_ISOLATE_SET(page_md->status, 0); + break; + } + + spin_unlock(&page_md->migrate_lock); + + /* If the page is still in the memory pool: try to remove it. This will fail + * if pool lock is taken which could mean page no longer exists in pool. + */ + if (status_mem_pool) { + if (!spin_trylock(&mem_pool->pool_lock)) { + atomic_dec(&mem_pool->isolation_in_progress_cnt); + preempt_enable(); + return false; + } + + spin_lock(&page_md->migrate_lock); + /* Check status again to ensure page has not been removed from memory pool. */ + if (PAGE_STATUS_GET(page_md->status) == MEM_POOL) { + page_md->status = PAGE_ISOLATE_SET(page_md->status, 1); + list_del_init(&p->lru); + mem_pool->cur_size--; + } + spin_unlock(&page_md->migrate_lock); + spin_unlock(&mem_pool->pool_lock); + atomic_dec(&mem_pool->isolation_in_progress_cnt); + preempt_enable(); + } + + return IS_PAGE_ISOLATED(page_md->status); +} + +/** + * kbase_page_migrate - Migrate content of old page to new page provided. + * + * @mapping: Pointer to address_space struct associated with pages. + * @new_page: Pointer to the page struct of new page. + * @old_page: Pointer to the page struct of old page. + * @mode: Mode to determine if migration will be synchronised. + * + * Callback function for Linux to migrate the content of the old page to the + * new page provided. + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_page_migrate(struct address_space *mapping, struct page *new_page, + struct page *old_page, enum migrate_mode mode) +{ + int err = 0; + bool status_mem_pool = false; + struct kbase_page_metadata *page_md = kbase_page_private(old_page); + struct kbase_device *kbdev; + + CSTD_UNUSED(mapping); + CSTD_UNUSED(mode); + + if (!spin_trylock(&page_md->migrate_lock)) + return -EAGAIN; + + if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) { + spin_unlock(&page_md->migrate_lock); + return -EINVAL; + } + + switch (PAGE_STATUS_GET(page_md->status)) { + case MEM_POOL: + status_mem_pool = true; + kbdev = page_md->data.mem_pool.kbdev; + break; + case ALLOCATED_MAPPED: + case PT_MAPPED: + case FREE_ISOLATED_IN_PROGRESS: + case MULTI_MAPPED: + /* So far, only pages in a memory pool can be migrated. */ + default: + /* State should always fall in one of the previous cases! */ + err = -EAGAIN; + break; + } + + spin_unlock(&page_md->migrate_lock); + + if (status_mem_pool) { + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + kbase_free_page_metadata(kbdev, old_page); + __ClearPageMovable(old_page); + + /* Just free new page to avoid lock contention. */ + INIT_LIST_HEAD(&new_page->lru); + set_page_private(new_page, 0); + kbase_free_page_later(kbdev, new_page); + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } + + return err; +} + +/** + * kbase_page_putback - Return isolated page back to kbase. + * + * @p: Pointer of the page struct of page. + * + * Callback function for Linux to return isolated page back to kbase. This + * will only be called for a page that has been isolated but failed to + * migrate. This function will put back the given page to the state it was + * in before it was isolated. + */ +static void kbase_page_putback(struct page *p) +{ + bool status_mem_pool = false; + struct kbase_page_metadata *page_md = kbase_page_private(p); + struct kbase_device *kbdev; + + spin_lock(&page_md->migrate_lock); + + /* Page must have been isolated to reach here but metadata is incorrect. */ + WARN_ON(!IS_PAGE_ISOLATED(page_md->status)); + + switch (PAGE_STATUS_GET(page_md->status)) { + case MEM_POOL: + status_mem_pool = true; + kbdev = page_md->data.mem_pool.kbdev; + break; + case ALLOCATED_MAPPED: + case PT_MAPPED: + case FREE_ISOLATED_IN_PROGRESS: + /* Only pages in a memory pool can be isolated for now. + * Therefore only pages in a memory pool can be 'putback'. + */ + break; + default: + /* State should always fall in one of the previous cases! */ + break; + } + + spin_unlock(&page_md->migrate_lock); + + /* If page was in a memory pool then just free it to avoid lock contention. */ + if (!WARN_ON(!status_mem_pool)) { + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + __ClearPageMovable(p); + list_del_init(&p->lru); + kbase_free_page_later(kbdev, p); + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); + } +} + +static const struct address_space_operations kbase_address_space_ops = { + .isolate_page = kbase_page_isolate, + .migratepage = kbase_page_migrate, + .putback_page = kbase_page_putback, +}; + +void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) +{ + if (filp) { + filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops; + + if (!kbdev->mem_migrate.mapping) + kbdev->mem_migrate.mapping = filp->f_inode->i_mapping; + else + WARN_ON(kbdev->mem_migrate.mapping != filp->f_inode->i_mapping); + } +} + +void kbase_mem_migrate_init(struct kbase_device *kbdev) +{ + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + if (kbase_page_migration_enabled < 0) + kbase_page_migration_enabled = 0; + + spin_lock_init(&mem_migrate->free_pages_lock); + INIT_LIST_HEAD(&mem_migrate->free_pages_list); + + mem_migrate->free_pages_workq = + alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); +} + +void kbase_mem_migrate_term(struct kbase_device *kbdev) +{ + struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + + if (mem_migrate->free_pages_workq) + destroy_workqueue(mem_migrate->free_pages_workq); +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h new file mode 100644 index 000000000000..6610c0ccc40c --- /dev/null +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel page migration implementation. + */ + +#define PAGE_STATUS_MASK ((u8)0x7F) +#define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK) +#define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK)) +#define PAGE_ISOLATE_SHIFT (7) +#define PAGE_ISOLATE_SET(status, value) \ + ((status & PAGE_STATUS_MASK) | (value << PAGE_ISOLATE_SHIFT)) +#define IS_PAGE_ISOLATED(status) ((bool)(status & ~PAGE_STATUS_MASK)) + +/* Global integer used to determine if module parameter value has been + * provided and if page migration feature is enabled. + */ +extern int kbase_page_migration_enabled; + +/** + * kbase_alloc_page_metadata - Allocate and initialize page metadata + * @kbdev: Pointer to kbase device. + * @p: Page to assign metadata to. + * @dma_addr: DMA address mapped to paged. + * + * This will allocate memory for the page's metadata, initialize it and + * assign a reference to the page's private field. Importantly, once + * the metadata is set and ready this function will mark the page as + * movable. + * + * Return: true if successful or false otherwise. + */ +bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr); + +/** + * kbase_free_page_later - Defer freeing of given page. + * @kbdev: Pointer to kbase device + * @p: Page to free + * + * This will add given page to a list of pages which will be freed at + * a later time. + */ +void kbase_free_page_later(struct kbase_device *kbdev, struct page *p); + +/* + * kbase_mem_migrate_set_address_space_ops - Set address space operations + * + * @kbdev: Pointer to object representing an instance of GPU platform device. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * + * Assign address space operations to the given file struct @filp and + * add a reference to @kbdev. + */ +void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp); + +/* + * kbase_mem_migrate_init - Initialise kbase page migration + * + * @kbdev: Pointer to kbase device + * + * Enables page migration by default based on GPU and setup work queue to + * defer freeing pages during page migration callbacks. + */ +void kbase_mem_migrate_init(struct kbase_device *kbdev); + +/* + * kbase_mem_migrate_term - Terminate kbase page migration + * + * @kbdev: Pointer to kbase device + * + * This will flush any work left to free pages from page migration + * and destroy workqueue associated. + */ +void kbase_mem_migrate_term(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c index 4103bd1c93d0..dce066db7385 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -56,13 +57,36 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) return kbase_mem_pool_size(pool) == 0; } +static void set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p, + struct list_head *page_list, size_t *list_size) +{ + struct kbase_page_metadata *page_md = kbase_page_private(p); + + lockdep_assert_held(&pool->pool_lock); + + spin_lock(&page_md->migrate_lock); + /* Only update page status and add the page to the memory pool if it is not isolated */ + if (!WARN_ON(IS_PAGE_ISOLATED(page_md->status))) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); + page_md->data.mem_pool.pool = pool; + page_md->data.mem_pool.kbdev = pool->kbdev; + list_move(&p->lru, page_list); + (*list_size)++; + } + spin_unlock(&page_md->migrate_lock); +} + static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, struct page *p) { lockdep_assert_held(&pool->pool_lock); - list_add(&p->lru, &pool->page_list); - pool->cur_size++; + if (!pool->order && kbase_page_migration_enabled) + set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); + else { + list_add(&p->lru, &pool->page_list); + pool->cur_size++; + } pool_dbg(pool, "added page\n"); } @@ -79,8 +103,15 @@ static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, { lockdep_assert_held(&pool->pool_lock); - list_splice(page_list, &pool->page_list); - pool->cur_size += nr_pages; + if (!pool->order && kbase_page_migration_enabled) { + struct page *p, *tmp; + + list_for_each_entry_safe(p, tmp, page_list, lru) + set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size); + } else { + list_splice(page_list, &pool->page_list); + pool->cur_size += nr_pages; + } pool_dbg(pool, "added %zu pages\n", nr_pages); } @@ -93,7 +124,8 @@ static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, kbase_mem_pool_unlock(pool); } -static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) +static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool, + enum kbase_page_status status) { struct page *p; @@ -103,6 +135,16 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) return NULL; p = list_first_entry(&pool->page_list, struct page, lru); + + if (!pool->order && kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = kbase_page_private(p); + + spin_lock(&page_md->migrate_lock); + WARN_ON(PAGE_STATUS_GET(page_md->status) != (u8)MEM_POOL); + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status); + spin_unlock(&page_md->migrate_lock); + } + list_del_init(&p->lru); pool->cur_size--; @@ -111,12 +153,13 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) return p; } -static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) +static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool, + enum kbase_page_status status) { struct page *p; kbase_mem_pool_lock(pool); - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, status); kbase_mem_pool_unlock(pool); return p; @@ -126,9 +169,9 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, struct page *p) { struct device *dev = pool->kbdev->dev; + dma_addr_t dma_addr = pool->order ? kbase_dma_addr_as_priv(p) : kbase_dma_addr(p); - dma_sync_single_for_device(dev, kbase_dma_addr(p), - (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); } static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, @@ -154,7 +197,7 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) { struct page *p; - gfp_t gfp = GFP_HIGHUSER | __GFP_ZERO; + gfp_t gfp = __GFP_ZERO; struct kbase_device *const kbdev = pool->kbdev; struct device *const dev = kbdev->dev; dma_addr_t dma_addr; @@ -162,7 +205,9 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) /* don't warn on higher order failures */ if (pool->order) - gfp |= __GFP_NOWARN; + gfp |= GFP_HIGHUSER | __GFP_NOWARN; + else + gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, pool->group_id, gfp, pool->order); @@ -178,30 +223,52 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) return NULL; } - WARN_ON(dma_addr != page_to_phys(p)); - for (i = 0; i < (1u << pool->order); i++) - kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i); + /* Setup page metadata for 4KB pages when page migration is enabled */ + if (!pool->order && kbase_page_migration_enabled) { + INIT_LIST_HEAD(&p->lru); + if (!kbase_alloc_page_metadata(kbdev, p, dma_addr)) { + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, + pool->order); + return NULL; + } + } else { + WARN_ON(dma_addr != page_to_phys(p)); + for (i = 0; i < (1u << pool->order); i++) + kbase_set_dma_addr_as_priv(p + i, dma_addr + PAGE_SIZE * i); + } return p; } -static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, - struct page *p) +static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) { - struct kbase_device *const kbdev = pool->kbdev; - struct device *const dev = kbdev->dev; - dma_addr_t dma_addr = kbase_dma_addr(p); - int i; + struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; - dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order), - DMA_BIDIRECTIONAL); - for (i = 0; i < (1u << pool->order); i++) - kbase_clear_dma_addr(p+i); + if (!pool->order && kbase_page_migration_enabled) + queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); +} - kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, - pool->group_id, p, pool->order); +void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) +{ + struct kbase_device *kbdev = pool->kbdev; - pool_dbg(pool, "freed page to kernel\n"); + if (!pool->order && kbase_page_migration_enabled) { + kbase_free_page_later(kbdev, p); + pool_dbg(pool, "page to be freed to kernel later\n"); + } else { + int i; + dma_addr_t dma_addr = kbase_dma_addr_as_priv(p); + + for (i = 0; i < (1u << pool->order); i++) + kbase_clear_dma_addr_as_priv(p + i); + + dma_unmap_page(kbdev->dev, dma_addr, (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); + + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p, pool->order); + + pool_dbg(pool, "freed page to kernel\n"); + } } static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, @@ -213,10 +280,13 @@ static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, lockdep_assert_held(&pool->pool_lock); for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); kbase_mem_pool_free_page(pool, p); } + /* Freeing of pages will be deferred when page migration is enabled. */ + enqueue_free_pool_pages_work(pool); + return i; } @@ -232,8 +302,7 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, return nr_freed; } -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, - size_t nr_to_grow) +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow) { struct page *p; size_t i; @@ -268,6 +337,7 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, return 0; } +KBASE_EXPORT_TEST_API(kbase_mem_pool_grow); void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) { @@ -323,6 +393,9 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { kbase_mem_pool_unlock(pool); + /* Tell shrinker to skip reclaim + * even though freeable pages are available + */ return 0; } pool_size = kbase_mem_pool_size(pool); @@ -342,7 +415,10 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, kbase_mem_pool_lock(pool); if (pool->dont_reclaim && !pool->dying) { kbase_mem_pool_unlock(pool); - return 0; + /* Tell shrinker that reclaim can't be made and + * do not attempt again for this reclaim context. + */ + return SHRINK_STOP; } pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); @@ -356,12 +432,9 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, return freed; } -int kbase_mem_pool_init(struct kbase_mem_pool *pool, - const struct kbase_mem_pool_config *config, - unsigned int order, - int group_id, - struct kbase_device *kbdev, - struct kbase_mem_pool *next_pool) +int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, + unsigned int order, int group_id, struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool) { if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { @@ -375,6 +448,7 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, pool->kbdev = kbdev; pool->next_pool = next_pool; pool->dying = false; + atomic_set(&pool->isolation_in_progress_cnt, 0); spin_lock_init(&pool->pool_lock); INIT_LIST_HEAD(&pool->page_list); @@ -392,6 +466,7 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, return 0; } +KBASE_EXPORT_TEST_API(kbase_mem_pool_init); void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) { @@ -423,14 +498,14 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) /* Zero pages first without holding the next_pool lock */ for (i = 0; i < nr_to_spill; i++) { - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS); list_add(&p->lru, &spill_list); } } while (!kbase_mem_pool_is_empty(pool)) { /* Free remaining pages to kernel */ - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS); list_add(&p->lru, &free_list); } @@ -451,8 +526,18 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) kbase_mem_pool_free_page(pool, p); } + /* Freeing of pages will be deferred when page migration is enabled. */ + enqueue_free_pool_pages_work(pool); + + /* Before returning wait to make sure there are no pages undergoing page isolation + * which will require reference to this pool. + */ + while (atomic_read(&pool->isolation_in_progress_cnt)) + cpu_relax(); + pool_dbg(pool, "terminated\n"); } +KBASE_EXPORT_TEST_API(kbase_mem_pool_term); struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) { @@ -460,7 +545,7 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) do { pool_dbg(pool, "alloc()\n"); - p = kbase_mem_pool_remove(pool); + p = kbase_mem_pool_remove(pool, ALLOCATE_IN_PROGRESS); if (p) return p; @@ -478,7 +563,7 @@ struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) lockdep_assert_held(&pool->pool_lock); pool_dbg(pool, "alloc_locked()\n"); - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); if (p) return p; @@ -505,6 +590,8 @@ void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, } else { /* Free page */ kbase_mem_pool_free_page(pool, p); + /* Freeing of pages will be deferred when page migration is enabled. */ + enqueue_free_pool_pages_work(pool); } } @@ -524,11 +611,13 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, } else { /* Free page */ kbase_mem_pool_free_page(pool, p); + /* Freeing of pages will be deferred when page migration is enabled. */ + enqueue_free_pool_pages_work(pool); } } int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed) + struct tagged_addr *pages, bool partial_allowed) { struct page *p; size_t nr_from_pool; @@ -550,7 +639,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, while (nr_from_pool--) { int j; - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); if (pool->order) { pages[i++] = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); @@ -566,8 +655,8 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, if (i != nr_4k_pages && pool->next_pool) { /* Allocate via next pool */ - err = kbase_mem_pool_alloc_pages(pool->next_pool, - nr_4k_pages - i, pages + i, partial_allowed); + err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i, + partial_allowed); if (err < 0) goto err_rollback; @@ -638,7 +727,7 @@ int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, for (i = 0; i < nr_pages_internal; i++) { int j; - p = kbase_mem_pool_remove_locked(pool); + p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS); if (pool->order) { *pages++ = as_tagged_tag(page_to_phys(p), HUGE_HEAD | HUGE_PAGE); @@ -745,6 +834,7 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, size_t nr_to_pool; LIST_HEAD(to_pool_list); size_t i = 0; + bool pages_released = false; pool_dbg(pool, "free_pages(%zu):\n", nr_pages); @@ -782,8 +872,13 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, kbase_mem_pool_free_page(pool, p); pages[i] = as_tagged(0); + pages_released = true; } + /* Freeing of pages will be deferred when page migration is enabled. */ + if (pages_released) + enqueue_free_pool_pages_work(pool); + pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); } @@ -796,6 +891,7 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, size_t nr_to_pool; LIST_HEAD(to_pool_list); size_t i = 0; + bool pages_released = false; lockdep_assert_held(&pool->pool_lock); @@ -826,7 +922,12 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, kbase_mem_pool_free_page(pool, p); pages[i] = as_tagged(0); + pages_released = true; } + /* Freeing of pages will be deferred when page migration is enabled. */ + if (pages_released) + enqueue_free_pool_pages_work(pool); + pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c index 8d7bb4d68854..49c4b041e13a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,29 +43,22 @@ void kbase_mem_pool_group_config_set_max_size( } } -int kbase_mem_pool_group_init( - struct kbase_mem_pool_group *const mem_pools, - struct kbase_device *const kbdev, - const struct kbase_mem_pool_group_config *const configs, - struct kbase_mem_pool_group *next_pools) +int kbase_mem_pool_group_init(struct kbase_mem_pool_group *const mem_pools, + struct kbase_device *const kbdev, + const struct kbase_mem_pool_group_config *const configs, + struct kbase_mem_pool_group *next_pools) { int gid, err = 0; for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { - err = kbase_mem_pool_init(&mem_pools->small[gid], - &configs->small[gid], - KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, - gid, - kbdev, - next_pools ? &next_pools->small[gid] : NULL); + err = kbase_mem_pool_init(&mem_pools->small[gid], &configs->small[gid], + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, gid, kbdev, + next_pools ? &next_pools->small[gid] : NULL); if (!err) { - err = kbase_mem_pool_init(&mem_pools->large[gid], - &configs->large[gid], - KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, - gid, - kbdev, - next_pools ? &next_pools->large[gid] : NULL); + err = kbase_mem_pool_init(&mem_pools->large[gid], &configs->large[gid], + KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, gid, kbdev, + next_pools ? &next_pools->large[gid] : NULL); if (err) kbase_mem_pool_term(&mem_pools->small[gid]); } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h index f97f47d15b7d..fe8ce775258f 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool_group.h @@ -86,10 +86,9 @@ void kbase_mem_pool_group_config_set_max_size( * * Return: 0 on success, otherwise a negative error code */ -int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, - struct kbase_device *kbdev, - const struct kbase_mem_pool_group_config *configs, - struct kbase_mem_pool_group *next_pools); +int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, struct kbase_device *kbdev, + const struct kbase_mem_pool_group_config *configs, + struct kbase_mem_pool_group *next_pools); /** * kbase_mem_pool_group_mark_dying - Mark a set of memory pools as dying diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c index 90406b2bf1fe..b65f9e7b5162 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha.c @@ -209,20 +209,13 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev) } } -int kbase_pbha_read_dtb(struct kbase_device *kbdev) +static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, + const struct device_node *pbha_node) { u32 dtb_data[SYSC_ALLOC_COUNT * sizeof(u32) * DTB_SET_SIZE]; - const struct device_node *pbha_node; int sz, i; bool valid = true; - if (!kbasep_pbha_supported(kbdev)) - return 0; - - pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha"); - if (!pbha_node) - return 0; - sz = of_property_count_elems_of_size(pbha_node, "int_id_override", sizeof(u32)); if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) { @@ -256,3 +249,58 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev) } return 0; } + +#if MALI_USE_CSF +static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, + const struct device_node *pbha_node) +{ + u32 bits; + int err; + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) + return 0; + + err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + + if (err < 0) { + if (err != -EINVAL) { + dev_err(kbdev->dev, + "DTB value for propagate_bits is improperly formed (err=%d)\n", + err); + return err; + } + } + + if (bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) { + dev_err(kbdev->dev, "Bad DTB value for propagate_bits: 0x%x\n", bits); + return -EINVAL; + } + + kbdev->pbha_propagate_bits = bits; + return 0; +} +#endif + +int kbase_pbha_read_dtb(struct kbase_device *kbdev) +{ + const struct device_node *pbha_node; + int err; + + if (!kbasep_pbha_supported(kbdev)) + return 0; + + pbha_node = of_get_child_by_name(kbdev->dev->of_node, "pbha"); + if (!pbha_node) + return 0; + + err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node); + +#if MALI_USE_CSF + if (err < 0) + return err; + + err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node); +#endif + + return err; +} diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c index 4130dd609157..1cc29c700e5a 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pbha_debugfs.c @@ -20,13 +20,15 @@ */ #include "mali_kbase_pbha_debugfs.h" - #include "mali_kbase_pbha.h" - #include #include #include +#if MALI_USE_CSF +#include "backend/gpu/mali_kbase_pm_internal.h" +#endif + static int int_id_overrides_show(struct seq_file *sfile, void *data) { struct kbase_device *kbdev = sfile->private; @@ -108,6 +110,90 @@ static int int_id_overrides_open(struct inode *in, struct file *file) return single_open(file, int_id_overrides_show, in->i_private); } +#if MALI_USE_CSF +/** + * propagate_bits_show - Read PBHA bits from L2_CONFIG out to debugfs. + * + * @sfile: The debugfs entry. + * @data: Data associated with the entry. + * + * Return: 0 in all cases. + */ +static int propagate_bits_show(struct seq_file *sfile, void *data) +{ + struct kbase_device *kbdev = sfile->private; + u32 l2_config_val; + + kbase_csf_scheduler_pm_active(kbdev); + kbase_pm_wait_for_l2_powered(kbdev); + l2_config_val = L2_CONFIG_PBHA_HWU_GET(kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG))); + kbase_csf_scheduler_pm_idle(kbdev); + + seq_printf(sfile, "PBHA Propagate Bits: 0x%x\n", l2_config_val); + return 0; +} + +static int propagate_bits_open(struct inode *in, struct file *file) +{ + return single_open(file, propagate_bits_show, in->i_private); +} + +/** + * propagate_bits_write - Write input value from debugfs to PBHA bits of L2_CONFIG register. + * + * @file: Pointer to file struct of debugfs node. + * @ubuf: Pointer to user buffer with value to be written. + * @count: Size of user buffer. + * @ppos: Not used. + * + * Return: Size of buffer passed in when successful, but error code E2BIG/EINVAL otherwise. + */ +static ssize_t propagate_bits_write(struct file *file, const char __user *ubuf, size_t count, + loff_t *ppos) +{ + struct seq_file *sfile = file->private_data; + struct kbase_device *kbdev = sfile->private; + /* 32 characters should be enough for the input string in any base */ + char raw_str[32]; + unsigned long propagate_bits; + + if (count >= sizeof(raw_str)) + return -E2BIG; + if (copy_from_user(raw_str, ubuf, count)) + return -EINVAL; + raw_str[count] = '\0'; + if (kstrtoul(raw_str, 0, &propagate_bits)) + return -EINVAL; + + /* Check propagate_bits input argument does not + * exceed the maximum size of the propagate_bits mask. + */ + if (propagate_bits > (L2_CONFIG_PBHA_HWU_MASK >> L2_CONFIG_PBHA_HWU_SHIFT)) + return -EINVAL; + /* Cast to u8 is safe as check is done already to ensure size is within + * correct limits. + */ + kbdev->pbha_propagate_bits = (u8)propagate_bits; + + /* GPU Reset will set new values in L2 config */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { + kbase_reset_gpu(kbdev); + kbase_reset_gpu_wait(kbdev); + } + + return count; +} + +static const struct file_operations pbha_propagate_bits_fops = { + .owner = THIS_MODULE, + .open = propagate_bits_open, + .read = seq_read, + .write = propagate_bits_write, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* MALI_USE_CSF */ + static const struct file_operations pbha_int_id_overrides_fops = { .owner = THIS_MODULE, .open = int_id_overrides_open, @@ -132,5 +218,10 @@ void kbase_pbha_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("int_id_overrides", mode, debugfs_pbha_dir, kbdev, &pbha_int_id_overrides_fops); +#if MALI_USE_CSF + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) + debugfs_create_file("propagate_bits", mode, debugfs_pbha_dir, kbdev, + &pbha_propagate_bits_fops); +#endif /* MALI_USE_CSF */ } } diff --git a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c index 761a636b4cbf..265c676f13fa 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_platform_fake.c @@ -32,12 +32,12 @@ */ #include +#ifndef CONFIG_OF + #define PLATFORM_CONFIG_RESOURCE_COUNT 4 -#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 static struct platform_device *mali_device; -#ifndef CONFIG_OF /** * kbasep_config_parse_io_resources - Convert data in struct kbase_io_resources * struct to Linux-specific resources @@ -73,14 +73,11 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io linux_resources[3].end = io_resources->gpu_irq_number; linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; } -#endif /* CONFIG_OF */ int kbase_platform_register(void) { struct kbase_platform_config *config; -#ifndef CONFIG_OF struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; -#endif int err; config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ @@ -93,7 +90,6 @@ int kbase_platform_register(void) if (mali_device == NULL) return -ENOMEM; -#ifndef CONFIG_OF kbasep_config_parse_io_resources(config->io_resources, resources); err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); if (err) { @@ -101,7 +97,6 @@ int kbase_platform_register(void) mali_device = NULL; return err; } -#endif /* CONFIG_OF */ err = platform_device_add(mali_device); if (err) { @@ -120,3 +115,5 @@ void kbase_platform_unregister(void) platform_device_unregister(mali_device); } EXPORT_SYMBOL(kbase_platform_unregister); + +#endif /* CONFIG_OF */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_pm.c b/drivers/gpu/arm/bifrost/mali_kbase_pm.c index 68c1b9bb25e8..62a132816a42 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_pm.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h index ff631e91824f..48ea9954b17c 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h @@ -236,6 +236,18 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev); */ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); +/** + * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending + * + * @kbdev: Device pointer + * + * Note that unless appropriate locks are held when using this function, the + * state could change immediately afterwards. + * + * Return: True if the GPU reset isn't pending. + */ +bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev); + /** * kbase_reset_gpu_wait - Wait for a GPU reset to complete * @kbdev: Device pointer diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c index 5808a2e893cc..b0c5126afcbe 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c @@ -23,7 +23,7 @@ #include #include -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) #include #endif #include @@ -204,7 +204,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) return 0; } -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) /* Called by the explicit fence mechanism when a fence wait has completed */ void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) { @@ -925,26 +925,6 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, #if !MALI_USE_CSF -/* - * Sizes of user data to copy for each just-in-time memory interface version - * - * In interface version 2 onwards this is the same as the struct size, allowing - * copying of arrays of structures from userspace. - * - * In interface version 1 the structure size was variable, and hence arrays of - * structures cannot be supported easily, and were not a feature present in - * version 1 anyway. - */ -static const size_t jit_info_copy_size_for_jit_version[] = { - /* in jit_version 1, the structure did not have any end padding, hence - * it could be a different size on 32 and 64-bit clients. We therefore - * do not copy past the last member - */ - [1] = offsetofend(struct base_jit_alloc_info_10_2, id), - [2] = sizeof(struct base_jit_alloc_info_11_5), - [3] = sizeof(struct base_jit_alloc_info) -}; - static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) { __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; @@ -954,18 +934,11 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) u32 count; int ret; u32 i; - size_t jit_info_user_copy_size; - - WARN_ON(kctx->jit_version >= - ARRAY_SIZE(jit_info_copy_size_for_jit_version)); - jit_info_user_copy_size = - jit_info_copy_size_for_jit_version[kctx->jit_version]; - WARN_ON(jit_info_user_copy_size > sizeof(*info)); /* For backwards compatibility, and to prevent reading more than 1 jit * info struct on jit version 1 */ - if (katom->nr_extres == 0 || kctx->jit_version == 1) + if (katom->nr_extres == 0) katom->nr_extres = 1; count = katom->nr_extres; @@ -985,8 +958,8 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) katom->softjob_data = info; - for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) { - if (copy_from_user(info, data, jit_info_user_copy_size) != 0) { + for (i = 0; i < count; i++, info++, data += sizeof(*info)) { + if (copy_from_user(info, data, sizeof(*info)) != 0) { ret = -EINVAL; goto free_info; } @@ -994,8 +967,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) * kernel struct. For jit version 1, this also clears the * padding bytes */ - memset(((u8 *)info) + jit_info_user_copy_size, 0, - sizeof(*info) - jit_info_user_copy_size); + memset(((u8 *)info) + sizeof(*info), 0, sizeof(*info) - sizeof(*info)); ret = kbasep_jit_alloc_validate(kctx, info); if (ret) @@ -1476,10 +1448,11 @@ static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) if (!kbase_sticky_resource_acquire(katom->kctx, gpu_addr)) goto failed_loop; - } else + } else { if (!kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr)) failed = true; + } } /* @@ -1539,7 +1512,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) ret = kbase_dump_cpu_gpu_time(katom); break; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_TRIGGER: katom->event_code = kbase_sync_fence_out_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? @@ -1599,7 +1572,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) void kbase_cancel_soft_job(struct kbase_jd_atom *katom) { switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_WAIT: kbase_sync_fence_in_cancel_wait(katom); break; @@ -1622,7 +1595,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) return -EINVAL; } break; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_TRIGGER: { struct base_fence fence; @@ -1668,20 +1641,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) fence.basep.fd); if (ret < 0) return ret; - -#ifdef CONFIG_MALI_BIFROST_DMA_FENCE - /* - * Set KCTX_NO_IMPLICIT_FENCE in the context the first - * time a soft fence wait job is observed. This will - * prevent the implicit dma-buf fence to conflict with - * the Android native sync fences. - */ - if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC)) - kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC); -#endif /* CONFIG_MALI_BIFROST_DMA_FENCE */ } break; -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#endif /* CONFIG_SYNC_FILE */ case BASE_JD_REQ_SOFT_JIT_ALLOC: return kbase_jit_allocate_prepare(katom); case BASE_JD_REQ_SOFT_JIT_FREE: @@ -1714,7 +1676,7 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ break; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_TRIGGER: /* If fence has not yet been signaled, do it now */ kbase_sync_fence_out_trigger(katom, katom->event_code == @@ -1724,7 +1686,7 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) /* Release katom's reference to fence object */ kbase_sync_fence_in_remove(katom); break; -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#endif /* CONFIG_SYNC_FILE */ case BASE_JD_REQ_SOFT_DEBUG_COPY: kbase_debug_copy_finish(katom); break; diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync.h b/drivers/gpu/arm/bifrost/mali_kbase_sync.h index fcc9b6fe0195..3d2053bee08e 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync.h +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync.h @@ -30,9 +30,6 @@ #include #include -#if IS_ENABLED(CONFIG_SYNC) -#include -#endif #if IS_ENABLED(CONFIG_SYNC_FILE) #include "mali_kbase_fence_defs.h" #include @@ -181,7 +178,7 @@ int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info); #endif /* !MALI_USE_CSF */ -#if defined(CONFIG_SYNC_FILE) +#if IS_ENABLED(CONFIG_SYNC_FILE) #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) void kbase_sync_fence_info_get(struct fence *fence, struct kbase_sync_fence_info *info); diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c deleted file mode 100644 index 8c5cb6c3838e..000000000000 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync_android.c +++ /dev/null @@ -1,514 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -/* - * Code for supporting explicit Android fences (CONFIG_SYNC) - * Known to be good for kernels 4.5 and earlier. - * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels - * (see mali_kbase_sync_file.c) - */ - -#include -#include -#include -#include -#include -#include -#include -#include "sync.h" -#include -#include - -struct mali_sync_timeline { - struct sync_timeline timeline; - atomic_t counter; - atomic_t signaled; -}; - -struct mali_sync_pt { - struct sync_pt pt; - int order; - int result; -}; - -static struct mali_sync_timeline *to_mali_sync_timeline( - struct sync_timeline *timeline) -{ - return container_of(timeline, struct mali_sync_timeline, timeline); -} - -static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) -{ - return container_of(pt, struct mali_sync_pt, pt); -} - -static struct sync_pt *timeline_dup(struct sync_pt *pt) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - struct mali_sync_pt *new_mpt; - struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), - sizeof(struct mali_sync_pt)); - - if (!new_pt) - return NULL; - - new_mpt = to_mali_sync_pt(new_pt); - new_mpt->order = mpt->order; - new_mpt->result = mpt->result; - - return new_pt; -} - -static int timeline_has_signaled(struct sync_pt *pt) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - struct mali_sync_timeline *mtl = to_mali_sync_timeline( - sync_pt_parent(pt)); - int result = mpt->result; - - int diff = atomic_read(&mtl->signaled) - mpt->order; - - if (diff >= 0) - return (result < 0) ? result : 1; - - return 0; -} - -static int timeline_compare(struct sync_pt *a, struct sync_pt *b) -{ - struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); - struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); - - int diff = ma->order - mb->order; - - if (diff == 0) - return 0; - - return (diff < 0) ? -1 : 1; -} - -static void timeline_value_str(struct sync_timeline *timeline, char *str, - int size) -{ - struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); - - snprintf(str, size, "%d", atomic_read(&mtl->signaled)); -} - -static void pt_value_str(struct sync_pt *pt, char *str, int size) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - - snprintf(str, size, "%d(%d)", mpt->order, mpt->result); -} - -static struct sync_timeline_ops mali_timeline_ops = { - .driver_name = "Mali", - .dup = timeline_dup, - .has_signaled = timeline_has_signaled, - .compare = timeline_compare, - .timeline_value_str = timeline_value_str, - .pt_value_str = pt_value_str, -}; - -/* Allocates a timeline for Mali - * - * One timeline should be allocated per API context. - */ -static struct sync_timeline *mali_sync_timeline_alloc(const char *name) -{ - struct sync_timeline *tl; - struct mali_sync_timeline *mtl; - - tl = sync_timeline_create(&mali_timeline_ops, - sizeof(struct mali_sync_timeline), name); - if (!tl) - return NULL; - - /* Set the counter in our private struct */ - mtl = to_mali_sync_timeline(tl); - atomic_set(&mtl->counter, 0); - atomic_set(&mtl->signaled, 0); - - return tl; -} - -static int kbase_stream_close(struct inode *inode, struct file *file) -{ - struct sync_timeline *tl; - - tl = (struct sync_timeline *)file->private_data; - sync_timeline_destroy(tl); - return 0; -} - -static const struct file_operations stream_fops = { - .owner = THIS_MODULE, - .release = kbase_stream_close, -}; - -int kbase_sync_fence_stream_create(const char *name, int *const out_fd) -{ - struct sync_timeline *tl; - - if (!out_fd) - return -EINVAL; - - tl = mali_sync_timeline_alloc(name); - if (!tl) - return -EINVAL; - - *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC); - - if (*out_fd < 0) { - sync_timeline_destroy(tl); - return -EINVAL; - } - - return 0; -} - -#if !MALI_USE_CSF -/* Allocates a sync point within the timeline. - * - * The timeline must be the one allocated by kbase_sync_timeline_alloc - * - * Sync points must be triggered in *exactly* the same order as they are - * allocated. - */ -static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) -{ - struct sync_pt *pt = sync_pt_create(parent, - sizeof(struct mali_sync_pt)); - struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); - struct mali_sync_pt *mpt; - - if (!pt) - return NULL; - - mpt = to_mali_sync_pt(pt); - mpt->order = atomic_inc_return(&mtl->counter); - mpt->result = 0; - - return pt; -} - -int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) -{ - struct sync_timeline *tl; - struct sync_pt *pt; - struct sync_fence *fence; - int fd; - struct file *tl_file; - - tl_file = fget(tl_fd); - if (tl_file == NULL) - return -EBADF; - - if (tl_file->f_op != &stream_fops) { - fd = -EBADF; - goto out; - } - - tl = tl_file->private_data; - - pt = kbase_sync_pt_alloc(tl); - if (!pt) { - fd = -EFAULT; - goto out; - } - - fence = sync_fence_create("mali_fence", pt); - if (!fence) { - sync_pt_free(pt); - fd = -EFAULT; - goto out; - } - - /* from here the fence owns the sync_pt */ - - /* create a fd representing the fence */ - fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (fd < 0) { - sync_pt_free(pt); - sync_fence_put(fence); - katom->fence = NULL; - goto out; - } - - /* Place the successfully created fence in katom */ - katom->fence = fence; - - /* bind fence to the new fd */ - sync_fence_install(fence, fd); -out: - fput(tl_file); - - return fd; -} - -int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) -{ - katom->fence = sync_fence_fdget(fd); - return katom->fence ? 0 : -ENOENT; -} -#endif /* !MALI_USE_CSF */ - -int kbase_sync_fence_validate(int fd) -{ - struct sync_fence *fence; - - fence = sync_fence_fdget(fd); - if (!fence) - return -EINVAL; - - sync_fence_put(fence); - return 0; -} - -#if !MALI_USE_CSF -/* Returns true if the specified timeline is allocated by Mali */ -static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) -{ - return timeline->ops == &mali_timeline_ops; -} - -/* Signals a particular sync point - * - * Sync points must be triggered in *exactly* the same order as they are - * allocated. - * - * If they are signaled in the wrong order then a message will be printed in - * debug builds and otherwise attempts to signal order sync_pts will be ignored. - * - * result can be negative to indicate error, any other value is interpreted as - * success. - */ -static void kbase_sync_signal_pt(struct sync_pt *pt, int result) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - struct mali_sync_timeline *mtl = to_mali_sync_timeline( - sync_pt_parent(pt)); - int signaled; - int diff; - - mpt->result = result; - - do { - signaled = atomic_read(&mtl->signaled); - - diff = signaled - mpt->order; - - if (diff > 0) { - /* The timeline is already at or ahead of this point. - * This should not happen unless userspace has been - * signaling fences out of order, so warn but don't - * violate the sync_pt API. - * The warning is only in debug builds to prevent - * a malicious user being able to spam dmesg. - */ -#ifdef CONFIG_MALI_BIFROST_DEBUG - pr_err("Fences were triggered in a different order to allocation!"); -#endif /* CONFIG_MALI_BIFROST_DEBUG */ - return; - } - } while (atomic_cmpxchg(&mtl->signaled, - signaled, mpt->order) != signaled); -} - -enum base_jd_event_code -kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) -{ - struct sync_pt *pt; - struct sync_timeline *timeline; - - if (!katom->fence) - return BASE_JD_EVENT_JOB_CANCELLED; - - if (katom->fence->num_fences != 1) { - /* Not exactly one item in the list - so it didn't (directly) - * come from us - */ - return BASE_JD_EVENT_JOB_CANCELLED; - } - - pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); - timeline = sync_pt_parent(pt); - - if (!kbase_sync_timeline_is_ours(timeline)) { - /* Fence has a sync_pt which isn't ours! */ - return BASE_JD_EVENT_JOB_CANCELLED; - } - - kbase_sync_signal_pt(pt, result); - - sync_timeline_signal(timeline); - - kbase_sync_fence_out_remove(katom); - - return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; -} - -static inline int kbase_fence_get_status(struct sync_fence *fence) -{ - if (!fence) - return -ENOENT; - - return atomic_read(&fence->status); -} - -static void kbase_fence_wait_callback(struct sync_fence *fence, - struct sync_fence_waiter *waiter) -{ - struct kbase_jd_atom *katom = container_of(waiter, - struct kbase_jd_atom, sync_waiter); - struct kbase_context *kctx = katom->kctx; - - /* Propagate the fence status to the atom. - * If negative then cancel this atom and its dependencies. - */ - if (kbase_fence_get_status(fence) < 0) - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - /* To prevent a potential deadlock we schedule the work onto the - * job_done_wq workqueue - * - * The issue is that we may signal the timeline while holding - * kctx->jctx.lock and the callbacks are run synchronously from - * sync_timeline_signal. So we simply defer the work. - */ - - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} - -int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) -{ - int ret; - - sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); - - ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); - - if (ret == 1) { - /* Already signaled */ - return 0; - } - - if (ret < 0) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job - */ - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); - } - - return 1; -} - -void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) -{ - if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { - /* The wait wasn't cancelled - leave the cleanup for - * kbase_fence_wait_callback - */ - return; - } - - /* Wait was cancelled - zap the atoms */ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - kbasep_remove_waiting_soft_job(katom); - kbase_finish_soft_job(katom); - - if (kbase_jd_done_nolock(katom, true)) - kbase_js_sched_all(katom->kctx->kbdev); -} - -void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) -{ - if (katom->fence) { - sync_fence_put(katom->fence); - katom->fence = NULL; - } -} - -void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) -{ - if (katom->fence) { - sync_fence_put(katom->fence); - katom->fence = NULL; - } -} - -int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) -{ - u32 string_len; - - if (!katom->fence) - return -ENOENT; - - info->fence = katom->fence; - info->status = kbase_fence_get_status(katom->fence); - - string_len = strscpy(info->name, katom->fence->name, sizeof(info->name)); - string_len += sizeof(char); - /* Make sure that the source string fit into the buffer. */ - KBASE_DEBUG_ASSERT(string_len <= sizeof(info->name)); - CSTD_UNUSED(string_len); - - return 0; -} - -int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) -{ - u32 string_len; - - if (!katom->fence) - return -ENOENT; - - info->fence = katom->fence; - info->status = kbase_fence_get_status(katom->fence); - - string_len = strscpy(info->name, katom->fence->name, sizeof(info->name)); - string_len += sizeof(char); - /* Make sure that the source string fit into the buffer. */ - KBASE_DEBUG_ASSERT(string_len <= sizeof(info->name)); - CSTD_UNUSED(string_len); - - return 0; -} - -#ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG -void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) -{ - /* Dump out the full state of all the Android sync fences. - * The function sync_dump() isn't exported to modules, so force - * sync_fence_wait() to time out to trigger sync_dump(). - */ - if (katom->fence) - sync_fence_wait(katom->fence, 1); -} -#endif -#endif /* !MALI_USE_CSF */ diff --git a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c index e08a87210fbc..9360324cfee6 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_sync_file.c @@ -21,9 +21,6 @@ /* * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) - * Introduced in kernel 4.9. - * Android explicit fences (CONFIG_SYNC) can be used for older kernels - * (see mali_kbase_sync_android.c) */ #include @@ -112,10 +109,13 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) struct dma_fence *fence = sync_file_get_fence(fd); #endif + lockdep_assert_held(&katom->kctx->jctx.lock); + if (!fence) return -ENOENT; kbase_fence_fence_in_set(katom, fence); + katom->dma_fence.fence_cb_added = false; return 0; } @@ -167,36 +167,31 @@ static void kbase_fence_wait_callback(struct dma_fence *fence, struct dma_fence_cb *cb) #endif { - struct kbase_fence_cb *kcb = container_of(cb, - struct kbase_fence_cb, - fence_cb); - struct kbase_jd_atom *katom = kcb->katom; + struct kbase_jd_atom *katom = container_of(cb, struct kbase_jd_atom, + dma_fence.fence_cb); struct kbase_context *kctx = katom->kctx; /* Cancel atom if fence is erroneous */ + if (dma_fence_is_signaled(katom->dma_fence.fence_in) && #if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) - if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error < 0) + katom->dma_fence.fence_in->error < 0) #else - if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) + katom->dma_fence.fence_in->status < 0) #endif katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (kbase_fence_dep_count_dec_and_test(katom)) { - /* We take responsibility of handling this */ - kbase_fence_dep_count_set(katom, -1); - /* To prevent a potential deadlock we schedule the work onto the - * job_done_wq workqueue - * - * The issue is that we may signal the timeline while holding - * kctx->jctx.lock and the callbacks are run synchronously from - * sync_timeline_signal. So we simply defer the work. - */ - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); - } + /* To prevent a potential deadlock we schedule the work onto the + * job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding + * kctx->jctx.lock and the callbacks are run synchronously from + * sync_timeline_signal. So we simply defer the work. + */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); } int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) @@ -208,53 +203,77 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) struct dma_fence *fence; #endif - fence = kbase_fence_in_get(katom); + lockdep_assert_held(&katom->kctx->jctx.lock); + + fence = katom->dma_fence.fence_in; if (!fence) return 0; /* no input fence to wait for, good to go! */ - kbase_fence_dep_count_set(katom, 1); + err = dma_fence_add_callback(fence, &katom->dma_fence.fence_cb, + kbase_fence_wait_callback); + if (err == -ENOENT) { + int fence_status = dma_fence_get_status(fence); - err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback); - - kbase_fence_put(fence); - - if (likely(!err)) { - /* Test if the callbacks are already triggered */ - if (kbase_fence_dep_count_dec_and_test(katom)) { - kbase_fence_free_callbacks(katom); - kbase_fence_dep_count_set(katom, -1); - return 0; /* Already signaled, good to go right now */ + if (fence_status == 1) { + /* Fence is already signaled with no error. The completion + * for FENCE_WAIT softjob can be done right away. + */ + return 0; } - /* Callback installed, so we just need to wait for it... */ - } else { - /* Failure */ - kbase_fence_free_callbacks(katom); - kbase_fence_dep_count_set(katom, -1); + /* Fence shouldn't be in not signaled state */ + if (!fence_status) { + struct kbase_sync_fence_info info; - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_sync_fence_in_info_get(katom, &info); - /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job + dev_warn(katom->kctx->kbdev->dev, + "Unexpected status for fence %s of ctx:%d_%d atom:%d", + info.name, katom->kctx->tgid, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + } + + /* If fence is signaled with an error, then the FENCE_WAIT softjob is + * considered to be failed. */ - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); } - return 1; /* completion to be done later by callback/worker */ + if (unlikely(err)) { + /* We should cause the dependent jobs in the bag to be failed. */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* The completion for FENCE_WAIT softjob can be done right away. */ + return 0; + } + + /* Callback was successfully installed */ + katom->dma_fence.fence_cb_added = true; + + /* Completion to be done later by callback/worker */ + return 1; } void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) { - if (!kbase_fence_free_callbacks(katom)) { - /* The wait wasn't cancelled - - * leave the cleanup for kbase_fence_wait_callback - */ - return; - } + lockdep_assert_held(&katom->kctx->jctx.lock); - /* Take responsibility of completion */ - kbase_fence_dep_count_set(katom, -1); + if (katom->dma_fence.fence_cb_added) { + if (!dma_fence_remove_callback(katom->dma_fence.fence_in, + &katom->dma_fence.fence_cb)) { + /* The callback is already removed so leave the cleanup + * for kbase_fence_wait_callback. + */ + return; + } + } else { + struct kbase_sync_fence_info info; + + kbase_sync_fence_in_info_get(katom, &info); + dev_warn(katom->kctx->kbdev->dev, + "Callback was not added earlier for fence %s of ctx:%d_%d atom:%d", + info.name, katom->kctx->tgid, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + } /* Wait was cancelled - zap the atoms */ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; @@ -273,8 +292,29 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) { - kbase_fence_free_callbacks(katom); + lockdep_assert_held(&katom->kctx->jctx.lock); + + if (katom->dma_fence.fence_cb_added) { + bool removed = dma_fence_remove_callback(katom->dma_fence.fence_in, + &katom->dma_fence.fence_cb); + + /* Here it is expected that the callback should have already been removed + * previously either by kbase_sync_fence_in_cancel_wait() or when the fence + * was signaled and kbase_sync_fence_wait_worker() was called. + */ + if (removed) { + struct kbase_sync_fence_info info; + + kbase_sync_fence_in_info_get(katom, &info); + dev_warn(katom->kctx->kbdev->dev, + "Callback was not removed earlier for fence %s of ctx:%d_%d atom:%d", + info.name, katom->kctx->tgid, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + } + } + kbase_fence_in_remove(katom); + katom->dma_fence.fence_cb_added = false; } #endif /* !MALI_USE_CSF */ @@ -288,7 +328,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, { info->fence = fence; - /* translate into CONFIG_SYNC status: + /* Translate into the following status, with support for error handling: * < 0 : error * 0 : active * 1 : signaled diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c index abcf53041069..853c89796d44 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c @@ -20,11 +20,11 @@ */ #include "mali_kbase_vinstr.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_types.h" +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" #include -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_gpu_narrow.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" #include #include "mali_malisw.h" #include "mali_kbase_debug.h" @@ -1034,24 +1034,25 @@ static long kbasep_vinstr_hwcnt_reader_ioctl( * @filp: Non-NULL pointer to file structure. * @wait: Non-NULL pointer to poll table. * - * Return: POLLIN if data can be read without blocking, 0 if data can not be - * read without blocking, else error code. + * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if + * data can not be read without blocking, else EPOLLHUP | EPOLLERR. */ static __poll_t kbasep_vinstr_hwcnt_reader_poll(struct file *filp, poll_table *wait) { struct kbase_vinstr_client *cli; if (!filp || !wait) - return (__poll_t)-EINVAL; + return EPOLLHUP | EPOLLERR; cli = filp->private_data; if (!cli) - return (__poll_t)-EINVAL; + return EPOLLHUP | EPOLLERR; poll_wait(filp, &cli->waitq, wait); if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) - return POLLIN; - return 0; + return EPOLLIN | EPOLLRDNORM; + + return (__poll_t)0; } /** diff --git a/drivers/gpu/arm/bifrost/mali_malisw.h b/drivers/gpu/arm/bifrost/mali_malisw.h index d25c29fda63b..d9db189e8684 100644 --- a/drivers/gpu/arm/bifrost/mali_malisw.h +++ b/drivers/gpu/arm/bifrost/mali_malisw.h @@ -97,16 +97,12 @@ */ #define CSTD_STR2(x) CSTD_STR1(x) -/* LINUX_VERSION_CODE < 5.4 */ -#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) -#if defined(GCC_VERSION) && GCC_VERSION >= 70000 + #ifndef fallthrough + #define fallthrough __fallthrough + #endif /* fallthrough */ + #ifndef __fallthrough #define __fallthrough __attribute__((fallthrough)) #endif /* __fallthrough */ -#define fallthrough __fallthrough -#else -#define fallthrough CSTD_NOP(...) /* fallthrough */ -#endif /* GCC_VERSION >= 70000 */ -#endif /* KERNEL_VERSION(5, 4, 0) */ #endif /* _MALISW_H_ */ diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c index 04f5cdf42b84..db2086079c14 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c @@ -122,6 +122,8 @@ void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, access_type, kbase_gpu_access_type_name(fault->status), source_id); + kbase_debug_csf_fault_notify(kbdev, NULL, DF_GPU_PAGE_FAULT); + /* Report MMU fault for all address spaces (except MCU_AS_NR) */ for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) submit_work_pagefault(kbdev, as_no, fault); @@ -188,6 +190,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAR_FAULT); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } /* @@ -249,6 +252,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ + kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); /* Switching to UNMAPPED mode above would have enabled the firmware to * recover from the fault (if the memory access was made by firmware) * and it can then respond to CSG termination requests to be sent now. @@ -262,6 +266,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } /** diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c index 3130b332dec2..22786f0748ce 100644 --- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c +++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c @@ -94,6 +94,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + } /* diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c index fc7c8923ab07..8f261d439909 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c @@ -110,7 +110,8 @@ static void mmu_hw_operation_end(struct kbase_device *kbdev) /** * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done - * through GPU_CONTROL interface + * through GPU_CONTROL interface. + * * @kbdev: kbase device to check GPU model ID on. * * This function returns whether a cache flush for page table update should @@ -137,6 +138,42 @@ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev) * * Issue a cache flush physical range command. */ +#if MALI_USE_CSF +static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes, + enum kbase_mmu_op_type op) +{ + u32 flush_op; + int ret; + + if (WARN_ON(kbdev == NULL)) + return; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Translate operation to command */ + if (op == KBASE_MMU_OP_FLUSH_PT) { + flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2; + } else if (op == KBASE_MMU_OP_FLUSH_MEM) { + flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; + } else { + dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", op); + return; + } + + ret = kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op); + + if (ret) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover + */ + dev_err(kbdev->dev, + "Flush for physical address range did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } +} +#endif /** * mmu_invalidate() - Perform an invalidate operation on MMU caches. @@ -177,39 +214,15 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { - int err; - bool gpu_powered; + int err = 0; unsigned long flags; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - gpu_powered = kbdev->pm.backend.gpu_powered; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* GPU is off so there's no need to perform flush/invalidate. - * But even if GPU is not actually powered down, after gpu_powered flag - * was set to false, it is still safe to skip the flush/invalidate. - * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE - * which is sent when address spaces are restored after gpu_powered flag - * is set to true. Flushing of L2 cache is certainly not required as L2 - * cache is definitely off if gpu_powered is false. - */ - if (!gpu_powered) - return; - - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* GPU has just been powered off due to system suspend. - * So again, no need to perform flush/invalidate. - */ - return; - } - /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_flush(kbdev, as, op_param); - mmu_hw_operation_end(kbdev); + if (kbdev->pm.backend.gpu_powered) + err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param); if (err) { /* Flush failed to complete, assume the GPU has hung and @@ -222,10 +235,9 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as kbase_reset_gpu(kbdev); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ - - kbase_pm_context_idle(kbdev); } /** @@ -246,9 +258,6 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as * invalidating the TLBs. - * - * If operation is set to KBASE_MMU_OP_UNLOCK then this function will only - * invalidate the MMU caches and TLBs. */ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, const struct kbase_mmu_hw_op_param *op_param) @@ -327,6 +336,31 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct mutex_unlock(&kbdev->mmu_hw_mutex); } +static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx, + phys_addr_t phys, size_t size, + enum kbase_mmu_op_type flush_op) +{ +#if MALI_USE_CSF + unsigned long irq_flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && + kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +#endif +} + +static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size) +{ + /* In non-coherent system, ensure the GPU can read + * the pages from memory + */ + if (kbdev->system_coherency == COHERENCY_NONE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); +} + /** * kbase_mmu_sync_pgd() - sync page directory to memory when needed. * @kbdev: Device pointer. @@ -350,13 +384,9 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context phys_addr_t phys, dma_addr_t handle, size_t size, enum kbase_mmu_op_type flush_op) { - /* In non-coherent system, ensure the GPU can read - * the pages from memory - */ - if (kbdev->system_coherency == COHERENCY_NONE) - dma_sync_single_for_device(kbdev->dev, handle, size, - DMA_TO_DEVICE); + kbase_mmu_sync_pgd_cpu(kbdev, handle, size); + kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op); } /* @@ -383,22 +413,75 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, * @level: The level of MMU page table. * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. + * @free_pgds_list: Linked list of the page directory pages to free. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + struct list_head *free_pgds_list); /** * kbase_mmu_free_pgd() - Free memory of the page directory * * @kbdev: Device pointer. * @mmut: GPU MMU page table. * @pgd: Physical address of page directory to be freed. - * @dirty: Flag to indicate whether the page may be dirty in the cache. + * + * This function is supposed to be called with mmu_lock held and after + * ensuring that GPU won't be able to access the page. */ -static void kbase_mmu_free_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - bool dirty); +static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t pgd) +{ + struct page *p; + + lockdep_assert_held(&mmut->mmu_lock); + + p = pfn_to_page(PFN_DOWN(pgd)); + + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +/** + * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @free_pgds_list: Linked list of the page directory pages to free. + * + * This function will call kbase_mmu_free_pgd() on each page directory page + * present in the @free_pgds_list. + * + * The function is supposed to be called after the GPU cache and MMU TLB has + * been invalidated post the teardown loop. + */ +static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct list_head *free_pgds_list) +{ + struct page *page, *next_page; + + mutex_lock(&mmut->mmu_lock); + + list_for_each_entry_safe(page, next_page, free_pgds_list, lru) { + list_del_init(&page->lru); + kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page)); + } + + mutex_unlock(&mmut->mmu_lock); +} + /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault @@ -484,8 +567,6 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, u64 start_pfn, size_t nr, u32 kctx_id, u64 dirty_pgds) { - int err; - /* Calls to this function are inherently synchronous, with respect to * MMU operations. */ @@ -509,12 +590,11 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, - &op_param); + kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } else { mmu_hw_operation_begin(kbdev); - err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); + kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } @@ -552,7 +632,6 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, struct tagged_addr *fault_phys_addr; struct kbase_fault *fault; u64 fault_pfn, pfn_offset; - int ret; int as_no; u64 dirty_pgds = 0; @@ -613,8 +692,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, } /* Now make this faulting page writable to GPU. */ - ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, - region->gpu_alloc->group_id, &dirty_pgds); + kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags, + region->gpu_alloc->group_id, &dirty_pgds); kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, kctx->id, dirty_pgds); @@ -648,31 +727,68 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, } #endif -#define MAX_POOL_LEVEL 2 +/** + * estimate_pool_space_required - Determine how much a pool should be grown by to support a future + * allocation + * @pool: The memory pool to check, including its linked pools + * @pages_required: Number of 4KiB pages require for the pool to support a future allocation + * + * The value returned is accounting for the size of @pool and the size of each memory pool linked to + * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to + * allocate from. + * + * Note: this is only an estimate, because even during the calculation the memory pool(s) involved + * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an + * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller + * should keep attempting an allocation and then re-growing with a new value queried form this + * function until the allocation succeeds. + * + * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an + * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the + * allocation. + */ +static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required) +{ + size_t pages_still_required; + + for (pages_still_required = pages_required; pool != NULL && pages_still_required; + pool = pool->next_pool) { + size_t pool_size_4k; + + kbase_mem_pool_lock(pool); + + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + if (pool_size_4k >= pages_still_required) + pages_still_required = 0; + else + pages_still_required -= pool_size_4k; + + kbase_mem_pool_unlock(pool); + } + return pages_still_required; +} /** * page_fault_try_alloc - Try to allocate memory from a context pool * @kctx: Context pointer * @region: Region to grow - * @new_pages: Number of 4 kB pages to allocate - * @pages_to_grow: Pointer to variable to store number of outstanding pages on - * failure. This can be either 4 kB or 2 MB pages, depending on - * the number of pages requested. - * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true - * for 2 MB, false for 4 kB. + * @new_pages: Number of 4 KiB pages to allocate + * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be + * either 4 KiB or 2 MiB pages, depending on the number of pages requested. + * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for + * 4 KiB. * @prealloc_sas: Pointer to kbase_sub_alloc structures * - * This function will try to allocate as many pages as possible from the context - * pool, then if required will try to allocate the remaining pages from the - * device pool. + * This function will try to allocate as many pages as possible from the context pool, then if + * required will try to allocate the remaining pages from the device pool. * - * This function will not allocate any new memory beyond that is already - * present in the context or device pools. This is because it is intended to be - * called with the vm_lock held, which could cause recursive locking if the - * allocation caused the out-of-memory killer to run. + * This function will not allocate any new memory beyond that is already present in the context or + * device pools. This is because it is intended to be called whilst the thread has acquired the + * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is + * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close(). * - * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be - * a count of 2 MB pages, otherwise it will be a count of 4 kB pages. + * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB + * pages, otherwise it will be a count of 4 KiB pages. * * Return: true if successful, false on failure */ @@ -681,13 +797,15 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, int *pages_to_grow, bool *grow_2mb_pool, struct kbase_sub_alloc **prealloc_sas) { - struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL}; - struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL}; - size_t pages_alloced[MAX_POOL_LEVEL] = {0}; + size_t total_gpu_pages_alloced = 0; + size_t total_cpu_pages_alloced = 0; struct kbase_mem_pool *pool, *root_pool; - int pool_level = 0; bool alloc_failed = false; size_t pages_still_required; + size_t total_mempools_free_4k = 0; + + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mem_partials_lock); if (WARN_ON(region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { @@ -711,27 +829,10 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, if (region->gpu_alloc != region->cpu_alloc) new_pages *= 2; - pages_still_required = new_pages; - /* Determine how many pages are in the pools before trying to allocate. * Don't attempt to allocate & free if the allocation can't succeed. */ - for (pool = root_pool; pool != NULL; pool = pool->next_pool) { - size_t pool_size_4k; - - kbase_mem_pool_lock(pool); - - pool_size_4k = kbase_mem_pool_size(pool) << pool->order; - if (pool_size_4k >= pages_still_required) - pages_still_required = 0; - else - pages_still_required -= pool_size_4k; - - kbase_mem_pool_unlock(pool); - - if (!pages_still_required) - break; - } + pages_still_required = estimate_pool_space_required(root_pool, new_pages); if (pages_still_required) { /* Insufficient pages in pools. Don't try to allocate - just @@ -742,11 +843,11 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return false; } - /* Since we've dropped the pool locks, the amount of memory in the pools - * may change between the above check and the actual allocation. + /* Since we're not holding any of the mempool locks, the amount of memory in the pools may + * change between the above estimate and the actual allocation. */ - pool = root_pool; - for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) { + pages_still_required = new_pages; + for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) { size_t pool_size_4k; size_t pages_to_alloc_4k; size_t pages_to_alloc_4k_per_alloc; @@ -755,93 +856,91 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, /* Allocate as much as possible from this pool*/ pool_size_4k = kbase_mem_pool_size(pool) << pool->order; - pages_to_alloc_4k = MIN(new_pages, pool_size_4k); + total_mempools_free_4k += pool_size_4k; + pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k); if (region->gpu_alloc == region->cpu_alloc) pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; else pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; - pages_alloced[pool_level] = pages_to_alloc_4k; if (pages_to_alloc_4k) { - gpu_pages[pool_level] = - kbase_alloc_phy_pages_helper_locked( - region->gpu_alloc, pool, - pages_to_alloc_4k_per_alloc, - &prealloc_sas[0]); + struct tagged_addr *gpu_pages = + kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[0]); - if (!gpu_pages[pool_level]) { + if (!gpu_pages) alloc_failed = true; - } else if (region->gpu_alloc != region->cpu_alloc) { - cpu_pages[pool_level] = - kbase_alloc_phy_pages_helper_locked( - region->cpu_alloc, pool, - pages_to_alloc_4k_per_alloc, - &prealloc_sas[1]); + else + total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc; - if (!cpu_pages[pool_level]) + if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) { + struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked( + region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc, + &prealloc_sas[1]); + + if (!cpu_pages) alloc_failed = true; + else + total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc; } } kbase_mem_pool_unlock(pool); if (alloc_failed) { - WARN_ON(!new_pages); - WARN_ON(pages_to_alloc_4k >= new_pages); - WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages); + WARN_ON(!pages_still_required); + WARN_ON(pages_to_alloc_4k >= pages_still_required); + WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required); break; } - new_pages -= pages_to_alloc_4k; - - if (!new_pages) - break; - - pool = pool->next_pool; - if (!pool) - break; + pages_still_required -= pages_to_alloc_4k; } - if (new_pages) { - /* Allocation was unsuccessful */ - int max_pool_level = pool_level; - - pool = root_pool; - - /* Free memory allocated so far */ - for (pool_level = 0; pool_level <= max_pool_level; - pool_level++) { - kbase_mem_pool_lock(pool); - - if (region->gpu_alloc != region->cpu_alloc) { - if (pages_alloced[pool_level] && - cpu_pages[pool_level]) - kbase_free_phy_pages_helper_locked( - region->cpu_alloc, - pool, cpu_pages[pool_level], - pages_alloced[pool_level]); - } - - if (pages_alloced[pool_level] && gpu_pages[pool_level]) - kbase_free_phy_pages_helper_locked( - region->gpu_alloc, - pool, gpu_pages[pool_level], - pages_alloced[pool_level]); - - kbase_mem_pool_unlock(pool); - - pool = pool->next_pool; - } - - /* - * If the allocation failed despite there being enough memory in - * the pool, then just fail. Otherwise, try to grow the memory - * pool. + if (pages_still_required) { + /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation, + * so must in any case use kbase_free_phy_pages_helper() rather than + * kbase_free_phy_pages_helper_locked() */ - if (alloc_failed) + if (total_gpu_pages_alloced > 0) + kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced); + if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0) + kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced); + + if (alloc_failed) { + /* Note that in allocating from the above memory pools, we always ensure + * never to request more than is available in each pool with the pool's + * lock held. Hence failing to allocate in such situations would be unusual + * and we should cancel the growth instead (as re-growing the memory pool + * might not fix the situation) + */ + dev_warn( + kctx->kbdev->dev, + "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available\n", + new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced, + total_mempools_free_4k); *pages_to_grow = 0; - else - *pages_to_grow = new_pages; + } else { + /* Tell the caller to try to grow the memory pool + * + * Freeing pages above may have spilled or returned them to the OS, so we + * have to take into account how many are still in the pool before giving a + * new estimate for growth required of the pool. We can just re-estimate a + * new value. + */ + pages_still_required = estimate_pool_space_required(root_pool, new_pages); + if (pages_still_required) { + *pages_to_grow = pages_still_required; + } else { + /* It's possible another thread could've grown the pool to be just + * big enough after we rolled back the allocation. Request at least + * one more page to ensure the caller doesn't fail the growth by + * conflating it with the alloc_failed case above + */ + *pages_to_grow = 1u; + } + } return false; } @@ -975,20 +1074,24 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) goto fault_done; } +page_fault_retry: #ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs if necessary */ + /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); if (!prealloc_sas[i]) { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + + if (!prealloc_sas[i]) { + kbase_mmu_report_fault_and_kill( + kctx, faulting_as, "Failed pre-allocating memory for sub-allocations' metadata", fault); - goto fault_done; + goto fault_done; + } } } #endif /* CONFIG_MALI_2MB_ALLOC */ -page_fault_retry: /* so we have a translation fault, * let's see if it is for growable memory */ @@ -1372,13 +1475,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { u64 *page; - int i; struct page *p; phys_addr_t pgd; p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); if (!p) - return 0; + return KBASE_MMU_INVALID_PGD_ADDRESS; page = kmap(p); if (page == NULL) @@ -1406,13 +1508,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) - kbdev->mmu_mode->entry_invalidate(&page[i]); + kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES); - /* MMU cache flush strategy is NONE because this page is newly created, therefore - * there is no content to clean or invalidate in the GPU caches. + /* As this page is newly created, therefore there is no content to + * clean or invalidate in the GPU caches. */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd, kbase_dma_addr(p), PAGE_SIZE, KBASE_MMU_OP_NONE); + kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap(p); return pgd; @@ -1420,7 +1521,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, alloc_free: kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); - return 0; + return KBASE_MMU_INVALID_PGD_ADDRESS; } /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the @@ -1452,16 +1553,12 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * return -EINVAL; } - target_pgd = kbdev->mmu_mode->pte_to_phy_addr( - kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); - - if (!target_pgd) { - enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; + if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { unsigned int current_valid_entries; u64 managed_pte; + target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - if (!target_pgd) { + if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", __func__); kunmap(p); @@ -1477,22 +1574,24 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * /* Rely on the caller to update the address space flags. */ if (newly_created_pgd && !*newly_created_pgd) { *newly_created_pgd = true; - /* If code reaches here we know parent PGD of target PGD was - * not newly created and should be flushed. - */ - flush_op = KBASE_MMU_OP_FLUSH_PT; - if (dirty_pgds) *dirty_pgds |= 1ULL << level; } - /* MMU cache flush strategy is FLUSH_PT because a new entry is added - * to an existing PGD which may be stored in GPU caches and needs a - * "clean" operation. An "invalidation" operation is not required here - * as this entry points to a new page and cannot be present in GPU - * caches. + /* A new valid entry is added to an existing PGD. Perform the + * invalidate operation for GPU cache as it could be having a + * cacheline that contains the entry (in an invalid form). + * Even if the parent PGD was newly created, invalidation of + * GPU cache is still needed. For explanation, please refer + * the comment in kbase_mmu_insert_pages_no_flush(). */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd, kbase_dma_addr(p), PAGE_SIZE, flush_op); + kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd + (vpfn * sizeof(u64)), + kbase_dma_addr(p) + (vpfn * sizeof(u64)), sizeof(u64), + KBASE_MMU_OP_FLUSH_PT); + } else { + target_pgd = kbdev->mmu_mode->pte_to_phy_addr( + kbdev->mgm_dev->ops.mgm_pte_to_original_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); } kunmap(p); @@ -1540,9 +1639,9 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, - u64 to_vpfn, u64 *dirty_pgds) + u64 to_vpfn, u64 *dirty_pgds, + struct list_head *free_pgds_list) { - phys_addr_t pgd; u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; @@ -1555,7 +1654,6 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode = kbdev->mmu_mode; while (vpfn < to_vpfn) { - unsigned int i; unsigned int idx = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; unsigned int pcount = 0; @@ -1563,6 +1661,8 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, int level; u64 *page; phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); register unsigned int num_of_valid_entries; @@ -1570,18 +1670,17 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, count = left; /* need to check if this is a 2MB page or a 4kB */ - pgd = mmut->pgd; - for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; pgds[level] = pgd; - page = kmap(phys_to_page(pgd)); + page = kmap(p); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kunmap(phys_to_page(pgd)); + kunmap(p); pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); + p = phys_to_page(pgd); } switch (level) { @@ -1608,35 +1707,70 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, else num_of_valid_entries -= pcount; - if (!num_of_valid_entries) { - kunmap(phys_to_page(pgd)); + /* Invalidate the entries we added */ + mmu_mode->entries_invalidate(&page[idx], pcount); - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); + if (!num_of_valid_entries) { + kunmap(p); + + list_add(&p->lru, free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - KBASE_MMU_OP_NONE, dirty_pgds); + KBASE_MMU_OP_NONE, dirty_pgds, + free_pgds_list); vpfn += count; continue; } - /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[idx + i]); - mmu_mode->set_num_valid_entries(page, num_of_valid_entries); /* MMU cache flush strategy is NONE because GPU cache maintenance is * going to be done by the caller */ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, 8 * pcount, + kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, KBASE_MMU_OP_NONE); - kunmap(phys_to_page(pgd)); + kunmap(p); next: vpfn += count; } } +static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, const u64 vpfn, + size_t nr, u64 dirty_pgds, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + struct kbase_mmu_hw_op_param op_param; + int as_nr = 0; + + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_PT; + op_param.mmu_sync_info = mmu_sync_info; + op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; + op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + +#if MALI_USE_CSF + as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR; +#else + WARN_ON(!mmut->kctx); +#endif + + /* MMU cache flush strategy depends on whether GPU control commands for + * flushing physical address ranges are supported. The new physical pages + * are not present in GPU caches therefore they don't need any cache + * maintenance, but PGDs in the page table may or may not be created anew. + * + * Operations that affect the whole GPU cache shall only be done if it's + * impossible to update physical ranges. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); + else + mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); +} + /* * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' */ @@ -1657,8 +1791,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, int err; struct kbase_device *kbdev; enum kbase_mmu_op_type flush_op; - struct kbase_mmu_hw_op_param op_param; u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); if (WARN_ON(kctx == NULL)) return -EINVAL; @@ -1672,15 +1806,6 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; - /* Set up MMU flush operation parameters. */ - op_param = (struct kbase_mmu_hw_op_param){ - .vpfn = vpfn, - .nr = nr, - .op = KBASE_MMU_OP_FLUSH_PT, - .kctx_id = kctx->id, - .mmu_sync_info = mmu_sync_info, - }; - mutex_lock(&kctx->mmu.mmu_lock); while (remain) { @@ -1725,7 +1850,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds); + &dirty_pgds, &free_pgds_list); } goto fail_unlock; } @@ -1740,7 +1865,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, */ mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, start_vpfn + recover_count, - &dirty_pgds); + &dirty_pgds, &free_pgds_list); } err = -ENOMEM; goto fail_unlock; @@ -1791,53 +1916,21 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } mutex_unlock(&kctx->mmu.mmu_lock); - op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - /* If FLUSH_PA_RANGE is supported then existing PGDs will have been flushed - * and all that remains is TLB (or MMU cache) invalidation which is done via - * MMU UNLOCK command. - */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - mmu_invalidate(kbdev, kctx, kctx->as_nr, &op_param); - else - mmu_flush_invalidate(kbdev, kctx, kctx->as_nr, &op_param); + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); + return 0; fail_unlock: mutex_unlock(&kctx->mmu.mmu_lock); - op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, kctx->as_nr, &op_param); - else - mmu_flush_invalidate(kbdev, kctx, kctx->as_nr, &op_param); + + mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, + mmu_sync_info); + kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); + return err; } -static void kbase_mmu_free_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - bool dirty) -{ - struct page *p; - - lockdep_assert_held(&mmut->mmu_lock); - - p = pfn_to_page(PFN_DOWN(pgd)); - - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], - p, dirty); - - atomic_sub(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - atomic_sub(1, &mmut->kctx->used_pages); - } - - kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); -} - u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy, unsigned long const flags, int const level, int const group_id) @@ -1859,6 +1952,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu size_t remain = nr; int err; struct kbase_mmu_mode const *mmu_mode; + LIST_HEAD(free_pgds_list); /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -1879,7 +1973,6 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu struct page *p; int cur_level; register unsigned int num_of_valid_entries; - enum kbase_mmu_op_type flush_op; bool newly_created_pgd = false; if (count > remain) @@ -1919,7 +2012,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds); + insert_vpfn, dirty_pgds, + &free_pgds_list); } goto fail_unlock; } @@ -1934,7 +2028,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu * completed */ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds); + insert_vpfn, dirty_pgds, + &free_pgds_list); } err = -ENOMEM; goto fail_unlock; @@ -1945,20 +2040,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu if (cur_level == MIDGARD_MMU_LEVEL(2)) { int level_index = (insert_vpfn >> 9) & 0x1FF; - u64 *target = &pgd_page[level_index]; - - if (mmu_mode->pte_is_valid(*target, cur_level)) { - kbase_mmu_free_pgd( - kbdev, mmut, - kbdev->mmu_mode->pte_to_phy_addr( - kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, - cur_level, *target)), - false); - num_of_valid_entries--; - } - *target = kbase_mmu_create_ate(kbdev, *phys, flags, - cur_level, group_id); + pgd_page[level_index] = + kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id); num_of_valid_entries++; } else { @@ -1983,36 +2066,48 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); - if (dirty_pgds && count > 0 && !newly_created_pgd) + if (dirty_pgds && !newly_created_pgd) *dirty_pgds |= 1ULL << cur_level; phys += count; insert_vpfn += count; remain -= count; - /* For the most part, the creation of a new virtual memory mapping does - * not require cache flush operations, because the operation results - * into the creation of new memory pages which are not present in GPU - * caches. Therefore the defaul operation is NONE. - * - * However, it is quite common for the mapping to start and/or finish - * at an already existing PGD. Moreover, the PTEs modified are not - * necessarily aligned with GPU cache lines. Therefore, GPU cache - * maintenance is required for existing PGDs. + /* Even if mmu_get_pgd_at_level() allocated a new bottom level + * table page, the invalidation of L2 cache is still needed for + * for the valid entries written in that page. This is because a + * race can happen as soon as the entry of parent level table is + * updated to point to the page of bottom level table. + * GPU can try to access within the the same virtual range that + * is being mapped, before the valid entries of bottom level table + * page are flushed to the memory from the CPU's cache. And if that + * happens then the invalid entries from memory could get fetched + * into the L2 cache and so those entries won't be affected by the + * MMU TLB invalidation done by sending the UNLOCK command. + * If the memory is growable then this could result in unexpected + * page faults happening repeatedly, until the invalid entry is + * evicted from the L2 cache, as Driver would consider the page + * faults for mapped memory as duplicate and won't take any action + * effectively. */ - flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; - kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), - flush_op); + KBASE_MMU_OP_FLUSH_PT); kunmap(p); } - err = 0; + mutex_unlock(&mmut->mmu_lock); + + return 0; fail_unlock: mutex_unlock(&mmut->mmu_lock); + + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds, + CALLER_MMU_ASYNC); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); + return err; } @@ -2027,8 +2122,8 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, enum kbase_caller_mmu_sync_info mmu_sync_info) { int err; - struct kbase_mmu_hw_op_param op_param = { 0 }; u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); /* Early out if there is nothing to do */ if (nr == 0) @@ -2036,28 +2131,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds); + if (err) + return err; - op_param.vpfn = vpfn; - op_param.nr = nr; - op_param.op = KBASE_MMU_OP_FLUSH_PT; - op_param.mmu_sync_info = mmu_sync_info; - op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF; - op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info); - /* MMU cache flush strategy depends on whether GPU control commands for - * flushing physical address ranges are supported. The new physical pages - * are not present in GPU caches there for they don't need any cache - * maintenance, but PGDs in the page table may or may not be created anew. - * - * Operations that affect the whole GPU cache shall only be done if it's - * impossible to update physical ranges. - */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param); - else - mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); - - return err; + return 0; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); @@ -2173,7 +2252,8 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, + struct list_head *free_pgds_list) { int current_level; @@ -2181,36 +2261,42 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0); current_level--) { - u64 *current_page = kmap(phys_to_page(pgds[current_level])); + phys_addr_t current_pgd = pgds[current_level]; + struct page *p = phys_to_page(current_pgd); + u64 *current_page = kmap(p); unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page); + int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; /* We need to track every level that needs updating */ if (dirty_pgds) *dirty_pgds |= 1ULL << current_level; + kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { - kunmap(phys_to_page(pgds[current_level])); + kunmap(p); - kbase_mmu_free_pgd(kbdev, mmut, pgds[current_level], - true); + /* Ensure the cacheline containing the last valid entry + * of PGD is invalidated from the GPU cache, before the + * PGD page is freed. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, + current_pgd + (index * sizeof(u64)), + sizeof(u64), flush_op); + + list_add(&p->lru, free_pgds_list); } else { - int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; - - kbdev->mmu_mode->entry_invalidate(¤t_page[index]); - current_valid_entries--; kbdev->mmu_mode->set_num_valid_entries( current_page, current_valid_entries); - kbase_mmu_sync_pgd( - kbdev, mmut->kctx, pgds[current_level] + (index * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgds[current_level])) + 8 * index, - 8 * 1, flush_op); + kunmap(p); - kunmap(phys_to_page(pgds[current_level])); + kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), + kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), + flush_op); break; } } @@ -2239,15 +2325,33 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, struct tagged_addr *phys, struct kbase_mmu_hw_op_param *op_param) { - if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { + /* Full cache flush through the MMU_COMMAND */ mmu_flush_invalidate(kbdev, kctx, as_nr, op_param); - return; } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) { + /* Full cache flush through the GPU_CONTROL */ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param); - return; } +#if MALI_USE_CSF + else { + /* Partial GPU cache flush with MMU cache invalidation */ + unsigned long irq_flags; + unsigned int i; + bool flush_done = false; + mmu_invalidate(kbdev, kctx, as_nr, op_param); + + for (i = 0; !flush_done && i < op_param->nr; i++) { + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, + KBASE_MMU_OP_FLUSH_MEM); + else + flush_done = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + } + } +#endif } /** @@ -2282,15 +2386,14 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, int as_nr) { - phys_addr_t pgd; u64 start_vpfn = vpfn; size_t requested_nr = nr; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; struct kbase_mmu_mode const *mmu_mode; struct kbase_mmu_hw_op_param op_param; - unsigned int i; int err = -EFAULT; u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -2328,19 +2431,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table u64 *page; phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; register unsigned int num_of_valid_entries; + phys_addr_t pgd = mmut->pgd; + struct page *p = phys_to_page(pgd); if (count > nr) count = nr; - /* need to check if this is a 2MB or a 4kB page */ - pgd = mmut->pgd; - + /* need to check if this is a 2MB page or a 4kB */ for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); + page = kmap(p); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { @@ -2366,9 +2469,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table next_pgd = mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); + kunmap(p); pgds[level] = pgd; - kunmap(phys_to_page(pgd)); pgd = next_pgd; + p = phys_to_page(pgd); } switch (level) { @@ -2377,7 +2481,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table dev_warn(kbdev->dev, "%s: No support for ATEs at level %d\n", __func__, level); - kunmap(phys_to_page(pgd)); + kunmap(p); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -2412,30 +2516,38 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table else num_of_valid_entries -= pcount; - if (!num_of_valid_entries) { - kunmap(phys_to_page(pgd)); + /* Invalidate the entries we added */ + mmu_mode->entries_invalidate(&page[index], pcount); - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); + if (!num_of_valid_entries) { + kunmap(p); + + /* Ensure the cacheline(s) containing the last valid entries + * of PGD is invalidated from the GPU cache, before the + * PGD page is freed. + */ + kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, + pgd + (index * sizeof(u64)), + pcount * sizeof(u64), flush_op); + + list_add(&p->lru, &free_pgds_list); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - flush_op, &dirty_pgds); + flush_op, &dirty_pgds, + &free_pgds_list); vpfn += count; nr -= count; continue; } - /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[index + i]); - mmu_mode->set_num_valid_entries(page, num_of_valid_entries); kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(phys_to_page(pgd)) + 8 * index, 8 * pcount, + kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); next: - kunmap(phys_to_page(pgd)); + kunmap(p); vpfn += count; nr -= count; } @@ -2454,6 +2566,8 @@ out: }; mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); + return err; } @@ -2627,49 +2741,45 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd, int level) { - phys_addr_t target_pgd; u64 *pgd_page; int i; - struct kbase_mmu_mode const *mmu_mode; - u64 *pgd_page_buffer; + struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; + struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; + u64 *pgd_page_buffer = NULL; lockdep_assert_held(&mmut->mmu_lock); - /* Early-out. No need to kmap to check entries for L3 PGD. */ - if (level == MIDGARD_MMU_BOTTOMLEVEL) { - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); - return; - } - pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ if (WARN_ON(pgd_page == NULL)) return; - /* Copy the page to our preallocated buffer so that we can minimize - * kmap_atomic usage - */ - pgd_page_buffer = mmut->mmu_teardown_pages[level]; - memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + if (level < MIDGARD_MMU_BOTTOMLEVEL) { + /* Copy the page to our preallocated buffer so that we can minimize + * kmap_atomic usage + */ + pgd_page_buffer = mmut->mmu_teardown_pages[level]; + memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + } + + /* Invalidate page after copying */ + mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; - mmu_mode = kbdev->mmu_mode; - - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - target_pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, - level, pgd_page[i])); - - if (target_pgd) { + if (level < MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { if (mmu_mode->pte_is_valid(pgd_page[i], level)) { - mmu_teardown_level(kbdev, mmut, - target_pgd, - level + 1); + phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr( + mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev, + MGM_DEFAULT_PTE_GROUP, + level, pgd_page[i])); + + mmu_teardown_level(kbdev, mmut, target_pgd, level + 1); } } } - kbase_mmu_free_pgd(kbdev, mmut, pgd, true); + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, @@ -2685,7 +2795,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, mmut->group_id = group_id; mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; - mmut->pgd = 0; + mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */ for (level = MIDGARD_MMU_TOPLEVEL; @@ -2703,7 +2813,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. */ - while (!mmut->pgd) { + while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { int err; err = kbase_mem_pool_grow( @@ -2726,7 +2836,7 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { int level; - if (mmut->pgd) { + if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) { mutex_lock(&mmut->mmu_lock); mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL); mutex_unlock(&mmut->mmu_lock); @@ -2750,6 +2860,7 @@ void kbase_mmu_as_term(struct kbase_device *kbdev, int i) destroy_workqueue(kbdev->as[i].pf_wq); } +#ifdef CONFIG_MALI_VECTOR_DUMP static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) { @@ -2891,6 +3002,7 @@ fail_free: return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); +#endif /* CONFIG_MALI_VECTOR_DUMP */ void kbase_mmu_bus_fault_worker(struct work_struct *data) { diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h index 53d1d194eca7..848570f2a6dd 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h @@ -25,6 +25,7 @@ #include #define KBASE_MMU_PAGE_ENTRIES 512 +#define KBASE_MMU_INVALID_PGD_ADDRESS (~(phys_addr_t)0) struct kbase_context; struct kbase_mmu_table; diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c index c9e5ef288ff8..cc764b483f05 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c @@ -28,6 +28,26 @@ #include #include +#if MALI_USE_CSF +/** + * mmu_has_flush_skip_pgd_levels() - Check if the GPU has the feature + * AS_LOCKADDR_FLUSH_SKIP_LEVELS + * + * @gpu_props: GPU properties for the GPU instance. + * + * This function returns whether a cache flush can apply the skip flags of + * AS_LOCKADDR_FLUSH_SKIP_LEVELS. + * + * Return: True if cache flush has the said feature. + */ +static bool mmu_has_flush_skip_pgd_levels(struct kbase_gpu_props const *gpu_props) +{ + u32 const signature = + gpu_props->props.raw_props.gpu_id & (GPU_ID2_ARCH_MAJOR | GPU_ID2_ARCH_REV); + + return signature >= (u32)GPU_ID2_PRODUCT_MAKE(12, 0, 4, 0); +} +#endif /** * lock_region() - Generate lockaddr to lock memory region in MMU @@ -105,7 +125,7 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, * therefore the highest bit that differs is bit #16 * and the region size (as a logarithm) is 16 + 1 = 17, i.e. 128 kB. */ - lockaddr_size_log2 = fls(lockaddr_base ^ lockaddr_end); + lockaddr_size_log2 = fls64(lockaddr_base ^ lockaddr_end); /* Cap the size against minimum and maximum values allowed. */ if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) @@ -126,6 +146,13 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, */ *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); *lockaddr |= lockaddr_size_log2 - 1; + +#if MALI_USE_CSF + if (mmu_has_flush_skip_pgd_levels(gpu_props)) + *lockaddr = + AS_LOCKADDR_FLUSH_SKIP_LEVELS_SET(*lockaddr, op_param->flush_skip_levels); +#endif + return 0; } @@ -207,21 +234,18 @@ static int wait_cores_power_trans_complete(struct kbase_device *kbdev) * implicit unlock. * @as_nr: Address space number for which MMU command needs to be * sent. - * @hwaccess_locked: Flag to indicate if hwaccess_lock is held by the caller. * - * This functions ensures that the flush of LSC is not missed for the pages that + * This function ensures that the flush of LSC is not missed for the pages that * were unmapped from the GPU, due to the power down transition of shader cores. * * Return: 0 if the WA was successfully applied, non-zero otherwise. */ -static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, - u32 *mmu_cmd, unsigned int as_nr, bool hwaccess_locked) +static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_cmd, + unsigned int as_nr) { - unsigned long flags = 0; int ret = 0; - if (!hwaccess_locked) - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); /* Check if L2 is OFF. The cores also must be OFF if L2 is not up, so * the workaround can be safely skipped. @@ -230,23 +254,22 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) { dev_warn(kbdev->dev, "Unexpected mmu command received"); - ret = -EINVAL; - goto unlock; + return -EINVAL; } /* Wait for the LOCK MMU command to complete, issued by the caller */ ret = wait_ready(kbdev, as_nr); if (ret) - goto unlock; + return ret; ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, GPU_COMMAND_CACHE_CLN_INV_LSC); if (ret) - goto unlock; + return ret; ret = wait_cores_power_trans_complete(kbdev); if (ret) - goto unlock; + return ret; /* As LSC is guaranteed to have been flushed we can use FLUSH_PT * MMU command to only flush the L2. @@ -254,10 +277,6 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, *mmu_cmd = AS_COMMAND_FLUSH_PT; } -unlock: - if (!hwaccess_locked) - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return ret; } #endif @@ -487,8 +506,16 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, * FLUSH_MEM/PT command is deprecated. */ if (mmu_cmd == AS_COMMAND_FLUSH_MEM) { - ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, - as->number, hwaccess_locked); + if (!hwaccess_locked) { + unsigned long flags = 0; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); + } + if (ret) return ret; } diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c index dfbdee17782b..f2c627482c18 100644 --- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_mode_aarch64.c @@ -35,10 +35,8 @@ #define ENTRY_IS_INVAL 2ULL #define ENTRY_IS_PTE 3ULL -#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ #define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ #define ENTRY_ACCESS_RO (3ULL << 6) -#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ #define ENTRY_ACCESS_BIT (1ULL << 10) #define ENTRY_NX_BIT (1ULL << 54) @@ -194,25 +192,26 @@ static void entry_set_pte(u64 *entry, phys_addr_t phy) page_table_entry_set(entry, (phy & PAGE_MASK) | ENTRY_ACCESS_BIT | ENTRY_IS_PTE); } -static void entry_invalidate(u64 *entry) +static void entries_invalidate(u64 *entry, u32 count) { - page_table_entry_set(entry, ENTRY_IS_INVAL); + u32 i; + + for (i = 0; i < count; i++) + page_table_entry_set(entry + i, ENTRY_IS_INVAL); } -static const struct kbase_mmu_mode aarch64_mode = { - .update = mmu_update, - .get_as_setup = kbase_mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .get_num_valid_entries = get_num_valid_entries, - .set_num_valid_entries = set_num_valid_entries, - .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE -}; +static const struct kbase_mmu_mode aarch64_mode = { .update = mmu_update, + .get_as_setup = kbase_mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entries_invalidate = entries_invalidate, + .get_num_valid_entries = get_num_valid_entries, + .set_num_valid_entries = set_num_valid_entries, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) { diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild index 2eecd6635948..60a52d80fa8e 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild +++ b/drivers/gpu/arm/bifrost/platform/devicetree/Kbuild @@ -20,6 +20,5 @@ bifrost_kbase-y += \ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ - platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_platform.o \ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ platform/$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.c deleted file mode 100644 index 2eebed022a59..000000000000 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include -#include -#include -#include "mali_kbase_config_platform.h" -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -struct kbase_platform_funcs_conf platform_funcs = { - .platform_init_func = NULL, - .platform_term_func = NULL, - .platform_late_init_func = NULL, - .platform_late_term_func = NULL, -}; diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h index 743885ffad0e..584a7217d300 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_config_platform.h @@ -33,13 +33,12 @@ * Attached value: pointer to @ref kbase_platform_funcs_conf * Default value: See @ref kbase_platform_funcs_conf */ -#define PLATFORM_FUNCS (&platform_funcs) +#define PLATFORM_FUNCS (NULL) #define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) extern struct kbase_pm_callback_conf pm_callbacks; extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; -extern struct kbase_platform_funcs_conf platform_funcs; /** * AUTO_SUSPEND_DELAY - Autosuspend delay * diff --git a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c index 07b09f868735..2687bee96ec9 100644 --- a/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/bifrost/platform/devicetree/mali_kbase_runtime_pm.c @@ -29,7 +29,6 @@ #include "mali_kbase_config_platform.h" - static void enable_gpu_power_control(struct kbase_device *kbdev) { unsigned int i; @@ -82,8 +81,7 @@ static int pm_callback_power_on(struct kbase_device *kbdev) int error; unsigned long flags; - dev_dbg(kbdev->dev, "%s %p\n", __func__, - (void *)kbdev->dev->pm_domain); + dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(kbdev->pm.backend.gpu_powered); @@ -298,5 +296,3 @@ struct kbase_pm_callback_conf pm_callbacks = { .power_runtime_gpu_active_callback = NULL, #endif }; - - diff --git a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c index c00cbcb17d39..910d4b4fd3e1 100644 --- a/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/bifrost/platform/meson/mali_kbase_runtime_pm.c @@ -149,7 +149,7 @@ static int pm_callback_power_on(struct kbase_device *kbdev) int ret = 1; /* Assume GPU has been powered off */ int error; - dev_dbg(kbdev->dev, "%s %p\n", __func__, (void *)kbdev->dev->pm_domain); + dev_dbg(kbdev->dev, "%s %pK\n", __func__, (void *)kbdev->dev->pm_domain); #ifdef KBASE_PM_RUNTIME error = pm_runtime_get_sync(kbdev->dev); diff --git a/drivers/gpu/arm/bifrost/tests/Mconfig b/drivers/gpu/arm/bifrost/tests/Mconfig index 738dbd42aac7..67b38a28cf96 100644 --- a/drivers/gpu/arm/bifrost/tests/Mconfig +++ b/drivers/gpu/arm/bifrost/tests/Mconfig @@ -26,8 +26,8 @@ menuconfig MALI_KUTF This option will build the Mali testing framework modules. Modules: - - kutf.ko - - kutf_test.ko + - kutf.ko + - kutf_test.ko config MALI_KUTF_IRQ_TEST bool "Build Mali KUTF IRQ test module" @@ -38,7 +38,7 @@ config MALI_KUTF_IRQ_TEST It can determine the latency of the Mali GPU IRQ on your system. Modules: - - mali_kutf_irq_test.ko + - mali_kutf_irq_test.ko config MALI_KUTF_CLK_RATE_TRACE bool "Build Mali KUTF Clock rate trace test module" @@ -50,7 +50,7 @@ config MALI_KUTF_CLK_RATE_TRACE basic trace test in the system. Modules: - - mali_kutf_clk_rate_trace_test_portal.ko + - mali_kutf_clk_rate_trace_test_portal.ko config MALI_KUTF_MGM_INTEGRATION_TEST bool "Build Mali KUTF MGM integration test module" @@ -62,12 +62,12 @@ config MALI_KUTF_MGM_INTEGRATION_TEST group ids. Modules: - - mali_kutf_mgm_integration_test.ko + - mali_kutf_mgm_integration_test.ko # Enable MALI_BIFROST_DEBUG for KUTF modules support config UNIT_TEST_KERNEL_MODULES - bool - default y if UNIT_TEST_CODE && BACKEND_KERNEL - default n + bool + default y if UNIT_TEST_CODE && BACKEND_KERNEL + default n diff --git a/drivers/gpu/arm/bifrost/tests/build.bp b/drivers/gpu/arm/bifrost/tests/build.bp index 9d6137d17d5f..7abae237f9c3 100644 --- a/drivers/gpu/arm/bifrost/tests/build.bp +++ b/drivers/gpu/arm/bifrost/tests/build.bp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ bob_defaults { "include", "./../../", "./../", - "./" + "./", ], } diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c index 2d7289daca20..a6f54b61d4ad 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -825,7 +825,7 @@ static void *mali_kutf_clk_rate_trace_create_fixture( if (!data) return NULL; - *data = (const struct kutf_clk_rate_trace_fixture_data){ NULL }; + memset(data, 0, sizeof(*data)); pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); spin_lock(&kbdev->pm.clk_rtm.lock); diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c index 2d6e68946c00..f2a014d9b5ca 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -51,8 +51,6 @@ struct kutf_irq_fixture_data { struct kbase_device *kbdev; }; -#define SEC_TO_NANO(s) ((s)*1000000000LL) - /* ID for the GPU IRQ */ #define GPU_IRQ_HANDLER 2 @@ -212,6 +210,11 @@ static void mali_kutf_irq_latency(struct kutf_context *context) average_time += irq_time - start_time; udelay(10); + /* Sleep for a ms, every 10000 iterations, to avoid misleading warning + * of CPU softlockup when all GPU IRQs keep going to the same CPU. + */ + if (!(i % 10000)) + msleep(1); } /* Go back to default handler */ diff --git a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp index 2e4a083863e4..8b995f8a0a07 100644 --- a/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp +++ b/drivers/gpu/arm/bifrost/tests/mali_kutf_mgm_integration_test/build.bp @@ -38,4 +38,4 @@ bob_kernel_module { kbuild_options: ["CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST=y"], enabled: true, }, -} \ No newline at end of file +} diff --git a/drivers/gpu/arm/bifrost/tl/Kbuild b/drivers/gpu/arm/bifrost/tl/Kbuild index 6e472dff8a78..1c684d489d6f 100644 --- a/drivers/gpu/arm/bifrost/tl/Kbuild +++ b/drivers/gpu/arm/bifrost/tl/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c index 7160522a0be6..334248867c7c 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,7 +26,6 @@ #include #include -#include #include #include #include @@ -35,7 +34,7 @@ #include #include #include - +#include /* The period of autoflush checker execution in milliseconds. */ #define AUTOFLUSH_INTERVAL 1000 /* ms */ @@ -184,90 +183,109 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) } #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ -int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags) { - int ret = 0; + int err = 0; u32 timeline_flags = TLSTREAM_ENABLED | flags; - struct kbase_timeline *timeline = kbdev->timeline; + struct kbase_timeline *timeline; + int rcode; - if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { - int rcode; + if (WARN_ON(!kbdev) || WARN_ON(flags & ~BASE_TLSTREAM_FLAGS_MASK)) + return -EINVAL; + + timeline = kbdev->timeline; + if (WARN_ON(!timeline)) + return -EFAULT; + + if (atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) + return -EBUSY; #if MALI_USE_CSF - if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { - ret = kbase_csf_tl_reader_start( - &timeline->csf_tl_reader, kbdev); - if (ret) { - atomic_set(timeline->timeline_flags, 0); - return ret; - } - } -#endif - ret = anon_inode_getfd( - "[mali_tlstream]", - &kbasep_tlstream_fops, - timeline, - O_RDONLY | O_CLOEXEC); - if (ret < 0) { + if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { + err = kbase_csf_tl_reader_start(&timeline->csf_tl_reader, kbdev); + if (err) { atomic_set(timeline->timeline_flags, 0); -#if MALI_USE_CSF - kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); -#endif - return ret; + return err; } + } +#endif - /* Reset and initialize header streams. */ - kbase_tlstream_reset( - &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); + /* Reset and initialize header streams. */ + kbase_tlstream_reset(&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); - timeline->obj_header_btc = obj_desc_header_size; - timeline->aux_header_btc = aux_desc_header_size; + timeline->obj_header_btc = obj_desc_header_size; + timeline->aux_header_btc = aux_desc_header_size; #if !MALI_USE_CSF - /* If job dumping is enabled, readjust the software event's - * timeout as the default value of 3 seconds is often - * insufficient. - */ - if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { - dev_info(kbdev->dev, - "Job dumping is enabled, readjusting the software event's timeout\n"); - atomic_set(&kbdev->js_data.soft_job_timeout_ms, - 1800000); - } + /* If job dumping is enabled, readjust the software event's + * timeout as the default value of 3 seconds is often + * insufficient. + */ + if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { + dev_info(kbdev->dev, + "Job dumping is enabled, readjusting the software event's timeout\n"); + atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000); + } #endif /* !MALI_USE_CSF */ - /* Summary stream was cleared during acquire. - * Create static timeline objects that will be - * read by client. - */ - kbase_create_timeline_objects(kbdev); + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be + * read by client. + */ + kbase_create_timeline_objects(kbdev); #ifdef CONFIG_MALI_BIFROST_DEVFREQ - /* Devfreq target tracepoints are only fired when the target - * changes, so we won't know the current target unless we - * send it now. - */ - kbase_tlstream_current_devfreq_target(kbdev); + /* Devfreq target tracepoints are only fired when the target + * changes, so we won't know the current target unless we + * send it now. + */ + kbase_tlstream_current_devfreq_target(kbdev); #endif /* CONFIG_MALI_BIFROST_DEVFREQ */ - /* Start the autoflush timer. - * We must do this after creating timeline objects to ensure we - * don't auto-flush the streams which will be reset during the - * summarization process. - */ - atomic_set(&timeline->autoflush_timer_active, 1); - rcode = mod_timer(&timeline->autoflush_timer, - jiffies + - msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); - } else { - ret = -EBUSY; - } + /* Start the autoflush timer. + * We must do this after creating timeline objects to ensure we + * don't auto-flush the streams which will be reset during the + * summarization process. + */ + atomic_set(&timeline->autoflush_timer_active, 1); + rcode = mod_timer(&timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); - if (ret >= 0) - timeline->last_acquire_time = ktime_get_raw(); + timeline->last_acquire_time = ktime_get_raw(); - return ret; + return err; +} + +void kbase_timeline_release(struct kbase_timeline *timeline) +{ + ktime_t elapsed_time; + s64 elapsed_time_ms, time_to_sleep; + + if (WARN_ON(!timeline) || WARN_ON(!atomic_read(timeline->timeline_flags))) + return; + + /* Get the amount of time passed since the timeline was acquired and ensure + * we sleep for long enough such that it has been at least + * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. + * This prevents userspace from spamming acquire and release too quickly. + */ + elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time); + elapsed_time_ms = ktime_to_ms(elapsed_time); + time_to_sleep = (elapsed_time_ms < 0 ? TIMELINE_HYSTERESIS_TIMEOUT_MS : + TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); + if (time_to_sleep > 0) + msleep_interruptible(time_to_sleep); + +#if MALI_USE_CSF + kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); +#endif + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&timeline->autoflush_timer_active, 0); + del_timer_sync(&timeline->autoflush_timer); + + atomic_set(timeline->timeline_flags, 0); } int kbase_timeline_streams_flush(struct kbase_timeline *timeline) @@ -275,11 +293,17 @@ int kbase_timeline_streams_flush(struct kbase_timeline *timeline) enum tl_stream_type stype; bool has_bytes = false; size_t nbytes = 0; -#if MALI_USE_CSF - int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); - if (ret > 0) - has_bytes = true; + if (WARN_ON(!timeline)) + return -EINVAL; + +#if MALI_USE_CSF + { + int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); + + if (ret > 0) + has_bytes = true; + } #endif for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h index 96a4b181a285..62be6c64c850 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline.h @@ -117,4 +117,12 @@ void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); #endif /* MALI_UNIT_TEST */ +/** + * kbase_timeline_io_debugfs_init - Add a debugfs entry for reading timeline stream data + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + */ +void kbase_timeline_io_debugfs_init(struct kbase_device *kbdev); + #endif /* _KBASE_TIMELINE_H */ diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c index af8b3d8c8c35..644d69bc209d 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c @@ -24,9 +24,11 @@ #include "mali_kbase_tracepoints.h" #include "mali_kbase_timeline.h" -#include +#include + #include #include +#include /* The timeline stream file operations functions. */ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, @@ -36,15 +38,6 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync); -/* The timeline stream file operations structure. */ -const struct file_operations kbasep_tlstream_fops = { - .owner = THIS_MODULE, - .release = kbasep_timeline_io_release, - .read = kbasep_timeline_io_read, - .poll = kbasep_timeline_io_poll, - .fsync = kbasep_timeline_io_fsync, -}; - /** * kbasep_timeline_io_packet_pending - check timeline streams for pending * packets @@ -290,7 +283,8 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, * @filp: Pointer to file structure * @wait: Pointer to poll table * - * Return: POLLIN if data can be read without blocking, otherwise zero + * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, + * otherwise zero, or EPOLLHUP | EPOLLERR on error. */ static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait) { @@ -302,20 +296,91 @@ static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait) KBASE_DEBUG_ASSERT(wait); if (WARN_ON(!filp->private_data)) - return (__poll_t)-EFAULT; + return EPOLLHUP | EPOLLERR; timeline = (struct kbase_timeline *)filp->private_data; /* If there are header bytes to copy, read will not block */ if (kbasep_timeline_has_header_data(timeline)) - return POLLIN; + return EPOLLIN | EPOLLRDNORM; poll_wait(filp, &timeline->event_queue, wait); if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) - return POLLIN; - return 0; + return EPOLLIN | EPOLLRDNORM; + + return (__poll_t)0; } +int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +{ + /* The timeline stream file operations structure. */ + static const struct file_operations kbasep_tlstream_fops = { + .owner = THIS_MODULE, + .release = kbasep_timeline_io_release, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, + .fsync = kbasep_timeline_io_fsync, + }; + int err; + + if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK)) + return -EINVAL; + + err = kbase_timeline_acquire(kbdev, flags); + if (err) + return err; + + err = anon_inode_getfd("[mali_tlstream]", &kbasep_tlstream_fops, kbdev->timeline, + O_RDONLY | O_CLOEXEC); + if (err < 0) + kbase_timeline_release(kbdev->timeline); + + return err; +} + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static int kbasep_timeline_io_open(struct inode *in, struct file *file) +{ + struct kbase_device *const kbdev = in->i_private; + + if (WARN_ON(!kbdev)) + return -EFAULT; + + file->private_data = kbdev->timeline; + return kbase_timeline_acquire(kbdev, BASE_TLSTREAM_FLAGS_MASK & + ~BASE_TLSTREAM_JOB_DUMPING_ENABLED); +} + +void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) +{ + static const struct file_operations kbasep_tlstream_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_timeline_io_open, + .release = kbasep_timeline_io_release, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, + .fsync = kbasep_timeline_io_fsync, + }; + struct dentry *file; + + if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) + return; + + file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_tlstream_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) + dev_warn(kbdev->dev, "Unable to create timeline debugfs entry"); +} +#else +/* + * Stub function for when debugfs is disabled + */ +void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev) +{ +} +#endif + /** * kbasep_timeline_io_release - release timeline stream descriptor * @inode: Pointer to inode structure @@ -325,55 +390,18 @@ static __poll_t kbasep_timeline_io_poll(struct file *filp, poll_table *wait) */ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) { - struct kbase_timeline *timeline; - ktime_t elapsed_time; - s64 elapsed_time_ms, time_to_sleep; - - KBASE_DEBUG_ASSERT(inode); - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(filp->private_data); - CSTD_UNUSED(inode); - timeline = (struct kbase_timeline *)filp->private_data; - - /* Get the amount of time passed since the timeline was acquired and ensure - * we sleep for long enough such that it has been at least - * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. - * This prevents userspace from spamming acquire and release too quickly. - */ - elapsed_time = ktime_sub(ktime_get_raw(), timeline->last_acquire_time); - elapsed_time_ms = ktime_to_ms(elapsed_time); - time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS, - TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); - if (time_to_sleep > 0) - msleep(time_to_sleep); - -#if MALI_USE_CSF - kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); -#endif - - /* Stop autoflush timer before releasing access to streams. */ - atomic_set(&timeline->autoflush_timer_active, 0); - del_timer_sync(&timeline->autoflush_timer); - - atomic_set(timeline->timeline_flags, 0); + kbase_timeline_release(filp->private_data); return 0; } static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { - struct kbase_timeline *timeline; - CSTD_UNUSED(start); CSTD_UNUSED(end); CSTD_UNUSED(datasync); - if (WARN_ON(!filp->private_data)) - return -EFAULT; - - timeline = (struct kbase_timeline *)filp->private_data; - - return kbase_timeline_streams_flush(timeline); + return kbase_timeline_streams_flush(filp->private_data); } diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h index bf2c3855434a..de30bccc7cca 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_priv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,7 +51,7 @@ * @event_queue: Timeline stream event queue * @bytes_collected: Number of bytes read by user * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags - * otherwise. See kbase_timeline_io_acquire(). + * otherwise. See kbase_timeline_acquire(). * @obj_header_btc: Remaining bytes to copy for the object stream header * @aux_header_btc: Remaining bytes to copy for the aux stream header * @last_acquire_time: The time at which timeline was last acquired. @@ -77,8 +77,27 @@ struct kbase_timeline { #endif }; -extern const struct file_operations kbasep_tlstream_fops; - void kbase_create_timeline_objects(struct kbase_device *kbdev); +/** + * kbase_timeline_acquire - acquire timeline for a userspace client. + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @flags: Timeline stream flags + * + * Each timeline instance can be acquired by only one userspace client at a time. + * + * Return: Zero on success, error number on failure (e.g. if already acquired). + */ +int kbase_timeline_acquire(struct kbase_device *kbdev, u32 flags); + +/** + * kbase_timeline_release - release timeline for a userspace client. + * @timeline: Timeline instance to be stopped. It must be previously acquired + * with kbase_timeline_acquire(). + * + * Releasing the timeline instance allows it to be acquired by another userspace client. + */ +void kbase_timeline_release(struct kbase_timeline *timeline); + #endif /* _KBASE_TIMELINE_PRIV_H */ diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c index 3ac78503ce1f..fd0d0c01adde 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c @@ -100,14 +100,14 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, - KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, - KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, @@ -416,14 +416,6 @@ enum tl_msg_id_obj { "KCPU Queue enqueues Unmap Import ignoring reference count", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \ - "KCPU Queue enqueues Error Barrier", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \ - "KCPU Queue enqueues Group Suspend", \ - "@ppI", \ - "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "Begin array of KCPU Queue enqueues JIT Alloc", \ "@p", \ @@ -448,6 +440,14 @@ enum tl_msg_id_obj { "End array of KCPU Queue enqueues JIT Free", \ "@p", \ "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \ + "KCPU Queue enqueues Error Barrier", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \ + "KCPU Queue enqueues Group Suspend", \ + "@ppI", \ + "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ "KCPU Queue starts a Signal on Fence", \ "@p", \ @@ -465,15 +465,15 @@ enum tl_msg_id_obj { "@pI", \ "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ - "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ + "KCPU Queue starts a Wait on Cross Queue Sync Object", \ "@p", \ "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ - "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ + "KCPU Queue ends a Wait on Cross Queue Sync Object", \ "@pI", \ "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ - "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ + "KCPU Queue executes a Set on Cross Queue Sync Object", \ "@pI", \ "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ @@ -2540,60 +2540,6 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( - struct kbase_tlstream *stream, - const void *kcpu_queue -) -{ - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *group_suspend_buf, - u32 gpu_cmdq_grp_handle -) -{ - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - + sizeof(group_suspend_buf) - + sizeof(gpu_cmdq_grp_handle) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - pos = kbasep_serialize_bytes(buffer, - pos, &group_suspend_buf, sizeof(group_suspend_buf)); - pos = kbasep_serialize_bytes(buffer, - pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( struct kbase_tlstream *stream, const void *kcpu_queue @@ -2772,6 +2718,60 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( + struct kbase_tlstream *stream, + const void *kcpu_queue +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *group_suspend_buf, + u32 gpu_cmdq_grp_handle +) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(group_suspend_buf) + + sizeof(gpu_cmdq_grp_handle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &group_suspend_buf, sizeof(group_suspend_buf)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( struct kbase_tlstream *stream, const void *kcpu_queue diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h index cb1e63ef56f5..be0c62edecd7 100644 --- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h +++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h @@ -77,7 +77,7 @@ extern const size_t aux_desc_header_size; #define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP #define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED -#define TLSTREAM_ENABLED (1 << 31) +#define TLSTREAM_ENABLED (1u << 31) void __kbase_tlstream_tl_new_ctx( struct kbase_tlstream *stream, @@ -496,18 +496,6 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( u64 map_import_buf_gpu_addr ); -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( - struct kbase_tlstream *stream, - const void *kcpu_queue -); - -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( - struct kbase_tlstream *stream, - const void *kcpu_queue, - const void *group_suspend_buf, - u32 gpu_cmdq_grp_handle -); - void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( struct kbase_tlstream *stream, const void *kcpu_queue @@ -548,6 +536,18 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( const void *kcpu_queue ); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( + struct kbase_tlstream *stream, + const void *kcpu_queue +); + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *group_suspend_buf, + u32 gpu_cmdq_grp_handle +); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( struct kbase_tlstream *stream, const void *kcpu_queue @@ -2493,68 +2493,6 @@ struct kbase_tlstream; do { } while (0) #endif /* MALI_USE_CSF */ -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#if MALI_USE_CSF -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ - kbdev, \ - kcpu_queue \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_flags); \ - if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ - __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue \ - ); \ - } while (0) -#else -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ - kbdev, \ - kcpu_queue \ - ) \ - do { } while (0) -#endif /* MALI_USE_CSF */ - -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @group_suspend_buf: Pointer to the suspend buffer structure - * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace - */ -#if MALI_USE_CSF -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ - kbdev, \ - kcpu_queue, \ - group_suspend_buf, \ - gpu_cmdq_grp_handle \ - ) \ - do { \ - int enabled = atomic_read(&kbdev->timeline_flags); \ - if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ - __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \ - __TL_DISPATCH_STREAM(kbdev, obj), \ - kcpu_queue, \ - group_suspend_buf, \ - gpu_cmdq_grp_handle \ - ); \ - } while (0) -#else -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ - kbdev, \ - kcpu_queue, \ - group_suspend_buf, \ - gpu_cmdq_grp_handle \ - ) \ - do { } while (0) -#endif /* MALI_USE_CSF */ - /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Begin array of KCPU Queue enqueues JIT Alloc * @@ -2757,6 +2695,68 @@ struct kbase_tlstream; do { } while (0) #endif /* MALI_USE_CSF */ +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @group_suspend_buf: Pointer to the suspend buffer structure + * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ + kbdev, \ + kcpu_queue, \ + group_suspend_buf, \ + gpu_cmdq_grp_handle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, \ + group_suspend_buf, \ + gpu_cmdq_grp_handle \ + ); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ + kbdev, \ + kcpu_queue, \ + group_suspend_buf, \ + gpu_cmdq_grp_handle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - KCPU Queue starts a Signal on Fence * @@ -2874,7 +2874,7 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on an array of Cross Queue Sync Objects + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on Cross Queue Sync Object * * @kbdev: Kbase device * @kcpu_queue: KCPU queue @@ -2901,7 +2901,7 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on an array of Cross Queue Sync Objects + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on Cross Queue Sync Object * * @kbdev: Kbase device * @kcpu_queue: KCPU queue @@ -2932,7 +2932,7 @@ struct kbase_tlstream; #endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on an array of Cross Queue Sync Objects + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on Cross Queue Sync Object * * @kbdev: Kbase device * @kcpu_queue: KCPU queue diff --git a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h b/include/linux/mali_arbiter_interface.h similarity index 99% rename from drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h rename to include/linux/mali_arbiter_interface.h index a0ca1ccddcc9..8e675ec2ad3b 100644 --- a/drivers/gpu/arm/bifrost/arbiter/mali_kbase_arbiter_interface.h +++ b/include/linux/mali_arbiter_interface.h @@ -41,7 +41,7 @@ * 4 - Added max_config support * 5 - Added GPU clock frequency reporting support from arbiter */ -#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5 +#define MALI_ARBITER_INTERFACE_VERSION 5 /** * DOC: NO_FREQ is used in case platform doesn't support reporting frequency diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h index c4667803b361..786e3b995f29 100644 --- a/include/linux/memory_group_manager.h +++ b/include/linux/memory_group_manager.h @@ -43,6 +43,8 @@ struct memory_group_manager_import_data; * @mgm_free_page: Callback to free physical memory in a group * @mgm_get_import_memory_id: Callback to get the group ID for imported memory * @mgm_update_gpu_pte: Callback to modify a GPU page table entry + * @mgm_pte_to_original_pte: Callback to get the original PTE entry as given + * to mgm_update_gpu_pte * @mgm_vmf_insert_pfn_prot: Callback to map a physical memory page for the CPU */ struct memory_group_manager_ops { @@ -120,7 +122,8 @@ struct memory_group_manager_ops { * This function allows the memory group manager to modify a GPU page * table entry before it is stored by the kbase module (controller * driver). It may set certain bits in the page table entry attributes - * or in the physical address, based on the physical memory group ID. + * or modify the physical address, based on the physical memory group ID + * and/or additional data in struct memory_group_manager_device. * * Return: A modified GPU page table entry to be stored in a page table. */ @@ -128,6 +131,17 @@ struct memory_group_manager_ops { int group_id, int mmu_level, u64 pte); /* + * mgm_pte_to_original_pte - Undo any modification done during mgm_update_gpu_pte() + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @group_id: A physical memory group ID. The meaning of this is + * defined by the systems integrator. Its valid range is + * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @mmu_level: The level of the page table entry in @ate. + * @pte: The page table entry to restore the original representation for, + * in LPAE or AArch64 format (depending on the driver's configuration). + * * Undo any modifications done during mgm_update_gpu_pte(). * This function allows getting back the original PTE entry as given * to mgm_update_gpu_pte(). diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h index a8e08742069d..d0a09985c5ca 100644 --- a/include/linux/version_compat_defs.h +++ b/include/linux/version_compat_defs.h @@ -28,4 +28,20 @@ typedef unsigned int __poll_t; #endif +#ifndef EPOLLHUP +#define EPOLLHUP POLLHUP +#endif + +#ifndef EPOLLERR +#define EPOLLERR POLLERR +#endif + +#ifndef EPOLLIN +#define EPOLLIN POLLIN +#endif + +#ifndef EPOLLRDNORM +#define EPOLLRDNORM POLLRDNORM +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h index 3b02350c08bf..d9813c055809 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h @@ -573,6 +573,7 @@ struct base_csf_notification { * is a bitpattern where a set bit indicates that the format is supported. * Before using a texture format, it is recommended that the corresponding * bit be checked. + * @paddings: Padding bytes. * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. * It is unlikely that a client will be able to allocate all of this memory * for their own purposes, but this at least provides an upper bound on the @@ -590,6 +591,7 @@ struct mali_base_gpu_core_props { __u32 gpu_freq_khz_max; __u32 log2_program_counter_size; __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u8 paddings[4]; __u64 gpu_available_memory_size; }; diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h new file mode 100644 index 000000000000..f49ab0036fc3 --- /dev/null +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_ +#define _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_ + +/** + * enum dumpfault_error_type - Enumeration to define errors to be dumped + * + * @DF_NO_ERROR: No pending error + * @DF_CSG_SUSPEND_TIMEOUT: CSG suspension timeout + * @DF_CSG_TERMINATE_TIMEOUT: CSG group termination timeout + * @DF_CSG_START_TIMEOUT: CSG start timeout + * @DF_CSG_RESUME_TIMEOUT: CSG resume timeout + * @DF_CSG_EP_CFG_TIMEOUT: CSG end point configuration timeout + * @DF_CSG_STATUS_UPDATE_TIMEOUT: CSG status update timeout + * @DF_PROGRESS_TIMER_TIMEOUT: Progress timer timeout + * @DF_FW_INTERNAL_ERROR: Firmware internal error + * @DF_CS_FATAL: CS fatal error + * @DF_CS_FAULT: CS fault error + * @DF_FENCE_WAIT_TIMEOUT: Fence wait timeout + * @DF_PROTECTED_MODE_EXIT_TIMEOUT: P.mode exit timeout + * @DF_PROTECTED_MODE_ENTRY_FAILURE: P.mode entrance failure + * @DF_PING_REQUEST_TIMEOUT: Ping request timeout + * @DF_CORE_DOWNSCALE_REQUEST_TIMEOUT: DCS downscale request timeout + * @DF_TILER_OOM: Tiler Out-of-memory error + * @DF_GPU_PAGE_FAULT: GPU page fault + * @DF_BUS_FAULT: MMU BUS Fault + * @DF_GPU_PROTECTED_FAULT: GPU P.mode fault + * @DF_AS_ACTIVE_STUCK: AS active stuck + * @DF_GPU_SOFT_RESET_FAILURE: GPU soft reset falure + * + * This is used for kbase to notify error type of an event whereby + * user space client will dump relevant debugging information via debugfs. + * @DF_NO_ERROR is used to indicate no pending fault, thus the client will + * be blocked on reading debugfs file till a fault happens. + */ +enum dumpfault_error_type { + DF_NO_ERROR = 0, + DF_CSG_SUSPEND_TIMEOUT, + DF_CSG_TERMINATE_TIMEOUT, + DF_CSG_START_TIMEOUT, + DF_CSG_RESUME_TIMEOUT, + DF_CSG_EP_CFG_TIMEOUT, + DF_CSG_STATUS_UPDATE_TIMEOUT, + DF_PROGRESS_TIMER_TIMEOUT, + DF_FW_INTERNAL_ERROR, + DF_CS_FATAL, + DF_CS_FAULT, + DF_FENCE_WAIT_TIMEOUT, + DF_PROTECTED_MODE_EXIT_TIMEOUT, + DF_PROTECTED_MODE_ENTRY_FAILURE, + DF_PING_REQUEST_TIMEOUT, + DF_CORE_DOWNSCALE_REQUEST_TIMEOUT, + DF_TILER_OOM, + DF_GPU_PAGE_FAULT, + DF_BUS_FAULT, + DF_GPU_PROTECTED_FAULT, + DF_AS_ACTIVE_STUCK, + DF_GPU_SOFT_RESET_FAILURE, +}; + +#endif /* _UAPI_KBASE_CSF_ERRORS_DUMPFAULT_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h index db7252605f06..d9a1867e13c3 100644 --- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h @@ -60,10 +60,22 @@ * - Dummy model (no mali) backend will now clear HWC values after each sample * 1.12: * - Added support for incremental rendering flag in CSG create call + * 1.13: + * - Added ioctl to query a register of USER page. + * 1.14: + * - Added support for passing down the buffer descriptor VA in tiler heap init + * 1.15: + * - Enable new sync_wait GE condition + * 1.16: + * - Remove legacy definitions: + * - base_jit_alloc_info_10_2 + * - base_jit_alloc_info_11_5 + * - kbase_ioctl_mem_jit_init_10_2 + * - kbase_ioctl_mem_jit_init_11_5 */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 12 +#define BASE_UK_VERSION_MINOR 16 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -271,9 +283,9 @@ union kbase_ioctl_cs_queue_group_create { __u8 csi_handlers; __u8 padding[2]; /** - * @in.reserved: Reserved + * @in.dvs_buf: buffer for deferred vertex shader */ - __u64 reserved; + __u64 dvs_buf; } in; struct { __u8 group_handle; @@ -361,6 +373,7 @@ struct kbase_ioctl_kcpu_queue_enqueue { * allowed. * @in.group_id: Group ID to be used for physical allocations. * @in.padding: Padding + * @in.buf_desc_va: Buffer descriptor GPU VA for tiler heap reclaims. * @out: Output parameters * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up * for the heap. @@ -376,6 +389,7 @@ union kbase_ioctl_cs_tiler_heap_init { __u16 target_in_flight; __u8 group_id; __u8 padding; + __u64 buf_desc_va; } in; struct { __u64 gpu_heap_va; @@ -386,6 +400,43 @@ union kbase_ioctl_cs_tiler_heap_init { #define KBASE_IOCTL_CS_TILER_HEAP_INIT \ _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) +/** + * union kbase_ioctl_cs_tiler_heap_init_1_13 - Initialize chunked tiler memory heap, + * earlier version upto 1.13 + * @in: Input parameters + * @in.chunk_size: Size of each chunk. + * @in.initial_chunks: Initial number of chunks that heap will be created with. + * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. + * @in.target_in_flight: Number of render-passes that the driver should attempt to + * keep in flight for which allocation of new chunks is + * allowed. + * @in.group_id: Group ID to be used for physical allocations. + * @in.padding: Padding + * @out: Output parameters + * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up + * for the heap. + * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, + * actually points to the header of heap chunk and not to + * the low address of free memory in the chunk. + */ +union kbase_ioctl_cs_tiler_heap_init_1_13 { + struct { + __u32 chunk_size; + __u32 initial_chunks; + __u32 max_chunks; + __u16 target_in_flight; + __u8 group_id; + __u8 padding; + } in; + struct { + __u64 gpu_heap_va; + __u64 first_chunk_va; + } out; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_INIT_1_13 \ + _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init_1_13) + /** * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap * instance @@ -487,6 +538,29 @@ union kbase_ioctl_mem_alloc_ex { #define KBASE_IOCTL_MEM_ALLOC_EX _IOWR(KBASE_IOCTL_TYPE, 59, union kbase_ioctl_mem_alloc_ex) +/** + * union kbase_ioctl_read_user_page - Read a register of USER page + * + * @in: Input parameters. + * @in.offset: Register offset in USER page. + * @in.padding: Padding to round up to a multiple of 8 bytes, must be zero. + * @out: Output parameters. + * @out.val_lo: Value of 32bit register or the 1st half of 64bit register to be read. + * @out.val_hi: Value of the 2nd half of 64bit register to be read. + */ +union kbase_ioctl_read_user_page { + struct { + __u32 offset; + __u32 padding; + } in; + struct { + __u32 val_lo; + __u32 val_hi; + } out; +}; + +#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page) + /*************** * test ioctls * ***************/ diff --git a/drivers/base/arm/dma_buf_lock/src/build.bp b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h similarity index 67% rename from drivers/base/arm/dma_buf_lock/src/build.bp rename to include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h index dc49c0feb44d..75ae6a1a5409 100644 --- a/drivers/base/arm/dma_buf_lock/src/build.bp +++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,19 +19,12 @@ * */ -bob_kernel_module { - name: "dma_buf_lock", - defaults: [ - "kernel_defaults" - ], - srcs: [ - "Kbuild", - "dma_buf_lock.c", - "dma_buf_lock.h", - ], - enabled: false, - dma_buf_lock: { - kbuild_options: ["CONFIG_DMA_BUF_LOCK=y"], - enabled: true, - }, -} +#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ +#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ + +/* IPA control registers */ +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) +#define STATUS 0x004 /* (RO) Status register */ + +#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h index f46638947953..87f849d28c93 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/include/uapi/gpu/arm/bifrost/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,8 +23,24 @@ #define _UAPI_KBASE_GPU_REGMAP_JM_H_ /* GPU control registers */ -#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest - * clean-and-invalidate operation - */ + +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */ + +/* Job control registers */ + +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ + +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ #endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h index 1a99e56b0910..1f34d99830fe 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h @@ -126,6 +126,8 @@ #define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) #define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) #define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) +#define GPU_ID2_PRODUCT_TTIX GPU_ID2_MODEL_MAKE(12, 0) +#define GPU_ID2_PRODUCT_LTIX GPU_ID2_MODEL_MAKE(12, 1) /** * GPU_ID_MAKE - Helper macro to generate GPU_ID using id, major, minor, status diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h index deca665df030..cdfcf8d3150e 100644 --- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h +++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,13 +22,70 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_H_ #define _UAPI_KBASE_GPU_REGMAP_H_ -#if !MALI_USE_CSF +#if MALI_USE_CSF +#include "backend/mali_kbase_gpu_regmap_csf.h" +#else #include "backend/mali_kbase_gpu_regmap_jm.h" #endif /* !MALI_USE_CSF */ +/* Begin Register Offsets */ +/* GPU control registers */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) + +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ + /* MMU control registers */ + #define MEMORY_MANAGEMENT_BASE 0x2000 + #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + #define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ + +/* MMU address space control registers */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ + +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 + #endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h index ae43908b9360..1a3098d6cad8 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_base_jm_kernel.h @@ -1024,6 +1024,7 @@ struct base_dump_cpu_gpu_counters { * is a bitpattern where a set bit indicates that the format is supported. * Before using a texture format, it is recommended that the corresponding * bit be checked. + * @paddings_1: Padding bytes. * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. * It is unlikely that a client will be able to allocate all of this memory * for their own purposes, but this at least provides an upper bound on the @@ -1034,6 +1035,7 @@ struct base_dump_cpu_gpu_counters { * @num_exec_engines: The number of execution engines. Only valid for tGOX * (Bifrost) GPUs, where GPU_HAS_REG_CORE_FEATURES is defined. Otherwise, * this is always 0. + * @paddings_2: Padding bytes. */ struct mali_base_gpu_core_props { __u32 product_id; @@ -1044,8 +1046,10 @@ struct mali_base_gpu_core_props { __u32 gpu_freq_khz_max; __u32 log2_program_counter_size; __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u8 paddings_1[4]; __u64 gpu_available_memory_size; __u8 num_exec_engines; + __u8 paddings_2[7]; }; #endif /* _UAPI_BASE_JM_KERNEL_H_ */ diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h index 20d931adc9b8..9c7553ff2bd2 100644 --- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h @@ -127,9 +127,15 @@ * - First release of new HW performance counters interface. * 11.35: * - Dummy model (no mali) backend will now clear HWC values after each sample + * 11.36: + * - Remove legacy definitions: + * - base_jit_alloc_info_10_2 + * - base_jit_alloc_info_11_5 + * - kbase_ioctl_mem_jit_init_10_2 + * - kbase_ioctl_mem_jit_init_11_5 */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 35 +#define BASE_UK_VERSION_MINOR 36 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h index 6adbd81bcc70..e6cac0eb2a1a 100644 --- a/include/uapi/gpu/arm/bifrost/mali_base_kernel.h +++ b/include/uapi/gpu/arm/bifrost/mali_base_kernel.h @@ -197,55 +197,6 @@ struct base_mem_aliasing_info { */ #define BASE_JIT_ALLOC_COUNT (255) -/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 - * - * jit_version is 1 - * - * Due to the lack of padding specified, user clients between 32 and 64-bit - * may have assumed a different size of the struct - * - * An array of structures was not supported - */ -struct base_jit_alloc_info_10_2 { - __u64 gpu_alloc_addr; - __u64 va_pages; - __u64 commit_pages; - __u64 extension; - __u8 id; -}; - -/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up - * to 11.19 - * - * This structure had a number of modifications during and after kernel driver - * version 11.5, but remains size-compatible throughout its version history, and - * with earlier variants compatible with future variants by requiring - * zero-initialization to the unused space in the structure. - * - * jit_version is 2 - * - * Kernel driver version history: - * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes - * must be zero. Kbase minor version was not incremented, so some - * versions of 11.5 do not have this change. - * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase - * minor version not incremented) - * 11.6: Added 'flags', replacing 1 padding byte - * 11.10: Arrays of this structure are supported - */ -struct base_jit_alloc_info_11_5 { - __u64 gpu_alloc_addr; - __u64 va_pages; - __u64 commit_pages; - __u64 extension; - __u8 id; - __u8 bin_id; - __u8 max_allocations; - __u8 flags; - __u8 padding[2]; - __u16 usage_id; -}; - /** * struct base_jit_alloc_info - Structure which describes a JIT allocation * request. @@ -275,16 +226,6 @@ struct base_jit_alloc_info_11_5 { * @heap_info_gpu_addr: Pointer to an object in GPU memory describing * the actual usage of the region. * - * jit_version is 3. - * - * When modifications are made to this structure, it is still compatible with - * jit_version 3 when: a) the size is unchanged, and b) new members only - * replace the padding bytes. - * - * Previous jit_version history: - * jit_version == 1, refer to &base_jit_alloc_info_10_2 - * jit_version == 2, refer to &base_jit_alloc_info_11_5 - * * Kbase version history: * 11.20: added @heap_info_gpu_addr */ diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h index 42d93ba4c150..962decc10efc 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h @@ -221,6 +221,7 @@ struct prfcnt_enum_sample_info { /** * struct prfcnt_enum_item - Performance counter enumeration item. + * @padding: Padding bytes. * @hdr: Header describing the type of item in the list. * @u: Structure containing discriptor for enumeration item type. * @u.block_counter: Performance counter block descriptor. @@ -229,6 +230,7 @@ struct prfcnt_enum_sample_info { */ struct prfcnt_enum_item { struct prfcnt_item_header hdr; + __u8 padding[4]; /** union u - union of block_counter and request */ union { struct prfcnt_enum_block_counter block_counter; @@ -305,6 +307,7 @@ struct prfcnt_request_scope { /** * struct prfcnt_request_item - Performance counter request item. + * @padding: Padding bytes. * @hdr: Header describing the type of item in the list. * @u: Structure containing descriptor for request type. * @u.req_mode: Mode request descriptor. @@ -313,6 +316,7 @@ struct prfcnt_request_scope { */ struct prfcnt_request_item { struct prfcnt_item_header hdr; + __u8 padding[4]; /** union u - union on req_mode and req_enable */ union { struct prfcnt_request_mode req_mode; @@ -417,6 +421,7 @@ struct prfcnt_block_metadata { /** * struct prfcnt_metadata - Performance counter metadata item. + * @padding: Padding bytes. * @hdr: Header describing the type of item in the list. * @u: Structure containing descriptor for metadata type. * @u.sample_md: Counter sample data metadata descriptor. @@ -425,6 +430,7 @@ struct prfcnt_block_metadata { */ struct prfcnt_metadata { struct prfcnt_item_header hdr; + __u8 padding[4]; union { struct prfcnt_sample_metadata sample_md; struct prfcnt_clock_metadata clock_md; diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h index e691aea47b40..63bf48b603ef 100644 --- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h @@ -218,52 +218,6 @@ struct kbase_ioctl_get_ddk_version { #define KBASE_IOCTL_GET_DDK_VERSION \ _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) -/** - * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory - * allocator (between kernel driver - * version 10.2--11.4) - * @va_pages: Number of VA pages to reserve for JIT - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - * - * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for - * backwards compatibility. - */ -struct kbase_ioctl_mem_jit_init_10_2 { - __u64 va_pages; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) - -/** - * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory - * allocator (between kernel driver - * version 11.5--11.19) - * @va_pages: Number of VA pages to reserve for JIT - * @max_allocations: Maximum number of concurrent allocations - * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) - * @group_id: Group ID to be used for physical allocations - * @padding: Currently unused, must be zero - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - * - * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for - * backwards compatibility. - */ -struct kbase_ioctl_mem_jit_init_11_5 { - __u64 va_pages; - __u8 max_allocations; - __u8 trim_level; - __u8 group_id; - __u8 padding[5]; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) - /** * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory * allocator diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h b/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h similarity index 88% rename from drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h rename to include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h index c2fb3f572f2d..329845005341 100644 --- a/drivers/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h +++ b/include/uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h @@ -23,8 +23,8 @@ * DOC: Header file for the size of the buffer to accumulate the histogram report text in */ -#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ -#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ +#ifndef _UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ +#define _UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ /** * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the histogram report text @@ -32,5 +32,4 @@ */ #define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 54) + 56)) -#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ - +#endif /*_UAPI_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/