vkd3d: Fix invalid atomic behaviour in the view cache linked list.

The list suffers from the ABA problem, where comparison with the head succeeds but the head's `next` pointer has changed. Occurs in Cyberpunk 2077, on NVIDIA at least.
Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/297
2025-04-13 05:43:18 -07:00 · 2023-08-09 00:48:19 +10:00 · 2023-08-23 22:50:33 +02:00
parent 4f2e07a45d
commit 3ca2259807
3 changed files with 60 additions and 19 deletions
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@ -2435,21 +2435,25 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device)
 static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size)
 {
-    cache->head = NULL;
+    memset(cache, 0, sizeof(*cache));
    cache->size = size;
 }
 static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache)
 {
    union d3d12_desc_object u;
    unsigned int i;
    void *next;
-    for (u.object = cache->head; u.object; u.object = next)
+    for (i = 0; i < ARRAY_SIZE(cache->heads); ++i)
    {
        for (u.object = cache->heads[i].head; u.object; u.object = next)
        {
            next = u.header->next;
            vkd3d_free(u.object);
        }
    }
 }
 /* ID3D12Device */
 static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface)
--- a/libs/vkd3d/resource.c
+++ b/libs/vkd3d/resource.c
@ -2282,37 +2282,66 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource)
    return d3d12_resource_decref(impl_from_ID3D12Resource(resource));
 }
-/* Objects are cached so that vkd3d_view_incref() can safely check the refcount
+#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1)
- * of an object freed by another thread. */
+
 /* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an
 * object freed by another thread. This could be implemented as a single atomic linked
 * list, but it requires handling the ABA problem, which brings issues with cross-platform
 * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */
 static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache)
 {
    union d3d12_desc_object u;
-    void *next;
+    unsigned int i;
-    do
+    STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK));
    i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK;
    for (;;)
    {
-        u.object = cache->head;
+        if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1))
-        if (!u.object)
+        {
-            return vkd3d_malloc(cache->size);
+            if ((u.object = cache->heads[i].head))
-        next = u.header->next;
+            {
-    }
+                vkd3d_atomic_decrement(&cache->free_count);
-    while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next));
+                cache->heads[i].head = u.header->next;
-
+                vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0);
                return u.object;
            }
            vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0);
        }
        /* Keeping a free count avoids uncertainty over when this loop should terminate,
         * which could result in excess allocations gradually increasing without limit. */
        if (cache->free_count < ARRAY_SIZE(cache->heads))
            return vkd3d_malloc(cache->size);
        i = (i + 1) & HEAD_INDEX_MASK;
    }
 }
 static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object)
 {
    union d3d12_desc_object u = {object};
    unsigned int i;
    void *head;
-    do
+    /* Using the same index as above may result in a somewhat uneven distribution,
     * but the main objective is to avoid costly spinlock contention. */
    i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK;
    for (;;)
    {
-        head = cache->head;
+        if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1))
            break;
        i = (i + 1) & HEAD_INDEX_MASK;
    }
    head = cache->heads[i].head;
    u.header->next = head;
    cache->heads[i].head = u.object;
    vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0);
    vkd3d_atomic_increment(&cache->free_count);
 }
-    while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object));
+
-}
+#undef HEAD_INDEX_MASK
 static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device)
 {
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@ -1690,9 +1690,17 @@ struct vkd3d_uav_clear_state
 HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
 void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
 struct desc_object_cache_head
 {
    void *head;
    unsigned int spinlock;
 };
 struct vkd3d_desc_object_cache
 {
-    void * volatile head;
+    struct desc_object_cache_head heads[16];
    unsigned int next_index;
    unsigned int free_count;
    size_t size;
 };