vkd3d: Fix invalid atomic behaviour in the view cache linked list.

The list suffers from the ABA problem, where comparison with the head
succeeds but the head's `next` pointer has changed. Occurs in Cyberpunk
2077, on NVIDIA at least.
This commit is contained in:
Conor McCarthy 2023-08-09 00:48:19 +10:00 committed by Alexandre Julliard
parent 4f2e07a45d
commit 3ca2259807
Notes: Alexandre Julliard 2023-08-23 22:50:33 +02:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/297
3 changed files with 60 additions and 19 deletions

View File

@ -2435,21 +2435,25 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device)
static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size)
{ {
cache->head = NULL; memset(cache, 0, sizeof(*cache));
cache->size = size; cache->size = size;
} }
static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache)
{ {
union d3d12_desc_object u; union d3d12_desc_object u;
unsigned int i;
void *next; void *next;
for (u.object = cache->head; u.object; u.object = next) for (i = 0; i < ARRAY_SIZE(cache->heads); ++i)
{
for (u.object = cache->heads[i].head; u.object; u.object = next)
{ {
next = u.header->next; next = u.header->next;
vkd3d_free(u.object); vkd3d_free(u.object);
} }
} }
}
/* ID3D12Device */ /* ID3D12Device */
static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface) static inline struct d3d12_device *impl_from_ID3D12Device(ID3D12Device *iface)

View File

@ -2282,37 +2282,66 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource)
return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); return d3d12_resource_decref(impl_from_ID3D12Resource(resource));
} }
/* Objects are cached so that vkd3d_view_incref() can safely check the refcount #define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1)
* of an object freed by another thread. */
/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an
* object freed by another thread. This could be implemented as a single atomic linked
* list, but it requires handling the ABA problem, which brings issues with cross-platform
* support, compiler support, and non-universal x86-64 support for 128-bit CAS. */
static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache)
{ {
union d3d12_desc_object u; union d3d12_desc_object u;
void *next; unsigned int i;
do STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK));
i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK;
for (;;)
{ {
u.object = cache->head; if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1))
if (!u.object) {
return vkd3d_malloc(cache->size); if ((u.object = cache->heads[i].head))
next = u.header->next; {
} vkd3d_atomic_decrement(&cache->free_count);
while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, u.object, next)); cache->heads[i].head = u.header->next;
vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0);
return u.object; return u.object;
} }
vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0);
}
/* Keeping a free count avoids uncertainty over when this loop should terminate,
* which could result in excess allocations gradually increasing without limit. */
if (cache->free_count < ARRAY_SIZE(cache->heads))
return vkd3d_malloc(cache->size);
i = (i + 1) & HEAD_INDEX_MASK;
}
}
static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object)
{ {
union d3d12_desc_object u = {object}; union d3d12_desc_object u = {object};
unsigned int i;
void *head; void *head;
do /* Using the same index as above may result in a somewhat uneven distribution,
* but the main objective is to avoid costly spinlock contention. */
i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK;
for (;;)
{ {
head = cache->head; if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1))
break;
i = (i + 1) & HEAD_INDEX_MASK;
}
head = cache->heads[i].head;
u.header->next = head; u.header->next = head;
cache->heads[i].head = u.object;
vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0);
vkd3d_atomic_increment(&cache->free_count);
} }
while (!vkd3d_atomic_compare_exchange_pointer(&cache->head, head, u.object));
} #undef HEAD_INDEX_MASK
static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device)
{ {

View File

@ -1690,9 +1690,17 @@ struct vkd3d_uav_clear_state
HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
struct desc_object_cache_head
{
void *head;
unsigned int spinlock;
};
struct vkd3d_desc_object_cache struct vkd3d_desc_object_cache
{ {
void * volatile head; struct desc_object_cache_head heads[16];
unsigned int next_index;
unsigned int free_count;
size_t size; size_t size;
}; };