From 4b2ad3bf49e77c6cb0e541329ac851d054e6f649 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Mon, 7 Jun 2021 16:26:18 -0500 Subject: [PATCH] ntdll: Reimplement Win32 futexes on top of thread-ID alerts. Signed-off-by: Zebediah Figura Signed-off-by: Zebediah Figura --- dlls/ntdll/sync.c | 173 ++++++++++++++++++++++++++++++++++++++- dlls/ntdll/unix/loader.c | 3 - dlls/ntdll/unix/sync.c | 162 ------------------------------------ dlls/ntdll/unixlib.h | 6 +- 4 files changed, 171 insertions(+), 173 deletions(-) diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c index bfb30661864..adc03c9ff7a 100644 --- a/dlls/ntdll/sync.c +++ b/dlls/ntdll/sync.c @@ -34,11 +34,18 @@ #include "windef.h" #include "winternl.h" #include "wine/debug.h" +#include "wine/list.h" #include "ntdll_misc.h" WINE_DEFAULT_DEBUG_CHANNEL(sync); WINE_DECLARE_DEBUG_CHANNEL(relay); +static const char *debugstr_timeout( const LARGE_INTEGER *timeout ) +{ + if (!timeout) return "(infinite)"; + return wine_dbgstr_longlong( timeout->QuadPart ); +} + /****************************************************************** * RtlRunOnceInitialize (NTDLL.@) */ @@ -863,13 +870,111 @@ NTSTATUS WINAPI RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable, return status; } +/* RtlWaitOnAddress() and RtlWakeAddress*(), hereafter referred to as "Win32 + * futexes", offer futex-like semantics with a variable set of address sizes, + * but are limited to a single process. They are also fair—the documentation + * specifies this, and tests bear it out. + * + * On Windows they are implemented using NtAlertThreadByThreadId and + * NtWaitForAlertByThreadId, which manipulate a single flag (similar to a + * manual-reset event) per thread. This can be tested by attempting to wake a + * thread waiting in RtlWaitOnAddress() via NtAlertThreadByThreadId. + */ + +struct futex_entry +{ + struct list entry; + const void *addr; + DWORD tid; +}; + +struct futex_queue +{ + struct list queue; + LONG lock; +}; + +static struct futex_queue futex_queues[256]; + +static struct futex_queue *get_futex_queue( const void *addr ) +{ + ULONG_PTR val = (ULONG_PTR)addr; + + return &futex_queues[(val >> 4) % ARRAY_SIZE(futex_queues)]; +} + +static void spin_wrlock( LONG *lock ) +{ + while (InterlockedCompareExchange( lock, -1, 0 )) + YieldProcessor(); +} + +static void spin_wrunlock( LONG *lock ) +{ + InterlockedExchange( lock, 0 ); +} + +static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size ) +{ + switch (size) + { + case 1: + return (*(const UCHAR *)addr == *(const UCHAR *)cmp); + case 2: + return (*(const USHORT *)addr == *(const USHORT *)cmp); + case 4: + return (*(const ULONG *)addr == *(const ULONG *)cmp); + case 8: + return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp); + } + + return FALSE; +} + /*********************************************************************** * RtlWaitOnAddress (NTDLL.@) */ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size, const LARGE_INTEGER *timeout ) { - return unix_funcs->RtlWaitOnAddress( addr, cmp, size, timeout ); + struct futex_queue *queue = get_futex_queue( addr ); + struct futex_entry entry; + NTSTATUS ret; + + TRACE("addr %p cmp %p size %#Ix timeout %s\n", addr, cmp, size, debugstr_timeout( timeout )); + + if (size != 1 && size != 2 && size != 4 && size != 8) + return STATUS_INVALID_PARAMETER; + + entry.addr = addr; + entry.tid = GetCurrentThreadId(); + + spin_wrlock( &queue->lock ); + + if (!compare_addr( addr, cmp, size )) + { + spin_wrunlock( &queue->lock ); + return STATUS_SUCCESS; + } + + if (!queue->queue.next) + list_init( &queue->queue ); + list_add_tail( &queue->queue, &entry.entry ); + + spin_wrunlock( &queue->lock ); + + ret = NtWaitForAlertByThreadId( NULL, timeout ); + + spin_wrlock( &queue->lock ); + /* We may have already been removed by a call to RtlWakeAddressSingle(). */ + if (entry.addr) + list_remove( &entry.entry ); + spin_wrunlock( &queue->lock ); + + TRACE("returning %#x\n", ret); + + if (ret == STATUS_ALERTED) ret = STATUS_SUCCESS; + return ret; } /*********************************************************************** @@ -877,7 +982,37 @@ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size */ void WINAPI RtlWakeAddressAll( const void *addr ) { - return unix_funcs->RtlWakeAddressAll( addr ); + struct futex_queue *queue = get_futex_queue( addr ); + unsigned int count = 0, i; + struct futex_entry *entry; + DWORD tids[256]; + + TRACE("%p\n", addr); + + if (!addr) return; + + spin_wrlock( &queue->lock ); + + if (!queue->queue.next) + list_init(&queue->queue); + + LIST_FOR_EACH_ENTRY( entry, &queue->queue, struct futex_entry, entry ) + { + if (entry->addr == addr) + { + /* Try to buffer wakes, so that we don't make a system call while + * holding a spinlock. */ + if (count < ARRAY_SIZE(tids)) + tids[count++] = entry->tid; + else + NtAlertThreadByThreadId( (HANDLE)(DWORD_PTR)entry->tid ); + } + } + + spin_wrunlock( &queue->lock ); + + for (i = 0; i < count; ++i) + NtAlertThreadByThreadId( (HANDLE)(DWORD_PTR)tids[i] ); } /*********************************************************************** @@ -885,5 +1020,37 @@ void WINAPI RtlWakeAddressAll( const void *addr ) */ void WINAPI RtlWakeAddressSingle( const void *addr ) { - return unix_funcs->RtlWakeAddressSingle( addr ); + struct futex_queue *queue = get_futex_queue( addr ); + struct futex_entry *entry; + DWORD tid = 0; + + TRACE("%p\n", addr); + + if (!addr) return; + + spin_wrlock( &queue->lock ); + + if (!queue->queue.next) + list_init(&queue->queue); + + LIST_FOR_EACH_ENTRY( entry, &queue->queue, struct futex_entry, entry ) + { + if (entry->addr == addr) + { + /* Try to buffer wakes, so that we don't make a system call while + * holding a spinlock. */ + tid = entry->tid; + + /* Remove this entry from the queue, so that a simultaneous call to + * RtlWakeAddressSingle() will not also wake it—two simultaneous + * calls must wake at least two waiters if they exist. */ + entry->addr = NULL; + list_remove( &entry->entry ); + break; + } + } + + spin_wrunlock( &queue->lock ); + + if (tid) NtAlertThreadByThreadId( (HANDLE)(DWORD_PTR)tid ); } diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c index a411b01a389..59bc978022c 100644 --- a/dlls/ntdll/unix/loader.c +++ b/dlls/ntdll/unix/loader.c @@ -2146,9 +2146,6 @@ static struct unix_funcs unix_funcs = NtCurrentTeb, #endif RtlGetSystemTimePrecise, - RtlWaitOnAddress, - RtlWakeAddressAll, - RtlWakeAddressSingle, fast_RtlpWaitForCriticalSection, fast_RtlpUnWaitCriticalSection, fast_RtlDeleteCriticalSection, diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index ce78c11fd4b..c37dbc5e96e 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -74,10 +74,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(sync); HANDLE keyed_event = 0; -static const LARGE_INTEGER zero_timeout; - -static pthread_mutex_t addr_mutex = PTHREAD_MUTEX_INITIALIZER; - static const char *debugstr_timeout( const LARGE_INTEGER *timeout ) { if (!timeout) return "(infinite)"; @@ -187,24 +183,6 @@ static void timespec_from_timeout( struct timespec *timespec, const LARGE_INTEGE #endif -static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size ) -{ - switch (size) - { - case 1: - return (*(const UCHAR *)addr == *(const UCHAR *)cmp); - case 2: - return (*(const USHORT *)addr == *(const USHORT *)cmp); - case 4: - return (*(const ULONG *)addr == *(const ULONG *)cmp); - case 8: - return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp); - } - - return FALSE; -} - - /* create a struct security_descriptor and contained information in one contiguous piece of memory */ NTSTATUS alloc_object_attributes( const OBJECT_ATTRIBUTES *attr, struct object_attributes **ret, data_size_t *ret_len ) @@ -3035,71 +3013,6 @@ NTSTATUS CDECL fast_RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable, return STATUS_SUCCESS; } - -/* We can't map addresses to futex directly, because an application can wait on - * 8 bytes, and we can't pass all 8 as the compare value to futex(). Instead we - * map all addresses to a small fixed table of futexes. This may result in - * spurious wakes, but the application is already expected to handle those. */ - -static int addr_futex_table[256]; - -static inline int *hash_addr( const void *addr ) -{ - ULONG_PTR val = (ULONG_PTR)addr; - - return &addr_futex_table[(val >> 2) & 255]; -} - -static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ) -{ - int *futex; - int val; - struct timespec timespec; - int ret; - - if (!use_futexes()) - return STATUS_NOT_IMPLEMENTED; - - futex = hash_addr( addr ); - - /* We must read the previous value of the futex before checking the value - * of the address being waited on. That way, if we receive a wake between - * now and waiting on the futex, we know that val will have changed. - * Use an atomic load so that memory accesses are ordered between this read - * and the increment below. */ - val = InterlockedCompareExchange( futex, 0, 0 ); - if (!compare_addr( addr, cmp, size )) - return STATUS_SUCCESS; - - if (timeout) - { - timespec_from_timeout( ×pec, timeout ); - ret = futex_wait( futex, val, ×pec ); - } - else - ret = futex_wait( futex, val, NULL ); - - if (ret == -1 && errno == ETIMEDOUT) - return STATUS_TIMEOUT; - return STATUS_SUCCESS; -} - -static inline NTSTATUS fast_wake_addr( const void *addr ) -{ - int *futex; - - if (!use_futexes()) - return STATUS_NOT_IMPLEMENTED; - - futex = hash_addr( addr ); - - InterlockedIncrement( futex ); - - futex_wake( futex, INT_MAX ); - return STATUS_SUCCESS; -} - #else NTSTATUS CDECL fast_RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock ) @@ -3142,79 +3055,4 @@ NTSTATUS CDECL fast_wait_cv( RTL_CONDITION_VARIABLE *variable, const void *value return STATUS_NOT_IMPLEMENTED; } -static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ) -{ - return STATUS_NOT_IMPLEMENTED; -} - -static inline NTSTATUS fast_wake_addr( const void *addr ) -{ - return STATUS_NOT_IMPLEMENTED; -} - #endif - - -/*********************************************************************** - * RtlWaitOnAddress (NTDLL.@) - */ -NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ) -{ - select_op_t select_op; - NTSTATUS ret; - timeout_t abs_timeout = timeout ? timeout->QuadPart : TIMEOUT_INFINITE; - - if (size != 1 && size != 2 && size != 4 && size != 8) - return STATUS_INVALID_PARAMETER; - - if ((ret = fast_wait_addr( addr, cmp, size, timeout )) != STATUS_NOT_IMPLEMENTED) - return ret; - - mutex_lock( &addr_mutex ); - if (!compare_addr( addr, cmp, size )) - { - mutex_unlock( &addr_mutex ); - return STATUS_SUCCESS; - } - - if (abs_timeout < 0) - { - LARGE_INTEGER now; - - NtQueryPerformanceCounter( &now, NULL ); - abs_timeout -= now.QuadPart; - } - - select_op.keyed_event.op = SELECT_KEYED_EVENT_WAIT; - select_op.keyed_event.handle = wine_server_obj_handle( keyed_event ); - select_op.keyed_event.key = wine_server_client_ptr( addr ); - - return server_select( &select_op, sizeof(select_op.keyed_event), SELECT_INTERRUPTIBLE, - abs_timeout, NULL, &addr_mutex, NULL ); -} - -/*********************************************************************** - * RtlWakeAddressAll (NTDLL.@) - */ -void WINAPI RtlWakeAddressAll( const void *addr ) -{ - if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return; - - mutex_lock( &addr_mutex ); - while (NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout ) == STATUS_SUCCESS) {} - mutex_unlock( &addr_mutex ); -} - -/*********************************************************************** - * RtlWakeAddressSingle (NTDLL.@) - */ -void WINAPI RtlWakeAddressSingle( const void *addr ) -{ - if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return; - - mutex_lock( &addr_mutex ); - NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout ); - mutex_unlock( &addr_mutex ); -} diff --git a/dlls/ntdll/unixlib.h b/dlls/ntdll/unixlib.h index 62030d91cdb..ded05c40e17 100644 --- a/dlls/ntdll/unixlib.h +++ b/dlls/ntdll/unixlib.h @@ -26,7 +26,7 @@ struct _DISPATCHER_CONTEXT; /* increment this when you change the function table */ -#define NTDLL_UNIXLIB_VERSION 128 +#define NTDLL_UNIXLIB_VERSION 129 struct unix_funcs { @@ -37,10 +37,6 @@ struct unix_funcs /* other Win32 API functions */ LONGLONG (WINAPI *RtlGetSystemTimePrecise)(void); - NTSTATUS (WINAPI *RtlWaitOnAddress)( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ); - void (WINAPI *RtlWakeAddressAll)( const void *addr ); - void (WINAPI *RtlWakeAddressSingle)( const void *addr ); /* fast locks */ NTSTATUS (CDECL *fast_RtlpWaitForCriticalSection)( RTL_CRITICAL_SECTION *crit, int timeout ); -- 2.33.0