From 4c3b3a58f64e3bf023778edb8a8543a2a3289070 Mon Sep 17 00:00:00 2001 From: Zebediah Figura Date: Mon, 2 Nov 2020 20:24:07 -0600 Subject: [PATCH] ntdll: Reimplement Win32 futexes on top of thread-ID alerts. Signed-off-by: Zebediah Figura --- dlls/ntdll/sync.c | 158 +++++++++++++++++++++++++++++++++++++- dlls/ntdll/unix/loader.c | 3 - dlls/ntdll/unix/sync.c | 162 --------------------------------------- dlls/ntdll/unixlib.h | 6 +- 4 files changed, 156 insertions(+), 173 deletions(-) diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c index f1263ae33fd..348f260c3b0 100644 --- a/dlls/ntdll/sync.c +++ b/dlls/ntdll/sync.c @@ -36,6 +36,13 @@ #include "wine/debug.h" #include "ntdll_misc.h" +WINE_DEFAULT_DEBUG_CHANNEL(sync); + +static const char *debugstr_timeout( const LARGE_INTEGER *timeout ) +{ + if (!timeout) return "(infinite)"; + return wine_dbgstr_longlong( timeout->QuadPart ); +} /****************************************************************** * RtlRunOnceInitialize (NTDLL.@) @@ -530,13 +537,111 @@ NTSTATUS WINAPI RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable, return status; } + +#define FUTEX_ADDR_BLOCK_SIZE (65536 / sizeof(void *)) +static void **futex_addr_blocks[4096]; + +static unsigned int tid_to_index( DWORD tid, unsigned int *block_idx ) +{ + unsigned int idx = (tid >> 2) - 1; + *block_idx = idx / FUTEX_ADDR_BLOCK_SIZE; + return idx % FUTEX_ADDR_BLOCK_SIZE; +} + +static HANDLE index_to_tid( unsigned int block_idx, unsigned int idx ) +{ + return (HANDLE)((((block_idx * FUTEX_ADDR_BLOCK_SIZE) + idx) + 1) << 2); +} + +static void **get_futex_entry( DWORD tid ) +{ + unsigned int block_idx, idx = tid_to_index( tid, &block_idx ); + + if (block_idx > ARRAY_SIZE(futex_addr_blocks)) + { + FIXME( "tid %#x is too high\n", tid ); + return NULL; + } + + if (!futex_addr_blocks[block_idx]) + { + SIZE_T size = FUTEX_ADDR_BLOCK_SIZE * sizeof(void *); + void *ptr = NULL; + + if (NtAllocateVirtualMemory( NtCurrentProcess(), &ptr, 0, &size, MEM_COMMIT, PAGE_READWRITE )) + return NULL; + if (InterlockedCompareExchangePointer( (void **)&futex_addr_blocks[block_idx], ptr, NULL )) + NtFreeVirtualMemory( NtCurrentProcess(), &ptr, &size, MEM_RELEASE ); /* someone beat us to it */ + } + + return &futex_addr_blocks[block_idx][idx % FUTEX_ADDR_BLOCK_SIZE]; +} + +static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size ) +{ + switch (size) + { + case 1: + return (*(const UCHAR *)addr == *(const UCHAR *)cmp); + case 2: + return (*(const USHORT *)addr == *(const USHORT *)cmp); + case 4: + return (*(const ULONG *)addr == *(const ULONG *)cmp); + case 8: + return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp); + } + + return FALSE; +} + /*********************************************************************** * RtlWaitOnAddress (NTDLL.@) */ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size, const LARGE_INTEGER *timeout ) { - return unix_funcs->RtlWaitOnAddress( addr, cmp, size, timeout ); + void **entry = get_futex_entry( GetCurrentThreadId() ); + NTSTATUS ret; + + TRACE("addr %p cmp %p size %#Ix timeout %s\n", addr, cmp, size, debugstr_timeout( timeout )); + + if (size != 1 && size != 2 && size != 4 && size != 8) + return STATUS_INVALID_PARAMETER; + + if (!entry) return STATUS_NO_MEMORY; + + InterlockedExchangePointer( entry, (void *)addr ); + + /* Ensure that the compare-and-swap above is ordered before the comparison + * below. This barrier is paired with another in RtlWakeByAddress*(). + * + * In more detail, given the following sequence: + * + * Thread A Thread B + * ----------------------------------------------------------------- + * RtlWaitOnAddress( &val ); val = 1; + * queue thread RtlWakeByAddress( &val ); + * MemoryBarrier(); <---- paired with ----> MemoryBarrier(); + * compare_addr( &val ); if (thread is queued) + * + * We must ensure that the thread is queued [through the above + * InterlockedExchangePointer()] before reading "val", and that writes to + * "val" by the application happen before we check for queued threads. + * Otherwise, thread A can deadlock: "val" may appear unchanged, while + * thread B observed that thread A was not queued. + */ + MemoryBarrier(); + + if (!compare_addr( addr, cmp, size )) + { + InterlockedExchangePointer( entry, NULL ); + return STATUS_SUCCESS; + } + + ret = NtWaitForAlertByThreadId( NULL, timeout ); + InterlockedExchangePointer( entry, NULL ); + if (ret == STATUS_ALERTED) ret = STATUS_SUCCESS; + return ret; } /*********************************************************************** @@ -544,7 +649,29 @@ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size */ void WINAPI RtlWakeAddressAll( const void *addr ) { - return unix_funcs->RtlWakeAddressAll( addr ); + unsigned int i, j; + void **block; + + TRACE("%p\n", addr); + + if (!addr) return; + + /* Ensure that memory stores to "addr" are ordered before reading the + * array below. Paired with another barrier in RtlWaitOnAddress() [q.v.]. + */ + MemoryBarrier(); + + for (i = 0; i < ARRAY_SIZE(futex_addr_blocks); ++i) + { + block = futex_addr_blocks[i]; + if (!block) continue; + + for (j = 0; j < FUTEX_ADDR_BLOCK_SIZE; ++j) + { + if (block[j] == addr) + NtAlertThreadByThreadId( index_to_tid( i, j ) ); + } + } } /*********************************************************************** @@ -552,5 +679,30 @@ void WINAPI RtlWakeAddressAll( const void *addr ) */ void WINAPI RtlWakeAddressSingle( const void *addr ) { - return unix_funcs->RtlWakeAddressSingle( addr ); + unsigned int i, j; + void **block; + + TRACE("%p\n", addr); + + if (!addr) return; + + /* Ensure that memory stores to "addr" are ordered before reading the + * array below. Paired with another barrier in RtlWaitOnAddress() [q.v.]. + */ + MemoryBarrier(); + + for (i = 0; i < ARRAY_SIZE(futex_addr_blocks); ++i) + { + block = futex_addr_blocks[i]; + if (!block) continue; + + for (j = 0; j < FUTEX_ADDR_BLOCK_SIZE; ++j) + { + if (block[j] == addr && InterlockedCompareExchangePointer( &block[j], NULL, (void *)addr ) == addr) + { + NtAlertThreadByThreadId( index_to_tid( i, j ) ); + return; + } + } + } } diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c index ff49cd5f26b..6626e02139b 100644 --- a/dlls/ntdll/unix/loader.c +++ b/dlls/ntdll/unix/loader.c @@ -1811,9 +1811,6 @@ static struct unix_funcs unix_funcs = #endif DbgUiIssueRemoteBreakin, RtlGetSystemTimePrecise, - RtlWaitOnAddress, - RtlWakeAddressAll, - RtlWakeAddressSingle, fast_RtlpWaitForCriticalSection, fast_RtlpUnWaitCriticalSection, fast_RtlDeleteCriticalSection, diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c index 48960b5cb83..d8663e47ee6 100644 --- a/dlls/ntdll/unix/sync.c +++ b/dlls/ntdll/unix/sync.c @@ -80,10 +80,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(sync); HANDLE keyed_event = 0; -static const LARGE_INTEGER zero_timeout; - -static pthread_mutex_t addr_mutex = PTHREAD_MUTEX_INITIALIZER; - static const char *debugstr_timeout( const LARGE_INTEGER *timeout ) { if (!timeout) return "(infinite)"; @@ -193,24 +189,6 @@ static void timespec_from_timeout( struct timespec *timespec, const LARGE_INTEGE #endif -static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size ) -{ - switch (size) - { - case 1: - return (*(const UCHAR *)addr == *(const UCHAR *)cmp); - case 2: - return (*(const USHORT *)addr == *(const USHORT *)cmp); - case 4: - return (*(const ULONG *)addr == *(const ULONG *)cmp); - case 8: - return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp); - } - - return FALSE; -} - - /* create a struct security_descriptor and contained information in one contiguous piece of memory */ NTSTATUS alloc_object_attributes( const OBJECT_ATTRIBUTES *attr, struct object_attributes **ret, data_size_t *ret_len ) @@ -2924,71 +2902,6 @@ NTSTATUS CDECL fast_RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable, return STATUS_SUCCESS; } - -/* We can't map addresses to futex directly, because an application can wait on - * 8 bytes, and we can't pass all 8 as the compare value to futex(). Instead we - * map all addresses to a small fixed table of futexes. This may result in - * spurious wakes, but the application is already expected to handle those. */ - -static int addr_futex_table[256]; - -static inline int *hash_addr( const void *addr ) -{ - ULONG_PTR val = (ULONG_PTR)addr; - - return &addr_futex_table[(val >> 2) & 255]; -} - -static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ) -{ - int *futex; - int val; - struct timespec timespec; - int ret; - - if (!use_futexes()) - return STATUS_NOT_IMPLEMENTED; - - futex = hash_addr( addr ); - - /* We must read the previous value of the futex before checking the value - * of the address being waited on. That way, if we receive a wake between - * now and waiting on the futex, we know that val will have changed. - * Use an atomic load so that memory accesses are ordered between this read - * and the increment below. */ - val = InterlockedCompareExchange( futex, 0, 0 ); - if (!compare_addr( addr, cmp, size )) - return STATUS_SUCCESS; - - if (timeout) - { - timespec_from_timeout( ×pec, timeout ); - ret = futex_wait( futex, val, ×pec ); - } - else - ret = futex_wait( futex, val, NULL ); - - if (ret == -1 && errno == ETIMEDOUT) - return STATUS_TIMEOUT; - return STATUS_SUCCESS; -} - -static inline NTSTATUS fast_wake_addr( const void *addr ) -{ - int *futex; - - if (!use_futexes()) - return STATUS_NOT_IMPLEMENTED; - - futex = hash_addr( addr ); - - InterlockedIncrement( futex ); - - futex_wake( futex, INT_MAX ); - return STATUS_SUCCESS; -} - #else NTSTATUS CDECL fast_RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock ) @@ -3031,79 +2944,4 @@ NTSTATUS CDECL fast_wait_cv( RTL_CONDITION_VARIABLE *variable, const void *value return STATUS_NOT_IMPLEMENTED; } -static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ) -{ - return STATUS_NOT_IMPLEMENTED; -} - -static inline NTSTATUS fast_wake_addr( const void *addr ) -{ - return STATUS_NOT_IMPLEMENTED; -} - #endif - - -/*********************************************************************** - * RtlWaitOnAddress (NTDLL.@) - */ -NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ) -{ - select_op_t select_op; - NTSTATUS ret; - timeout_t abs_timeout = timeout ? timeout->QuadPart : TIMEOUT_INFINITE; - - if (size != 1 && size != 2 && size != 4 && size != 8) - return STATUS_INVALID_PARAMETER; - - if ((ret = fast_wait_addr( addr, cmp, size, timeout )) != STATUS_NOT_IMPLEMENTED) - return ret; - - mutex_lock( &addr_mutex ); - if (!compare_addr( addr, cmp, size )) - { - mutex_unlock( &addr_mutex ); - return STATUS_SUCCESS; - } - - if (abs_timeout < 0) - { - LARGE_INTEGER now; - - NtQueryPerformanceCounter( &now, NULL ); - abs_timeout -= now.QuadPart; - } - - select_op.keyed_event.op = SELECT_KEYED_EVENT_WAIT; - select_op.keyed_event.handle = wine_server_obj_handle( keyed_event ); - select_op.keyed_event.key = wine_server_client_ptr( addr ); - - return server_select( &select_op, sizeof(select_op.keyed_event), SELECT_INTERRUPTIBLE, - abs_timeout, NULL, &addr_mutex, NULL ); -} - -/*********************************************************************** - * RtlWakeAddressAll (NTDLL.@) - */ -void WINAPI RtlWakeAddressAll( const void *addr ) -{ - if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return; - - mutex_lock( &addr_mutex ); - while (NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout ) == STATUS_SUCCESS) {} - mutex_unlock( &addr_mutex ); -} - -/*********************************************************************** - * RtlWakeAddressSingle (NTDLL.@) - */ -void WINAPI RtlWakeAddressSingle( const void *addr ) -{ - if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return; - - mutex_lock( &addr_mutex ); - NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout ); - mutex_unlock( &addr_mutex ); -} diff --git a/dlls/ntdll/unixlib.h b/dlls/ntdll/unixlib.h index d0f2f4ed508..f5462125874 100644 --- a/dlls/ntdll/unixlib.h +++ b/dlls/ntdll/unixlib.h @@ -26,7 +26,7 @@ struct _DISPATCHER_CONTEXT; /* increment this when you change the function table */ -#define NTDLL_UNIXLIB_VERSION 121 +#define NTDLL_UNIXLIB_VERSION 122 struct unix_funcs { @@ -38,10 +38,6 @@ struct unix_funcs /* other Win32 API functions */ NTSTATUS (WINAPI *DbgUiIssueRemoteBreakin)( HANDLE process ); LONGLONG (WINAPI *RtlGetSystemTimePrecise)(void); - NTSTATUS (WINAPI *RtlWaitOnAddress)( const void *addr, const void *cmp, SIZE_T size, - const LARGE_INTEGER *timeout ); - void (WINAPI *RtlWakeAddressAll)( const void *addr ); - void (WINAPI *RtlWakeAddressSingle)( const void *addr ); /* fast locks */ NTSTATUS (CDECL *fast_RtlpWaitForCriticalSection)( RTL_CRITICAL_SECTION *crit, int timeout ); -- 2.30.2