wine-staging/patches/ntdll-NtAlertThreadByThreadId/0007-ntdll-Reimplement-Win32-futexes-on-top-of-thread-ID-.patch

441 lines
14 KiB
Diff
Raw Normal View History

From 4c99070bbd7dd897a6e27bd94e8f50206b3ad49c Mon Sep 17 00:00:00 2001
From: Zebediah Figura <z.figura12@gmail.com>
Date: Mon, 2 Nov 2020 20:24:07 -0600
Subject: [PATCH] ntdll: Reimplement Win32 futexes on top of thread-ID alerts.
Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
---
dlls/ntdll/sync.c | 158 +++++++++++++++++++++++++++++++++++++-
dlls/ntdll/unix/loader.c | 3 -
dlls/ntdll/unix/sync.c | 162 ---------------------------------------
dlls/ntdll/unixlib.h | 6 +-
4 files changed, 156 insertions(+), 173 deletions(-)
diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c
index f1263ae33fd..348f260c3b0 100644
--- a/dlls/ntdll/sync.c
+++ b/dlls/ntdll/sync.c
@@ -36,6 +36,13 @@
#include "wine/debug.h"
#include "ntdll_misc.h"
+WINE_DEFAULT_DEBUG_CHANNEL(sync);
+
+static const char *debugstr_timeout( const LARGE_INTEGER *timeout )
+{
+ if (!timeout) return "(infinite)";
+ return wine_dbgstr_longlong( timeout->QuadPart );
+}
/******************************************************************
* RtlRunOnceInitialize (NTDLL.@)
@@ -530,13 +537,111 @@ NTSTATUS WINAPI RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable,
return status;
}
+
+#define FUTEX_ADDR_BLOCK_SIZE (65536 / sizeof(void *))
+static void **futex_addr_blocks[4096];
+
+static unsigned int tid_to_index( DWORD tid, unsigned int *block_idx )
+{
+ unsigned int idx = (tid >> 2) - 1;
+ *block_idx = idx / FUTEX_ADDR_BLOCK_SIZE;
+ return idx % FUTEX_ADDR_BLOCK_SIZE;
+}
+
+static HANDLE index_to_tid( unsigned int block_idx, unsigned int idx )
+{
+ return (HANDLE)((((block_idx * FUTEX_ADDR_BLOCK_SIZE) + idx) + 1) << 2);
+}
+
+static void **get_futex_entry( DWORD tid )
+{
+ unsigned int block_idx, idx = tid_to_index( tid, &block_idx );
+
+ if (block_idx > ARRAY_SIZE(futex_addr_blocks))
+ {
+ FIXME( "tid %#x is too high\n", tid );
+ return NULL;
+ }
+
+ if (!futex_addr_blocks[block_idx])
+ {
+ SIZE_T size = FUTEX_ADDR_BLOCK_SIZE * sizeof(void *);
+ void *ptr = NULL;
+
+ if (NtAllocateVirtualMemory( NtCurrentProcess(), &ptr, 0, &size, MEM_COMMIT, PAGE_READWRITE ))
+ return NULL;
+ if (InterlockedCompareExchangePointer( (void **)&futex_addr_blocks[block_idx], ptr, NULL ))
+ NtFreeVirtualMemory( NtCurrentProcess(), &ptr, &size, MEM_RELEASE ); /* someone beat us to it */
+ }
+
+ return &futex_addr_blocks[block_idx][idx % FUTEX_ADDR_BLOCK_SIZE];
+}
+
+static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size )
+{
+ switch (size)
+ {
+ case 1:
+ return (*(const UCHAR *)addr == *(const UCHAR *)cmp);
+ case 2:
+ return (*(const USHORT *)addr == *(const USHORT *)cmp);
+ case 4:
+ return (*(const ULONG *)addr == *(const ULONG *)cmp);
+ case 8:
+ return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp);
+ }
+
+ return FALSE;
+}
+
/***********************************************************************
* RtlWaitOnAddress (NTDLL.@)
*/
NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size,
const LARGE_INTEGER *timeout )
{
- return unix_funcs->RtlWaitOnAddress( addr, cmp, size, timeout );
+ void **entry = get_futex_entry( GetCurrentThreadId() );
+ NTSTATUS ret;
+
+ TRACE("addr %p cmp %p size %#Ix timeout %s\n", addr, cmp, size, debugstr_timeout( timeout ));
+
+ if (size != 1 && size != 2 && size != 4 && size != 8)
+ return STATUS_INVALID_PARAMETER;
+
+ if (!entry) return STATUS_NO_MEMORY;
+
+ InterlockedExchangePointer( entry, (void *)addr );
+
+ /* Ensure that the compare-and-swap above is ordered before the comparison
+ * below. This barrier is paired with another in RtlWakeByAddress*().
+ *
+ * In more detail, given the following sequence:
+ *
+ * Thread A Thread B
+ * -----------------------------------------------------------------
+ * RtlWaitOnAddress( &val ); val = 1;
+ * queue thread RtlWakeByAddress( &val );
+ * MemoryBarrier(); <---- paired with ----> MemoryBarrier();
+ * compare_addr( &val ); if (thread is queued)
+ *
+ * We must ensure that the thread is queued [through the above
+ * InterlockedExchangePointer()] before reading "val", and that writes to
+ * "val" by the application happen before we check for queued threads.
+ * Otherwise, thread A can deadlock: "val" may appear unchanged, while
+ * thread B observed that thread A was not queued.
+ */
+ MemoryBarrier();
+
+ if (!compare_addr( addr, cmp, size ))
+ {
+ InterlockedExchangePointer( entry, NULL );
+ return STATUS_SUCCESS;
+ }
+
+ ret = NtWaitForAlertByThreadId( NULL, timeout );
+ InterlockedExchangePointer( entry, NULL );
+ if (ret == STATUS_ALERTED) ret = STATUS_SUCCESS;
+ return ret;
}
/***********************************************************************
@@ -544,7 +649,29 @@ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size
*/
void WINAPI RtlWakeAddressAll( const void *addr )
{
- return unix_funcs->RtlWakeAddressAll( addr );
+ unsigned int i, j;
+ void **block;
+
+ TRACE("%p\n", addr);
+
+ if (!addr) return;
+
+ /* Ensure that memory stores to "addr" are ordered before reading the
+ * array below. Paired with another barrier in RtlWaitOnAddress() [q.v.].
+ */
+ MemoryBarrier();
+
+ for (i = 0; i < ARRAY_SIZE(futex_addr_blocks); ++i)
+ {
+ block = futex_addr_blocks[i];
+ if (!block) continue;
+
+ for (j = 0; j < FUTEX_ADDR_BLOCK_SIZE; ++j)
+ {
+ if (block[j] == addr)
+ NtAlertThreadByThreadId( index_to_tid( i, j ) );
+ }
+ }
}
/***********************************************************************
@@ -552,5 +679,30 @@ void WINAPI RtlWakeAddressAll( const void *addr )
*/
void WINAPI RtlWakeAddressSingle( const void *addr )
{
- return unix_funcs->RtlWakeAddressSingle( addr );
+ unsigned int i, j;
+ void **block;
+
+ TRACE("%p\n", addr);
+
+ if (!addr) return;
+
+ /* Ensure that memory stores to "addr" are ordered before reading the
+ * array below. Paired with another barrier in RtlWaitOnAddress() [q.v.].
+ */
+ MemoryBarrier();
+
+ for (i = 0; i < ARRAY_SIZE(futex_addr_blocks); ++i)
+ {
+ block = futex_addr_blocks[i];
+ if (!block) continue;
+
+ for (j = 0; j < FUTEX_ADDR_BLOCK_SIZE; ++j)
+ {
+ if (block[j] == addr && InterlockedCompareExchangePointer( &block[j], NULL, (void *)addr ) == addr)
+ {
+ NtAlertThreadByThreadId( index_to_tid( i, j ) );
+ return;
+ }
+ }
+ }
}
diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c
index e4c062cea98..3a4a943c65f 100644
--- a/dlls/ntdll/unix/loader.c
+++ b/dlls/ntdll/unix/loader.c
@@ -1816,9 +1816,6 @@ static struct unix_funcs unix_funcs =
#endif
DbgUiIssueRemoteBreakin,
RtlGetSystemTimePrecise,
- RtlWaitOnAddress,
- RtlWakeAddressAll,
- RtlWakeAddressSingle,
fast_RtlpWaitForCriticalSection,
fast_RtlpUnWaitCriticalSection,
fast_RtlDeleteCriticalSection,
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c
index 4fa890e5acc..8394a1b8601 100644
--- a/dlls/ntdll/unix/sync.c
+++ b/dlls/ntdll/unix/sync.c
@@ -80,10 +80,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(sync);
HANDLE keyed_event = 0;
-static const LARGE_INTEGER zero_timeout;
-
-static pthread_mutex_t addr_mutex = PTHREAD_MUTEX_INITIALIZER;
-
static const char *debugstr_timeout( const LARGE_INTEGER *timeout )
{
if (!timeout) return "(infinite)";
@@ -193,24 +189,6 @@ static void timespec_from_timeout( struct timespec *timespec, const LARGE_INTEGE
#endif
-static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size )
-{
- switch (size)
- {
- case 1:
- return (*(const UCHAR *)addr == *(const UCHAR *)cmp);
- case 2:
- return (*(const USHORT *)addr == *(const USHORT *)cmp);
- case 4:
- return (*(const ULONG *)addr == *(const ULONG *)cmp);
- case 8:
- return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp);
- }
-
- return FALSE;
-}
-
-
/* create a struct security_descriptor and contained information in one contiguous piece of memory */
NTSTATUS alloc_object_attributes( const OBJECT_ATTRIBUTES *attr, struct object_attributes **ret,
data_size_t *ret_len )
@@ -2911,71 +2889,6 @@ NTSTATUS CDECL fast_RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable,
return STATUS_SUCCESS;
}
-
-/* We can't map addresses to futex directly, because an application can wait on
- * 8 bytes, and we can't pass all 8 as the compare value to futex(). Instead we
- * map all addresses to a small fixed table of futexes. This may result in
- * spurious wakes, but the application is already expected to handle those. */
-
-static int addr_futex_table[256];
-
-static inline int *hash_addr( const void *addr )
-{
- ULONG_PTR val = (ULONG_PTR)addr;
-
- return &addr_futex_table[(val >> 2) & 255];
-}
-
-static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size,
- const LARGE_INTEGER *timeout )
-{
- int *futex;
- int val;
- struct timespec timespec;
- int ret;
-
- if (!use_futexes())
- return STATUS_NOT_IMPLEMENTED;
-
- futex = hash_addr( addr );
-
- /* We must read the previous value of the futex before checking the value
- * of the address being waited on. That way, if we receive a wake between
- * now and waiting on the futex, we know that val will have changed.
- * Use an atomic load so that memory accesses are ordered between this read
- * and the increment below. */
- val = InterlockedCompareExchange( futex, 0, 0 );
- if (!compare_addr( addr, cmp, size ))
- return STATUS_SUCCESS;
-
- if (timeout)
- {
- timespec_from_timeout( &timespec, timeout );
- ret = futex_wait( futex, val, &timespec );
- }
- else
- ret = futex_wait( futex, val, NULL );
-
- if (ret == -1 && errno == ETIMEDOUT)
- return STATUS_TIMEOUT;
- return STATUS_SUCCESS;
-}
-
-static inline NTSTATUS fast_wake_addr( const void *addr )
-{
- int *futex;
-
- if (!use_futexes())
- return STATUS_NOT_IMPLEMENTED;
-
- futex = hash_addr( addr );
-
- InterlockedIncrement( futex );
-
- futex_wake( futex, INT_MAX );
- return STATUS_SUCCESS;
-}
-
#else
NTSTATUS CDECL fast_RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
@@ -3018,79 +2931,4 @@ NTSTATUS CDECL fast_wait_cv( RTL_CONDITION_VARIABLE *variable, const void *value
return STATUS_NOT_IMPLEMENTED;
}
-static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size,
- const LARGE_INTEGER *timeout )
-{
- return STATUS_NOT_IMPLEMENTED;
-}
-
-static inline NTSTATUS fast_wake_addr( const void *addr )
-{
- return STATUS_NOT_IMPLEMENTED;
-}
-
#endif
-
-
-/***********************************************************************
- * RtlWaitOnAddress (NTDLL.@)
- */
-NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size,
- const LARGE_INTEGER *timeout )
-{
- select_op_t select_op;
- NTSTATUS ret;
- timeout_t abs_timeout = timeout ? timeout->QuadPart : TIMEOUT_INFINITE;
-
- if (size != 1 && size != 2 && size != 4 && size != 8)
- return STATUS_INVALID_PARAMETER;
-
- if ((ret = fast_wait_addr( addr, cmp, size, timeout )) != STATUS_NOT_IMPLEMENTED)
- return ret;
-
- mutex_lock( &addr_mutex );
- if (!compare_addr( addr, cmp, size ))
- {
- mutex_unlock( &addr_mutex );
- return STATUS_SUCCESS;
- }
-
- if (abs_timeout < 0)
- {
- LARGE_INTEGER now;
-
- NtQueryPerformanceCounter( &now, NULL );
- abs_timeout -= now.QuadPart;
- }
-
- select_op.keyed_event.op = SELECT_KEYED_EVENT_WAIT;
- select_op.keyed_event.handle = wine_server_obj_handle( keyed_event );
- select_op.keyed_event.key = wine_server_client_ptr( addr );
-
- return server_select( &select_op, sizeof(select_op.keyed_event), SELECT_INTERRUPTIBLE,
- abs_timeout, NULL, &addr_mutex, NULL );
-}
-
-/***********************************************************************
- * RtlWakeAddressAll (NTDLL.@)
- */
-void WINAPI RtlWakeAddressAll( const void *addr )
-{
- if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return;
-
- mutex_lock( &addr_mutex );
- while (NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout ) == STATUS_SUCCESS) {}
- mutex_unlock( &addr_mutex );
-}
-
-/***********************************************************************
- * RtlWakeAddressSingle (NTDLL.@)
- */
-void WINAPI RtlWakeAddressSingle( const void *addr )
-{
- if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return;
-
- mutex_lock( &addr_mutex );
- NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout );
- mutex_unlock( &addr_mutex );
-}
diff --git a/dlls/ntdll/unixlib.h b/dlls/ntdll/unixlib.h
index 04ae8230b1a..21e34842778 100644
--- a/dlls/ntdll/unixlib.h
+++ b/dlls/ntdll/unixlib.h
@@ -26,7 +26,7 @@
struct _DISPATCHER_CONTEXT;
/* increment this when you change the function table */
-#define NTDLL_UNIXLIB_VERSION 120
+#define NTDLL_UNIXLIB_VERSION 121
struct unix_funcs
{
@@ -38,10 +38,6 @@ struct unix_funcs
/* other Win32 API functions */
NTSTATUS (WINAPI *DbgUiIssueRemoteBreakin)( HANDLE process );
LONGLONG (WINAPI *RtlGetSystemTimePrecise)(void);
- NTSTATUS (WINAPI *RtlWaitOnAddress)( const void *addr, const void *cmp, SIZE_T size,
- const LARGE_INTEGER *timeout );
- void (WINAPI *RtlWakeAddressAll)( const void *addr );
- void (WINAPI *RtlWakeAddressSingle)( const void *addr );
/* fast locks */
NTSTATUS (CDECL *fast_RtlpWaitForCriticalSection)( RTL_CRITICAL_SECTION *crit, int timeout );
--
2.30.2