Properly wrap CUDA stream callbacks by forwarding them to a separate worker thread.

This commit is contained in:
Sebastian Lackner 2015-01-22 01:05:36 +01:00
parent f64437a901
commit fbe41916cb
3 changed files with 212 additions and 0 deletions

1
debian/changelog vendored
View File

@ -4,6 +4,7 @@ wine-staging (1.7.35) UNRELEASED; urgency=low
* Automatically enable fallback method to apply patches when running from inside of a git subdirectory.
* Synchronize CSMT patchset with https://github.com/stefand/wine.
* Several improvements to make nvcuvid (CUDA video decoding) better compatible with x86_64.
* Properly wrap CUDA stream callbacks by forwarding them to a separate worker thread.
* Added patch to quote program name in ShellExecute[Ex] when it contains spaces.
* Added patch to implement support for DDS file format in D3DXSaveTextureToFileInMemory.
* Added patch to avoid appending duplicate NULL character when importing keys with regedit.

View File

@ -0,0 +1,209 @@
From 4cb6fdc37124222d6e661d8db89fc832f98da9cb Mon Sep 17 00:00:00 2001
From: Sebastian Lackner <sebastian@fds-team.de>
Date: Thu, 22 Jan 2015 01:02:53 +0100
Subject: nvcuda: Properly wrap stream callbacks by forwarding them to a worker
thread.
---
dlls/nvcuda/Makefile.in | 1 +
dlls/nvcuda/nvcuda.c | 136 ++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 122 insertions(+), 15 deletions(-)
diff --git a/dlls/nvcuda/Makefile.in b/dlls/nvcuda/Makefile.in
index 6322fb2..98541b1 100644
--- a/dlls/nvcuda/Makefile.in
+++ b/dlls/nvcuda/Makefile.in
@@ -1,4 +1,5 @@
MODULE = nvcuda.dll
+EXTRALIBS = $(PTHREAD_LIBS)
C_SRCS = \
nvcuda.c \
diff --git a/dlls/nvcuda/nvcuda.c b/dlls/nvcuda/nvcuda.c
index 9fefc28..8b356fd 100644
--- a/dlls/nvcuda/nvcuda.c
+++ b/dlls/nvcuda/nvcuda.c
@@ -21,11 +21,17 @@
#include "wine/port.h"
#include <stdarg.h>
+#include <assert.h>
+
+#ifdef HAVE_PTHREAD_H
+#include <pthread.h>
+#endif
#include "windef.h"
#include "winbase.h"
#include "wine/library.h"
#include "wine/debug.h"
+#include "wine/list.h"
#include "wine/wgl.h"
#include "cuda.h"
#include "nvcuda.h"
@@ -39,6 +45,30 @@
WINE_DEFAULT_DEBUG_CHANNEL(nvcuda);
+struct stream_callback_entry
+{
+ struct list entry;
+ enum
+ {
+ STREAM_CALLBACK_ABANDONED,
+ STREAM_CALLBACK_PENDING,
+ STREAM_CALLBACK_EXECUTED
+ } status;
+ void (WINAPI *callback)(CUstream hStream, CUresult status, void *userData);
+ struct
+ {
+ CUstream stream;
+ CUresult status;
+ void *userdata;
+ } args;
+};
+
+static struct list stream_callbacks = LIST_INIT( stream_callbacks );
+static pthread_mutex_t stream_callback_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t stream_callback_request = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t stream_callback_reply = PTHREAD_COND_INITIALIZER;
+LONG num_stream_callbacks;
+
static CUresult (*pcuArray3DCreate)(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
static CUresult (*pcuArray3DCreate_v2)(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
static CUresult (*pcuArray3DGetDescriptor)(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
@@ -1793,40 +1823,116 @@ CUresult WINAPI wine_cuPointerSetAttribute(const void *value, CUpointer_attribut
return pcuPointerSetAttribute(value, attribute, ptr);
}
-struct stream_callback
+static DWORD WINAPI stream_callback_worker_thread(LPVOID parameter)
{
- void (WINAPI *callback)(CUstream hStream, CUresult status, void *userData);
- void *userData;
-};
+ struct stream_callback_entry *wrapper;
+ struct list *ptr;
+ pthread_mutex_lock(&stream_callback_mutex);
+
+ for (;;)
+ {
+ while ((ptr = list_head(&stream_callbacks)))
+ {
+ wrapper = LIST_ENTRY(ptr, struct stream_callback_entry, entry);
+ list_remove(&wrapper->entry);
+
+ switch (wrapper->status)
+ {
+ case STREAM_CALLBACK_ABANDONED:
+ free(wrapper);
+ break;
+
+ case STREAM_CALLBACK_PENDING:
+ pthread_mutex_unlock(&stream_callback_mutex);
+
+ TRACE("calling stream callback %p(%p, %d, %p)\n", wrapper->callback,
+ wrapper->args.stream, wrapper->args.status, wrapper->args.userdata);
+ wrapper->callback(wrapper->args.stream, wrapper->args.status, wrapper->args.userdata);
+ TRACE("stream callback %p returned\n", wrapper->callback);
+
+ wrapper->status = STREAM_CALLBACK_EXECUTED;
+ pthread_cond_broadcast(&stream_callback_reply);
+ pthread_mutex_lock(&stream_callback_mutex);
+ break;
+
+ default:
+ assert(0); /* never reached */
+ }
+
+ if (!--num_stream_callbacks)
+ goto end;
+ }
+
+ pthread_cond_wait(&stream_callback_request, &stream_callback_mutex);
+ }
+
+end:
+ pthread_mutex_unlock(&stream_callback_mutex);
+ return 0;
+}
static void stream_callback_wrapper(CUstream hStream, CUresult status, void *userData)
{
- struct stream_callback *wrapper = userData;
- TRACE("(%p, %d, %p)\n", hStream, status, userData);
+ struct stream_callback_entry *wrapper = userData;
+ wrapper->status = STREAM_CALLBACK_PENDING;
+ wrapper->args.stream = hStream;
+ wrapper->args.status = status;
+ pthread_mutex_lock(&stream_callback_mutex);
- TRACE("calling stream callback %p(%p, %d, %p)\n", wrapper->callback, hStream, status, wrapper->userData);
- wrapper->callback(hStream, status, wrapper->userData);
- TRACE("stream callback %p returned\n", wrapper->callback);
+ list_add_tail(&stream_callbacks, &wrapper->entry);
+ pthread_cond_signal(&stream_callback_request);
+ while (wrapper->status == STREAM_CALLBACK_PENDING)
+ pthread_cond_wait(&stream_callback_reply, &stream_callback_mutex);
- HeapFree( GetProcessHeap(), 0, wrapper );
+ pthread_mutex_unlock(&stream_callback_mutex);
+ free(wrapper);
}
CUresult WINAPI wine_cuStreamAddCallback(CUstream hStream, void *callback, void *userData, unsigned int flags)
{
- struct stream_callback *wrapper;
+ struct stream_callback_entry *wrapper;
CUresult ret;
TRACE("(%p, %p, %p, %u)\n", hStream, callback, userData, flags);
- wrapper = HeapAlloc( GetProcessHeap(), 0, sizeof(*wrapper) );
+ wrapper = malloc(sizeof(*wrapper));
if (!wrapper)
return CUDA_ERROR_OUT_OF_MEMORY;
+ wrapper->callback = callback;
+ wrapper->args.userdata = userData;
- wrapper->callback = callback;
- wrapper->userData = userData;
+ /* spawn a new worker thread if necessary */
+ pthread_mutex_lock(&stream_callback_mutex);
+ if (!num_stream_callbacks++)
+ {
+ HANDLE thread = CreateThread(NULL, 0, stream_callback_worker_thread, NULL, 0, NULL);
+ if (!thread)
+ {
+ num_stream_callbacks--;
+ pthread_mutex_unlock(&stream_callback_mutex);
+ free(wrapper);
+ return CUDA_ERROR_OUT_OF_MEMORY; /* FIXME */
+ }
+ CloseHandle(thread);
+ }
+ pthread_mutex_unlock(&stream_callback_mutex);
ret = pcuStreamAddCallback(hStream, stream_callback_wrapper, wrapper, flags);
- if (ret) HeapFree( GetProcessHeap(), 0, wrapper );
+ if (ret)
+ {
+ pthread_mutex_lock(&stream_callback_mutex);
+ if (num_stream_callbacks == 1)
+ {
+ wrapper->status = STREAM_CALLBACK_ABANDONED;
+ list_add_tail(&stream_callbacks, &wrapper->entry);
+ pthread_cond_signal(&stream_callback_request);
+ wrapper = NULL;
+ }
+ else num_stream_callbacks--;
+ pthread_mutex_unlock(&stream_callback_mutex);
+ free(wrapper);
+ }
+
return ret;
}
--
2.2.1

View File

@ -2204,6 +2204,7 @@ if test "$enable_nvcuda_CUDA_Support" -eq 1; then
patch_apply nvcuda-CUDA_Support/0004-nvcuda-Implement-new-functions-added-in-CUDA-6.5.patch
patch_apply nvcuda-CUDA_Support/0005-nvcuda-Properly-wrap-undocumented-ContextStorage-int.patch
patch_apply nvcuda-CUDA_Support/0006-nvcuda-Emulate-two-d3d9-initialization-functions.patch
patch_apply nvcuda-CUDA_Support/0007-nvcuda-Properly-wrap-stream-callbacks-by-forwarding-.patch
(
echo '+ { "Sebastian Lackner", "include: Add cuda.h.h.", 1 },';
echo '+ { "Sebastian Lackner", "nvcuda: Add stub dll.", 1 },';
@ -2211,6 +2212,7 @@ if test "$enable_nvcuda_CUDA_Support" -eq 1; then
echo '+ { "Sebastian Lackner", "nvcuda: Implement new functions added in CUDA 6.5.", 1 },';
echo '+ { "Michael Müller", "nvcuda: Properly wrap undocumented '\''ContextStorage'\'' interface and add tests.", 1 },';
echo '+ { "Michael Müller", "nvcuda: Emulate two d3d9 initialization functions.", 1 },';
echo '+ { "Sebastian Lackner", "nvcuda: Properly wrap stream callbacks by forwarding them to a worker thread.", 1 },';
) >> "$patchlist"
fi