From fbe41916cb50c584a744412c2845f44da81ae1af Mon Sep 17 00:00:00 2001 From: Sebastian Lackner Date: Thu, 22 Jan 2015 01:05:36 +0100 Subject: [PATCH] Properly wrap CUDA stream callbacks by forwarding them to a separate worker thread. --- debian/changelog | 1 + ...wrap-stream-callbacks-by-forwarding-.patch | 209 ++++++++++++++++++ patches/patchinstall.sh | 2 + 3 files changed, 212 insertions(+) create mode 100644 patches/nvcuda-CUDA_Support/0007-nvcuda-Properly-wrap-stream-callbacks-by-forwarding-.patch diff --git a/debian/changelog b/debian/changelog index bd47f86b..8539cd53 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,7 @@ wine-staging (1.7.35) UNRELEASED; urgency=low * Automatically enable fallback method to apply patches when running from inside of a git subdirectory. * Synchronize CSMT patchset with https://github.com/stefand/wine. * Several improvements to make nvcuvid (CUDA video decoding) better compatible with x86_64. + * Properly wrap CUDA stream callbacks by forwarding them to a separate worker thread. * Added patch to quote program name in ShellExecute[Ex] when it contains spaces. * Added patch to implement support for DDS file format in D3DXSaveTextureToFileInMemory. * Added patch to avoid appending duplicate NULL character when importing keys with regedit. diff --git a/patches/nvcuda-CUDA_Support/0007-nvcuda-Properly-wrap-stream-callbacks-by-forwarding-.patch b/patches/nvcuda-CUDA_Support/0007-nvcuda-Properly-wrap-stream-callbacks-by-forwarding-.patch new file mode 100644 index 00000000..c444334c --- /dev/null +++ b/patches/nvcuda-CUDA_Support/0007-nvcuda-Properly-wrap-stream-callbacks-by-forwarding-.patch @@ -0,0 +1,209 @@ +From 4cb6fdc37124222d6e661d8db89fc832f98da9cb Mon Sep 17 00:00:00 2001 +From: Sebastian Lackner +Date: Thu, 22 Jan 2015 01:02:53 +0100 +Subject: nvcuda: Properly wrap stream callbacks by forwarding them to a worker + thread. + +--- + dlls/nvcuda/Makefile.in | 1 + + dlls/nvcuda/nvcuda.c | 136 ++++++++++++++++++++++++++++++++++++++++++------ + 2 files changed, 122 insertions(+), 15 deletions(-) + +diff --git a/dlls/nvcuda/Makefile.in b/dlls/nvcuda/Makefile.in +index 6322fb2..98541b1 100644 +--- a/dlls/nvcuda/Makefile.in ++++ b/dlls/nvcuda/Makefile.in +@@ -1,4 +1,5 @@ + MODULE = nvcuda.dll ++EXTRALIBS = $(PTHREAD_LIBS) + + C_SRCS = \ + nvcuda.c \ +diff --git a/dlls/nvcuda/nvcuda.c b/dlls/nvcuda/nvcuda.c +index 9fefc28..8b356fd 100644 +--- a/dlls/nvcuda/nvcuda.c ++++ b/dlls/nvcuda/nvcuda.c +@@ -21,11 +21,17 @@ + #include "wine/port.h" + + #include ++#include ++ ++#ifdef HAVE_PTHREAD_H ++#include ++#endif + + #include "windef.h" + #include "winbase.h" + #include "wine/library.h" + #include "wine/debug.h" ++#include "wine/list.h" + #include "wine/wgl.h" + #include "cuda.h" + #include "nvcuda.h" +@@ -39,6 +45,30 @@ + + WINE_DEFAULT_DEBUG_CHANNEL(nvcuda); + ++struct stream_callback_entry ++{ ++ struct list entry; ++ enum ++ { ++ STREAM_CALLBACK_ABANDONED, ++ STREAM_CALLBACK_PENDING, ++ STREAM_CALLBACK_EXECUTED ++ } status; ++ void (WINAPI *callback)(CUstream hStream, CUresult status, void *userData); ++ struct ++ { ++ CUstream stream; ++ CUresult status; ++ void *userdata; ++ } args; ++}; ++ ++static struct list stream_callbacks = LIST_INIT( stream_callbacks ); ++static pthread_mutex_t stream_callback_mutex = PTHREAD_MUTEX_INITIALIZER; ++static pthread_cond_t stream_callback_request = PTHREAD_COND_INITIALIZER; ++static pthread_cond_t stream_callback_reply = PTHREAD_COND_INITIALIZER; ++LONG num_stream_callbacks; ++ + static CUresult (*pcuArray3DCreate)(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); + static CUresult (*pcuArray3DCreate_v2)(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); + static CUresult (*pcuArray3DGetDescriptor)(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray); +@@ -1793,40 +1823,116 @@ CUresult WINAPI wine_cuPointerSetAttribute(const void *value, CUpointer_attribut + return pcuPointerSetAttribute(value, attribute, ptr); + } + +-struct stream_callback ++static DWORD WINAPI stream_callback_worker_thread(LPVOID parameter) + { +- void (WINAPI *callback)(CUstream hStream, CUresult status, void *userData); +- void *userData; +-}; ++ struct stream_callback_entry *wrapper; ++ struct list *ptr; ++ pthread_mutex_lock(&stream_callback_mutex); ++ ++ for (;;) ++ { ++ while ((ptr = list_head(&stream_callbacks))) ++ { ++ wrapper = LIST_ENTRY(ptr, struct stream_callback_entry, entry); ++ list_remove(&wrapper->entry); ++ ++ switch (wrapper->status) ++ { ++ case STREAM_CALLBACK_ABANDONED: ++ free(wrapper); ++ break; ++ ++ case STREAM_CALLBACK_PENDING: ++ pthread_mutex_unlock(&stream_callback_mutex); ++ ++ TRACE("calling stream callback %p(%p, %d, %p)\n", wrapper->callback, ++ wrapper->args.stream, wrapper->args.status, wrapper->args.userdata); ++ wrapper->callback(wrapper->args.stream, wrapper->args.status, wrapper->args.userdata); ++ TRACE("stream callback %p returned\n", wrapper->callback); ++ ++ wrapper->status = STREAM_CALLBACK_EXECUTED; ++ pthread_cond_broadcast(&stream_callback_reply); ++ pthread_mutex_lock(&stream_callback_mutex); ++ break; ++ ++ default: ++ assert(0); /* never reached */ ++ } ++ ++ if (!--num_stream_callbacks) ++ goto end; ++ } ++ ++ pthread_cond_wait(&stream_callback_request, &stream_callback_mutex); ++ } ++ ++end: ++ pthread_mutex_unlock(&stream_callback_mutex); ++ return 0; ++} + + static void stream_callback_wrapper(CUstream hStream, CUresult status, void *userData) + { +- struct stream_callback *wrapper = userData; +- TRACE("(%p, %d, %p)\n", hStream, status, userData); ++ struct stream_callback_entry *wrapper = userData; ++ wrapper->status = STREAM_CALLBACK_PENDING; ++ wrapper->args.stream = hStream; ++ wrapper->args.status = status; ++ pthread_mutex_lock(&stream_callback_mutex); + +- TRACE("calling stream callback %p(%p, %d, %p)\n", wrapper->callback, hStream, status, wrapper->userData); +- wrapper->callback(hStream, status, wrapper->userData); +- TRACE("stream callback %p returned\n", wrapper->callback); ++ list_add_tail(&stream_callbacks, &wrapper->entry); ++ pthread_cond_signal(&stream_callback_request); ++ while (wrapper->status == STREAM_CALLBACK_PENDING) ++ pthread_cond_wait(&stream_callback_reply, &stream_callback_mutex); + +- HeapFree( GetProcessHeap(), 0, wrapper ); ++ pthread_mutex_unlock(&stream_callback_mutex); ++ free(wrapper); + } + + CUresult WINAPI wine_cuStreamAddCallback(CUstream hStream, void *callback, void *userData, unsigned int flags) + { +- struct stream_callback *wrapper; ++ struct stream_callback_entry *wrapper; + CUresult ret; + + TRACE("(%p, %p, %p, %u)\n", hStream, callback, userData, flags); + +- wrapper = HeapAlloc( GetProcessHeap(), 0, sizeof(*wrapper) ); ++ wrapper = malloc(sizeof(*wrapper)); + if (!wrapper) + return CUDA_ERROR_OUT_OF_MEMORY; ++ wrapper->callback = callback; ++ wrapper->args.userdata = userData; + +- wrapper->callback = callback; +- wrapper->userData = userData; ++ /* spawn a new worker thread if necessary */ ++ pthread_mutex_lock(&stream_callback_mutex); ++ if (!num_stream_callbacks++) ++ { ++ HANDLE thread = CreateThread(NULL, 0, stream_callback_worker_thread, NULL, 0, NULL); ++ if (!thread) ++ { ++ num_stream_callbacks--; ++ pthread_mutex_unlock(&stream_callback_mutex); ++ free(wrapper); ++ return CUDA_ERROR_OUT_OF_MEMORY; /* FIXME */ ++ } ++ CloseHandle(thread); ++ } ++ pthread_mutex_unlock(&stream_callback_mutex); + + ret = pcuStreamAddCallback(hStream, stream_callback_wrapper, wrapper, flags); +- if (ret) HeapFree( GetProcessHeap(), 0, wrapper ); ++ if (ret) ++ { ++ pthread_mutex_lock(&stream_callback_mutex); ++ if (num_stream_callbacks == 1) ++ { ++ wrapper->status = STREAM_CALLBACK_ABANDONED; ++ list_add_tail(&stream_callbacks, &wrapper->entry); ++ pthread_cond_signal(&stream_callback_request); ++ wrapper = NULL; ++ } ++ else num_stream_callbacks--; ++ pthread_mutex_unlock(&stream_callback_mutex); ++ free(wrapper); ++ } ++ + return ret; + } + +-- +2.2.1 + diff --git a/patches/patchinstall.sh b/patches/patchinstall.sh index 533e983e..40f6ba52 100755 --- a/patches/patchinstall.sh +++ b/patches/patchinstall.sh @@ -2204,6 +2204,7 @@ if test "$enable_nvcuda_CUDA_Support" -eq 1; then patch_apply nvcuda-CUDA_Support/0004-nvcuda-Implement-new-functions-added-in-CUDA-6.5.patch patch_apply nvcuda-CUDA_Support/0005-nvcuda-Properly-wrap-undocumented-ContextStorage-int.patch patch_apply nvcuda-CUDA_Support/0006-nvcuda-Emulate-two-d3d9-initialization-functions.patch + patch_apply nvcuda-CUDA_Support/0007-nvcuda-Properly-wrap-stream-callbacks-by-forwarding-.patch ( echo '+ { "Sebastian Lackner", "include: Add cuda.h.h.", 1 },'; echo '+ { "Sebastian Lackner", "nvcuda: Add stub dll.", 1 },'; @@ -2211,6 +2212,7 @@ if test "$enable_nvcuda_CUDA_Support" -eq 1; then echo '+ { "Sebastian Lackner", "nvcuda: Implement new functions added in CUDA 6.5.", 1 },'; echo '+ { "Michael Müller", "nvcuda: Properly wrap undocumented '\''ContextStorage'\'' interface and add tests.", 1 },'; echo '+ { "Michael Müller", "nvcuda: Emulate two d3d9 initialization functions.", 1 },'; + echo '+ { "Sebastian Lackner", "nvcuda: Properly wrap stream callbacks by forwarding them to a worker thread.", 1 },'; ) >> "$patchlist" fi