From f64437a901854108267b3c5b0d55598b99bf05eb Mon Sep 17 00:00:00 2001 From: Sebastian Lackner Date: Wed, 21 Jan 2015 03:09:54 +0100 Subject: [PATCH] nvcuda-CUDA_Support: Update patchset with various improvements. * Add static to a couple of functions * TLS callbacks should be declared as CDECL * Do not unlock the critical section while processing TLS callbacks --- .../0003-nvcuda-First-implementation.patch | 76 +++++++++---------- ...ment-new-functions-added-in-CUDA-6.5.patch | 4 +- ...wrap-undocumented-ContextStorage-int.patch | 28 +++---- patches/patchinstall.sh | 2 +- 4 files changed, 54 insertions(+), 56 deletions(-) diff --git a/patches/nvcuda-CUDA_Support/0003-nvcuda-First-implementation.patch b/patches/nvcuda-CUDA_Support/0003-nvcuda-First-implementation.patch index e414d3b0..31ffc17b 100644 --- a/patches/nvcuda-CUDA_Support/0003-nvcuda-First-implementation.patch +++ b/patches/nvcuda-CUDA_Support/0003-nvcuda-First-implementation.patch @@ -1,28 +1,32 @@ -From 81fe2ed75ea9e0f282c496f0c8183a4e316fb78d Mon Sep 17 00:00:00 2001 +From 3a96e7ad09c6ad0ad31313634688c5c374d5d89f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20M=C3=BCller?= Date: Sat, 3 Jan 2015 03:39:11 +0100 -Subject: nvcuda: First implementation. +Subject: nvcuda: First implementation. (rev 2) +Changes by Sebastian Lackner : +* Add static to a couple of functions +* TLS callbacks should be declared as CDECL +* Do not unlock the critical section while processing TLS callbacks --- configure.ac | 1 + dlls/nvcuda/Makefile.in | 3 +- - dlls/nvcuda/internal.c | 521 ++++++++++ + dlls/nvcuda/internal.c | 515 ++++++++++ dlls/nvcuda/nvcuda.c | 2097 ++++++++++++++++++++++++++++++++++++++++- dlls/nvcuda/nvcuda.h | 29 + dlls/nvcuda/nvcuda.spec | 492 +++++----- dlls/nvcuda/tests/Makefile.in | 4 + dlls/nvcuda/tests/nvcuda.c | 168 ++++ - 8 files changed, 3063 insertions(+), 252 deletions(-) + 8 files changed, 3057 insertions(+), 252 deletions(-) create mode 100755 dlls/nvcuda/internal.c create mode 100644 dlls/nvcuda/nvcuda.h create mode 100644 dlls/nvcuda/tests/Makefile.in create mode 100644 dlls/nvcuda/tests/nvcuda.c diff --git a/configure.ac b/configure.ac -index a4248d0..73e67bb 100644 +index 48bd58c..dc03595 100644 --- a/configure.ac +++ b/configure.ac -@@ -3126,6 +3126,7 @@ WINE_CONFIG_DLL(ntoskrnl.exe,,[implib]) +@@ -3127,6 +3127,7 @@ WINE_CONFIG_DLL(ntoskrnl.exe,,[implib]) WINE_CONFIG_DLL(ntprint) WINE_CONFIG_TEST(dlls/ntprint/tests) WINE_CONFIG_DLL(nvcuda) @@ -45,10 +49,10 @@ index 4b33278..6322fb2 100644 RC_SRCS = nvcuda.rc diff --git a/dlls/nvcuda/internal.c b/dlls/nvcuda/internal.c new file mode 100755 -index 0000000..4e733eb +index 0000000..611e310 --- /dev/null +++ b/dlls/nvcuda/internal.c -@@ -0,0 +1,521 @@ +@@ -0,0 +1,515 @@ +/* + * Copyright (C) 2014-2015 Michael Müller + * Copyright (C) 2014-2015 Sebastian Lackner @@ -87,7 +91,7 @@ index 0000000..4e733eb +struct tls_callback_entry +{ + struct list entry; -+ void (*callback)(DWORD, void *); ++ void (CDECL *callback)(DWORD, void *); + void *userdata; + ULONG count; +}; @@ -105,7 +109,6 @@ index 0000000..4e733eb + +void cuda_process_tls_callbacks(DWORD reason) +{ -+ struct tls_callback_entry *to_free = NULL; + struct list *ptr; + + TRACE("(%d)\n", reason); @@ -119,24 +122,19 @@ index 0000000..4e733eb + { + struct tls_callback_entry *callback = LIST_ENTRY( ptr, struct tls_callback_entry, entry ); + callback->count++; -+ LeaveCriticalSection( &tls_callback_section ); -+ HeapFree( GetProcessHeap(), 0, to_free ); -+ to_free = NULL; + + TRACE("calling handler %p(0, %p)\n", callback->callback, callback->userdata); + callback->callback(0, callback->userdata); + TRACE("handler %p returned\n", callback->callback); + -+ EnterCriticalSection( &tls_callback_section ); + ptr = list_next( &tls_callbacks, ptr ); + if (!--callback->count) /* removed during execution */ + { + list_remove( &callback->entry ); -+ to_free = callback; ++ HeapFree( GetProcessHeap(), 0, callback ); + } + } + LeaveCriticalSection( &tls_callback_section ); -+ HeapFree( GetProcessHeap(), 0, to_free ); +} + +static const CUuuid UUID_Unknown1 = {{0x6B, 0xD5, 0xFB, 0x6C, 0x5B, 0xF4, 0xE7, 0x4A, @@ -261,31 +259,31 @@ index 0000000..4e733eb +} *Unknown5_orig = NULL; + + -+void* WINAPI Unknown1_func0_relay(void *param0, void *param1) ++static void* WINAPI Unknown1_func0_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown1_orig->func0(param0, param1); +} + -+void* WINAPI Unknown1_func1_relay(void *param0, void *param1) ++static void* WINAPI Unknown1_func1_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown1_orig->func1(param0, param1); +} + -+void* WINAPI Unknown1_func2_relay(void *param0, void *param1) ++static void* WINAPI Unknown1_func2_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown1_orig->func2(param0, param1); +} + -+void* WINAPI Unknown1_func3_relay(void *param0, void *param1) ++static void* WINAPI Unknown1_func3_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown1_orig->func3(param0, param1); +} + -+void* WINAPI Unknown1_func4_relay(void *param0) ++static void* WINAPI Unknown1_func4_relay(void *param0) +{ + TRACE("(%p)\n", param0); + return Unknown1_orig->func4(param0); @@ -301,37 +299,37 @@ index 0000000..4e733eb + Unknown1_func4_relay, +}; + -+void* WINAPI Unknown2_func0_relay(void *param0, void *param1) ++static void* WINAPI Unknown2_func0_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown2_orig->func0(param0, param1); +} + -+void* WINAPI Unknown2_func1_relay(void *param0, void *param1) ++static void* WINAPI Unknown2_func1_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown2_orig->func1(param0, param1); +} + -+void* WINAPI Unknown2_func2_relay(void *param0, void *param1, void *param2) ++static void* WINAPI Unknown2_func2_relay(void *param0, void *param1, void *param2) +{ + TRACE("(%p, %p, %p)\n", param0, param1, param2); + return Unknown2_orig->func2(param0, param1, param2); +} + -+void* WINAPI Unknown2_func3_relay(void *param0, void *param1) ++static void* WINAPI Unknown2_func3_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown2_orig->func3(param0, param1); +} + -+void* WINAPI Unknown2_func4_relay(void *param0, void *param1) ++static void* WINAPI Unknown2_func4_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown2_orig->func4(param0, param1); +} + -+void* WINAPI Unknown2_func5_relay(void *param0, void *param1) ++static void* WINAPI Unknown2_func5_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown2_orig->func5(param0, param1); @@ -348,38 +346,38 @@ index 0000000..4e733eb + Unknown2_func5_relay, +}; + -+void* WINAPI Unknown3_func0_relay(void *param0) ++static void* WINAPI Unknown3_func0_relay(void *param0) +{ + TRACE("(%p)\n", param0); + return Unknown3_orig->func0(param0); +} + -+void* WINAPI Unknown3_func1_relay(void *param0) ++static void* WINAPI Unknown3_func1_relay(void *param0) +{ + TRACE("(%p)\n", param0); + return Unknown3_orig->func1(param0); +} + -+struct Unknown3_table Unknown3_Impl = ++static struct Unknown3_table Unknown3_Impl = +{ + sizeof(struct Unknown3_table), + Unknown3_func0_relay, + Unknown3_func1_relay, +}; + -+void* WINAPI Unknown4_func0_relay(void *param0, void *param1, void *param2, void *param3) ++static void* WINAPI Unknown4_func0_relay(void *param0, void *param1, void *param2, void *param3) +{ + TRACE("(%p, %p, %p, %p)\n", param0, param1, param2, param3); + return Unknown4_orig->func0(param0, param1, param2, param3); +} + -+void* WINAPI Unknown4_func1_relay(void *param0, void *param1) ++static void* WINAPI Unknown4_func1_relay(void *param0, void *param1) +{ + TRACE("(%p, %p)\n", param0, param1); + return Unknown4_orig->func1(param0, param1); +} + -+void* WINAPI Unknown4_func2_relay(void *param0, void *param1, void *param2) ++static void* WINAPI Unknown4_func2_relay(void *param0, void *param1, void *param2) +{ + TRACE("(%p, %p, %p)\n", param0, param1, param2); + return Unknown4_orig->func2(param0, param1, param2); @@ -392,7 +390,7 @@ index 0000000..4e733eb + Unknown4_func2_relay, +}; + -+void* WINAPI Unknown5_func0_relay(void *param0, void *param1, void *param2) ++static void* WINAPI Unknown5_func0_relay(void *param0, void *param1, void *param2) +{ + TRACE("(%p, %p, %p)\n", param0, param1, param2); + return Unknown5_orig->func0(param0, param1, param2); @@ -404,7 +402,7 @@ index 0000000..4e733eb + Unknown5_func0_relay, +}; + -+CUresult WINAPI TlsNotifyInterface_Set(void **handle, void *callback, void *userdata) ++static CUresult WINAPI TlsNotifyInterface_Set(void **handle, void *callback, void *userdata) +{ + struct tls_callback_entry *new_entry; + @@ -426,7 +424,7 @@ index 0000000..4e733eb + return CUDA_SUCCESS; +} + -+CUresult WINAPI TlsNotifyInterface_Remove(void *handle, void *param1) ++static CUresult WINAPI TlsNotifyInterface_Remove(void *handle, void *param1) +{ + CUresult ret = CUDA_ERROR_INVALID_VALUE; + struct tls_callback_entry *to_free = NULL; @@ -571,7 +569,7 @@ index 0000000..4e733eb + return CUDA_ERROR_UNKNOWN; +} diff --git a/dlls/nvcuda/nvcuda.c b/dlls/nvcuda/nvcuda.c -index 8b7eb9c..5b9eed6 100644 +index 8b7eb9c..3ef7c35 100644 --- a/dlls/nvcuda/nvcuda.c +++ b/dlls/nvcuda/nvcuda.c @@ -1,4 +1,5 @@ @@ -3251,7 +3249,7 @@ index 0000000..81f9290 + nvcuda.c diff --git a/dlls/nvcuda/tests/nvcuda.c b/dlls/nvcuda/tests/nvcuda.c new file mode 100644 -index 0000000..3b00c5d +index 0000000..fc8f300 --- /dev/null +++ b/dlls/nvcuda/tests/nvcuda.c @@ -0,0 +1,168 @@ @@ -3329,7 +3327,7 @@ index 0000000..3b00c5d + DWORD threadid; +}; + -+static void tls_callback_test(DWORD reason, void *data) ++static void CDECL tls_callback_test(DWORD reason, void *data) +{ + struct tls_test_data *test_data = data; + trace("reason: %d, data: %p\n", reason, data); diff --git a/patches/nvcuda-CUDA_Support/0004-nvcuda-Implement-new-functions-added-in-CUDA-6.5.patch b/patches/nvcuda-CUDA_Support/0004-nvcuda-Implement-new-functions-added-in-CUDA-6.5.patch index ec168f2d..3aed7567 100644 --- a/patches/nvcuda-CUDA_Support/0004-nvcuda-Implement-new-functions-added-in-CUDA-6.5.patch +++ b/patches/nvcuda-CUDA_Support/0004-nvcuda-Implement-new-functions-added-in-CUDA-6.5.patch @@ -1,4 +1,4 @@ -From 4d10b5ffaadd216525cfef2d9c832a68268d8614 Mon Sep 17 00:00:00 2001 +From e08627884ed59ebbac09a1182428718ed0ec1cbe Mon Sep 17 00:00:00 2001 From: Sebastian Lackner Date: Fri, 9 Jan 2015 04:39:49 +0100 Subject: nvcuda: Implement new functions added in CUDA 6.5. @@ -10,7 +10,7 @@ Subject: nvcuda: Implement new functions added in CUDA 6.5. 3 files changed, 125 insertions(+) diff --git a/dlls/nvcuda/nvcuda.c b/dlls/nvcuda/nvcuda.c -index 5b9eed6..bd39547 100644 +index 3ef7c35..3118c49 100644 --- a/dlls/nvcuda/nvcuda.c +++ b/dlls/nvcuda/nvcuda.c @@ -293,6 +293,20 @@ static CUresult (*pcuTexRefSetMipmapLevelBias)(CUtexref hTexRef, float bias); diff --git a/patches/nvcuda-CUDA_Support/0005-nvcuda-Properly-wrap-undocumented-ContextStorage-int.patch b/patches/nvcuda-CUDA_Support/0005-nvcuda-Properly-wrap-undocumented-ContextStorage-int.patch index cb2cd5cb..21ddfe01 100644 --- a/patches/nvcuda-CUDA_Support/0005-nvcuda-Properly-wrap-undocumented-ContextStorage-int.patch +++ b/patches/nvcuda-CUDA_Support/0005-nvcuda-Properly-wrap-undocumented-ContextStorage-int.patch @@ -1,4 +1,4 @@ -From 9890ed7c2dcde12d86180d9c65d398d43a2cffca Mon Sep 17 00:00:00 2001 +From 39568b59352627db2c0ec1235c71cfcba05141a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20M=C3=BCller?= Date: Fri, 9 Jan 2015 07:42:32 +0100 Subject: nvcuda: Properly wrap undocumented 'ContextStorage' interface and add @@ -11,10 +11,10 @@ Subject: nvcuda: Properly wrap undocumented 'ContextStorage' interface and add 3 files changed, 319 insertions(+), 33 deletions(-) diff --git a/dlls/nvcuda/internal.c b/dlls/nvcuda/internal.c -index 4e733eb..4ba42b3 100755 +index 611e310..8a2dffd 100755 --- a/dlls/nvcuda/internal.c +++ b/dlls/nvcuda/internal.c -@@ -94,7 +94,7 @@ static const CUuuid UUID_Unknown2 = {{0xA0, 0x94, 0x79, 0x8C, +@@ -88,7 +88,7 @@ static const CUuuid UUID_Unknown2 = {{0xA0, 0x94, 0x79, 0x8C, 0x93, 0xF2, 0x08, 0x00, 0x20, 0x0C, 0x0A, 0x66}}; static const CUuuid UUID_Unknown3 = {{0x42, 0xD8, 0x5A, 0x81, 0x23, 0xF6, 0xCB, 0x47, 0x82, 0x98, 0xF6, 0xE7, 0x8A, 0x3A, 0xEC, 0xDC}}; @@ -23,7 +23,7 @@ index 4e733eb..4ba42b3 100755 0xA8, 0xC3, 0x68, 0xF3, 0x55, 0xD8, 0x95, 0x93}}; static const CUuuid UUID_Unknown5 = {{0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72, 0x86}}; -@@ -170,20 +170,20 @@ static const struct +@@ -164,20 +164,20 @@ static const struct } *Unknown3_orig = NULL; /* @@ -53,11 +53,11 @@ index 4e733eb..4ba42b3 100755 /* * TlsNotifyInterface -@@ -316,29 +316,80 @@ struct Unknown3_table Unknown3_Impl = +@@ -310,29 +310,80 @@ static struct Unknown3_table Unknown3_Impl = Unknown3_func1_relay, }; --void* WINAPI Unknown4_func0_relay(void *param0, void *param1, void *param2, void *param3) +-static void* WINAPI Unknown4_func0_relay(void *param0, void *param1, void *param2, void *param3) +struct context_storage { - TRACE("(%p, %p, %p, %p)\n", param0, param1, param2, param3); @@ -83,8 +83,8 @@ index 4e733eb..4ba42b3 100755 + HeapFree( GetProcessHeap(), 0, storage ); } --void* WINAPI Unknown4_func1_relay(void *param0, void *param1) -+CUresult WINAPI ContextStorage_Set(CUcontext ctx, void *key, void *value, void *callback) +-static void* WINAPI Unknown4_func1_relay(void *param0, void *param1) ++static CUresult WINAPI ContextStorage_Set(CUcontext ctx, void *key, void *value, void *callback) { - TRACE("(%p, %p)\n", param0, param1); - return Unknown4_orig->func1(param0, param1); @@ -105,8 +105,8 @@ index 4e733eb..4ba42b3 100755 + return ret; } --void* WINAPI Unknown4_func2_relay(void *param0, void *param1, void *param2) -+CUresult WINAPI ContextStorage_Remove(CUcontext ctx, void *key) +-static void* WINAPI Unknown4_func2_relay(void *param0, void *param1, void *param2) ++static CUresult WINAPI ContextStorage_Remove(CUcontext ctx, void *key) { - TRACE("(%p, %p, %p)\n", param0, param1, param2); - return Unknown4_orig->func2(param0, param1, param2); @@ -123,7 +123,7 @@ index 4e733eb..4ba42b3 100755 + return ContextStorage_orig->Remove(ctx, key); +} + -+CUresult WINAPI ContextStorage_Get(void **value, CUcontext ctx, void *key) ++static CUresult WINAPI ContextStorage_Get(void **value, CUcontext ctx, void *key) +{ + struct context_storage *storage; + CUresult ret; @@ -146,8 +146,8 @@ index 4e733eb..4ba42b3 100755 + ContextStorage_Get, }; - void* WINAPI Unknown5_func0_relay(void *param0, void *param1, void *param2) -@@ -486,15 +537,15 @@ CUresult cuda_get_table(const void **table, const CUuuid *uuid, const void *orig + static void* WINAPI Unknown5_func0_relay(void *param0, void *param1, void *param2) +@@ -480,15 +531,15 @@ CUresult cuda_get_table(const void **table, const CUuuid *uuid, const void *orig *table = (void *)&Unknown3_Impl; return CUDA_SUCCESS; } @@ -167,7 +167,7 @@ index 4e733eb..4ba42b3 100755 } else if (cuda_equal_uuid(uuid, &UUID_Unknown5)) diff --git a/dlls/nvcuda/tests/nvcuda.c b/dlls/nvcuda/tests/nvcuda.c -index 3b00c5d..93aaa49 100644 +index fc8f300..9c30c37 100644 --- a/dlls/nvcuda/tests/nvcuda.c +++ b/dlls/nvcuda/tests/nvcuda.c @@ -27,10 +27,24 @@ diff --git a/patches/patchinstall.sh b/patches/patchinstall.sh index f2344b16..533e983e 100755 --- a/patches/patchinstall.sh +++ b/patches/patchinstall.sh @@ -2207,7 +2207,7 @@ if test "$enable_nvcuda_CUDA_Support" -eq 1; then ( echo '+ { "Sebastian Lackner", "include: Add cuda.h.h.", 1 },'; echo '+ { "Sebastian Lackner", "nvcuda: Add stub dll.", 1 },'; - echo '+ { "Michael Müller", "nvcuda: First implementation.", 1 },'; + echo '+ { "Michael Müller", "nvcuda: First implementation.", 2 },'; echo '+ { "Sebastian Lackner", "nvcuda: Implement new functions added in CUDA 6.5.", 1 },'; echo '+ { "Michael Müller", "nvcuda: Properly wrap undocumented '\''ContextStorage'\'' interface and add tests.", 1 },'; echo '+ { "Michael Müller", "nvcuda: Emulate two d3d9 initialization functions.", 1 },';