From 3bf7fe4763b3c5ee0b1fb7369f5d7c1008de4567 Mon Sep 17 00:00:00 2001 From: Sebastian Lackner Date: Sat, 22 Jul 2017 17:26:59 +0200 Subject: [PATCH] Added patch to improve heap allocation performance. --- patches/ntdll-Heap_FreeLists/definition | 1 + ...elper-function-to-delete-free-blocks.patch | 88 +++ ...-Improve-heap-allocation-performance.patch | 551 ++++++++++++++++++ patches/ntdll-Heap_Improvements/definition | 2 + patches/patchinstall.sh | 31 +- 5 files changed, 665 insertions(+), 8 deletions(-) create mode 100644 patches/ntdll-Heap_Improvements/0001-ntdll-Add-helper-function-to-delete-free-blocks.patch create mode 100644 patches/ntdll-Heap_Improvements/0002-ntdll-Improve-heap-allocation-performance.patch create mode 100644 patches/ntdll-Heap_Improvements/definition diff --git a/patches/ntdll-Heap_FreeLists/definition b/patches/ntdll-Heap_FreeLists/definition index 44a7106e..b8149695 100644 --- a/patches/ntdll-Heap_FreeLists/definition +++ b/patches/ntdll-Heap_FreeLists/definition @@ -1 +1,2 @@ Fixes: Improvement for heap allocation performance +Disabled: true diff --git a/patches/ntdll-Heap_Improvements/0001-ntdll-Add-helper-function-to-delete-free-blocks.patch b/patches/ntdll-Heap_Improvements/0001-ntdll-Add-helper-function-to-delete-free-blocks.patch new file mode 100644 index 00000000..aa9ca107 --- /dev/null +++ b/patches/ntdll-Heap_Improvements/0001-ntdll-Add-helper-function-to-delete-free-blocks.patch @@ -0,0 +1,88 @@ +From e46de2c7f78d1b8b336868276a23f4ca1d191599 Mon Sep 17 00:00:00 2001 +From: Sebastian Lackner +Date: Sat, 22 Jul 2017 06:41:53 +0200 +Subject: ntdll: Add helper function to delete free blocks. + +--- + dlls/ntdll/heap.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c +index f928ebff500..dc45a688925 100644 +--- a/dlls/ntdll/heap.c ++++ b/dlls/ntdll/heap.c +@@ -485,6 +485,17 @@ static inline void HEAP_InsertFreeBlock( HEAP *heap, ARENA_FREE *pArena, BOOL la + + + /*********************************************************************** ++ * HEAP_DeleteFreeBlock ++ * ++ * Delete a free block from the free list. ++ */ ++static inline void HEAP_DeleteFreeBlock( HEAP *heap, ARENA_FREE *pArena ) ++{ ++ list_remove( &pArena->entry ); ++} ++ ++ ++/*********************************************************************** + * HEAP_FindSubHeap + * Find the sub-heap containing a given address. + * +@@ -592,7 +603,7 @@ static void HEAP_CreateFreeBlock( SUBHEAP *subheap, void *ptr, SIZE_T size ) + { + /* Remove the next arena from the free list */ + ARENA_FREE *pNext = (ARENA_FREE *)((char *)ptr + size); +- list_remove( &pNext->entry ); ++ HEAP_DeleteFreeBlock( subheap->heap, pNext ); + size += (pNext->size & ARENA_SIZE_MASK) + sizeof(*pNext); + mark_block_free( pNext, sizeof(ARENA_FREE), flags ); + } +@@ -647,7 +658,7 @@ static void HEAP_MakeInUseBlockFree( SUBHEAP *subheap, ARENA_INUSE *pArena ) + pFree = *((ARENA_FREE **)pArena - 1); + size += (pFree->size & ARENA_SIZE_MASK) + sizeof(ARENA_FREE); + /* Remove it from the free list */ +- list_remove( &pFree->entry ); ++ HEAP_DeleteFreeBlock( heap, pFree ); + } + else pFree = (ARENA_FREE *)pArena; + +@@ -667,7 +678,7 @@ static void HEAP_MakeInUseBlockFree( SUBHEAP *subheap, ARENA_INUSE *pArena ) + + size = 0; + /* Remove the free block from the list */ +- list_remove( &pFree->entry ); ++ HEAP_DeleteFreeBlock( heap, pFree ); + /* Remove the subheap from the list */ + list_remove( &subheap->entry ); + /* Free the memory */ +@@ -1697,7 +1708,7 @@ PVOID WINAPI RtlAllocateHeap( HANDLE heap, ULONG flags, SIZE_T size ) + + /* Remove the arena from the free list */ + +- list_remove( &pArena->entry ); ++ HEAP_DeleteFreeBlock( heapPtr, pArena ); + + /* Build the in-use arena */ + +@@ -1854,7 +1865,7 @@ PVOID WINAPI RtlReAllocateHeap( HANDLE heap, ULONG flags, PVOID ptr, SIZE_T size + { + /* The next block is free and large enough */ + ARENA_FREE *pFree = (ARENA_FREE *)pNext; +- list_remove( &pFree->entry ); ++ HEAP_DeleteFreeBlock( heapPtr, pFree ); + pArena->size += (pFree->size & ARENA_SIZE_MASK) + sizeof(*pFree); + if (!HEAP_Commit( subheap, pArena, rounded_size )) goto oom; + notify_realloc( pArena + 1, oldActualSize, size ); +@@ -1872,7 +1883,7 @@ PVOID WINAPI RtlReAllocateHeap( HANDLE heap, ULONG flags, PVOID ptr, SIZE_T size + + /* Build the in-use arena */ + +- list_remove( &pNew->entry ); ++ HEAP_DeleteFreeBlock( heapPtr, pNew ); + pInUse = (ARENA_INUSE *)pNew; + pInUse->size = (pInUse->size & ~ARENA_FLAG_FREE) + + sizeof(ARENA_FREE) - sizeof(ARENA_INUSE); +-- +2.13.1 + diff --git a/patches/ntdll-Heap_Improvements/0002-ntdll-Improve-heap-allocation-performance.patch b/patches/ntdll-Heap_Improvements/0002-ntdll-Improve-heap-allocation-performance.patch new file mode 100644 index 00000000..c7432cc5 --- /dev/null +++ b/patches/ntdll-Heap_Improvements/0002-ntdll-Improve-heap-allocation-performance.patch @@ -0,0 +1,551 @@ +From ced9d6d9ab67f54a59ee6a6726e002e50d91c38c Mon Sep 17 00:00:00 2001 +From: Sebastian Lackner +Date: Sat, 22 Jul 2017 07:21:45 +0200 +Subject: ntdll: Improve heap allocation performance. (v2) + +--- + configure.ac | 9 ++ + dlls/ntdll/heap.c | 312 +++++++++++++++++++++++++++++++++++++++--------------- + 2 files changed, 234 insertions(+), 87 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 533f1e49342..560a58d6977 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -2650,6 +2650,15 @@ then + AC_DEFINE(HAVE___BUILTIN_CLZ, 1, [Define to 1 if you have the `__builtin_clz' built-in function.]) + fi + ++dnl Check for __builtin_ctzl ++AC_CACHE_CHECK([for __builtin_ctzl], ac_cv_have___builtin_ctzl, ++ AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[return __builtin_ctzl(1)]])], ++ [ac_cv_have___builtin_ctzl="yes"], [ac_cv_have___builtin_ctzl="no"])) ++if test "$ac_cv_have___builtin_ctzl" = "yes" ++then ++ AC_DEFINE(HAVE___BUILTIN_CTZL, 1, [Define to 1 if you have the `__builtin_ctzl' built-in function.]) ++fi ++ + dnl Check for __builtin_popcount + AC_CACHE_CHECK([for __builtin_popcount], ac_cv_have___builtin_popcount, + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[return __builtin_popcount(1)]])], +diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c +index 64236204477..f4ddd7bd68a 100644 +--- a/dlls/ntdll/heap.c ++++ b/dlls/ntdll/heap.c +@@ -3,6 +3,7 @@ + * + * Copyright 1996 Alexandre Julliard + * Copyright 1998 Ulrich Weigand ++ * Copyright 2017 Sebastian Lackner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public +@@ -45,6 +46,7 @@ + #include "ntdll_misc.h" + #include "wine/library.h" + #include "wine/list.h" ++#include "wine/rbtree.h" + #include "wine/debug.h" + #include "wine/server.h" + +@@ -66,7 +68,11 @@ typedef struct tagARENA_FREE + { + DWORD size; /* Block size; must be the first field */ + DWORD magic; /* Magic number */ +- struct list entry; /* Entry in free list */ ++ union ++ { ++ struct list list; /* Entry in free list */ ++ struct wine_rb_entry tree; /* Entry in free tree */ ++ } entry; + } ARENA_FREE; + + typedef struct +@@ -79,9 +85,11 @@ typedef struct + DWORD magic; /* these must remain at the end of the structure */ + } ARENA_LARGE; + +-#define ARENA_FLAG_FREE 0x00000001 /* flags OR'ed with arena size */ +-#define ARENA_FLAG_PREV_FREE 0x00000002 +-#define ARENA_SIZE_MASK (~3) ++#define ARENA_FLAG_FREE_LIST 0x00000001 /* flags OR'ed with arena size */ ++#define ARENA_FLAG_FREE_TREE 0x00000002 ++#define ARENA_FLAG_FREE (ARENA_FLAG_FREE_LIST | ARENA_FLAG_FREE_TREE) ++#define ARENA_FLAG_PREV_FREE 0x00000004 ++#define ARENA_SIZE_MASK (~7) + #define ARENA_LARGE_SIZE 0xfedcba90 /* magic value for 'size' field in large blocks */ + + /* Value for arena 'magic' field */ +@@ -99,6 +107,8 @@ typedef struct + #define LARGE_ALIGNMENT 16 /* large blocks have stricter alignment */ + #define ARENA_OFFSET (ALIGNMENT - sizeof(ARENA_INUSE)) + ++C_ASSERT( (sizeof(ARENA_INUSE) & ~ARENA_SIZE_MASK) == 0 ); ++C_ASSERT( (sizeof(ARENA_FREE) & ~ARENA_SIZE_MASK) == 0 ); + C_ASSERT( sizeof(ARENA_LARGE) % LARGE_ALIGNMENT == 0 ); + + #define ROUND_SIZE(size) ((((size) + ALIGNMENT - 1) & ~(ALIGNMENT-1)) + ARENA_OFFSET) +@@ -107,8 +117,7 @@ C_ASSERT( sizeof(ARENA_LARGE) % LARGE_ALIGNMENT == 0 ); + #define NOISY 0 /* Report all errors */ + + /* minimum data size (without arenas) of an allocated block */ +-/* make sure that it's larger than a free list entry */ +-#define HEAP_MIN_DATA_SIZE ROUND_SIZE(2 * sizeof(struct list)) ++#define HEAP_MIN_DATA_SIZE ROUND_SIZE(sizeof(ARENA_FREE) + sizeof(ARENA_FREE*) - sizeof(ARENA_INUSE)) + /* minimum size that must remain to shrink an allocated block */ + #define HEAP_MIN_SHRINK_SIZE (HEAP_MIN_DATA_SIZE+sizeof(ARENA_FREE)) + /* minimum size to start allocating large blocks */ +@@ -116,19 +125,14 @@ C_ASSERT( sizeof(ARENA_LARGE) % LARGE_ALIGNMENT == 0 ); + /* extra size to add at the end of block for tail checking */ + #define HEAP_TAIL_EXTRA_SIZE(flags) \ + ((flags & HEAP_TAIL_CHECKING_ENABLED) || RUNNING_ON_VALGRIND ? ALIGNMENT : 0) +- +-/* Max size of the blocks on the free lists */ +-static const SIZE_T HEAP_freeListSizes[] = +-{ +- 0x10, 0x20, 0x30, 0x40, 0x60, 0x80, 0x100, 0x200, 0x400, 0x1000, ~0UL +-}; +-#define HEAP_NB_FREE_LISTS (sizeof(HEAP_freeListSizes)/sizeof(HEAP_freeListSizes[0])) +- +-typedef union +-{ +- ARENA_FREE arena; +- void *alignment[4]; +-} FREE_LIST_ENTRY; ++/* size of the blocks on the free lists */ ++#define HEAP_FREELIST_SIZE(index) \ ++ ((DWORD)(((index) * ALIGNMENT) + HEAP_MIN_DATA_SIZE + sizeof(ARENA_INUSE))) ++/* returns index for a given block size */ ++#define HEAP_SIZE_TO_FREELIST_INDEX(size) \ ++ (((size) - HEAP_MIN_DATA_SIZE - sizeof(ARENA_INUSE)) / ALIGNMENT) ++/* number of free lists */ ++#define HEAP_NB_FREE_LISTS 128 + + struct tagHEAP; + +@@ -161,9 +165,17 @@ typedef struct tagHEAP + DWORD pending_pos; /* Position in pending free requests ring */ + ARENA_INUSE **pending_free; /* Ring buffer for pending free requests */ + RTL_CRITICAL_SECTION critSection; /* Critical section for serialization */ +- FREE_LIST_ENTRY *freeList; /* Free lists */ ++ struct list *freeList; /* Free lists */ ++ struct wine_rb_tree freeTree; /* Free tree */ ++ unsigned long freeMask[HEAP_NB_FREE_LISTS / (8 * sizeof(unsigned long))]; + } HEAP; + ++#define HEAP_FREEMASK_BLOCK (8 * sizeof(unsigned long)) ++#define HEAP_FREEMASK_INDEX(x) ((x) / HEAP_FREEMASK_BLOCK) ++#define HEAP_FREEMASK_BIT(x) (1UL << ((x) & (HEAP_FREEMASK_BLOCK - 1))) ++ ++C_ASSERT( HEAP_NB_FREE_LISTS % HEAP_FREEMASK_BLOCK == 0 ); ++ + #define HEAP_MAGIC ((DWORD)('H' | ('E'<<8) | ('A'<<16) | ('P'<<24))) + + #define HEAP_DEF_SIZE 0x110000 /* Default heap size = 1Mb + 64Kb */ +@@ -180,6 +192,30 @@ static HEAP *processHeap; /* main process heap */ + + static BOOL HEAP_IsRealArena( HEAP *heapPtr, DWORD flags, LPCVOID block, BOOL quiet ); + ++/* get arena size for an rb tree entry */ ++static inline DWORD get_arena_size( const struct wine_rb_entry *entry ) ++{ ++ ARENA_FREE *arena = WINE_RB_ENTRY_VALUE( entry, ARENA_FREE, entry.tree ); ++ return (arena->size & ARENA_SIZE_MASK); ++} ++ ++/* return number of trailing 0-bits in x */ ++static inline int ctzl(unsigned long x) ++{ ++#ifdef HAVE___BUILTIN_CTZL ++ return __builtin_ctzl(x); ++#else ++ int c = 1; ++ if (!(x & 0xffffffff)) { x >>= 32; c += 32; } ++ if (!(x & 0x0000ffff)) { x >>= 16; c += 16; } ++ if (!(x & 0x000000ff)) { x >>= 8; c += 8; } ++ if (!(x & 0x0000000f)) { x >>= 4; c += 4; } ++ if (!(x & 0x00000003)) { x >>= 2; c += 2; } ++ c -= (x & 0x00000001); ++ return c; ++#endif ++} ++ + /* mark a block of memory as free for debugging purposes */ + static inline void mark_block_free( void *ptr, SIZE_T size, DWORD flags ) + { +@@ -301,17 +337,6 @@ static void subheap_notify_free_all(SUBHEAP const *subheap) + #endif + } + +-/* locate a free list entry of the appropriate size */ +-/* size is the size of the whole block including the arena header */ +-static inline unsigned int get_freelist_index( SIZE_T size ) +-{ +- unsigned int i; +- +- size -= sizeof(ARENA_FREE); +- for (i = 0; i < HEAP_NB_FREE_LISTS - 1; i++) if (size <= HEAP_freeListSizes[i]) break; +- return i; +-} +- + /* get the memory protection type to use for a given heap */ + static inline ULONG get_protection_type( DWORD flags ) + { +@@ -339,12 +364,31 @@ static void HEAP_Dump( HEAP *heap ) + DPRINTF( "Next: %p Sub-heaps:", LIST_ENTRY( heap->entry.next, HEAP, entry ) ); + LIST_FOR_EACH_ENTRY( subheap, &heap->subheap_list, SUBHEAP, entry ) DPRINTF( " %p", subheap ); + +- DPRINTF( "\nFree lists:\n Block Stat Size Id\n" ); ++ DPRINTF( "\nFree lists:\n" ); + for (i = 0; i < HEAP_NB_FREE_LISTS; i++) +- DPRINTF( "%p free %08lx prev=%p next=%p\n", +- &heap->freeList[i].arena, HEAP_freeListSizes[i], +- LIST_ENTRY( heap->freeList[i].arena.entry.prev, ARENA_FREE, entry ), +- LIST_ENTRY( heap->freeList[i].arena.entry.next, ARENA_FREE, entry )); ++ { ++ BOOL empty = !(heap->freeMask[ HEAP_FREEMASK_INDEX( i ) ] & HEAP_FREEMASK_BIT( i )); ++ ++ DPRINTF( "free %08x: ", HEAP_FREELIST_SIZE( i ) ); ++ if (!empty && !list_empty( &heap->freeList[i] )) ++ { ++ DPRINTF( "head=%p tail=%p\n", ++ LIST_ENTRY( heap->freeList[i].next, ARENA_FREE, entry.list ), ++ LIST_ENTRY( heap->freeList[i].prev, ARENA_FREE, entry.list )); ++ } ++ else if (empty && list_empty( &heap->freeList[i] )) ++ { ++ DPRINTF( "(empty)\n" ); ++ } ++ else ++ { ++ DPRINTF( "(corrupted)\n" ); ++ } ++ } ++ ++ DPRINTF( "free %08x: root=%p\n", ++ HEAP_FREELIST_SIZE( HEAP_NB_FREE_LISTS ), ++ heap->freeTree.root ? LIST_ENTRY( heap->freeTree.root, ARENA_FREE, entry.tree ) : NULL); + + LIST_FOR_EACH_ENTRY( subheap, &heap->subheap_list, SUBHEAP, entry ) + { +@@ -359,11 +403,32 @@ static void HEAP_Dump( HEAP *heap ) + if (*(DWORD *)ptr & ARENA_FLAG_FREE) + { + ARENA_FREE *pArena = (ARENA_FREE *)ptr; +- DPRINTF( "%p %08x free %08x prev=%p next=%p\n", +- pArena, pArena->magic, +- pArena->size & ARENA_SIZE_MASK, +- LIST_ENTRY( pArena->entry.prev, ARENA_FREE, entry ), +- LIST_ENTRY( pArena->entry.next, ARENA_FREE, entry ) ); ++ if ((pArena->size & ARENA_FLAG_FREE) == ARENA_FLAG_FREE_LIST) ++ { ++ DPRINTF( "%p %08x free %08x prev=%p next=%p\n", ++ pArena, pArena->magic, pArena->size & ARENA_SIZE_MASK, ++ LIST_ENTRY( pArena->entry.list.prev, ARENA_FREE, entry.list ), ++ LIST_ENTRY( pArena->entry.list.next, ARENA_FREE, entry.list ) ); ++ } ++ else if ((pArena->size & ARENA_FLAG_FREE) == ARENA_FLAG_FREE_TREE) ++ { ++ ARENA_FREE *parent = NULL, *left = NULL, *right = NULL; ++ ++ if (pArena->entry.tree.parent) ++ parent = WINE_RB_ENTRY_VALUE( pArena->entry.tree.parent, ARENA_FREE, entry.tree ); ++ if (pArena->entry.tree.left) ++ left = WINE_RB_ENTRY_VALUE( pArena->entry.tree.left, ARENA_FREE, entry.tree ); ++ if (pArena->entry.tree.right) ++ right = WINE_RB_ENTRY_VALUE( pArena->entry.tree.right, ARENA_FREE, entry.tree ); ++ ++ DPRINTF( "%p %08x free %08x parent=%p left=%p right=%p\n", ++ pArena, pArena->magic, pArena->size & ARENA_SIZE_MASK, parent, left, right ); ++ } ++ else ++ { ++ DPRINTF( "%p %08x free %08x corrupted\n", ++ pArena, pArena->magic, pArena->size & ARENA_SIZE_MASK ); ++ } + ptr += sizeof(*pArena) + (pArena->size & ARENA_SIZE_MASK); + arenaSize += sizeof(ARENA_FREE); + freeSize += pArena->size & ARENA_SIZE_MASK; +@@ -471,20 +536,19 @@ static HEAP *HEAP_GetPtr( + */ + static inline void HEAP_InsertFreeBlock( HEAP *heap, ARENA_FREE *pArena, BOOL last ) + { +- FREE_LIST_ENTRY *pEntry = heap->freeList + get_freelist_index( pArena->size + sizeof(*pArena) ); +- if (last) ++ SIZE_T index = HEAP_SIZE_TO_FREELIST_INDEX( pArena->size + sizeof(*pArena) ); ++ ++ if (index < HEAP_NB_FREE_LISTS) + { +- /* insert at end of free list, i.e. before the next free list entry */ +- pEntry++; +- if (pEntry == &heap->freeList[HEAP_NB_FREE_LISTS]) pEntry = heap->freeList; +- list_add_before( &pEntry->arena.entry, &pArena->entry ); ++ list_add_tail( &heap->freeList[index], &pArena->entry.list ); ++ heap->freeMask[ HEAP_FREEMASK_INDEX( index ) ] |= HEAP_FREEMASK_BIT( index ); ++ pArena->size |= ARENA_FLAG_FREE_LIST; + } + else + { +- /* insert at head of free list */ +- list_add_after( &pEntry->arena.entry, &pArena->entry ); ++ wine_rb_put( &heap->freeTree, &pArena->size, &pArena->entry.tree ); ++ pArena->size |= ARENA_FLAG_FREE_TREE; + } +- pArena->size |= ARENA_FLAG_FREE; + } + + +@@ -495,7 +559,19 @@ static inline void HEAP_InsertFreeBlock( HEAP *heap, ARENA_FREE *pArena, BOOL la + */ + static inline void HEAP_DeleteFreeBlock( HEAP *heap, ARENA_FREE *pArena ) + { +- list_remove( &pArena->entry ); ++ if ((pArena->size & ARENA_FLAG_FREE) == ARENA_FLAG_FREE_LIST) ++ { ++ if (pArena->entry.list.prev == pArena->entry.list.next) ++ { ++ SIZE_T index = HEAP_SIZE_TO_FREELIST_INDEX( (pArena->size & ARENA_SIZE_MASK) + sizeof(*pArena) ); ++ heap->freeMask[ HEAP_FREEMASK_INDEX( index ) ] &= ~HEAP_FREEMASK_BIT( index ); ++ } ++ list_remove( &pArena->entry.list ); ++ } ++ else ++ { ++ wine_rb_remove( &heap->freeTree, &pArena->entry.tree ); ++ } + } + + +@@ -878,6 +954,15 @@ static BOOL validate_large_arena( HEAP *heap, const ARENA_LARGE *arena, BOOL qui + } + + ++static inline int arena_free_compare( const void *key, const struct wine_rb_entry *entry ) ++{ ++ DWORD arena_size = get_arena_size( entry ); ++ if (*(DWORD *)key > arena_size) return 1; ++ else if (*(DWORD *)key < arena_size) return -1; ++ else return entry->left ? 1 : -1; ++} ++ ++ + /*********************************************************************** + * HEAP_CreateSubHeap + */ +@@ -885,7 +970,6 @@ static SUBHEAP *HEAP_CreateSubHeap( HEAP *heap, LPVOID address, DWORD flags, + SIZE_T commitSize, SIZE_T totalSize ) + { + SUBHEAP *subheap; +- FREE_LIST_ENTRY *pEntry; + unsigned int i; + + if (!address) +@@ -946,17 +1030,21 @@ static SUBHEAP *HEAP_CreateSubHeap( HEAP *heap, LPVOID address, DWORD flags, + subheap->headerSize = ROUND_SIZE( sizeof(HEAP) ); + list_add_head( &heap->subheap_list, &subheap->entry ); + +- /* Build the free lists */ ++ /* Initialize the free tree */ + +- heap->freeList = (FREE_LIST_ENTRY *)((char *)heap + subheap->headerSize); +- subheap->headerSize += HEAP_NB_FREE_LISTS * sizeof(FREE_LIST_ENTRY); +- list_init( &heap->freeList[0].arena.entry ); +- for (i = 0, pEntry = heap->freeList; i < HEAP_NB_FREE_LISTS; i++, pEntry++) +- { +- pEntry->arena.size = 0 | ARENA_FLAG_FREE; +- pEntry->arena.magic = ARENA_FREE_MAGIC; +- if (i) list_add_after( &pEntry[-1].arena.entry, &pEntry->arena.entry ); +- } ++ heap->freeList = (struct list *)((char *)heap + subheap->headerSize); ++ subheap->headerSize += HEAP_NB_FREE_LISTS * sizeof(struct list); ++ for (i = 0; i < HEAP_NB_FREE_LISTS; i++) ++ list_init( &heap->freeList[i] ); ++ ++ /* Initialize the free tree */ ++ ++ wine_rb_init( &heap->freeTree, arena_free_compare ); ++ ++ /* Initialize the free mask */ ++ ++ for (i = 0; i < sizeof(heap->freeMask) / sizeof(heap->freeMask[0]); i++) ++ heap->freeMask[i] = 0; + + /* Initialize critical section */ + +@@ -999,6 +1087,34 @@ static SUBHEAP *HEAP_CreateSubHeap( HEAP *heap, LPVOID address, DWORD flags, + } + + ++/* helper function for HEAP_FindFreeBlock */ ++static struct wine_rb_entry *find_free_block( struct wine_rb_entry *entry, DWORD arena_size ) ++{ ++ for (;;) ++ { ++ if (!entry) return NULL; ++ if (get_arena_size( entry ) >= arena_size) break; ++ entry = entry->right; ++ } ++ ++ for (;;) ++ { ++ if (!entry->left) return entry; ++ if (get_arena_size( entry->left ) < arena_size) break; ++ entry = entry->left; ++ } ++ ++ if (entry->left->right) ++ { ++ struct wine_rb_entry *ret; ++ if ((ret = find_free_block( entry->left->right, arena_size ))) ++ return ret; ++ } ++ ++ return entry; ++} ++ ++ + /*********************************************************************** + * HEAP_FindFreeBlock + * +@@ -1008,26 +1124,39 @@ static SUBHEAP *HEAP_CreateSubHeap( HEAP *heap, LPVOID address, DWORD flags, + static ARENA_FREE *HEAP_FindFreeBlock( HEAP *heap, SIZE_T size, + SUBHEAP **ppSubHeap ) + { ++ struct wine_rb_entry *ptr; ++ unsigned long mask; ++ ARENA_FREE *arena; + SUBHEAP *subheap; +- struct list *ptr; + SIZE_T total_size; +- FREE_LIST_ENTRY *pEntry = heap->freeList + get_freelist_index( size + sizeof(ARENA_INUSE) ); ++ SIZE_T index = HEAP_SIZE_TO_FREELIST_INDEX( size + sizeof(ARENA_INUSE) ); + +- /* Find a suitable free list, and in it find a block large enough */ ++ /* Find a suitable block from the free list */ + +- ptr = &pEntry->arena.entry; +- while ((ptr = list_next( &heap->freeList[0].arena.entry, ptr ))) ++ while (index < HEAP_NB_FREE_LISTS) + { +- ARENA_FREE *pArena = LIST_ENTRY( ptr, ARENA_FREE, entry ); +- SIZE_T arena_size = (pArena->size & ARENA_SIZE_MASK) + +- sizeof(ARENA_FREE) - sizeof(ARENA_INUSE); +- if (arena_size >= size) ++ mask = heap->freeMask[ HEAP_FREEMASK_INDEX( index ) ] & ~(HEAP_FREEMASK_BIT( index ) - 1); ++ if (mask) + { +- subheap = HEAP_FindSubHeap( heap, pArena ); +- if (!HEAP_Commit( subheap, (ARENA_INUSE *)pArena, size )) return NULL; ++ index = (index & ~(HEAP_FREEMASK_BLOCK - 1)) | ctzl( mask ); ++ arena = LIST_ENTRY( heap->freeList[index].next, ARENA_FREE, entry.list ); ++ subheap = HEAP_FindSubHeap( heap, arena ); ++ if (!HEAP_Commit( subheap, (ARENA_INUSE *)arena, size )) return NULL; + *ppSubHeap = subheap; +- return pArena; ++ return arena; + } ++ index = (index + HEAP_FREEMASK_BLOCK) & ~(HEAP_FREEMASK_BLOCK - 1); ++ } ++ ++ /* Find a suitable block from the free tree */ ++ ++ if ((ptr = find_free_block( heap->freeTree.root, size + sizeof(ARENA_INUSE) - sizeof(ARENA_FREE) ))) ++ { ++ arena = WINE_RB_ENTRY_VALUE( ptr, ARENA_FREE, entry.tree ); ++ subheap = HEAP_FindSubHeap( heap, arena ); ++ if (!HEAP_Commit( subheap, (ARENA_INUSE *)arena, size )) return NULL; ++ *ppSubHeap = subheap; ++ return arena; + } + + /* If no block was found, attempt to grow the heap */ +@@ -1115,13 +1244,10 @@ void *grow_virtual_heap( HANDLE handle, SIZE_T *size ) + */ + static BOOL HEAP_IsValidArenaPtr( const HEAP *heap, const ARENA_FREE *ptr ) + { +- unsigned int i; + const SUBHEAP *subheap = HEAP_FindSubHeap( heap, ptr ); + if (!subheap) return FALSE; + if ((const char *)ptr >= (const char *)subheap->base + subheap->headerSize) return TRUE; + if (subheap != &heap->subheap) return FALSE; +- for (i = 0; i < HEAP_NB_FREE_LISTS; i++) +- if (ptr == &heap->freeList[i].arena) return TRUE; + return FALSE; + } + +@@ -1133,7 +1259,7 @@ static BOOL HEAP_ValidateFreeArena( SUBHEAP *subheap, ARENA_FREE *pArena ) + { + DWORD flags = subheap->heap->flags; + SIZE_T size; +- ARENA_FREE *prev, *next; ++ ARENA_FREE *prev = NULL, *next = NULL; + char *heapEnd = (char *)subheap->base + subheap->size; + + /* Check for unaligned pointers */ +@@ -1150,7 +1276,8 @@ static BOOL HEAP_ValidateFreeArena( SUBHEAP *subheap, ARENA_FREE *pArena ) + return FALSE; + } + /* Check size flags */ +- if (!(pArena->size & ARENA_FLAG_FREE) || ++ if (((pArena->size & ARENA_FLAG_FREE) != ARENA_FLAG_FREE_LIST && ++ (pArena->size & ARENA_FLAG_FREE) != ARENA_FLAG_FREE_TREE) || + (pArena->size & ARENA_FLAG_PREV_FREE)) + { + ERR("Heap %p: bad flags %08x for free arena %p\n", +@@ -1164,34 +1291,45 @@ static BOOL HEAP_ValidateFreeArena( SUBHEAP *subheap, ARENA_FREE *pArena ) + ERR("Heap %p: bad size %08lx for free arena %p\n", subheap->heap, size, pArena ); + return FALSE; + } ++ if ((pArena->size & ARENA_FLAG_FREE) == ARENA_FLAG_FREE_LIST) ++ { ++ struct list *list = &subheap->heap->freeList[ HEAP_SIZE_TO_FREELIST_INDEX( size + sizeof(*pArena) ) ]; ++ if (pArena->entry.list.prev != list) ++ prev = LIST_ENTRY( pArena->entry.list.prev, ARENA_FREE, entry.list ); ++ if (pArena->entry.list.next != list) ++ next = LIST_ENTRY( pArena->entry.list.next, ARENA_FREE, entry.list ); ++ } ++ else ++ { ++ if (pArena->entry.tree.left) ++ prev = WINE_RB_ENTRY_VALUE( pArena->entry.tree.left, ARENA_FREE, entry.tree ); ++ if (pArena->entry.tree.right) ++ next = WINE_RB_ENTRY_VALUE( pArena->entry.tree.right, ARENA_FREE, entry.tree ); ++ } + /* Check that next pointer is valid */ +- next = LIST_ENTRY( pArena->entry.next, ARENA_FREE, entry ); +- if (!HEAP_IsValidArenaPtr( subheap->heap, next )) ++ if (next && !HEAP_IsValidArenaPtr( subheap->heap, next )) + { + ERR("Heap %p: bad next ptr %p for arena %p\n", + subheap->heap, next, pArena ); + return FALSE; + } + /* Check that next arena is free */ +- if (!(next->size & ARENA_FLAG_FREE) || (next->magic != ARENA_FREE_MAGIC)) ++ if (next && (!(next->size & ARENA_FLAG_FREE) || (next->magic != ARENA_FREE_MAGIC))) + { + ERR("Heap %p: next arena %p invalid for %p\n", + subheap->heap, next, pArena ); + return FALSE; + } + /* Check that prev pointer is valid */ +- prev = LIST_ENTRY( pArena->entry.prev, ARENA_FREE, entry ); +- if (!HEAP_IsValidArenaPtr( subheap->heap, prev )) ++ if (prev && !HEAP_IsValidArenaPtr( subheap->heap, prev )) + { + ERR("Heap %p: bad prev ptr %p for arena %p\n", + subheap->heap, prev, pArena ); + return FALSE; + } + /* Check that prev arena is free */ +- if (!(prev->size & ARENA_FLAG_FREE) || (prev->magic != ARENA_FREE_MAGIC)) ++ if (prev && (!(prev->size & ARENA_FLAG_FREE) || (prev->magic != ARENA_FREE_MAGIC))) + { +- /* this often means that the prev arena got overwritten +- * by a memory write before that prev arena */ + ERR("Heap %p: prev arena %p invalid for %p\n", + subheap->heap, prev, pArena ); + return FALSE; +-- +2.13.1 + diff --git a/patches/ntdll-Heap_Improvements/definition b/patches/ntdll-Heap_Improvements/definition new file mode 100644 index 00000000..26a829c0 --- /dev/null +++ b/patches/ntdll-Heap_Improvements/definition @@ -0,0 +1,2 @@ +Fixes: [43224] Improvement for heap allocation performance +Depends: ntdll-Grow_Virtual_Heap diff --git a/patches/patchinstall.sh b/patches/patchinstall.sh index 942e1202..7594fd6c 100755 --- a/patches/patchinstall.sh +++ b/patches/patchinstall.sh @@ -244,7 +244,7 @@ patch_enable_all () enable_ntdll_Fix_Alignment="$1" enable_ntdll_Grow_Virtual_Heap="$1" enable_ntdll_HashLinks="$1" - enable_ntdll_Heap_FreeLists="$1" + enable_ntdll_Heap_Improvements="$1" enable_ntdll_Hide_Wine_Exports="$1" enable_ntdll_Interrupt_0x2e="$1" enable_ntdll_Junction_Points="$1" @@ -993,8 +993,8 @@ patch_enable () ntdll-HashLinks) enable_ntdll_HashLinks="$2" ;; - ntdll-Heap_FreeLists) - enable_ntdll_Heap_FreeLists="$2" + ntdll-Heap_Improvements) + enable_ntdll_Heap_Improvements="$2" ;; ntdll-Hide_Wine_Exports) enable_ntdll_Hide_Wine_Exports="$2" @@ -2530,6 +2530,13 @@ if test "$enable_ntdll_NtQueryEaFile" -eq 1; then enable_kernel32_SetFileCompletionNotificationModes=1 fi +if test "$enable_ntdll_Heap_Improvements" -eq 1; then + if test "$enable_ntdll_Grow_Virtual_Heap" -gt 1; then + abort "Patchset ntdll-Grow_Virtual_Heap disabled, but ntdll-Heap_Improvements depends on that." + fi + enable_ntdll_Grow_Virtual_Heap=1 +fi + if test "$enable_ntdll_HashLinks" -eq 1; then if test "$enable_ntdll_CLI_Images" -gt 1; then abort "Patchset ntdll-CLI_Images disabled, but ntdll-HashLinks depends on that." @@ -6060,15 +6067,23 @@ if test "$enable_ntdll_HashLinks" -eq 1; then ) >> "$patchlist" fi -# Patchset ntdll-Heap_FreeLists +# Patchset ntdll-Heap_Improvements +# | +# | This patchset has the following (direct or indirect) dependencies: +# | * ntdll-Grow_Virtual_Heap +# | +# | This patchset fixes the following Wine bugs: +# | * [#43224] Improvement for heap allocation performance # | # | Modified files: -# | * dlls/ntdll/heap.c +# | * configure.ac, dlls/ntdll/heap.c # | -if test "$enable_ntdll_Heap_FreeLists" -eq 1; then - patch_apply ntdll-Heap_FreeLists/0001-ntdll-Improve-heap-allocation-performance-by-using-m.patch +if test "$enable_ntdll_Heap_Improvements" -eq 1; then + patch_apply ntdll-Heap_Improvements/0001-ntdll-Add-helper-function-to-delete-free-blocks.patch + patch_apply ntdll-Heap_Improvements/0002-ntdll-Improve-heap-allocation-performance.patch ( - printf '%s\n' '+ { "Steaphan Greene", "ntdll: Improve heap allocation performance by using more fine-grained free lists.", 1 },'; + printf '%s\n' '+ { "Sebastian Lackner", "ntdll: Add helper function to delete free blocks.", 1 },'; + printf '%s\n' '+ { "Sebastian Lackner", "ntdll: Improve heap allocation performance.", 2 },'; ) >> "$patchlist" fi