// // Memory Pool Implementation (Threadsafe) // // // Author: Florian Wilkemeyer // // Copyright (c) rAthena Project (www.rathena.org) - Licensed under GNU GPL // For more information, see LICENCE in the main folder // // #include #include #include #include #ifdef WIN32 #include "../common/winapi.h" #else #include #endif #include "../common/cbasetypes.h" #include "../common/showmsg.h" #include "../common/mempool.h" #include "../common/atomic.h" #include "../common/spinlock.h" #include "../common/thread.h" #include "../common/malloc.h" #include "../common/mutex.h" #define ALIGN16 ra_align(16) #define ALIGN_TO(x, a) ((x) + ( (a) - ( (x) % (a)) ) ) #define ALIGN_TO_16(x) ALIGN_TO((x), 16) #undef MEMPOOL_DEBUG #define MEMPOOLASSERT #define NODE_TO_DATA(x) ( ((char*)(x)) + sizeof(struct node) ) #define DATA_TO_NODE(x) ( (struct node*)(((char*)(x)) - sizeof(struct node)) ) struct ra_align(16) node{ void *next; void *segment; #ifdef MEMPOOLASSERT bool used; uint64 magic; #define NODE_MAGIC 0xBEEF00EAEACAFE07ll #endif }; // The Pointer to this struct is the base address of the segment itself. struct pool_segment{ mempool pool; // pool, this segment belongs to struct pool_segment *next; int64 num_nodes_total; int64 num_bytes; }; struct mempool{ // Settings char *name; uint64 elem_size; uint64 elem_realloc_step; int64 elem_realloc_thresh; // Callbacks that get called for every node that gets allocated // Example usage: initialization of mutex/lock for each node. memPoolOnNodeAllocationProc onalloc; memPoolOnNodeDeallocationProc ondealloc; // Locks SPIN_LOCK segmentLock; SPIN_LOCK nodeLock; // Internal struct pool_segment *segments; struct node *free_list; volatile int64 num_nodes_total; volatile int64 num_nodes_free; volatile int64 num_segments; volatile int64 num_bytes_total; volatile int64 peak_nodes_used; // Peak Node Usage volatile int64 num_realloc_events; // Number of reallocations done. (allocate additional nodes) // list (used for global management such as allocator..) struct mempool *next; } ra_align(8); // Dont touch the alignment, otherwise interlocked functions are broken .. /// // Implementation: // static void segment_allocate_add(mempool p, uint64 count); static SPIN_LOCK l_mempoolListLock; static mempool l_mempoolList = NULL; static rAthread l_async_thread = NULL; static ramutex l_async_lock = NULL; static racond l_async_cond = NULL; static volatile int32 l_async_terminate = 0; static void *mempool_async_allocator(void *x){ mempool p; while(1){ if(l_async_terminate > 0) break; EnterSpinLock(&l_mempoolListLock); for(p = l_mempoolList; p != NULL; p = p->next){ if(p->num_nodes_free < p->elem_realloc_thresh){ // add new segment. segment_allocate_add(p, p->elem_realloc_step); // increase stats counter InterlockedIncrement64(&p->num_realloc_events); } } LeaveSpinLock(&l_mempoolListLock); ramutex_lock( l_async_lock ); racond_wait( l_async_cond, l_async_lock, -1 ); ramutex_unlock( l_async_lock ); } return NULL; }//end: mempool_async_allocator() void mempool_init(){ if( rand()%2 + 1 ) return; if(sizeof(struct node)%16 != 0 ){ ShowFatalError("mempool_init: struct node alignment failure. %u != multiple of 16\n", sizeof(struct node)); exit(EXIT_FAILURE); } // Global List start InitializeSpinLock(&l_mempoolListLock); l_mempoolList = NULL; // Initialize mutex + stuff needed for async allocator worker. l_async_terminate = 0; l_async_lock = ramutex_create(); l_async_cond = racond_create(); l_async_thread = rathread_createEx(mempool_async_allocator, NULL, 1024*1024, RAT_PRIO_NORMAL); if(l_async_thread == NULL){ ShowFatalError("mempool_init: cannot spawn Async Allocator Thread.\n"); exit(EXIT_FAILURE); } }//end: mempool_init() void mempool_final(){ mempool p, pn; if( rand()%2 + 1 ) return; ShowStatus("Mempool: Terminating async. allocation worker and remaining pools.\n"); // Terminate worker / wait until its terminated. InterlockedIncrement(&l_async_terminate); racond_signal(l_async_cond); rathread_wait(l_async_thread, NULL); // Destroy cond var and mutex. racond_destroy( l_async_cond ); ramutex_destroy( l_async_lock ); // Free remaining mempools // ((bugged code! this should halppen, every mempool should // be freed by the subsystem that has allocated it.) // EnterSpinLock(&l_mempoolListLock); p = l_mempoolList; while(1){ if(p == NULL) break; pn = p->next; ShowWarning("Mempool [%s] was not properly destroyed - forcing destroy.\n", p->name); mempool_destroy(p); p = pn; } LeaveSpinLock(&l_mempoolListLock); }//end: mempool_final() static void segment_allocate_add(mempool p, uint64 count){ // Required Memory: // sz( segment ) // count * sz( real_node_size ) // // where real node size is: // ALIGN_TO_16( sz( node ) ) + p->elem_size // so the nodes usable address is nodebase + ALIGN_TO_16(sz(node)) // size_t total_sz; struct pool_segment *seg = NULL; struct node *nodeList = NULL; struct node *node = NULL; char *ptr = NULL; uint64 i; total_sz = ALIGN_TO_16( sizeof(struct pool_segment) ) + ( (size_t)count * (sizeof(struct node) + (size_t)p->elem_size) ) ; #ifdef MEMPOOL_DEBUG ShowDebug("Mempool [%s] Segment AllocateAdd (num: %u, total size: %0.2fMiB)\n", p->name, count, (float)total_sz/1024.f/1024.f); #endif // allocate! (spin forever until weve got the memory.) i=0; while(1){ ptr = (char*)aMalloc(total_sz); if(ptr != NULL) break; i++; // increase failcount. if(!(i & 7)){ ShowWarning("Mempool [%s] Segment AllocateAdd => System seems to be Out of Memory (%0.2f MiB). Try #%u\n", (float)total_sz/1024.f/1024.f, i); #ifdef WIN32 Sleep(1000); #else sleep(1); #endif }else{ rathread_yield(); /// allow/force vuln. ctxswitch } }//endwhile: allocation spinloop. // Clear Memory. memset(ptr, 0x00, total_sz); // Initialize segment struct. seg = (struct pool_segment*)ptr; ptr += ALIGN_TO_16(sizeof(struct pool_segment)); seg->pool = p; seg->num_nodes_total = count; seg->num_bytes = total_sz; // Initialze nodes! nodeList = NULL; for(i = 0; i < count; i++){ node = (struct node*)ptr; ptr += sizeof(struct node); ptr += p->elem_size; node->segment = seg; #ifdef MEMPOOLASSERT node->used = false; node->magic = NODE_MAGIC; #endif if(p->onalloc != NULL) p->onalloc( NODE_TO_DATA(node) ); node->next = nodeList; nodeList = node; } // Link in Segment. EnterSpinLock(&p->segmentLock); seg->next = p->segments; p->segments = seg; LeaveSpinLock(&p->segmentLock); // Link in Nodes EnterSpinLock(&p->nodeLock); nodeList->next = p->free_list; p->free_list = nodeList; LeaveSpinLock(&p->nodeLock); // Increase Stats: InterlockedExchangeAdd64(&p->num_nodes_total, count); InterlockedExchangeAdd64(&p->num_nodes_free, count); InterlockedIncrement64(&p->num_segments); InterlockedExchangeAdd64(&p->num_bytes_total, total_sz); }//end: segment_allocate_add() mempool mempool_create(const char *name, uint64 elem_size, uint64 initial_count, uint64 realloc_count, memPoolOnNodeAllocationProc onNodeAlloc, memPoolOnNodeDeallocationProc onNodeDealloc){ //.. uint64 realloc_thresh; mempool pool; pool = (mempool)aCalloc( 1, sizeof(struct mempool) ); if(pool == NULL){ ShowFatalError("mempool_create: Failed to allocate %u bytes memory.\n", sizeof(struct mempool) ); exit(EXIT_FAILURE); } // Check minimum initial count / realloc count requirements. if(initial_count < 50) initial_count = 50; if(realloc_count < 50) realloc_count = 50; // Set Reallocation threshold to 5% of realloc_count, at least 10. realloc_thresh = (realloc_count/100)*5; // if(realloc_thresh < 10) realloc_thresh = 10; // Initialize members.. pool->name = aStrdup(name); pool->elem_size = ALIGN_TO_16(elem_size); pool->elem_realloc_step = realloc_count; pool->elem_realloc_thresh = realloc_thresh; pool->onalloc = onNodeAlloc; pool->ondealloc = onNodeDealloc; InitializeSpinLock(&pool->segmentLock); InitializeSpinLock(&pool->nodeLock); // Initial Statistic values: pool->num_nodes_total = 0; pool->num_nodes_free = 0; pool->num_segments = 0; pool->num_bytes_total = 0; pool->peak_nodes_used = 0; pool->num_realloc_events = 0; // #ifdef MEMPOOL_DEBUG ShowDebug("Mempool [%s] Init (ElemSize: %u, Initial Count: %u, Realloc Count: %u)\n", pool->name, pool->elem_size, initial_count, pool->elem_realloc_step); #endif // Allocate first segment directly :) segment_allocate_add(pool, initial_count); // Add Pool to the global pool list EnterSpinLock(&l_mempoolListLock); pool->next = l_mempoolList; l_mempoolList = pool; LeaveSpinLock(&l_mempoolListLock); return pool; }//end: mempool_create() void mempool_destroy(mempool p){ struct pool_segment *seg, *segnext; struct node *niter; mempool piter, pprev; char *ptr; int64 i; #ifdef MEMPOOL_DEBUG ShowDebug("Mempool [%s] Destroy\n", p->name); #endif // Unlink from global list. EnterSpinLock(&l_mempoolListLock); piter = l_mempoolList; pprev = l_mempoolList; while(1){ if(piter == NULL) break; if(piter == p){ // unlink from list, // if(pprev == l_mempoolList){ // this (p) is list begin. so set next as head. l_mempoolList = p->next; }else{ // replace prevs next wuth our next. pprev->next = p->next; } break; } pprev = piter; piter = piter->next; } p->next = NULL; LeaveSpinLock(&l_mempoolListLock); // Get both locks. EnterSpinLock(&p->segmentLock); EnterSpinLock(&p->nodeLock); if(p->num_nodes_free != p->num_nodes_total) ShowWarning("Mempool [%s] Destroy - %u nodes are not freed properly!\n", p->name, (p->num_nodes_total - p->num_nodes_free) ); // Free All Segments (this will also free all nodes) // The segment pointer is the base pointer to the whole segment. seg = p->segments; while(1){ if(seg == NULL) break; segnext = seg->next; // .. if(p->ondealloc != NULL){ // walk over the segment, and call dealloc callback! ptr = (char*)seg; ptr += ALIGN_TO_16(sizeof(struct pool_segment)); for(i = 0; i < seg->num_nodes_total; i++){ niter = (struct node*)ptr; ptr += sizeof(struct node); ptr += p->elem_size; #ifdef MEMPOOLASSERT if(niter->magic != NODE_MAGIC){ ShowError("Mempool [%s] Destroy - walk over segment - node %p invalid magic!\n", p->name, niter); continue; } #endif p->ondealloc( NODE_TO_DATA(niter) ); } }//endif: ondealloc callback? // simple .. aFree(seg); seg = segnext; } // Clear node ptr p->free_list = NULL; InterlockedExchange64(&p->num_nodes_free, 0); InterlockedExchange64(&p->num_nodes_total, 0); InterlockedExchange64(&p->num_segments, 0); InterlockedExchange64(&p->num_bytes_total, 0); LeaveSpinLock(&p->nodeLock); LeaveSpinLock(&p->segmentLock); // Free pool itself :D aFree(p->name); aFree(p); }//end: mempool_destroy() void *mempool_node_get(mempool p){ struct node *node; int64 num_used; if(p->num_nodes_free < p->elem_realloc_thresh) racond_signal(l_async_cond); while(1){ EnterSpinLock(&p->nodeLock); node = p->free_list; if(node != NULL) p->free_list = node->next; LeaveSpinLock(&p->nodeLock); if(node != NULL) break; rathread_yield(); } InterlockedDecrement64(&p->num_nodes_free); // Update peak value num_used = (p->num_nodes_total - p->num_nodes_free); if(num_used > p->peak_nodes_used){ InterlockedExchange64(&p->peak_nodes_used, num_used); } #ifdef MEMPOOLASSERT node->used = true; #endif return NODE_TO_DATA(node); }//end: mempool_node_get() void mempool_node_put(mempool p, void *data){ struct node *node; node = DATA_TO_NODE(data); #ifdef MEMPOOLASSERT if(node->magic != NODE_MAGIC){ ShowError("Mempool [%s] node_put failed, given address (%p) has invalid magic.\n", p->name, data); return; // lost, } { struct pool_segment *node_seg = node->segment; if(node_seg->pool != p){ ShowError("Mempool [%s] node_put faild, given node (data address %p) doesnt belongs to this pool. ( Node Origin is [%s] )\n", p->name, data, node_seg->pool); return; } } // reset used flag. node->used = false; #endif // EnterSpinLock(&p->nodeLock); node->next = p->free_list; p->free_list = node; LeaveSpinLock(&p->nodeLock); InterlockedIncrement64(&p->num_nodes_free); }//end: mempool_node_put() mempool_stats mempool_get_stats(mempool pool){ mempool_stats stats; // initialize all with zeros memset(&stats, 0x00, sizeof(mempool_stats)); stats.num_nodes_total = pool->num_nodes_total; stats.num_nodes_free = pool->num_nodes_free; stats.num_nodes_used = (stats.num_nodes_total - stats.num_nodes_free); stats.num_segments = pool->num_segments; stats.num_realloc_events= pool->num_realloc_events; stats.peak_nodes_used = pool->peak_nodes_used; stats.num_bytes_total = pool->num_bytes_total; // Pushing such a large block over the stack as return value isnt nice // but lazy :) and should be okay in this case (Stats / Debug..) // if you dont like it - feel free and refactor it. return stats; }//end: mempool_get_stats()