adt - improved concurrent fixed memory pool policy to send buffers from thread cache to central cache

master
Francisco 4 years ago committed by Francisco Paisana
parent eb38ff43ab
commit 60cd7e6cfe

@ -21,25 +21,26 @@ namespace srsran {
/**
* Concurrent fixed size memory pool made of blocks of equal size
* Each worker keeps a separate thread-local memory block cache that uses for fast allocation/deallocation.
* When this cache gets depleted, the worker tries to obtain blocks from a shared memory block cache
* Note: This pool does not implement stealing of blocks between workers, so it is possible that a worker can't allocate
* while another worker still has blocks in its own cache. This situation is avoided by upper bounding the
* size of each worker cache
* Note2: Taking into account the usage of thread_local, this class is made a singleton
* Each worker keeps a separate thread-local memory block cache that it uses for fast allocation/deallocation.
* When this cache gets depleted, the worker tries to obtain blocks from a central memory block cache.
* When accessing a thread local cache, no locks are required.
* Since there is no stealing of blocks between workers, it is possible that a worker can't allocate while another
* worker still has blocks in its own cache. To minimize the impact of this event, an upper bound is place on a worker
* thread cache size. Once a worker reaches that upper bound, it sends half of its stored blocks to the central cache.
* Note: Taking into account the usage of thread_local, this class is made a singleton
* Note2: No considerations were made regarding false sharing between threads. It is assumed that the blocks are big
* enough to fill a cache line.
* @tparam NofObjects number of objects in the pool
* @tparam ObjSize object size
*/
template <size_t ObjSize, bool DebugSanitizeAddress = false>
class concurrent_fixed_memory_pool
{
static_assert(ObjSize > 256, "This pool is particularly designed for large objects");
static_assert(ObjSize > 256, "This pool is particularly designed for large objects.");
using pool_type = concurrent_fixed_memory_pool<ObjSize, DebugSanitizeAddress>;
struct obj_storage_t {
typename std::aligned_storage<ObjSize, alignof(detail::max_alignment_t)>::type buffer;
std::thread::id worker_id;
explicit obj_storage_t(std::thread::id id_) : worker_id(id_) {}
};
const static size_t batch_steal_size = 16;
@ -47,17 +48,17 @@ class concurrent_fixed_memory_pool
// ctor only accessible from singleton get_instance()
explicit concurrent_fixed_memory_pool(size_t nof_objects_)
{
srsran_assert(nof_objects_ > 0, "A positive pool size must be provided");
srsran_assert(nof_objects_ > batch_steal_size, "A positive pool size must be provided");
std::lock_guard<std::mutex> lock(mutex);
allocated_blocks.resize(nof_objects_);
for (std::unique_ptr<obj_storage_t>& b : allocated_blocks) {
b.reset(new obj_storage_t(std::this_thread::get_id()));
b.reset(new obj_storage_t());
srsran_assert(b.get() != nullptr, "Failed to instantiate fixed memory pool");
shared_mem_cache.push(static_cast<void*>(b.get()));
central_mem_cache.push(static_cast<void*>(b.get()));
}
max_objs_per_cache = allocated_blocks.size() / 16;
max_objs_per_cache = max_objs_per_cache < batch_steal_size ? batch_steal_size : max_objs_per_cache;
local_growth_thres = allocated_blocks.size() / 16;
local_growth_thres = local_growth_thres < batch_steal_size ? batch_steal_size : local_growth_thres;
}
public:
@ -89,9 +90,9 @@ public:
if (node == nullptr) {
// fill the thread local cache enough for this and next allocations
std::array<void*, batch_steal_size> popped_blocks;
size_t n = shared_mem_cache.try_pop(popped_blocks);
size_t n = central_mem_cache.try_pop(popped_blocks);
for (size_t i = 0; i < n; ++i) {
new (popped_blocks[i]) obj_storage_t(worker_ctxt->id);
new (popped_blocks[i]) obj_storage_t();
worker_ctxt->cache.push(static_cast<void*>(popped_blocks[i]));
}
node = worker_ctxt->cache.try_pop();
@ -116,19 +117,17 @@ public:
srsran_assert(std::any_of(allocated_blocks.begin(),
allocated_blocks.end(),
[block_ptr](const std::unique_ptr<obj_storage_t>& b) { return b.get() == block_ptr; }),
"Error deallocating block with address 0x%lx.",
"Error deallocating block with address 0x%lx",
(long unsigned)block_ptr);
}
if (block_ptr->worker_id != worker_ctxt->id or worker_ctxt->cache.size() >= max_objs_per_cache) {
// if block was allocated in a different thread or local cache reached max capacity, send block to shared
// container
shared_mem_cache.push(static_cast<void*>(block_ptr));
return;
}
// push to local memory block cache
worker_ctxt->cache.push(static_cast<void*>(p));
if (worker_ctxt->cache.size() >= local_growth_thres) {
// if local cache reached max capacity, send half of the blocks to central cache
central_mem_cache.steal_blocks(worker_ctxt->cache, worker_ctxt->cache.size() / 2);
}
}
void enable_logger(bool enabled)
@ -143,8 +142,16 @@ public:
void print_all_buffers()
{
std::lock_guard<std::mutex> lock(mutex);
printf("There are %zd/%zd buffers in shared block container.\n", shared_mem_cache.size(), allocated_blocks.size());
auto* worker = get_worker_cache();
size_t tot_blocks = 0;
{
std::lock_guard<std::mutex> lock(mutex);
tot_blocks = allocated_blocks.size();
}
printf("There are %zd/%zd buffers in shared block container. This thread contains %zd in its local cache\n",
central_mem_cache.size(),
tot_blocks,
worker->cache.size());
}
private:
@ -155,9 +162,8 @@ private:
worker_ctxt() : id(std::this_thread::get_id()) {}
~worker_ctxt()
{
while (not cache.is_empty()) {
pool_type::get_instance()->shared_mem_cache.push(cache.try_pop());
}
mutexed_memblock_cache& central_cache = pool_type::get_instance()->central_mem_cache;
central_cache.steal_blocks(cache, cache.size());
}
};
@ -178,10 +184,10 @@ private:
}
}
size_t max_objs_per_cache = 0;
size_t local_growth_thres = 0;
srslog::basic_logger* logger = nullptr;
mutexed_memblock_cache shared_mem_cache;
mutexed_memblock_cache central_mem_cache;
std::mutex mutex;
std::vector<std::unique_ptr<obj_storage_t> > allocated_blocks;
};

@ -106,6 +106,14 @@ public:
stack.push(block);
}
void steal_blocks(memblock_cache& other, size_t max_n) noexcept
{
std::lock_guard<std::mutex> lock(mutex);
for (size_t i = 0; i < max_n and not other.is_empty(); ++i) {
stack.push(other.try_pop());
}
}
uint8_t* try_pop() noexcept
{
std::lock_guard<std::mutex> lock(mutex);

Loading…
Cancel
Save