From 67fb6128adf8fc03d429393e98f0982b42a40e64 Mon Sep 17 00:00:00 2001 From: Zhang_Xinfeng Date: Wed, 27 Dec 2023 09:59:29 +0800 Subject: [PATCH 06/12] add INTEL MEDIA ALLOC refineE to specify the memory alignment this key is used to allocate bigger pages env variable INTEL_MEDIA_ALLOC_refineE also could be used and was not enabled by default mode 0 is default mode mode 1 is < 64 align to 64 mode 2 is > 1M && <= 3M align to 1M, >3M align to 2M mode 3 is mode 1 & mode 2 Upstream-Status: Backport [https://github.com/intel/media-driver/commit/765dd939dcc5562d18cca18e5eda505bda952797] Signed-off-by: Lim, Siew Hoon --- .../linux/common/os/i915/mos_bufmgr.c | 75 +++++++++++++++---- .../common/os/i915_production/mos_bufmgr.c | 1 + .../common/os/mos_context_specific_next.cpp | 21 +++++- .../common/os/mos_user_setting_specific.cpp | 7 ++ 4 files changed, 90 insertions(+), 14 deletions(-) diff --git a/media_softlet/linux/common/os/i915/mos_bufmgr.c b/media_softlet/linux/common/os/i915/mos_bufmgr.c index c0b3ba027..b623d0129 100644 --- a/media_softlet/linux/common/os/i915/mos_bufmgr.c +++ b/media_softlet/linux/common/os/i915/mos_bufmgr.c @@ -138,7 +138,7 @@ struct mos_bufmgr_gem { int exec_count; /** Array of lists of cached gem objects of power-of-two sizes */ - struct mos_gem_bo_bucket cache_bucket[14 * 4]; + struct mos_gem_bo_bucket cache_bucket[64]; int num_buckets; time_t time; @@ -3852,9 +3852,9 @@ add_bucket(struct mos_bufmgr_gem *bufmgr_gem, int size) } static void -init_cache_buckets(struct mos_bufmgr_gem *bufmgr_gem) +init_cache_buckets(struct mos_bufmgr_gem *bufmgr_gem, uint8_t alloc_mode) { - unsigned long size, cache_max_size = 64 * 1024 * 1024; + unsigned long size, cache_max_size = 64 * 1024 * 1024, unit_size; /* OK, so power of two buckets was too wasteful of memory. * Give 3 other sizes between each power of two, to hopefully @@ -3864,17 +3864,63 @@ init_cache_buckets(struct mos_bufmgr_gem *bufmgr_gem) * width/height alignment and rounding of sizes to pages will * get us useful cache hit rates anyway) */ - add_bucket(bufmgr_gem, 4096); - add_bucket(bufmgr_gem, 4096 * 2); - add_bucket(bufmgr_gem, 4096 * 3); + /* alloc_mode 0 is default alloc_mode + * alloc_mode 1 rounding up to 64K for all < 1M + * alloc_mode 2 rounding up to 2M for size> 1M + * alloc_mode 3 rounding up to 2M for size > 1M and 64K for size <= 1M */ + if( alloc_mode > 3 ) + alloc_mode = 0; + + if ( 0 == alloc_mode || 2 == alloc_mode) + { + // < 1M normal alloc_mode + add_bucket(bufmgr_gem, 4096); + add_bucket(bufmgr_gem, 4096 * 2); + add_bucket(bufmgr_gem, 4096 * 3); + /* Initialize the linked lists for BO reuse cache. */ + for (size = 4 * 4096; size < 1024 * 1024; size *= 2) { + add_bucket(bufmgr_gem, size); + add_bucket(bufmgr_gem, size + size * 1 / 4); + add_bucket(bufmgr_gem, size + size * 2 / 4); + add_bucket(bufmgr_gem, size + size * 3 / 4); + } - /* Initialize the linked lists for BO reuse cache. */ - for (size = 4 * 4096; size <= cache_max_size; size *= 2) { - add_bucket(bufmgr_gem, size); + add_bucket(bufmgr_gem, 1024 * 1024); + } + if (1 == alloc_mode || 3 == alloc_mode) + { + // < 1M 64k alignment + unit_size = 64 * 1024; + for (size = unit_size; size <= 1024 * 1024; size += unit_size) + { + add_bucket(bufmgr_gem, size); + } + } + if( 0 == alloc_mode || 1 == alloc_mode) + { + //> 1M is normal alloc_mode + add_bucket(bufmgr_gem, 1280 * 1024); + add_bucket(bufmgr_gem, 1536 * 1024); + add_bucket(bufmgr_gem, 1792 * 1024); + + for (size = 2 * 1024 * 1024; size < cache_max_size; size *= 2) { + add_bucket(bufmgr_gem, size); + add_bucket(bufmgr_gem, size + size * 1 / 4); + add_bucket(bufmgr_gem, size + size * 2 / 4); + add_bucket(bufmgr_gem, size + size * 3 / 4); + } + } + if( 2 == alloc_mode || 3 == alloc_mode) + { + //> 1M rolling to 2M + unit_size = 2 * 1024 * 1024; + add_bucket(bufmgr_gem, unit_size); + add_bucket(bufmgr_gem, 3 * 1024 * 1024); - add_bucket(bufmgr_gem, size + size * 1 / 4); - add_bucket(bufmgr_gem, size + size * 2 / 4); - add_bucket(bufmgr_gem, size + size * 3 / 4); + for (size = 4 * 1024 * 1024; size <= cache_max_size; size += unit_size) + { + add_bucket(bufmgr_gem, size); + } } } @@ -5100,6 +5146,7 @@ mos_bufmgr_gem_init_i915(int fd, int batch_size) struct drm_i915_gem_get_aperture aperture; drm_i915_getparam_t gp; int ret, tmp; + uint8_t alloc_mode; bool exec2 = false; pthread_mutex_lock(&bufmgr_list_mutex); @@ -5352,10 +5399,12 @@ mos_bufmgr_gem_init_i915(int fd, int batch_size) * * Every 4 was too few for the blender benchmark. */ + alloc_mode = (uint8_t)(batch_size & 0xff); + batch_size &= 0xffffff00; bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; DRMINITLISTHEAD(&bufmgr_gem->named); - init_cache_buckets(bufmgr_gem); + init_cache_buckets(bufmgr_gem,alloc_mode); DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); diff --git a/media_softlet/linux/common/os/i915_production/mos_bufmgr.c b/media_softlet/linux/common/os/i915_production/mos_bufmgr.c index 90b5685b1..b3574f7d3 100644 --- a/media_softlet/linux/common/os/i915_production/mos_bufmgr.c +++ b/media_softlet/linux/common/os/i915_production/mos_bufmgr.c @@ -5403,6 +5403,7 @@ mos_bufmgr_gem_init_i915(int fd, int batch_size) * * Every 4 was too few for the blender benchmark. */ + batch_size &= 0xffffff00; bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; DRMINITLISTHEAD(&bufmgr_gem->named); diff --git a/media_softlet/linux/common/os/mos_context_specific_next.cpp b/media_softlet/linux/common/os/mos_context_specific_next.cpp index 9e9e3ff7e..543e262d1 100644 --- a/media_softlet/linux/common/os/mos_context_specific_next.cpp +++ b/media_softlet/linux/common/os/mos_context_specific_next.cpp @@ -64,6 +64,7 @@ MOS_STATUS OsContextSpecificNext::Init(DDI_DEVICE_CONTEXT ddiDriverContext) uint32_t iDeviceId = 0; MOS_STATUS eStatus = MOS_STATUS_SUCCESS; uint32_t value = 0; + uint32_t mode = 0; MediaUserSettingSharedPtr userSettingPtr = nullptr; MOS_OS_FUNCTION_ENTER; @@ -89,7 +90,25 @@ MOS_STATUS OsContextSpecificNext::Init(DDI_DEVICE_CONTEXT ddiDriverContext) userSettingPtr = MosInterface::MosGetUserSettingInstance(osDriverContext); - m_bufmgr = mos_bufmgr_gem_init(m_fd, BATCH_BUFFER_SIZE, &m_deviceType); + mode = BATCH_BUFFER_SIZE; + ReadUserSetting( + userSettingPtr, + value, + "INTEL MEDIA ALLOC MODE", + MediaUserSetting::Group::Device); + + if (value) + { + mode |= (value & 0x000000ff); + } + value = 0; + /* no need to set batch buffer size after switch to softpin + * keep it, just for test during relocation to softpin transition + * now , it could be a debug method , but is actually useless + * so it is safe to reuse the lowest 8bit to convey addtional information + * more suitable solution is deleting it , or add additional parameter*/ + + m_bufmgr = mos_bufmgr_gem_init(m_fd, (int)mode, &m_deviceType); if (nullptr == m_bufmgr) { MOS_OS_ASSERTMESSAGE("Not able to allocate buffer manager, fd=0x%d", m_fd); diff --git a/media_softlet/linux/common/os/mos_user_setting_specific.cpp b/media_softlet/linux/common/os/mos_user_setting_specific.cpp index 6be8b4298..caed584f4 100644 --- a/media_softlet/linux/common/os/mos_user_setting_specific.cpp +++ b/media_softlet/linux/common/os/mos_user_setting_specific.cpp @@ -52,5 +52,12 @@ MOS_STATUS MosUserSetting::InitMosUserSettingSpecific(MediaUserSettingSharedPtr 0, true); //"Enable VM Bind." + DeclareUserSettingKey( + userSettingPtr, + "INTEL MEDIA ALLOC MODE", + MediaUserSetting::Group::Device, + 0, + false); // + return MOS_STATUS_SUCCESS; } -- 2.40.1