From 67fb6128adf8fc03d429393e98f0982b42a40e64 Mon Sep 17 00:00:00 2001
From: Zhang_Xinfeng <carl.zhang@intel.com>
Date: Wed, 27 Dec 2023 09:59:29 +0800
Subject: [PATCH 06/12] add INTEL MEDIA ALLOC refineE to specify the memory
 alignment

this key is used to allocate bigger pages
env variable INTEL_MEDIA_ALLOC_refineE also could be used
and was not enabled by default
mode 0 is default mode
mode 1 is < 64 align to 64
mode 2 is > 1M &&  <= 3M align to 1M, >3M align to 2M
mode 3 is mode 1 & mode 2

Upstream-Status: Backport [https://github.com/intel/media-driver/commit/765dd939dcc5562d18cca18e5eda505bda952797]
Signed-off-by: Lim, Siew Hoon <siew.hoon.lim@intel.com>
---
 .../linux/common/os/i915/mos_bufmgr.c         | 75 +++++++++++++++----
 .../common/os/i915_production/mos_bufmgr.c    |  1 +
 .../common/os/mos_context_specific_next.cpp   | 21 +++++-
 .../common/os/mos_user_setting_specific.cpp   |  7 ++
 4 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/media_softlet/linux/common/os/i915/mos_bufmgr.c b/media_softlet/linux/common/os/i915/mos_bufmgr.c
index c0b3ba027..b623d0129 100644
--- a/media_softlet/linux/common/os/i915/mos_bufmgr.c
+++ b/media_softlet/linux/common/os/i915/mos_bufmgr.c
@@ -138,7 +138,7 @@ struct mos_bufmgr_gem {
     int exec_count;
 
     /** Array of lists of cached gem objects of power-of-two sizes */
-    struct mos_gem_bo_bucket cache_bucket[14 * 4];
+    struct mos_gem_bo_bucket cache_bucket[64];
     int num_buckets;
     time_t time;
 
@@ -3852,9 +3852,9 @@ add_bucket(struct mos_bufmgr_gem *bufmgr_gem, int size)
 }
 
 static void
-init_cache_buckets(struct mos_bufmgr_gem *bufmgr_gem)
+init_cache_buckets(struct mos_bufmgr_gem *bufmgr_gem, uint8_t alloc_mode)
 {
-    unsigned long size, cache_max_size = 64 * 1024 * 1024;
+    unsigned long size, cache_max_size = 64 * 1024 * 1024, unit_size;
 
     /* OK, so power of two buckets was too wasteful of memory.
      * Give 3 other sizes between each power of two, to hopefully
@@ -3864,17 +3864,63 @@ init_cache_buckets(struct mos_bufmgr_gem *bufmgr_gem)
      * width/height alignment and rounding of sizes to pages will
      * get us useful cache hit rates anyway)
      */
-    add_bucket(bufmgr_gem, 4096);
-    add_bucket(bufmgr_gem, 4096 * 2);
-    add_bucket(bufmgr_gem, 4096 * 3);
+    /* alloc_mode 0 is default alloc_mode
+     * alloc_mode 1 rounding up to 64K for all < 1M
+     * alloc_mode 2 rounding up to 2M for size> 1M
+     * alloc_mode 3 rounding up to 2M for size > 1M and 64K for size <= 1M */
+    if( alloc_mode > 3 )
+        alloc_mode = 0;
+
+    if ( 0 == alloc_mode || 2 == alloc_mode)
+    {
+        // < 1M normal alloc_mode
+        add_bucket(bufmgr_gem, 4096);
+        add_bucket(bufmgr_gem, 4096 * 2);
+        add_bucket(bufmgr_gem, 4096 * 3);
+        /* Initialize the linked lists for BO reuse cache. */
+        for (size = 4 * 4096; size < 1024 * 1024; size *= 2) {
+            add_bucket(bufmgr_gem, size);
+            add_bucket(bufmgr_gem, size + size * 1 / 4);
+            add_bucket(bufmgr_gem, size + size * 2 / 4);
+            add_bucket(bufmgr_gem, size + size * 3 / 4);
+        }
 
-    /* Initialize the linked lists for BO reuse cache. */
-    for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
-        add_bucket(bufmgr_gem, size);
+        add_bucket(bufmgr_gem, 1024 * 1024);
+    }
+    if (1 == alloc_mode || 3 == alloc_mode)
+    {
+        // < 1M 64k alignment
+        unit_size = 64 * 1024;
+        for (size = unit_size; size <= 1024 * 1024; size += unit_size)
+        {
+            add_bucket(bufmgr_gem, size);
+        }
+    }
+    if( 0 == alloc_mode || 1 == alloc_mode)
+    {
+       //> 1M is normal alloc_mode
+        add_bucket(bufmgr_gem, 1280 * 1024);
+        add_bucket(bufmgr_gem, 1536 * 1024);
+        add_bucket(bufmgr_gem, 1792 * 1024);
+
+        for (size = 2 * 1024 * 1024; size < cache_max_size; size *= 2) {
+            add_bucket(bufmgr_gem, size);
+            add_bucket(bufmgr_gem, size + size * 1 / 4);
+            add_bucket(bufmgr_gem, size + size * 2 / 4);
+            add_bucket(bufmgr_gem, size + size * 3 / 4);
+        }
+    }
+    if( 2 == alloc_mode || 3 == alloc_mode)
+    {
+       //> 1M rolling to 2M
+       unit_size = 2 * 1024 * 1024;
+       add_bucket(bufmgr_gem, unit_size);
+       add_bucket(bufmgr_gem, 3 * 1024 * 1024);
 
-        add_bucket(bufmgr_gem, size + size * 1 / 4);
-        add_bucket(bufmgr_gem, size + size * 2 / 4);
-        add_bucket(bufmgr_gem, size + size * 3 / 4);
+       for (size = 4 * 1024 * 1024; size <= cache_max_size; size += unit_size)
+       {
+           add_bucket(bufmgr_gem, size);
+       }
     }
 }
 
@@ -5100,6 +5146,7 @@ mos_bufmgr_gem_init_i915(int fd, int batch_size)
     struct drm_i915_gem_get_aperture aperture;
     drm_i915_getparam_t gp;
     int ret, tmp;
+    uint8_t alloc_mode;
     bool exec2 = false;
 
     pthread_mutex_lock(&bufmgr_list_mutex);
@@ -5352,10 +5399,12 @@ mos_bufmgr_gem_init_i915(int fd, int batch_size)
      *
      * Every 4 was too few for the blender benchmark.
      */
+    alloc_mode = (uint8_t)(batch_size & 0xff);
+    batch_size &= 0xffffff00;
     bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
 
     DRMINITLISTHEAD(&bufmgr_gem->named);
-    init_cache_buckets(bufmgr_gem);
+    init_cache_buckets(bufmgr_gem,alloc_mode);
 
     DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
 
diff --git a/media_softlet/linux/common/os/i915_production/mos_bufmgr.c b/media_softlet/linux/common/os/i915_production/mos_bufmgr.c
index 90b5685b1..b3574f7d3 100644
--- a/media_softlet/linux/common/os/i915_production/mos_bufmgr.c
+++ b/media_softlet/linux/common/os/i915_production/mos_bufmgr.c
@@ -5403,6 +5403,7 @@ mos_bufmgr_gem_init_i915(int fd, int batch_size)
      *
      * Every 4 was too few for the blender benchmark.
      */
+    batch_size &= 0xffffff00;
     bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
 
     DRMINITLISTHEAD(&bufmgr_gem->named);
diff --git a/media_softlet/linux/common/os/mos_context_specific_next.cpp b/media_softlet/linux/common/os/mos_context_specific_next.cpp
index 9e9e3ff7e..543e262d1 100644
--- a/media_softlet/linux/common/os/mos_context_specific_next.cpp
+++ b/media_softlet/linux/common/os/mos_context_specific_next.cpp
@@ -64,6 +64,7 @@ MOS_STATUS OsContextSpecificNext::Init(DDI_DEVICE_CONTEXT ddiDriverContext)
     uint32_t      iDeviceId = 0;
     MOS_STATUS    eStatus   = MOS_STATUS_SUCCESS;
     uint32_t      value     = 0;
+    uint32_t      mode      = 0;
     MediaUserSettingSharedPtr   userSettingPtr = nullptr;
 
     MOS_OS_FUNCTION_ENTER;
@@ -89,7 +90,25 @@ MOS_STATUS OsContextSpecificNext::Init(DDI_DEVICE_CONTEXT ddiDriverContext)
 
         userSettingPtr = MosInterface::MosGetUserSettingInstance(osDriverContext);
 
-        m_bufmgr = mos_bufmgr_gem_init(m_fd, BATCH_BUFFER_SIZE, &m_deviceType);
+        mode = BATCH_BUFFER_SIZE;
+        ReadUserSetting(
+            userSettingPtr,
+            value,
+            "INTEL MEDIA ALLOC MODE",
+            MediaUserSetting::Group::Device);
+
+        if (value)
+        {
+            mode |= (value & 0x000000ff);
+        }
+        value = 0;
+        /* no need to set batch buffer size after switch to softpin
+         * keep it, just for test during relocation to softpin transition
+         * now , it could be a debug method , but is actually useless
+         * so it is safe to reuse the lowest 8bit to convey addtional information
+         * more suitable solution is deleting it , or add additional parameter*/
+
+        m_bufmgr = mos_bufmgr_gem_init(m_fd, (int)mode, &m_deviceType);
         if (nullptr == m_bufmgr)
         {
             MOS_OS_ASSERTMESSAGE("Not able to allocate buffer manager, fd=0x%d", m_fd);
diff --git a/media_softlet/linux/common/os/mos_user_setting_specific.cpp b/media_softlet/linux/common/os/mos_user_setting_specific.cpp
index 6be8b4298..caed584f4 100644
--- a/media_softlet/linux/common/os/mos_user_setting_specific.cpp
+++ b/media_softlet/linux/common/os/mos_user_setting_specific.cpp
@@ -52,5 +52,12 @@ MOS_STATUS MosUserSetting::InitMosUserSettingSpecific(MediaUserSettingSharedPtr
         0,
         true); //"Enable VM Bind."
 
+    DeclareUserSettingKey(
+        userSettingPtr,
+        "INTEL MEDIA ALLOC MODE",
+        MediaUserSetting::Group::Device,
+        0,
+        false); //
+
     return MOS_STATUS_SUCCESS;
 }
-- 
2.40.1