radeonsi: pin the winsys thread to the requested L3 cache (v2)
authorMarek Olšák <marek.olsak@amd.com>
Thu, 6 Sep 2018 03:13:56 +0000 (23:13 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 7 Sep 2018 20:03:36 +0000 (16:03 -0400)
v2: rebase

Reviewed-by: Brian Paul <brianp@vmware.com>
src/gallium/drivers/radeon/radeon_winsys.h
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index 99a793f90284712679727ae90b564a3e73ccd1d1..bb732ab314b17d2df8d87b7e3b44d34a02c467bd 100644 (file)
@@ -257,6 +257,14 @@ struct radeon_winsys {
     void (*query_info)(struct radeon_winsys *ws,
                        struct radeon_info *info);
 
+    /**
+     * A hint for the winsys that it should pin its execution threads to
+     * a group of cores sharing a specific L3 cache if the CPU has multiple
+     * L3 caches. This is needed for good multithreading performance on
+     * AMD Zen CPUs.
+     */
+    void (*pin_threads_to_L3_cache)(struct radeon_winsys *ws, unsigned cache);
+
     /**************************************************************************
      * Buffer management. Buffer attributes are mostly fixed over its lifetime.
      *
index c259c260550c73aadf746d18ff065996f34c8f3d..a5088adcf2411242644498482ab3e731a5c9c462 100644 (file)
@@ -346,6 +346,20 @@ static void si_set_log_context(struct pipe_context *ctx,
                u_log_add_auto_logger(log, si_auto_log_cs, sctx);
 }
 
+static void si_set_context_param(struct pipe_context *ctx,
+                                enum pipe_context_param param,
+                                unsigned value)
+{
+       struct radeon_winsys *ws = ((struct si_context *)ctx)->ws;
+
+       switch (param) {
+       case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE:
+               ws->pin_threads_to_L3_cache(ws, value);
+               break;
+       default:;
+       }
+}
+
 static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                               unsigned flags)
 {
@@ -366,6 +380,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        sctx->b.emit_string_marker = si_emit_string_marker;
        sctx->b.set_debug_callback = si_set_debug_callback;
        sctx->b.set_log_context = si_set_log_context;
+       sctx->b.set_context_param = si_set_context_param;
        sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
        sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
 
index dcbc075e3c5e1326714812cd77b646cc2db929a1..f32bbd9d08658795b24018b44e1301e7094985ab 100644 (file)
@@ -30,6 +30,7 @@
 #include "amdgpu_cs.h"
 #include "amdgpu_public.h"
 
+#include "util/u_cpu_detect.h"
 #include "util/u_hash_table.h"
 #include "util/hash_table.h"
 #include "util/xmlconfig.h"
@@ -235,6 +236,14 @@ static const char* amdgpu_get_chip_name(struct radeon_winsys *ws)
    return amdgpu_get_marketing_name(dev);
 }
 
+static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
+                                           unsigned cache)
+{
+   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+   util_pin_thread_to_L3(ws->cs_queue.threads[0], cache,
+                         util_cpu_caps.cores_per_L3);
+}
 
 PUBLIC struct radeon_winsys *
 amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
@@ -314,6 +323,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
    ws->base.query_value = amdgpu_query_value;
    ws->base.read_registers = amdgpu_read_registers;
    ws->base.get_chip_name = amdgpu_get_chip_name;
+   ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
 
    amdgpu_bo_init_functions(ws);
    amdgpu_cs_init_functions(ws);
index f8702e7c6015b2ea15a6335cf65dd093abdc72ad..19472a50ce159d2d91813310349384293a750f2d 100644 (file)
@@ -29,6 +29,7 @@
 #include "radeon_drm_cs.h"
 #include "radeon_drm_public.h"
 
+#include "util/u_cpu_detect.h"
 #include "util/u_memory.h"
 #include "util/u_hash_table.h"
 
@@ -797,6 +798,17 @@ static int handle_compare(void *key1, void *key2)
     return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
 }
 
+static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
+                                           unsigned cache)
+{
+    struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
+
+    if (util_queue_is_initialized(&rws->cs_queue)) {
+        util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
+                              util_cpu_caps.cores_per_L3);
+    }
+}
+
 PUBLIC struct radeon_winsys *
 radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
                         radeon_screen_create_t screen_create)
@@ -864,6 +876,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
     ws->base.unref = radeon_winsys_unref;
     ws->base.destroy = radeon_winsys_destroy;
     ws->base.query_info = radeon_query_info;
+    ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
     ws->base.cs_request_feature = radeon_cs_request_feature;
     ws->base.query_value = radeon_query_value;
     ws->base.read_registers = radeon_read_registers;