swr: knob overrides for Intel Xeon Phi
authorTim Rowley <timothy.o.rowley@intel.com>
Tue, 17 Oct 2017 20:11:19 +0000 (15:11 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Thu, 19 Oct 2017 18:10:55 +0000 (13:10 -0500)
Architecture benefits from having more threads/work outstanding.

Patch by Jan Zielinski.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/swr_context.cpp
src/gallium/drivers/swr/swr_context.h
src/gallium/drivers/swr/swr_loader.cpp
src/gallium/drivers/swr/swr_scratch.cpp
src/gallium/drivers/swr/swr_screen.h

index 34d9a259fee139d6c362b3f1ced0015247a757dc..b61720cd3000fcca25e1457e0103fcca49342d8e 100644 (file)
@@ -39,6 +39,7 @@
 
 #include "api.h"
 #include "backend.h"
+#include "knobs.h"
 
 static struct pipe_surface *
 swr_create_surface(struct pipe_context *pipe,
@@ -483,6 +484,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
    ctx->blendJIT =
       new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
 
+   ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;
+
    SWR_CREATECONTEXT_INFO createInfo;
    memset(&createInfo, 0, sizeof(createInfo));
    createInfo.privateStateSize = sizeof(swr_draw_context);
@@ -491,6 +494,30 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
    createInfo.pfnClearTile = swr_StoreHotTileClear;
    createInfo.pfnUpdateStats = swr_UpdateStats;
    createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
+
+   SWR_THREADING_INFO threadingInfo {0};
+
+   threadingInfo.MAX_WORKER_THREADS        = KNOB_MAX_WORKER_THREADS;
+   threadingInfo.MAX_NUMA_NODES            = KNOB_MAX_NUMA_NODES;
+   threadingInfo.MAX_CORES_PER_NUMA_NODE   = KNOB_MAX_CORES_PER_NUMA_NODE;
+   threadingInfo.MAX_THREADS_PER_CORE      = KNOB_MAX_THREADS_PER_CORE;
+   threadingInfo.SINGLE_THREADED           = KNOB_SINGLE_THREADED;
+
+   // Use non-standard settings for KNL
+   if (swr_screen(p_screen)->is_knl)
+   {
+      if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE"))
+         threadingInfo.MAX_THREADS_PER_CORE  = 2;
+
+      if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT"))
+      {
+         ctx->max_draws_in_flight = 2048;
+         createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight;
+      }
+   }
+
+   createInfo.pThreadInfo = &threadingInfo;
+
    ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo);
 
    ctx->api.pfnSwrInit();
index 8bed78f869a5d0c9223986467b9f8642bb7aa689..5c280ee365ca75f6f4a9aff9287d5da593f77ebc 100644 (file)
@@ -173,6 +173,8 @@ struct swr_context {
    unsigned dirty; /**< Mask of SWR_NEW_x flags */
 
    SWR_INTERFACE api;
+
+   uint32_t max_draws_in_flight;
 };
 
 static INLINE struct swr_context *
index e205fe2d7ef7212d17200e6518d7aa94553e8e88..9d6f918e3491bb55621d885f12573c54dca0777f 100644 (file)
@@ -38,11 +38,14 @@ swr_create_screen(struct sw_winsys *winsys)
 
    util_cpu_detect();
 
+   bool is_knl = false;
+
    if (!strlen(filename) &&
        util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
 #if HAVE_SWR_KNL
       fprintf(stderr, "KNL ");
       sprintf(filename, "%s%s%s", UTIL_DL_PREFIX, "swrKNL", UTIL_DL_EXT);
+      is_knl = true;
 #else
       fprintf(stderr, "KNL (not built) ");
 #endif
@@ -99,6 +102,7 @@ swr_create_screen(struct sw_winsys *winsys)
 
    struct pipe_screen *screen = swr_create_screen_internal(winsys);
    swr_screen(screen)->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
+   swr_screen(screen)->is_knl = is_knl;
 
    return screen;
 }
index d298a48dc0bbe61fd18d87ec55456125d805f230..8afe73c30e4d4661adb21afa970981f74574e024 100644 (file)
@@ -45,7 +45,7 @@ swr_copy_to_scratch_space(struct swr_context *ctx,
       ptr = ctx->api.pfnSwrAllocDrawContextMemory(ctx->swrContext, size, 4);
    } else {
       /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
-      unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT;
+      uint32_t max_size_in_flight = size * ctx->max_draws_in_flight;
 
       /* Need to grow space */
       if (max_size_in_flight > space->current_size) {
index 1c4e3315836dc0a59dfb5cb2cdabf322bd1a7d4e..81b1a18b028cb69df838dcf1cf0efac3f0d8dd4a 100644 (file)
@@ -54,6 +54,9 @@ struct swr_screen {
 #endif
 
    PFNSwrGetInterface pfnSwrGetInterface;
+
+   /* Do we run on Xeon Phi? */
+   bool is_knl;
 };
 
 static INLINE struct swr_screen *