From bfda35c8dd4bc602a3b174377dfea92319438e2b Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 17 Oct 2017 15:11:19 -0500 Subject: [PATCH] swr: knob overrides for Intel Xeon Phi Architecture benefits from having more threads/work outstanding. Patch by Jan Zielinski. Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/swr_context.cpp | 27 +++++++++++++++++++++++++ src/gallium/drivers/swr/swr_context.h | 2 ++ src/gallium/drivers/swr/swr_loader.cpp | 4 ++++ src/gallium/drivers/swr/swr_scratch.cpp | 2 +- src/gallium/drivers/swr/swr_screen.h | 3 +++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 34d9a259fee..b61720cd300 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -39,6 +39,7 @@ #include "api.h" #include "backend.h" +#include "knobs.h" static struct pipe_surface * swr_create_surface(struct pipe_context *pipe, @@ -483,6 +484,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) ctx->blendJIT = new std::unordered_map; + ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT; + SWR_CREATECONTEXT_INFO createInfo; memset(&createInfo, 0, sizeof(createInfo)); createInfo.privateStateSize = sizeof(swr_draw_context); @@ -491,6 +494,30 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) createInfo.pfnClearTile = swr_StoreHotTileClear; createInfo.pfnUpdateStats = swr_UpdateStats; createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; + + SWR_THREADING_INFO threadingInfo {0}; + + threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS; + threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES; + threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; + threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; + threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; + + // Use non-standard settings for KNL + if (swr_screen(p_screen)->is_knl) + { + if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE")) + threadingInfo.MAX_THREADS_PER_CORE = 2; + + if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT")) + { + ctx->max_draws_in_flight = 2048; + createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight; + } + } + + createInfo.pThreadInfo = &threadingInfo; + ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo); ctx->api.pfnSwrInit(); diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 8bed78f869a..5c280ee365c 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -173,6 +173,8 @@ struct swr_context { unsigned dirty; /**< Mask of SWR_NEW_x flags */ SWR_INTERFACE api; + + uint32_t max_draws_in_flight; }; static INLINE struct swr_context * diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp index e205fe2d7ef..9d6f918e349 100644 --- a/src/gallium/drivers/swr/swr_loader.cpp +++ b/src/gallium/drivers/swr/swr_loader.cpp @@ -38,11 +38,14 @@ swr_create_screen(struct sw_winsys *winsys) util_cpu_detect(); + bool is_knl = false; + if (!strlen(filename) && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) { #if HAVE_SWR_KNL fprintf(stderr, "KNL "); sprintf(filename, "%s%s%s", UTIL_DL_PREFIX, "swrKNL", UTIL_DL_EXT); + is_knl = true; #else fprintf(stderr, "KNL (not built) "); #endif @@ -99,6 +102,7 @@ swr_create_screen(struct sw_winsys *winsys) struct pipe_screen *screen = swr_create_screen_internal(winsys); swr_screen(screen)->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc; + swr_screen(screen)->is_knl = is_knl; return screen; } diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp index d298a48dc0b..8afe73c30e4 100644 --- a/src/gallium/drivers/swr/swr_scratch.cpp +++ b/src/gallium/drivers/swr/swr_scratch.cpp @@ -45,7 +45,7 @@ swr_copy_to_scratch_space(struct swr_context *ctx, ptr = ctx->api.pfnSwrAllocDrawContextMemory(ctx->swrContext, size, 4); } else { /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */ - unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT; + uint32_t max_size_in_flight = size * ctx->max_draws_in_flight; /* Need to grow space */ if (max_size_in_flight > space->current_size) { diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h index 1c4e3315836..81b1a18b028 100644 --- a/src/gallium/drivers/swr/swr_screen.h +++ b/src/gallium/drivers/swr/swr_screen.h @@ -54,6 +54,9 @@ struct swr_screen { #endif PFNSwrGetInterface pfnSwrGetInterface; + + /* Do we run on Xeon Phi? */ + bool is_knl; }; static INLINE struct swr_screen * -- 2.30.2