From: Jordan Justen Date: Fri, 12 Jun 2015 02:17:03 +0000 (-0700) Subject: i965/cs: Initialize GPGPU Thread Count X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=320089dbd63de3ac1bd3d42ee8cec41837486d8c;p=mesa.git i965/cs: Initialize GPGPU Thread Count This field should always be set for gen8. In the bdw PRM, Volume 2d: Command Reference: Structures under INTERFACE_DESCRIPTOR_DATA, DWORD 6, Bits 9:0, Number of Threads in GPGPU Thread Group: "This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption." In the HSW PRM, the it doesn't mention that it must always be set, but it should not hurt. Reported-by: Kristian Høgsberg Signed-off-by: Jordan Justen Reviewed-by: Ben Widawsky --- diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 4c5082c82c4..d61bba002c4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -291,6 +291,17 @@ brw_cs_precompile(struct gl_context *ctx, } +static unsigned +get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) +{ + const unsigned simd_size = cs_prog_data->simd_size; + unsigned group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + + return (group_size + simd_size - 1) / simd_size; +} + + static void brw_upload_cs_state(struct brw_context *brw) { @@ -316,6 +327,8 @@ brw_upload_cs_state(struct brw_context *brw) prog_data->binding_table.size_bytes, 32, &stage_state->bind_bo_offset); + unsigned threads = get_cs_thread_count(cs_prog_data); + uint32_t dwords = brw->gen < 8 ? 8 : 9; BEGIN_BATCH(dwords); OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2)); @@ -365,6 +378,13 @@ brw_upload_cs_state(struct brw_context *brw) desc[dw++] = 0; desc[dw++] = 0; desc[dw++] = stage_state->bind_bo_offset; + desc[dw++] = 0; + const uint32_t media_threads = + brw->gen >= 8 ? + SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : + SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); + assert(threads <= brw->max_cs_threads); + desc[dw++] = media_threads; BEGIN_BATCH(4); OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 19489aba5be..b1a1c11b3ae 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2518,6 +2518,11 @@ enum brw_wm_barycentric_interp_mode { # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 +/* GEN7 DW5, GEN8+ DW6 */ +# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) #define MEDIA_STATE_FLUSH 0x7004 #define GPGPU_WALKER 0x7105 /* GEN8+ DW2 */