From: Jason Ekstrand Date: Tue, 10 May 2016 21:57:15 +0000 (-0700) Subject: blorp: Add initial state setup support for SIMD8 dispatch X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=353eadb1703c2ac43200642ef329e8d2b5db9cea;p=mesa.git blorp: Add initial state setup support for SIMD8 dispatch Reviewed-by: Topi Pohjolainen --- diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 40c02e26773..c7a95f3c1d0 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -137,7 +137,11 @@ brw_blorp_compute_tile_offsets(const struct brw_blorp_surface_info *info, void brw_blorp_prog_data_init(struct brw_blorp_prog_data *prog_data) { - prog_data->first_curbe_grf = 0; + prog_data->dispatch_8 = false; + prog_data->dispatch_16 = true; + prog_data->first_curbe_grf_0 = 0; + prog_data->first_curbe_grf_2 = 0; + prog_data->ksp_offset_2 = 0; prog_data->persample_msaa_dispatch = false; prog_data->nr_params = BRW_BLORP_NUM_PUSH_CONSTANT_DWORDS; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 95e0e45ce39..b00e5cc3730 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -208,7 +208,13 @@ static const unsigned int BRW_BLORP_NUM_PUSH_CONST_REGS = struct brw_blorp_prog_data { - unsigned int first_curbe_grf; + bool dispatch_8; + bool dispatch_16; + + uint8_t first_curbe_grf_0; + uint8_t first_curbe_grf_2; + + uint32_t ksp_offset_2; /** * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index b263720f73d..ca20015190e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -778,7 +778,7 @@ brw_blorp_blit_program::alloc_regs() int reg = 0; this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); - prog_data.first_curbe_grf = reg; + prog_data.first_curbe_grf_0 = reg; alloc_push_const_regs(reg); reg += BRW_BLORP_NUM_PUSH_CONST_REGS; for (unsigned i = 0; i < ARRAY_SIZE(texture_data); ++i) { diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp index f1eb693fd27..ae7286220e1 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp @@ -86,7 +86,7 @@ brw_blorp_const_color_program::brw_blorp_const_color_program( clear_rgba(), base_mrf(0) { - prog_data.first_curbe_grf = 0; + prog_data.first_curbe_grf_0 = 0; prog_data.persample_msaa_dispatch = false; brw_init_codegen(brw->intelScreen->devinfo, &func, mem_ctx); } @@ -145,7 +145,7 @@ brw_blorp_const_color_program::alloc_regs() this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW); - prog_data.first_curbe_grf = reg; + prog_data.first_curbe_grf_0 = reg; clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F); reg += BRW_BLORP_NUM_PUSH_CONST_REGS; diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c b/src/mesa/drivers/dri/i965/gen6_blorp.c index fdd82869612..b2a2d699549 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.c +++ b/src/mesa/drivers/dri/i965/gen6_blorp.c @@ -619,7 +619,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, const struct brw_blorp_params *params) { const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw2, dw4, dw5, dw6; + uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2; /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be * nonzero to prevent the GPU from hanging. While the documentation doesn't @@ -630,7 +630,7 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, * configure the WM state whether or not there is a WM program. */ - dw2 = dw4 = dw5 = dw6 = 0; + dw2 = dw4 = dw5 = dw6 = ksp0 = ksp2 = 0; switch (params->hiz_op) { case GEN6_HIZ_OP_DEPTH_CLEAR: dw4 |= GEN6_WM_DEPTH_CLEAR; @@ -652,9 +652,18 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */ if (params->wm_prog_data) { - dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0; - dw5 |= GEN6_WM_16_DISPATCH_ENABLE; dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */ + + dw4 |= prog_data->first_curbe_grf_0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0; + dw4 |= prog_data->first_curbe_grf_2 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2; + + ksp0 = params->wm_prog_kernel; + ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; + + if (params->wm_prog_data->dispatch_8) + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; + if (params->wm_prog_data->dispatch_16) + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; } if (params->src.mt) { @@ -675,14 +684,14 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_BATCH(params->wm_prog_kernel); + OUT_BATCH(ksp0); OUT_BATCH(dw2); OUT_BATCH(0); /* No scratch needed */ OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); - OUT_BATCH(0); /* No other programs */ - OUT_BATCH(0); /* No other programs */ + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(ksp2); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index e2e6072410c..ea5f4c84479 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -526,26 +526,33 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, const struct brw_blorp_params *params) { const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw2, dw4, dw5; + uint32_t dw2, dw4, dw5, ksp0, ksp2; const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - dw2 = dw4 = dw5 = 0; + dw2 = dw4 = dw5 = ksp0 = ksp2 = 0; dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; - /* If there's a WM program, we need to do 16-pixel dispatch since that's - * what the program is compiled for. If there isn't, then it shouldn't - * matter because no program is actually being run. However, the hardware - * gets angry if we don't enable at least one dispatch mode, so just enable - * 16-pixel dispatch unconditionally. - */ - dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - if (brw->is_haswell) dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ if (params->wm_prog_data) { dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; - dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; + + dw5 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; + dw5 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; + + ksp0 = params->wm_prog_kernel; + ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; + + if (params->wm_prog_data->dispatch_8) + dw4 |= GEN7_PS_8_DISPATCH_ENABLE; + if (params->wm_prog_data->dispatch_16) + dw4 |= GEN7_PS_16_DISPATCH_ENABLE; + } else { + /* The hardware gets angry if we don't enable at least one dispatch + * mode, so just enable 16-pixel dispatch if we don't have a program. + */ + dw4 |= GEN7_PS_16_DISPATCH_ENABLE; } if (params->src.mt) @@ -555,13 +562,13 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_BATCH(params->wm_prog_kernel); + OUT_BATCH(ksp0); OUT_BATCH(dw2); OUT_BATCH(0); OUT_BATCH(dw4); OUT_BATCH(dw5); - OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(ksp2); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c index 05ef54c4d0a..eefdc93d0df 100644 --- a/src/mesa/drivers/dri/i965/gen8_blorp.c +++ b/src/mesa/drivers/dri/i965/gen8_blorp.c @@ -372,13 +372,11 @@ gen8_blorp_emit_ps_config(struct brw_context *brw, const struct brw_blorp_params *params) { const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw3, dw5, dw6, dw7; + uint32_t dw3, dw5, dw6, dw7, ksp0, ksp2; - dw3 = dw5 = dw6 = dw7 = 0; + dw3 = dw5 = dw6 = dw7 = ksp0 = ksp2 = 0; dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; - dw6 |= GEN7_PS_16_DISPATCH_ENABLE; - if (params->src.mt) { dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */ @@ -387,7 +385,16 @@ gen8_blorp_emit_ps_config(struct brw_context *brw, } dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE; - dw7 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; + dw7 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; + dw7 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; + + if (params->wm_prog_data->dispatch_8) + dw6 |= GEN7_PS_8_DISPATCH_ENABLE; + if (params->wm_prog_data->dispatch_16) + dw6 |= GEN7_PS_16_DISPATCH_ENABLE; + + ksp0 = params->wm_prog_kernel; + ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; * it implicitly scales for different GT levels (which have some # of PSDs). @@ -404,16 +411,16 @@ gen8_blorp_emit_ps_config(struct brw_context *brw, BEGIN_BATCH(12); OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); - OUT_BATCH(params->wm_prog_kernel); + OUT_BATCH(ksp0); OUT_BATCH(0); OUT_BATCH(dw3); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(dw6); OUT_BATCH(dw7); + OUT_BATCH(0); /* kernel 1 pointer */ OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(ksp2); OUT_BATCH(0); ADVANCE_BATCH(); }