Right now we decide which kernels to use and the GRF start offsets in
one place and emit the kernel pointers later. The logic of how to map
8, 16 and 32 kernels to kernel start pointers follows the same logic as which
GRF start offsets to use, so lets figure out these two things in one place.
Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
struct gl_context *ctx = &brw->ctx;
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
struct gl_context *ctx = &brw->ctx;
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
- uint32_t dw2, dw4, dw5, dw6;
+ uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
/* _NEW_BUFFERS */
bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
/* _NEW_BUFFERS */
bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
- dw2 = dw4 = dw5 = dw6 = 0;
+ dw2 = dw4 = dw5 = dw6 = ksp2 = 0;
dw4 |= GEN6_WM_STATISTICS_ENABLE;
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw4 |= GEN6_WM_STATISTICS_ENABLE;
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
+ ksp0 = brw->wm.base.prog_offset;
+ ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+ } else {
dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+ ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+ }
}
else {
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
}
else {
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+ ksp0 = brw->wm.base.prog_offset;
}
/* CACHE_NEW_WM_PROG | _NEW_COLOR */
}
/* CACHE_NEW_WM_PROG | _NEW_COLOR */
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
- if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1)
- OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
- else
- OUT_BATCH(brw->wm.base.prog_offset);
OUT_BATCH(dw2);
if (brw->wm.prog_data->total_scratch) {
OUT_RELOC(brw->wm.base.scratch_bo,
OUT_BATCH(dw2);
if (brw->wm.prog_data->total_scratch) {
OUT_RELOC(brw->wm.base.scratch_bo,
OUT_BATCH(dw5);
OUT_BATCH(dw6);
OUT_BATCH(0); /* kernel 1 pointer */
OUT_BATCH(dw5);
OUT_BATCH(dw6);
OUT_BATCH(0); /* kernel 1 pointer */
- /* kernel 2 pointer */
- OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
upload_ps_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
upload_ps_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- uint32_t dw2, dw4, dw5;
+ uint32_t dw2, dw4, dw5, ksp0, ksp2;
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
+ dw2 = dw4 = dw5 = ksp2 = 0;
dw2 |=
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
dw2 |=
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
+ ksp0 = brw->wm.base.prog_offset;
+ ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+ } else {
dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+ ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
+ }
}
else {
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
}
else {
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+ ksp0 = brw->wm.base.prog_offset;
}
BEGIN_BATCH(8);
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
}
BEGIN_BATCH(8);
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
- if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1)
- OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
- else
- OUT_BATCH(brw->wm.base.prog_offset);
OUT_BATCH(dw2);
if (brw->wm.prog_data->total_scratch) {
OUT_RELOC(brw->wm.base.scratch_bo,
OUT_BATCH(dw2);
if (brw->wm.prog_data->total_scratch) {
OUT_RELOC(brw->wm.base.scratch_bo,
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(0); /* kernel 1 pointer */
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(0); /* kernel 1 pointer */
- OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
upload_ps_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
upload_ps_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
+ uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
/* Initialize the execution mask with VMask. Otherwise, derivatives are
* incorrect for subspans where some of the pixels are unlit. We believe
/* Initialize the execution mask with VMask. Otherwise, derivatives are
* incorrect for subspans where some of the pixels are unlit. We believe
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
+ ksp0 = brw->wm.base.prog_offset;
+ ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
} else {
dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
} else {
dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+
+ ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
}
} else {
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
}
} else {
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+ ksp0 = brw->wm.base.prog_offset;
}
BEGIN_BATCH(12);
OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
}
BEGIN_BATCH(12);
OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
- if (brw->wm.prog_data->prog_offset_16 && min_invocations_per_fragment > 1)
- OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
- else
- OUT_BATCH(brw->wm.base.prog_offset);
OUT_BATCH(0);
OUT_BATCH(dw3);
if (brw->wm.prog_data->total_scratch) {
OUT_BATCH(0);
OUT_BATCH(dw3);
if (brw->wm.prog_data->total_scratch) {
OUT_BATCH(dw7);
OUT_BATCH(0); /* kernel 1 pointer */
OUT_BATCH(0);
OUT_BATCH(dw7);
OUT_BATCH(0); /* kernel 1 pointer */
OUT_BATCH(0);
- OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16);
OUT_BATCH(0);
ADVANCE_BATCH();
}
OUT_BATCH(0);
ADVANCE_BATCH();
}