From f5a690cb68d69c0279ab95ecb2d188ede13ada03 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 21 Aug 2012 15:24:14 -0700 Subject: [PATCH] i965: Split sampler count variable to be per-stage. Currently, we only have a single sampler state table shared among all stages, so we just copy wm.sampler_count into vs.sampler_count. In the future, each shader stage will have its own SAMPLER_STATE table, at which point we'll need these separate sampler counts. Signed-off-by: Kenneth Graunke Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_context.h | 5 ++++- src/mesa/drivers/dri/i965/brw_vs_state.c | 4 ++-- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 12 +++++++----- src/mesa/drivers/dri/i965/brw_wm_state.c | 6 +++--- src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_sampler_state.c | 12 +++++++----- src/mesa/drivers/dri/i965/gen7_vs_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +- 9 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 419cf174092..d7c3472fa2e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1066,7 +1066,6 @@ struct brw_context /** SAMPLER_STATE count and offset */ struct { - GLuint count; uint32_t offset; } sampler; @@ -1110,6 +1109,8 @@ struct brw_context uint32_t bind_bo_offset; uint32_t surf_offset[BRW_MAX_VS_SURFACES]; + + uint32_t sampler_count; } vs; struct { @@ -1183,6 +1184,8 @@ struct brw_context uint32_t bind_bo_offset; uint32_t surf_offset[BRW_MAX_WM_SURFACES]; + uint32_t sampler_count; + struct { struct ra_regs *regs; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index ddaf914f103..13aabac43d9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -142,7 +142,7 @@ brw_upload_vs_unit(struct brw_context *brw) vs->vs5.sampler_count = 0; /* hardware requirement */ else { /* CACHE_NEW_SAMPLER */ - vs->vs5.sampler_count = (brw->sampler.count + 3) / 4; + vs->vs5.sampler_count = (brw->vs.sampler_count + 3) / 4; } @@ -155,7 +155,7 @@ brw_upload_vs_unit(struct brw_context *brw) /* Set the sampler state pointer, and its reloc */ - if (brw->sampler.count) { + if (brw->vs.sampler_count) { vs->vs5.sampler_state_pointer = (brw->batch.bo->offset + brw->sampler.offset) >> 5; drm_intel_bo_emit_reloc(brw->batch.bo, diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 5457671b19f..40a6d5be1d6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -377,17 +377,19 @@ brw_upload_samplers(struct brw_context *brw) /* ARB programs use the texture unit number as the sampler index, so we * need to find the highest unit used. A bit-count will not work. */ - brw->sampler.count = _mesa_fls(SamplersUsed); + brw->wm.sampler_count = _mesa_fls(SamplersUsed); + /* Currently we only use one sampler state table. Mirror the count. */ + brw->vs.sampler_count = brw->wm.sampler_count; - if (brw->sampler.count == 0) + if (brw->wm.sampler_count == 0) return; samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, - brw->sampler.count * sizeof(*samplers), + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->sampler.offset); - memset(samplers, 0, brw->sampler.count * sizeof(*samplers)); + memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); - for (unsigned s = 0; s < brw->sampler.count; s++) { + for (unsigned s = 0; s < brw->wm.sampler_count; s++) { if (SamplersUsed & (1 << s)) { const unsigned unit = (fs->SamplersUsed & (1 << s)) ? fs->SamplerUnits[s] : vs->SamplerUnits[s]; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 631f351699b..106d6287bf2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -144,10 +144,10 @@ brw_upload_wm_unit(struct brw_context *brw) wm->wm4.sampler_count = 0; /* hardware requirement */ else { /* CACHE_NEW_SAMPLER */ - wm->wm4.sampler_count = (brw->sampler.count + 1) / 4; + wm->wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; } - if (brw->sampler.count) { + if (brw->wm.sampler_count) { /* reloc */ wm->wm4.sampler_state_pointer = (brw->batch.bo->offset + brw->sampler.offset) >> 5; @@ -225,7 +225,7 @@ brw_upload_wm_unit(struct brw_context *brw) } /* Emit sampler state relocation */ - if (brw->sampler.count != 0) { + if (brw->wm.sampler_count != 0) { drm_intel_bo_emit_reloc(brw->batch.bo, brw->wm.state_offset + offsetof(struct brw_wm_unit_state, wm4), diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index da20713e93f..4af7cda180a 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -149,7 +149,7 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_BATCH(brw->vs.prog_offset); OUT_BATCH(floating_point_mode | - ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); + ((ALIGN(brw->vs.sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); if (brw->vs.prog_data->base.total_scratch) { OUT_RELOC(brw->vs.scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 24c96ccc1a1..e2867855212 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -140,7 +140,7 @@ upload_wm_state(struct brw_context *brw) dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; /* CACHE_NEW_SAMPLER */ - dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; + dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; dw4 |= (brw->wm.prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 7ce58ce2a4a..f09c6b3ffe6 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -195,17 +195,19 @@ gen7_upload_samplers(struct brw_context *brw) GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed; - brw->sampler.count = _mesa_fls(SamplersUsed); + brw->wm.sampler_count = _mesa_fls(SamplersUsed); + /* Currently we only use one sampler state table. Mirror the count. */ + brw->vs.sampler_count = brw->wm.sampler_count; - if (brw->sampler.count == 0) + if (brw->wm.sampler_count == 0) return; samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, - brw->sampler.count * sizeof(*samplers), + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->sampler.offset); - memset(samplers, 0, brw->sampler.count * sizeof(*samplers)); + memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); - for (unsigned s = 0; s < brw->sampler.count; s++) { + for (unsigned s = 0; s < brw->wm.sampler_count; s++) { if (SamplersUsed & (1 << s)) { const unsigned unit = (fs->SamplersUsed & (1 << s)) ? fs->SamplerUnits[s] : vs->SamplerUnits[s]; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0340da4ba12..634bd95a873 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -89,7 +89,7 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_BATCH(brw->vs.prog_offset); OUT_BATCH(floating_point_mode | - ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); + ((ALIGN(brw->vs.sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); if (brw->vs.prog_data->base.total_scratch) { OUT_RELOC(brw->vs.scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 1bc6e2ec231..d079a52f315 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -162,7 +162,7 @@ upload_ps_state(struct brw_context *brw) dw2 = dw4 = dw5 = 0; /* CACHE_NEW_SAMPLER */ - dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; + dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Use ALT floating point mode for ARB fragment programs, because they * require 0^0 == 1. Even though _CurrentFragmentProgram is used for -- 2.30.2