From: Eric Anholt Date: Fri, 22 Apr 2011 22:30:42 +0000 (-0700) Subject: i965: Move sampler state to state streaming. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5c742ea1ee0cea031cb99651155d0c7521f42b4e;p=mesa.git i965: Move sampler state to state streaming. Overall, across this series since the last set of numbers, gen6 3DMMES taiji performance has dropped 0.8% +/- 0.3% (n=15), probably due to the increased reissuing of state from some of the state objects that otherwise never changed, and increased occurrence of the per-batch overhead as we've increased how much we put in the batch BO without increasing the batch BO's size. Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index b61a6ff9bc3..748a7503833 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -311,7 +311,6 @@ enum brw_cache_id { BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, BRW_SAMPLER, BRW_WM_UNIT, BRW_SF_PROG, @@ -384,7 +383,6 @@ struct brw_tracked_state { #define CACHE_NEW_CC_VP (1<intel.ctx; int i; - if (!brw->wm.sampler_bo) { - fprintf(stderr, "WM_SAMPLER: NULL\n"); - return; - } - - drm_intel_bo_map(brw->wm.sampler_bo, GL_FALSE); + drm_intel_bo_map(intel->batch.bo, GL_FALSE); for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { unsigned int offset; uint32_t sdc_offset; @@ -161,9 +156,11 @@ static void dump_wm_sampler_state(struct brw_context *brw) continue; } - offset = brw->wm.sampler_bo->offset + - i * sizeof(struct brw_sampler_state); - samp = (struct brw_sampler_state *)(brw->wm.sampler_bo->virtual + + offset = (intel->batch.bo->offset + + brw->wm.sampler_offset + + i * sizeof(struct brw_sampler_state)); + samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + + brw->wm.sampler_offset + i * sizeof(struct brw_sampler_state)); sprintf(name, "WM SAMP%d", i); @@ -174,10 +171,10 @@ static void dump_wm_sampler_state(struct brw_context *brw) sprintf(name, " WM SDC%d", i); - drm_intel_bo_map(brw->wm.sdc_bo[i], GL_FALSE); - sdc_offset = brw->wm.sdc_bo[i]->offset; + sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; if (intel->gen >= 5) { - struct gen5_sampler_default_color *sdc = brw->wm.sdc_bo[i]->virtual; + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + brw->wm.sdc_offset[i]); state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); @@ -191,15 +188,15 @@ static void dump_wm_sampler_state(struct brw_context *brw) state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); } else { - struct brw_sampler_default_color *sdc = brw->wm.sdc_bo[i]->virtual; + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + brw->wm.sdc_offset[i]); state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); } - drm_intel_bo_unmap(brw->wm.sdc_bo[i]); } - drm_intel_bo_unmap(brw->wm.sampler_bo); + drm_intel_bo_unmap(intel->batch.bo); } static void dump_sf_viewport_state(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index de67114e024..e1485772149 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -317,7 +317,6 @@ static struct dirty_bit_map cache_bits[] = { DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_UNIT), DEFINE_BIT(CACHE_NEW_WM_PROG), - DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), DEFINE_BIT(CACHE_NEW_SAMPLER), DEFINE_BIT(CACHE_NEW_WM_UNIT), DEFINE_BIT(CACHE_NEW_SF_PROG), diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 7add92aab43..36c1cc8bba8 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -85,9 +85,6 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->clip.state_bo); dri_bo_release(&brw->sf.prog_bo); dri_bo_release(&brw->sf.state_bo); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) - dri_bo_release(&brw->wm.sdc_bo[i]); - dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.const_bo); dri_bo_release(&brw->cc.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 9ceac5adf05..c24494b645f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -66,91 +66,93 @@ static GLuint translate_wrap_mode( GLenum wrap ) } } -static drm_intel_bo *upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static void +upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, + int unit) { struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; + float color[4]; + + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + color[0] = sampler->BorderColor.f[0]; + color[1] = sampler->BorderColor.f[0]; + color[2] = sampler->BorderColor.f[0]; + color[3] = sampler->BorderColor.f[0]; + } else { + color[0] = sampler->BorderColor.f[0]; + color[1] = sampler->BorderColor.f[1]; + color[2] = sampler->BorderColor.f[2]; + color[3] = sampler->BorderColor.f[3]; + } if (intel->gen >= 5) { - struct gen5_sampler_default_color sdc; - - memset(&sdc, 0, sizeof(sdc)); - - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); - UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); - - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); - UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); - - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); - UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); - - sdc.hf[0] = _mesa_float_to_half(color[0]); - sdc.hf[1] = _mesa_float_to_half(color[1]); - sdc.hf[2] = _mesa_float_to_half(color[2]); - sdc.hf[3] = _mesa_float_to_half(color[3]); - - sdc.b[0] = sdc.s[0] >> 8; - sdc.b[1] = sdc.s[1] >> 8; - sdc.b[2] = sdc.s[2] >> 8; - sdc.b[3] = sdc.s[3] >> 8; - - sdc.f[0] = color[0]; - sdc.f[1] = color[1]; - sdc.f[2] = color[2]; - sdc.f[3] = color[3]; - - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); - } else { - struct brw_sampler_default_color sdc; + struct gen5_sampler_default_color *sdc; - COPY_4V(sdc.color, color); + sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); - } -} + memset(sdc, 0, sizeof(*sdc)); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]); -struct wm_sampler_key { - int sampler_count; + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]); - struct wm_sampler_entry { - GLenum tex_target; - GLenum wrap_r, wrap_s, wrap_t; - float maxlod, minlod; - float lod_bias; - float max_aniso; - GLenum minfilter, magfilter; - GLenum comparemode, comparefunc; + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]); - /** If target is cubemap, take context setting. - */ - GLboolean seamless_cube_map; - } sampler[BRW_MAX_TEX_UNIT]; -}; + sdc->hf[0] = _mesa_float_to_half(color[0]); + sdc->hf[1] = _mesa_float_to_half(color[1]); + sdc->hf[2] = _mesa_float_to_half(color[2]); + sdc->hf[3] = _mesa_float_to_half(color[3]); + + sdc->b[0] = sdc->s[0] >> 8; + sdc->b[1] = sdc->s[1] >> 8; + sdc->b[2] = sdc->s[2] >> 8; + sdc->b[3] = sdc->s[3] >> 8; + + sdc->f[0] = color[0]; + sdc->f[1] = color[1]; + sdc->f[2] = color[2]; + sdc->f[3] = color[3]; + } else { + struct brw_sampler_default_color *sdc; + + sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + + COPY_4V(sdc->color, color); + } +} /** * Sets the sampler state for a single unit based off of the sampler key * entry. */ static void brw_update_sampler_state(struct brw_context *brw, - struct wm_sampler_entry *key, - drm_intel_bo *sdc_bo, + int unit, struct brw_sampler_state *sampler) { struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit); - memset(sampler, 0, sizeof(*sampler)); - - switch (key->minfilter) { + switch (gl_sampler->MinFilter) { case GL_NEAREST: sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; @@ -181,17 +183,17 @@ static void brw_update_sampler_state(struct brw_context *brw, /* Set Anisotropy: */ - if (key->max_aniso > 1.0) { + if (gl_sampler->MaxAnisotropy > 1.0) { sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, + if (gl_sampler->MaxAnisotropy > 2.0) { + sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16); } } else { - switch (key->magfilter) { + switch (gl_sampler->MagFilter) { case GL_NEAREST: sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; break; @@ -203,9 +205,9 @@ static void brw_update_sampler_state(struct brw_context *brw, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR); + sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS); + sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT); if (intel->gen >= 6 && sampler->ss0.min_filter != sampler->ss0.mag_filter) @@ -214,9 +216,10 @@ static void brw_update_sampler_state(struct brw_context *brw, /* Cube-maps on 965 and later must use the same wrap mode for all 3 * coordinate dimensions. Futher, only CUBE and CLAMP are valid. */ - if (key->tex_target == GL_TEXTURE_CUBE_MAP) { - if (key->seamless_cube_map && - (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + if (ctx->Texture.CubeMapSeamless && + (gl_sampler->MinFilter != GL_NEAREST || + gl_sampler->MagFilter != GL_NEAREST)) { sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; @@ -225,7 +228,7 @@ static void brw_update_sampler_state(struct brw_context *brw, sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; } - } else if (key->tex_target == GL_TEXTURE_1D) { + } else if (texObj->Target == GL_TEXTURE_1D) { /* There's a bug in 1D texture sampling - it actually pays * attention to the wrap_t value, though it should not. * Override the wrap_t value here to GL_REPEAT to keep @@ -237,18 +240,19 @@ static void brw_update_sampler_state(struct brw_context *brw, /* Set shadow function: */ - if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { /* Shadowing is "enabled" by emitting a particular sampler * message (sample_c). So need to recompile WM program when * shadow comparison is enabled on each/any texture unit. */ sampler->ss0.shadow_function = - intel_translate_shadow_compare_func(key->comparefunc); + intel_translate_shadow_compare_func(gl_sampler->CompareFunc); } /* Set LOD bias: */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); + sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias + + gl_sampler->LodBias, -16, 15), 6); sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ @@ -262,151 +266,63 @@ static void brw_update_sampler_state(struct brw_context *brw, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6); - sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6); - - sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ -} + sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6); + sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6); + upload_default_color(brw, gl_sampler, unit); -/** Sets up the cache key for sampler state for all texture units */ -static void -brw_wm_sampler_populate_key(struct brw_context *brw, - struct wm_sampler_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - int unit; - char *last_entry_end = ((char*)&key->sampler_count) + - sizeof(key->sampler_count); - - key->sampler_count = 0; - - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - struct wm_sampler_entry *entry = &key->sampler[unit]; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; - struct gl_texture_image *firstImage = - texObj->Image[0][texObj->BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - - memset(last_entry_end, 0, - (char*)entry - last_entry_end + sizeof(*entry)); - last_entry_end = ((char*)entry) + sizeof(*entry); - - entry->tex_target = texObj->Target; - - entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? ctx->Texture.CubeMapSeamless : GL_FALSE; - - entry->wrap_r = sampler->WrapR; - entry->wrap_s = sampler->WrapS; - entry->wrap_t = sampler->WrapT; - - entry->maxlod = sampler->MaxLod; - entry->minlod = sampler->MinLod; - entry->lod_bias = texUnit->LodBias + sampler->LodBias; - entry->max_aniso = sampler->MaxAnisotropy; - entry->minfilter = sampler->MinFilter; - entry->magfilter = sampler->MagFilter; - entry->comparemode = sampler->CompareMode; - entry->comparefunc = sampler->CompareFunc; - - drm_intel_bo_unreference(brw->wm.sdc_bo[unit]); - if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { - float bordercolor[4] = { - sampler->BorderColor.f[0], - sampler->BorderColor.f[0], - sampler->BorderColor.f[0], - sampler->BorderColor.f[0] - }; - /* GL specs that border color for depth textures is taken from the - * R channel, while the hardware uses A. Spam R into all the - * channels for safety. - */ - brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); - } else { - brw->wm.sdc_bo[unit] = upload_default_color(brw, - sampler->BorderColor.f); - } - key->sampler_count = unit + 1; - } - } - struct wm_sampler_entry *entry = &key->sampler[key->sampler_count]; - memset(last_entry_end, 0, (char*)entry - last_entry_end); + /* reloc */ + sampler->ss2.default_color_pointer = (intel->batch.bo->offset + + brw->wm.sdc_offset[unit]) >> 5; + + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.sampler_offset + + unit * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + intel->batch.bo, brw->wm.sdc_offset[unit], + I915_GEM_DOMAIN_SAMPLER, 0); } + /* All samplers must be uploaded in a single contiguous array, which * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -static void upload_wm_samplers( struct brw_context *brw ) +static void +prepare_wm_samplers(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; - struct wm_sampler_key key; - int i, sampler_key_size; + struct brw_sampler_state *samplers; + int i; - brw_wm_sampler_populate_key(brw, &key); - - if (brw->wm.sampler_count != key.sampler_count) { - brw->wm.sampler_count = key.sampler_count; - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + brw->wm.sampler_count = 0; + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + brw->wm.sampler_count = i + 1; } - drm_intel_bo_unreference(brw->wm.sampler_bo); - brw->wm.sampler_bo = NULL; if (brw->wm.sampler_count == 0) return; - /* Only include the populated portion of the key in the search. */ - sampler_key_size = offsetof(struct wm_sampler_key, - sampler[key.sampler_count]); - brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sampler_key_size, - brw->wm.sdc_bo, key.sampler_count, - NULL); - - /* If we didnt find it in the cache, compute the state and put it in the - * cache. - */ - if (brw->wm.sampler_bo == NULL) { - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - memset(sampler, 0, sizeof(sampler)); - for (i = 0; i < key.sampler_count; i++) { - if (brw->wm.sdc_bo[i] == NULL) - continue; + samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + 32, &brw->wm.sampler_offset); + memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); - brw_update_sampler_state(brw, &key.sampler[i], brw->wm.sdc_bo[i], - &sampler[i]); - } - - brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sampler_key_size, - brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler)); - - /* Emit SDC relocations */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (!ctx->Texture.Unit[i]._ReallyEnabled) - continue; - - drm_intel_bo_emit_reloc(brw->wm.sampler_bo, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i], 0, - I915_GEM_DOMAIN_SAMPLER, 0); - } + for (i = 0; i < brw->wm.sampler_count; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + brw_update_sampler_state(brw, i, &samplers[i]); } + + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } const struct brw_tracked_state brw_wm_samplers = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0 }, - .prepare = upload_wm_samplers, + .prepare = prepare_wm_samplers, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index d7faf490cfa..a91ae511b7f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -131,9 +131,10 @@ brw_prepare_wm_unit(struct brw_context *brw) wm->wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; } - if (brw->wm.sampler_bo != NULL) { + if (brw->wm.sampler_count) { /* reloc */ - wm->wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + wm->wm4.sampler_state_pointer = (intel->batch.bo->offset + + brw->wm.sampler_offset) >> 5; } else { wm->wm4.sampler_state_pointer = 0; } @@ -244,8 +245,9 @@ brw_prepare_wm_unit(struct brw_context *brw) drm_intel_bo_emit_reloc(intel->batch.bo, brw->wm.state_offset + offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo, (wm->wm4.stats_enable | - (wm->wm4.sampler_count << 2)), + intel->batch.bo, (brw->wm.sampler_offset | + wm->wm4.stats_enable | + (wm->wm4.sampler_count << 2)), I915_GEM_DOMAIN_INSTRUCTION, 0); } diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index f65c651bdff..969780ac1df 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -43,27 +43,20 @@ upload_sampler_state_pointers(struct brw_context *brw) (4 - 2)); OUT_BATCH(0); /* VS */ OUT_BATCH(0); /* GS */ - if (brw->wm.sampler_bo) - OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + if (brw->wm.sampler_count) + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->wm.sampler_offset); else OUT_BATCH(0); ADVANCE_BATCH(); } - -static void -prepare_sampler_state_pointers(struct brw_context *brw) -{ - brw_add_validated_bo(brw, brw->wm.sampler_bo); -} - const struct brw_tracked_state gen6_sampler_state = { .dirty = { .mesa = 0, .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_SAMPLER }, - .prepare = prepare_sampler_state_pointers, .emit = upload_sampler_state_pointers, };