From 7ad26b0030f6b14e6ec069eafdec6faf75e8007c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 11 Jun 2010 13:21:59 -0700 Subject: [PATCH] i965: Stream out CC unit state. before: [ # ] backend test min(s) median(s) stddev. count [ 0] gl firefox-talos-gfx 31.791 32.287 1.11% 6/6 after: [ 0] gl firefox-talos-gfx 31.198 31.675 0.96% 6/6 --- src/mesa/drivers/dri/i965/brw_cc.c | 216 ++++++--------------- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- 3 files changed, 66 insertions(+), 155 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 2b5efdd336b..cfce5d31405 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -58,27 +58,6 @@ brw_update_cc_vp(struct brw_context *brw) brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv)); } -struct brw_cc_unit_key { - GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; - GLclampf alpha_ref; - - GLboolean dither; - - GLboolean depth_test, depth_write; - GLenum depth_func; -}; - /** * Modify blend function to force destination alpha to 1.0 * @@ -101,136 +80,83 @@ fix_xRGB_alpha(GLenum function) return function; } -static void -cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static void prepare_cc_unit(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; - - memset(key, 0, sizeof(*key)); - - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; - - /* If the renderbuffer is XRGB, we have to frob the blend function to - * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA - * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) { - key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); - key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); - key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); - key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); - } - } - - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - key->alpha_ref = ctx->Color.AlphaRef; - } - - key->dither = ctx->Color.DitherFlag; - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } + brw_add_validated_bo(brw, brw->cc.vp_bo); } /** * Creates the state cache entry for the given CC unit key. */ -static drm_intel_bo * -cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static void upload_cc_unit(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_unit_state cc; - drm_intel_bo *bo; + void *map; memset(&cc, 0, sizeof(cc)); /* _NEW_STENCIL */ - if (key->stencil) { + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + cc.cc0.stencil_enable = 1; cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); + intel_translate_compare_func(ctx->Stencil.Function[0]); cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); + intel_translate_stencil_op(ctx->Stencil.FailFunc[0]); cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]); cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]); + cc.cc1.stencil_ref = ctx->Stencil.Ref[0]; + cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0]; + cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0]; - if (key->stencil_two_side) { + if (ctx->Stencil._TestTwoSide) { cc.cc0.bf_stencil_enable = 1; cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); + intel_translate_compare_func(ctx->Stencil.Function[back]); cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); + intel_translate_stencil_op(ctx->Stencil.FailFunc[back]); cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]); cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]); + cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back]; + cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back]; + cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back]; } /* Not really sure about this: */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) + if (ctx->Stencil.WriteMask[0] || + (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back])) cc.cc0.stencil_write_enable = 1; } /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { + if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) { cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; + cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp); + } else if (ctx->Color.BlendEnabled) { + GLenum eqRGB = ctx->Color.BlendEquationRGB; + GLenum eqA = ctx->Color.BlendEquationA; + GLenum srcRGB = ctx->Color.BlendSrcRGB; + GLenum dstRGB = ctx->Color.BlendDstRGB; + GLenum srcA = ctx->Color.BlendSrcA; + GLenum dstA = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) { + srcRGB = fix_xRGB_alpha(srcRGB); + srcA = fix_xRGB_alpha(srcA); + dstRGB = fix_xRGB_alpha(dstRGB); + dstA = fix_xRGB_alpha(dstA); + } if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -254,25 +180,27 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) eqA != eqRGB); } - if (key->alpha_enabled) { + if (ctx->Color.AlphaEnabled) { cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_func = + intel_translate_compare_func(ctx->Color.AlphaFunc); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef); } - if (key->dither) { + if (ctx->Color.DitherFlag) { cc.cc5.dither_enable = 1; cc.cc6.y_dither_offset = 0; cc.cc6.x_dither_offset = 0; } /* _NEW_DEPTH */ - if (key->depth_test) { + if (ctx->Depth.Test) { cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; + cc.cc2.depth_test_function = + intel_translate_compare_func(ctx->Depth.Func); + cc.cc2.depth_write_enable = ctx->Depth.Mask; } /* CACHE_NEW_CC_VP */ @@ -281,43 +209,25 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; - bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, - key, sizeof(*key), - &brw->cc.vp_bo, 1, - &cc, sizeof(cc)); + map = brw_state_batch(brw, sizeof(cc), 64, + &brw->cc.state_bo, &brw->cc.state_offset); + memcpy(map, &cc, sizeof(cc)); + brw->state.dirty.cache |= CACHE_NEW_CC_UNIT; /* Emit CC viewport relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_cc_unit_state, cc4), + drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset + + offsetof(struct brw_cc_unit_state, + cc4)), brw->cc.vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0); - - return bo; -} - -static void prepare_cc_unit( struct brw_context *brw ) -{ - struct brw_cc_unit_key key; - - cc_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->cc.state_bo); - brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, - &key, sizeof(key), - &brw->cc.vp_bo, 1, - NULL); - - if (brw->cc.state_bo == NULL) - brw->cc.state_bo = cc_unit_create_from_key(brw, &key); } const struct brw_tracked_state brw_cc_unit = { .dirty = { .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_CC_VP }, .prepare = prepare_cc_unit, + .emit = upload_cc_unit, }; - - - diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7fbffbeb030..cc4e6638e8b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -669,6 +669,7 @@ struct brw_context drm_intel_bo *color_calc_state_bo; drm_intel_bo *state_bo; + uint32_t state_offset; } cc; struct { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index b68b5af0106..572175f463e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -185,7 +185,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->cc.state_offset); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; @@ -199,7 +200,6 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->clip.state_bo); brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); - brw_add_validated_bo(brw, brw->cc.state_bo); } static void upload_psp_urb_cbs(struct brw_context *brw ) -- 2.30.2