From: Marek Olšák Date: Mon, 5 Apr 2010 04:26:11 +0000 (+0200) Subject: r300g: add fallback for back stencil reference value and masks for r3xx-r4xx X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a955f86b31304a1a0f35faa0e0861e920354e23b;p=mesa.git r300g: add fallback for back stencil reference value and masks for r3xx-r4xx This splits rendering into two passes when front and back stencil reference value, value mask, or write mask don't match. The advantages of doing it in the driver instead of in st are: * SWTCL is executed just once and the resulting vertex buffer is reused in the second pass. * Lower driver overhead due to the fallback being very close to the actual draw emission with minimum state change. --- diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 1e1c7161eca..46fdf087942 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -175,6 +175,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_elements = r300_draw_elements; r300->context.draw_range_elements = r300_draw_range_elements; + + if (r300screen->caps.is_r500) { + r300->emit_draw_arrays_immediate = r500_emit_draw_arrays_immediate; + r300->emit_draw_arrays = r500_emit_draw_arrays; + r300->emit_draw_elements = r500_emit_draw_elements; + } else { + r300->emit_draw_arrays_immediate = r300_emit_draw_arrays_immediate; + r300->emit_draw_arrays = r300_emit_draw_arrays; + r300->emit_draw_elements = r300_emit_draw_elements; + } } else { r300->context.draw_arrays = r300_swtcl_draw_arrays; r300->context.draw_elements = r300_draw_elements; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 351bd2cdaee..9d7e9d1226d 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -79,6 +79,11 @@ struct r300_dsa_state { uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + + /* Whether a two-sided stencil is enabled. */ + boolean two_sided; + /* Whether a fallback should be used for a two-sided stencil ref value. */ + boolean stencil_ref_bf_fallback; }; struct r300_rs_state { @@ -290,6 +295,21 @@ struct r300_context { /* Parent class */ struct pipe_context context; + /* Emission of drawing packets. */ + void (*emit_draw_arrays_immediate)( + struct r300_context *r300, + unsigned mode, unsigned start, unsigned count); + + void (*emit_draw_arrays)( + struct r300_context *r300, + unsigned mode, unsigned count); + + void (*emit_draw_elements)( + struct r300_context *r300, struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count); + + /* The interface to the windowing system, etc. */ struct r300_winsys_screen *rws; /* Screen. */ @@ -382,6 +402,9 @@ struct r300_context { boolean scissor_enabled; /* Whether rendering is conditional and should be skipped. */ boolean skip_rendering; + /* Whether the two-sided stencil ref value is different for front and + * back faces, and fallback should be used for r3xx-r4xx. */ + boolean stencil_ref_bf_fallback; /* upload managers */ struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c2b13235a16..09355569fb8 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -162,10 +162,15 @@ static boolean immd_is_good_idea(struct r300_context *r300, return TRUE; } -static void r300_emit_draw_arrays_immediate(struct r300_context *r300, - unsigned mode, - unsigned start, - unsigned count) +/***************************************************************************** + * The emission of draw packets for r500. Older GPUs may use these functions * + * after resolving fallback issues (e.g. stencil ref two-sided). * + ****************************************************************************/ + +void r500_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; @@ -252,9 +257,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, } } -static void r300_emit_draw_arrays(struct r300_context *r300, - unsigned mode, - unsigned count) +void r500_emit_draw_arrays(struct r300_context *r300, + unsigned mode, + unsigned count) { #if defined(ENABLE_ALT_NUM_VERTS) boolean alt_num_verts = count > 65535; @@ -282,14 +287,14 @@ static void r300_emit_draw_arrays(struct r300_context *r300, END_CS; } -static void r300_emit_draw_elements(struct r300_context *r300, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +void r500_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { uint32_t count_dwords; uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); @@ -347,6 +352,104 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } +/***************************************************************************** + * The emission of draw packets for r300 which take care of the two-sided * + * stencil ref fallback and call r500's functions. * + ****************************************************************************/ + +/* Set drawing for front faces. */ +static void r300_begin_stencil_ref_fallback(struct r300_context *r300) +{ + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_BACK); + END_CS; +} + +/* Set drawing for back faces. */ +static void r300_switch_stencil_ref_side(struct r300_context *r300) +{ + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + CS_LOCALS(r300); + + BEGIN_CS(4); + OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_FRONT); + OUT_CS_REG(R300_ZB_STENCILREFMASK, + dsa->stencil_ref_bf | r300->stencil_ref.ref_value[1]); + END_CS; +} + +/* Restore the original state. */ +static void r300_end_stencil_ref_fallback(struct r300_context *r300) +{ + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + CS_LOCALS(r300); + + BEGIN_CS(4); + OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode); + OUT_CS_REG(R300_ZB_STENCILREFMASK, + dsa->stencil_ref_mask | r300->stencil_ref.ref_value[0]); + END_CS; +} + +void r300_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + if (!r300->stencil_ref_bf_fallback) { + r500_emit_draw_arrays_immediate(r300, mode, start, count); + } else { + r300_begin_stencil_ref_fallback(r300); + r500_emit_draw_arrays_immediate(r300, mode, start, count); + r300_switch_stencil_ref_side(r300); + r500_emit_draw_arrays_immediate(r300, mode, start, count); + r300_end_stencil_ref_fallback(r300); + } +} + +void r300_emit_draw_arrays(struct r300_context *r300, + unsigned mode, + unsigned count) +{ + if (!r300->stencil_ref_bf_fallback) { + r500_emit_draw_arrays(r300, mode, count); + } else { + r300_begin_stencil_ref_fallback(r300); + r500_emit_draw_arrays(r300, mode, count); + r300_switch_stencil_ref_side(r300); + r500_emit_draw_arrays(r300, mode, count); + r300_end_stencil_ref_fallback(r300); + } +} + +void r300_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + if (!r300->stencil_ref_bf_fallback) { + r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, + maxIndex, mode, start, count); + } else { + r300_begin_stencil_ref_fallback(r300); + r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, + maxIndex, mode, start, count); + r300_switch_stencil_ref_side(r300); + r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, + maxIndex, mode, start, count); + r300_end_stencil_ref_fallback(r300); + } +} + static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_buffer** elts, unsigned start, @@ -457,13 +560,13 @@ void r300_draw_range_elements(struct pipe_context* pipe, u_upload_flush(r300->upload_vb); u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { - r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, - maxIndex, mode, start, count); + r300->emit_draw_elements(r300, indexBuffer, indexSize, minIndex, + maxIndex, mode, start, count); } else { do { short_count = MIN2(count, 65534); - r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, - maxIndex, mode, start, short_count); + r300->emit_draw_elements(r300, indexBuffer, indexSize, minIndex, + maxIndex, mode, start, short_count); start += short_count; count -= short_count; @@ -518,7 +621,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); if (immd_is_good_idea(r300, count)) { - r300_emit_draw_arrays_immediate(r300, mode, start, count); + r300->emit_draw_arrays_immediate(r300, mode, start, count); } else { /* Make sure there are at least 128 spare dwords in the command buffer. * (most of it being consumed by emit_aos) */ @@ -528,12 +631,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, if (alt_num_verts || count <= 65535) { r300_emit_aos(r300, start); - r300_emit_draw_arrays(r300, mode, count); + r300->emit_draw_arrays(r300, mode, count); } else { do { short_count = MIN2(count, 65535); r300_emit_aos(r300, start); - r300_emit_draw_arrays(r300, mode, short_count); + r300->emit_draw_arrays(r300, mode, short_count); start += short_count; count -= short_count; @@ -746,9 +849,9 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, return TRUE; } -static void r300_render_draw_arrays(struct vbuf_render* render, - unsigned start, - unsigned count) +static void r500_render_draw_arrays(struct vbuf_render* render, + unsigned start, + unsigned count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; @@ -768,9 +871,9 @@ static void r300_render_draw_arrays(struct vbuf_render* render, END_CS; } -static void r300_render_draw(struct vbuf_render* render, - const ushort* indices, - uint count) +static void r500_render_draw(struct vbuf_render* render, + const ushort* indices, + uint count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; @@ -796,6 +899,40 @@ static void r300_render_draw(struct vbuf_render* render, END_CS; } +static void r300_render_draw_arrays(struct vbuf_render* render, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_render(render)->r300; + + if (!r300->stencil_ref_bf_fallback) { + r500_render_draw_arrays(render, start, count); + } else { + r300_begin_stencil_ref_fallback(r300); + r500_render_draw_arrays(render, start, count); + r300_switch_stencil_ref_side(r300); + r500_render_draw_arrays(render, start, count); + r300_end_stencil_ref_fallback(r300); + } +} + +static void r300_render_draw(struct vbuf_render* render, + const ushort* indices, + uint count) +{ + struct r300_context* r300 = r300_render(render)->r300; + + if (!r300->stencil_ref_bf_fallback) { + r500_render_draw(render, indices, count); + } else { + r300_begin_stencil_ref_fallback(r300); + r500_render_draw(render, indices, count); + r300_switch_stencil_ref_side(r300); + r500_render_draw(render, indices, count); + r300_end_stencil_ref_fallback(r300); + } +} + static void r300_render_destroy(struct vbuf_render* render) { FREE(render); @@ -816,8 +953,13 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300) r300render->base.map_vertices = r300_render_map_vertices; r300render->base.unmap_vertices = r300_render_unmap_vertices; r300render->base.set_primitive = r300_render_set_primitive; - r300render->base.draw = r300_render_draw; - r300render->base.draw_arrays = r300_render_draw_arrays; + if (r300->screen->caps.is_r500) { + r300render->base.draw = r500_render_draw; + r300render->base.draw_arrays = r500_render_draw_arrays; + } else { + r300render->base.draw = r300_render_draw; + r300render->base.draw_arrays = r300_render_draw_arrays; + } r300render->base.release_vertices = r300_render_release_vertices; r300render->base.destroy = r300_render_destroy; diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index 27b5e6a9630..870e1fb53d1 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,6 +25,42 @@ uint32_t r300_translate_primitive(unsigned prim); +void r500_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count); + +void r500_emit_draw_arrays(struct r300_context *r300, + unsigned mode, + unsigned count); + +void r500_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_emit_draw_arrays_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count); + +void r300_emit_draw_arrays(struct r300_context *r300, + unsigned mode, + unsigned count); + +void r300_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + void r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2adb3e7221b..2309f353268 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -444,6 +444,8 @@ static void* (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT); if (state->stencil[1].enabled) { + dsa->two_sided = TRUE; + dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK; dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->stencil[1].func) << @@ -455,14 +457,16 @@ static void* (r300_translate_stencil_op(state->stencil[1].zfail_op) << R300_S_BACK_ZFAIL_OP_SHIFT); - if (caps->is_r500) - { + dsa->stencil_ref_bf = + (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); + + if (caps->is_r500) { dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; - dsa->stencil_ref_bf = - (state->stencil[1].valuemask << - R300_STENCILMASK_SHIFT) | - (state->stencil[1].writemask << - R300_STENCILWRITEMASK_SHIFT); + } else { + dsa->stencil_ref_bf_fallback = + (state->stencil[0].valuemask != state->stencil[1].valuemask || + state->stencil[0].writemask != state->stencil[1].writemask); } } } @@ -483,13 +487,33 @@ static void* return (void*)dsa; } +static void r300_update_stencil_ref_fallback_status(struct r300_context *r300) +{ + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + if (r300->screen->caps.is_r500) { + return; + } + + r300->stencil_ref_bf_fallback = + dsa->stencil_ref_bf_fallback || + (dsa->two_sided && + r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); +} + /* Bind DSA state. */ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + if (!state) { + return; + } + UPDATE_STATE(state, r300->dsa_state); + + r300_update_stencil_ref_fallback_status(r300); } /* Free DSA state. */ @@ -503,8 +527,11 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, const struct pipe_stencil_ref* sr) { struct r300_context* r300 = r300_context(pipe); + r300->stencil_ref = *sr; r300->dsa_state.dirty = TRUE; + + r300_update_stencil_ref_fallback_status(r300); } /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */