r300g: add fallback for back stencil reference value and masks for r3xx-r4xx
authorMarek Olšák <maraeo@gmail.com>
Mon, 5 Apr 2010 04:26:11 +0000 (06:26 +0200)
committerMarek Olšák <maraeo@gmail.com>
Mon, 5 Apr 2010 05:09:46 +0000 (07:09 +0200)
This splits rendering into two passes when front and back stencil
reference value, value mask, or write mask don't match.

The advantages of doing it in the driver instead of in st are:
* SWTCL is executed just once and the resulting vertex buffer is reused
  in the second pass.
* Lower driver overhead due to the fallback being very close to
  the actual draw emission with minimum state change.

src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_render.h
src/gallium/drivers/r300/r300_state.c

index 1e1c7161ecaaa6106f73d994890076f83ad136b9..46fdf087942999953921040e3f5e5849e941430d 100644 (file)
@@ -175,6 +175,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->context.draw_arrays = r300_draw_arrays;
         r300->context.draw_elements = r300_draw_elements;
         r300->context.draw_range_elements = r300_draw_range_elements;
+
+        if (r300screen->caps.is_r500) {
+            r300->emit_draw_arrays_immediate = r500_emit_draw_arrays_immediate;
+            r300->emit_draw_arrays = r500_emit_draw_arrays;
+            r300->emit_draw_elements = r500_emit_draw_elements;
+        } else {
+            r300->emit_draw_arrays_immediate = r300_emit_draw_arrays_immediate;
+            r300->emit_draw_arrays = r300_emit_draw_arrays;
+            r300->emit_draw_elements = r300_emit_draw_elements;
+        }
     } else {
         r300->context.draw_arrays = r300_swtcl_draw_arrays;
         r300->context.draw_elements = r300_draw_elements;
index 351bd2cdaeeaac11dc1c80c61de4202c607d2dc3..9d7e9d1226dcefd359dbb7e7020742f289444983 100644 (file)
@@ -79,6 +79,11 @@ struct r300_dsa_state {
     uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
     uint32_t stencil_ref_mask;  /* R300_ZB_STENCILREFMASK: 0x4f08 */
     uint32_t stencil_ref_bf;    /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+
+    /* Whether a two-sided stencil is enabled. */
+    boolean two_sided;
+    /* Whether a fallback should be used for a two-sided stencil ref value. */
+    boolean stencil_ref_bf_fallback;
 };
 
 struct r300_rs_state {
@@ -290,6 +295,21 @@ struct r300_context {
     /* Parent class */
     struct pipe_context context;
 
+    /* Emission of drawing packets. */
+    void (*emit_draw_arrays_immediate)(
+            struct r300_context *r300,
+            unsigned mode, unsigned start, unsigned count);
+
+    void (*emit_draw_arrays)(
+            struct r300_context *r300,
+            unsigned mode, unsigned count);
+
+    void (*emit_draw_elements)(
+            struct r300_context *r300, struct pipe_buffer* indexBuffer,
+            unsigned indexSize, unsigned minIndex, unsigned maxIndex,
+            unsigned mode, unsigned start, unsigned count);
+
+
     /* The interface to the windowing system, etc. */
     struct r300_winsys_screen *rws;
     /* Screen. */
@@ -382,6 +402,9 @@ struct r300_context {
     boolean scissor_enabled;
     /* Whether rendering is conditional and should be skipped. */
     boolean skip_rendering;
+    /* Whether the two-sided stencil ref value is different for front and
+     * back faces, and fallback should be used for r3xx-r4xx. */
+    boolean stencil_ref_bf_fallback;
     /* upload managers */
     struct u_upload_mgr *upload_vb;
     struct u_upload_mgr *upload_ib;
index c2b13235a165702af53a0a2366397e6f95172dcd..09355569fb89f0e5bb08d40d2b254948165713c7 100644 (file)
@@ -162,10 +162,15 @@ static boolean immd_is_good_idea(struct r300_context *r300,
     return TRUE;
 }
 
-static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
-                                            unsigned mode,
-                                            unsigned start,
-                                            unsigned count)
+/*****************************************************************************
+ * The emission of draw packets for r500. Older GPUs may use these functions *
+ * after resolving fallback issues (e.g. stencil ref two-sided).             *
+ ****************************************************************************/
+
+void r500_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count)
 {
     struct pipe_vertex_element* velem;
     struct pipe_vertex_buffer* vbuf;
@@ -252,9 +257,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     }
 }
 
-static void r300_emit_draw_arrays(struct r300_context *r300,
-                                  unsigned mode,
-                                  unsigned count)
+void r500_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count)
 {
 #if defined(ENABLE_ALT_NUM_VERTS)
     boolean alt_num_verts = count > 65535;
@@ -282,14 +287,14 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
     END_CS;
 }
 
-static void r300_emit_draw_elements(struct r300_context *r300,
-                                    struct pipe_buffer* indexBuffer,
-                                    unsigned indexSize,
-                                    unsigned minIndex,
-                                    unsigned maxIndex,
-                                    unsigned mode,
-                                    unsigned start,
-                                    unsigned count)
+void r500_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count)
 {
     uint32_t count_dwords;
     uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
@@ -347,6 +352,104 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     END_CS;
 }
 
+/*****************************************************************************
+ * The emission of draw packets for r300 which take care of the two-sided    *
+ * stencil ref fallback and call r500's functions.                           *
+ ****************************************************************************/
+
+/* Set drawing for front faces. */
+static void r300_begin_stencil_ref_fallback(struct r300_context *r300)
+{
+    struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+    CS_LOCALS(r300);
+
+    BEGIN_CS(2);
+    OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_BACK);
+    END_CS;
+}
+
+/* Set drawing for back faces. */
+static void r300_switch_stencil_ref_side(struct r300_context *r300)
+{
+    struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+    struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+    CS_LOCALS(r300);
+
+    BEGIN_CS(4);
+    OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_FRONT);
+    OUT_CS_REG(R300_ZB_STENCILREFMASK,
+               dsa->stencil_ref_bf | r300->stencil_ref.ref_value[1]);
+    END_CS;
+}
+
+/* Restore the original state. */
+static void r300_end_stencil_ref_fallback(struct r300_context *r300)
+{
+    struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+    struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+    CS_LOCALS(r300);
+
+    BEGIN_CS(4);
+    OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode);
+    OUT_CS_REG(R300_ZB_STENCILREFMASK,
+               dsa->stencil_ref_mask | r300->stencil_ref.ref_value[0]);
+    END_CS;
+}
+
+void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count)
+{
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_emit_draw_arrays_immediate(r300, mode, start, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_emit_draw_arrays_immediate(r300, mode, start, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_emit_draw_arrays_immediate(r300, mode, start, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
+void r300_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count)
+{
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_emit_draw_arrays(r300, mode, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_emit_draw_arrays(r300, mode, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_emit_draw_arrays(r300, mode, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
+void r300_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count)
+{
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                maxIndex, mode, start, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                maxIndex, mode, start, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                maxIndex, mode, start, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
 static void r300_shorten_ubyte_elts(struct r300_context* r300,
                                     struct pipe_buffer** elts,
                                     unsigned start,
@@ -457,13 +560,13 @@ void r300_draw_range_elements(struct pipe_context* pipe,
     u_upload_flush(r300->upload_vb);
     u_upload_flush(r300->upload_ib);
     if (alt_num_verts || count <= 65535) {
-        r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
-                                maxIndex, mode, start, count);
+        r300->emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                 maxIndex, mode, start, count);
     } else {
         do {
             short_count = MIN2(count, 65534);
-            r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
-                                    maxIndex, mode, start, short_count);
+            r300->emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                      maxIndex, mode, start, short_count);
 
             start += short_count;
             count -= short_count;
@@ -518,7 +621,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
     r300_update_derived_state(r300);
 
     if (immd_is_good_idea(r300, count)) {
-        r300_emit_draw_arrays_immediate(r300, mode, start, count);
+        r300->emit_draw_arrays_immediate(r300, mode, start, count);
     } else {
         /* Make sure there are at least 128 spare dwords in the command buffer.
          * (most of it being consumed by emit_aos) */
@@ -528,12 +631,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
 
         if (alt_num_verts || count <= 65535) {
             r300_emit_aos(r300, start);
-            r300_emit_draw_arrays(r300, mode, count);
+            r300->emit_draw_arrays(r300, mode, count);
         } else {
             do {
                 short_count = MIN2(count, 65535);
                 r300_emit_aos(r300, start);
-                r300_emit_draw_arrays(r300, mode, short_count);
+                r300->emit_draw_arrays(r300, mode, short_count);
 
                 start += short_count;
                 count -= short_count;
@@ -746,9 +849,9 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
     return TRUE;
 }
 
-static void r300_render_draw_arrays(struct vbuf_render* render,
-                                          unsigned start,
-                                          unsigned count)
+static void r500_render_draw_arrays(struct vbuf_render* render,
+                                    unsigned start,
+                                    unsigned count)
 {
     struct r300_render* r300render = r300_render(render);
     struct r300_context* r300 = r300render->r300;
@@ -768,9 +871,9 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
     END_CS;
 }
 
-static void r300_render_draw(struct vbuf_render* render,
-                                   const ushort* indices,
-                                   uint count)
+static void r500_render_draw(struct vbuf_render* render,
+                             const ushort* indices,
+                             uint count)
 {
     struct r300_render* r300render = r300_render(render);
     struct r300_context* r300 = r300render->r300;
@@ -796,6 +899,40 @@ static void r300_render_draw(struct vbuf_render* render,
     END_CS;
 }
 
+static void r300_render_draw_arrays(struct vbuf_render* render,
+                                    unsigned start,
+                                    unsigned count)
+{
+    struct r300_context* r300 = r300_render(render)->r300;
+
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_render_draw_arrays(render, start, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_render_draw_arrays(render, start, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_render_draw_arrays(render, start, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
+static void r300_render_draw(struct vbuf_render* render,
+                             const ushort* indices,
+                             uint count)
+{
+    struct r300_context* r300 = r300_render(render)->r300;
+
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_render_draw(render, indices, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_render_draw(render, indices, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_render_draw(render, indices, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
 static void r300_render_destroy(struct vbuf_render* render)
 {
     FREE(render);
@@ -816,8 +953,13 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
     r300render->base.map_vertices = r300_render_map_vertices;
     r300render->base.unmap_vertices = r300_render_unmap_vertices;
     r300render->base.set_primitive = r300_render_set_primitive;
-    r300render->base.draw = r300_render_draw;
-    r300render->base.draw_arrays = r300_render_draw_arrays;
+    if (r300->screen->caps.is_r500) {
+        r300render->base.draw = r500_render_draw;
+        r300render->base.draw_arrays = r500_render_draw_arrays;
+    } else {
+        r300render->base.draw = r300_render_draw;
+        r300render->base.draw_arrays = r300_render_draw_arrays;
+    }
     r300render->base.release_vertices = r300_render_release_vertices;
     r300render->base.destroy = r300_render_destroy;
 
index 27b5e6a9630b0a79b2cc92376134e4b408a84c20..870e1fb53d17f3a66e81a540d0c158093d9e6faa 100644 (file)
 
 uint32_t r300_translate_primitive(unsigned prim);
 
+void r500_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count);
+
+void r500_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count);
+
+void r500_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count);
+
+void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count);
+
+void r300_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count);
+
+void r300_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count);
+
 void r300_draw_range_elements(struct pipe_context* pipe,
                               struct pipe_buffer* indexBuffer,
                               unsigned indexSize,
index 2adb3e7221b4a907c2d8e10bc6f65cc5a88ddb1d..2309f35326817f85d2000cf4d8f6280c9183bc9c 100644 (file)
@@ -444,6 +444,8 @@ static void*
                 (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
 
         if (state->stencil[1].enabled) {
+            dsa->two_sided = TRUE;
+
             dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK;
             dsa->z_stencil_control |=
             (r300_translate_depth_stencil_function(state->stencil[1].func) <<
@@ -455,14 +457,16 @@ static void*
             (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
                 R300_S_BACK_ZFAIL_OP_SHIFT);
 
-            if (caps->is_r500)
-            {
+            dsa->stencil_ref_bf =
+                (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
+                (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
+
+            if (caps->is_r500) {
                 dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
-                dsa->stencil_ref_bf =
-                    (state->stencil[1].valuemask <<
-                    R300_STENCILMASK_SHIFT) |
-                    (state->stencil[1].writemask <<
-                    R300_STENCILWRITEMASK_SHIFT);
+            } else {
+                dsa->stencil_ref_bf_fallback =
+                  (state->stencil[0].valuemask != state->stencil[1].valuemask ||
+                   state->stencil[0].writemask != state->stencil[1].writemask);
             }
         }
     }
@@ -483,13 +487,33 @@ static void*
     return (void*)dsa;
 }
 
+static void r300_update_stencil_ref_fallback_status(struct r300_context *r300)
+{
+    struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+    if (r300->screen->caps.is_r500) {
+        return;
+    }
+
+    r300->stencil_ref_bf_fallback =
+        dsa->stencil_ref_bf_fallback ||
+        (dsa->two_sided &&
+         r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]);
+}
+
 /* Bind DSA state. */
 static void r300_bind_dsa_state(struct pipe_context* pipe,
                                 void* state)
 {
     struct r300_context* r300 = r300_context(pipe);
 
+    if (!state) {
+        return;
+    }
+
     UPDATE_STATE(state, r300->dsa_state);
+
+    r300_update_stencil_ref_fallback_status(r300);
 }
 
 /* Free DSA state. */
@@ -503,8 +527,11 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
                                  const struct pipe_stencil_ref* sr)
 {
     struct r300_context* r300 = r300_context(pipe);
+
     r300->stencil_ref = *sr;
     r300->dsa_state.dirty = TRUE;
+
+    r300_update_stencil_ref_fallback_status(r300);
 }
 
 /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */