r600g: avoid unnecessary shader exports v2
authorVadim Girlin <vadimgirlin@gmail.com>
Tue, 26 Jun 2012 18:47:27 +0000 (22:47 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Tue, 26 Jun 2012 22:06:55 +0000 (02:06 +0400)
In some cases TGSI shader has more color outputs than the number of CBs,
so it seems we need to limit the number of color exports. This requires
different shader variants depending on the nr_cbufs, but on the other hand
we are doing less exports, which are very costly.

v2: fix various piglit regressions

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 4b82b06f17777e347a381a34c33283bfb97f9c7d..895d8a2c4d4bb986fd04912e5c70d2548fa27d10 100644 (file)
@@ -2646,18 +2646,14 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
                db_shader_control |= S_02880C_KILL_ENABLE(1);
 
        exports_ps = 0;
-       num_cout = 0;
        for (i = 0; i < rshader->noutput; i++) {
                if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
                    rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
                        exports_ps |= 1;
-               else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
-                       if (rshader->fs_write_all)
-                               num_cout = rshader->nr_cbufs;
-                       else
-                               num_cout++;
-               }
        }
+
+       num_cout = rshader->nr_ps_color_exports;
+
        exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
        if (!exports_ps) {
                /* always at least export 1 component per pixel */
index 5d194e325f7d5ffb520c239cfb95e3c07e1413dc..380fd660c297264201c833bbf6a0a3aa8ae711a7 100644 (file)
@@ -198,10 +198,7 @@ struct r600_pipe_shader_selector {
        /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
        unsigned        type;
 
-       /* 1 on evergreen+ when the shader contains
-        * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
-        * Used to determine whether we need to include nr_cbufs in the key */
-       unsigned        eg_fs_write_all;
+       unsigned        nr_ps_max_color_exports;
 };
 
 struct r600_pipe_shader {
index f690c10fd7c37bdbd586b0208cd7e9dfa2540ddd..32d5a78758f21fd08ddcc7791b90a4bc4f4602d7 100644 (file)
@@ -800,6 +800,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                                ctx->cv_output = i;
                                break;
                        }
+               } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                       switch (d->Semantic.Name) {
+                       case TGSI_SEMANTIC_COLOR:
+                               ctx->shader->nr_ps_max_color_exports++;
+                               break;
+                       }
                }
                break;
        case TGSI_FILE_CONSTANT:
@@ -1152,8 +1158,10 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
        ctx.colors_used = 0;
        ctx.clip_vertex_write = 0;
 
+       shader->nr_ps_color_exports = 0;
+       shader->nr_ps_max_color_exports = 0;
+
        shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
-       shader->nr_cbufs = rctx->nr_cbufs;
 
        /* register allocations */
        /* Values [0,127] correspond to GPR[0..127].
@@ -1288,6 +1296,9 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
                }
        }
 
+       if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
+               shader->nr_ps_max_color_exports = 8;
+
        if (ctx.fragcoord_input >= 0) {
                if (ctx.bc->chip_class == CAYMAN) {
                        for (j = 0 ; j < 4; j++) {
@@ -1527,10 +1538,17 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
                        break;
                case TGSI_PROCESSOR_FRAGMENT:
                        if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+                               /* never export more colors than the number of CBs */
+                               if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
+                                       /* skip export */
+                                       j--;
+                                       continue;
+                               }
                                output[j].array_base = next_pixel_base++;
                                output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               shader->nr_ps_color_exports++;
                                if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
-                                       for (k = 1; k < shader->nr_cbufs; k++) {
+                                       for (k = 1; k < rctx->nr_cbufs; k++) {
                                                j++;
                                                memset(&output[j], 0, sizeof(struct r600_bytecode_output));
                                                output[j].gpr = shader->output[i].gpr;
@@ -1544,6 +1562,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
                                                output[j].array_base = next_pixel_base++;
                                                output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
                                                output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                               shader->nr_ps_color_exports++;
                                        }
                                }
                        } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
@@ -1594,7 +1613,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh
        }
 
        /* add fake pixel export */
-       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) {
+       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
                memset(&output[j], 0, sizeof(struct r600_bytecode_output));
                output[j].gpr = 0;
                output[j].elem_size = 3;
index 2d35e770070b574d4f710596d509721ffce48053..eb0bbf6ebb77ee9ae2a9dcd53272400f4d2605e4 100644 (file)
@@ -49,7 +49,12 @@ struct r600_shader {
        boolean                 fs_write_all;
        boolean                 vs_prohibit_ucps;
        boolean                 two_side;
-       unsigned                nr_cbufs;
+       /* Number of color outputs in the TGSI shader,
+        * sometimes it could be higher than nr_cbufs (bug?).
+        * Also with writes_all property on eg+ it will be set to max CB number */
+       unsigned                nr_ps_max_color_exports;
+       /* Real number of ps color exports compiled in the bytecode */
+       unsigned                nr_ps_color_exports;
        /* bit n is set if the shader writes gl_ClipDistance[n] */
        unsigned                clip_dist_write;
        /* flag is set if the shader writes VS_OUT_MISC_VEC (e.g. for PSIZE) */
index 295453a5aef2122fea54a132284c8404b32d2f99..d621aee920173db124e41e046c3e385b7c2c5a7c 100644 (file)
@@ -1644,6 +1644,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 
        /* build states */
        rctx->have_depth_fb = 0;
+       rctx->nr_cbufs = state->nr_cbufs;
+
        for (int i = 0; i < state->nr_cbufs; i++) {
                r600_cb(rctx, rstate, state, i);
        }
index 879dcc50edb1b373a596e531402d50c78a958135..6050fa049724528f2adfd369164e9d70016deb25 100644 (file)
@@ -437,9 +437,8 @@ static INLINE unsigned r600_shader_selector_key(struct pipe_context * ctx,
        unsigned key;
 
        if (sel->type == PIPE_SHADER_FRAGMENT) {
-               key = rctx->two_side;
-               if (sel->eg_fs_write_all)
-                       key |= rctx->nr_cbufs << 1;
+               key = rctx->two_side |
+                               MIN2(sel->nr_ps_max_color_exports, rctx->nr_cbufs + rctx->dual_src_blend) << 1;
        } else
                key = 0;
 
@@ -494,14 +493,12 @@ static int r600_shader_select(struct pipe_context *ctx,
                        return r;
                }
 
-               /* We don't know the value of eg_fs_write_all property until we built
-                * at least one variant, so we may need to recompute the key (include
-                * rctx->nr_cbufs) after building first variant. */
+               /* We don't know the value of nr_ps_max_color_exports until we built
+                * at least one variant, so we may need to recompute the key after
+                * building first variant. */
                if (sel->type == PIPE_SHADER_FRAGMENT &&
-                               sel->num_shaders == 0 &&
-                               rctx->chip_class >= EVERGREEN &&
-                               shader->shader.fs_write_all) {
-                       sel->eg_fs_write_all = 1;
+                               sel->num_shaders == 0) {
+                       sel->nr_ps_max_color_exports = shader->shader.nr_ps_max_color_exports;
                        key = r600_shader_selector_key(ctx, sel);
                }
 
@@ -515,9 +512,6 @@ static int r600_shader_select(struct pipe_context *ctx,
        shader->next_variant = sel->current;
        sel->current = shader;
 
-       /* Moved from r600_bind_ps_shader, different shader variants
-        * may use different number of GPRs, so we need to update it. */
-       /* FIXME: we never did it after rebuilding the shaders, is it required? */
        if (rctx->chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) {
                r600_adjust_gprs(rctx);
        }