r600g/sb: Enable SB for geometry shaders
authorGlenn Kennard <glenn.kennard@gmail.com>
Tue, 7 Apr 2015 01:00:20 +0000 (03:00 +0200)
committerDave Airlie <airlied@redhat.com>
Tue, 7 Apr 2015 22:18:35 +0000 (08:18 +1000)
Add SV_GEOMETRY_EMIT special variable type to track the
implicit dependencies between CUT/EMIT_VERTEX/MEM_RING
instructions so GCM/scheduler doesn't reorder them.

Mark emit instructions as unkillable so DCE doesn't eat them.

Enable only for evergreen/cayman as there are a few
unexplained GS piglit regressions on R6xx/R7xx with SB
enabled otherwise.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600_isa.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/sb/sb_bc_dump.cpp
src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
src/gallium/drivers/r600/sb/sb_bc_parser.cpp
src/gallium/drivers/r600/sb/sb_core.cpp
src/gallium/drivers/r600/sb/sb_dump.cpp
src/gallium/drivers/r600/sb/sb_ir.h
src/gallium/drivers/r600/sb/sb_ra_init.cpp
src/gallium/drivers/r600/sb/sb_sched.cpp
src/gallium/drivers/r600/sb/sb_valtable.cpp

index ec3f7028a980909844575f92cb7116f8680d1a8a..381f06d592441c7360f6027100816ba4eea0b15d 100644 (file)
@@ -641,7 +641,7 @@ static const struct cf_op_info cf_op_table[] = {
 
                {"MEM_SCRATCH",                   { 0x24, 0x24, 0x50, 0x50 },  CF_MEM  },
                {"MEM_REDUCT",                    { 0x25, 0x25,   -1,   -1 },  CF_MEM  },
-               {"MEM_RING",                      { 0x26, 0x26, 0x52, 0x52 },  CF_MEM  },
+               {"MEM_RING",                      { 0x26, 0x26, 0x52, 0x52 },  CF_MEM | CF_EMIT },
 
                {"EXPORT",                        { 0x27, 0x27, 0x53, 0x53 },  CF_EXP  },
                {"EXPORT_DONE",                   { 0x28, 0x28, 0x54, 0x54 },  CF_EXP  },
@@ -649,9 +649,9 @@ static const struct cf_op_info cf_op_table[] = {
                {"MEM_EXPORT",                    {   -1, 0x3A, 0x55, 0x55 },  CF_MEM  },
                {"MEM_RAT",                       {   -1,   -1, 0x56, 0x56 },  CF_MEM | CF_RAT },
                {"MEM_RAT_NOCACHE",               {   -1,   -1, 0x57, 0x57 },  CF_MEM | CF_RAT },
-               {"MEM_RING1",                     {   -1,   -1, 0x58, 0x58 },  CF_MEM  },
-               {"MEM_RING2",                     {   -1,   -1, 0x59, 0x59 },  CF_MEM  },
-               {"MEM_RING3",                     {   -1,   -1, 0x5A, 0x5A },  CF_MEM  },
+               {"MEM_RING1",                     {   -1,   -1, 0x58, 0x58 },  CF_MEM | CF_EMIT },
+               {"MEM_RING2",                     {   -1,   -1, 0x59, 0x59 },  CF_MEM | CF_EMIT },
+               {"MEM_RING3",                     {   -1,   -1, 0x5A, 0x5A },  CF_MEM | CF_EMIT },
                {"MEM_MEM_COMBINED",              {   -1,   -1, 0x5B, 0x5B },  CF_MEM  },
                {"MEM_RAT_COMBINED_NOCACHE",      {   -1,   -1, 0x5C, 0x5C },  CF_MEM | CF_RAT },
                {"MEM_RAT_COMBINED",              {   -1,   -1,   -1, 0x5D },  CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */
index ec754002eccdc79b43fcefa8dc75cc524d484ac3..87b6e6e06ecc85d878963637fc2e02013ae4152d 100644 (file)
@@ -159,8 +159,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                goto error;
        }
 
-       /* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
-       use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+    /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */
+    if (rctx->b.chip_class <= R700) {
+           use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+    }
        /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
        use_sb &= !shader->shader.uses_index_registers;
 
@@ -1141,6 +1143,8 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
                for (i = 0; i < 3; i++) {
                        treg[i] = r600_get_temp(ctx);
                }
+               r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
+
                t2 = r600_get_temp(ctx);
                for (i = 0; i < 3; i++) {
                        memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -1935,9 +1939,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
        }
 
+       shader->max_arrays = 0;
+       shader->num_arrays = 0;
        if (indirect_gprs) {
-               shader->max_arrays = 0;
-               shader->num_arrays = 0;
 
                if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
                        r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
index 6f6a57e2647f91aecf107f4eeb0500b1aa644373..5232782791d02e096f7c83faf61672af4f1f91dc 100644 (file)
@@ -448,7 +448,7 @@ void bc_dump::dump(fetch_node& n) {
                        s << " FWQ";
                if (ctx.is_egcm() && n.bc.resource_index_mode)
                        s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode;
-               if (ctx.is_egcm() && n.bc.resource_index_mode)
+               if (ctx.is_egcm() && n.bc.sampler_index_mode)
                        s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode;
 
                s << " UCF:" << n.bc.use_const_fields
index 08b7d77f1a40fb2cc5b26f60bff33141b90f9ac4..8c2cd1460e53e81db4340e2797588ba3c327cd43 100644 (file)
@@ -290,7 +290,7 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
                value *d = n->dst.empty() ? NULL : n->dst[0];
 
                if (d && d->is_special_reg()) {
-                       assert(n->bc.op_ptr->flags & AF_MOVA);
+                       assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
                        d = NULL;
                }
 
index 08e7f5c36c3f3200f2b9239dbe8e0d4dc84e74ab..4879c036f9f2df586c5dd361cc6d790a59be006d 100644 (file)
@@ -135,6 +135,16 @@ int bc_parser::parse_decls() {
                }
        }
 
+       // GS inputs can add indirect addressing
+       if (sh->target == TARGET_GS) {
+               if (pshader->num_arrays) {
+                       for (unsigned i = 0; i < pshader->num_arrays; ++i) {
+                               r600_shader_array &a = pshader->arrays[i];
+                               sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
+                       }
+               }
+       }
+
        if (sh->target == TARGET_VS || sh->target == TARGET_ES)
                sh->add_input(0, 1, 0x0F);
        else if (sh->target == TARGET_GS) {
@@ -720,6 +730,16 @@ int bc_parser::prepare_ir() {
                                        c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
                                }
 
+                               if (flags & CF_EMIT) {
+                                       // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
+                                       c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+                                       c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+                                       if (sh->target == TARGET_ES) {
+                                               // For ES shaders this is an export
+                                               c->flags |= NF_DONT_KILL;
+                                       }
+                               }
+
                                if (!burst_count--)
                                        break;
 
@@ -736,6 +756,11 @@ int bc_parser::prepare_ir() {
 
                        c->bc.end_of_program = eop;
 
+               } else if (flags & CF_EMIT) {
+                       c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+
+                       c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+                       c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
                }
        }
 
index 7db80082b607d03970e687c8ffcc6590f0cc8d4e..afea8188f1c143f890446d054ad14d1b65ac97d9 100644 (file)
@@ -189,7 +189,10 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
        sh->set_undef(sh->root->live_before);
 
-       SB_RUN_PASS(if_conversion,              1);
+       // if conversion breaks the dependency tracking between CF_EMIT ops when it removes
+       // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
+       if (sh->target != TARGET_GS)
+               SB_RUN_PASS(if_conversion,              1);
 
        // if_conversion breaks info about uses, but next pass (peephole)
        // doesn't need it, so we can skip def/use update here
index b2130a48fa61a04b06cce6f5d87b348eada943a1..d6051704c158008ca07c2d586a48bb6d71640cc6 100644 (file)
@@ -354,7 +354,9 @@ void dump::dump_op(node &n, const char *name) {
                                        "WRITE_IND_ACK"};
                        sblog << "  " << exp_type[c->bc.type] << " " << c->bc.array_base
                                        << "   ES:" << c->bc.elem_size;
-                       has_dst = false;
+                       if (!(c->bc.op_ptr->flags & CF_EMIT)) {
+                               has_dst = false;
+                       }
                }
        }
 
index 711c2eb9e35aef0a06d4353c2766b87dff548d01..560a4a9b2843ced8d54315d4d12b895e18b69fdb 100644 (file)
@@ -41,7 +41,8 @@ enum special_regs {
        SV_ALU_PRED = 128,
        SV_EXEC_MASK,
        SV_AR_INDEX,
-       SV_VALID_MASK
+       SV_VALID_MASK,
+       SV_GEOMETRY_EMIT
 };
 
 class node;
@@ -506,6 +507,9 @@ public:
        bool is_AR() {
                return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
        }
+       bool is_geometry_emit() {
+               return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
+       }
 
        node* any_def() {
                assert(!(def && adef));
index e53aba540de13de92f59068a261085caf75177df..95b92905f59f1cf0dc85a32390daaf013f18abe9 100644 (file)
@@ -707,7 +707,7 @@ void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
 
                        assert(!o->is_dead());
 
-                       if (o->is_undef())
+                       if (o->is_undef() || o->is_geometry_emit())
                                continue;
 
                        if (allow_swz && o->is_float_0_or_1())
@@ -751,7 +751,7 @@ void ra_split::split_vector_inst(node* n) {
                // src vectors 1 (src[4-7] and 2 (src[8-11])
 
                unsigned nvec = n->src.size() >> 2;
-               assert(nvec << 2 == n->src.size());
+               assert(nvec << 2 <= n->src.size());
 
                for (unsigned nv = 0; nv < nvec; ++nv) {
                        vvec sv, tv, nsrc(4);
index 63e746406878c3412217a0c68991f25c12d1915c..4248a3fd30a8e6389fe4e7ad25df72a4de04ece6 100644 (file)
@@ -1463,7 +1463,7 @@ unsigned post_scheduler::try_add_instruction(node *n) {
                value *d = a->dst.empty() ? NULL : a->dst[0];
 
                if (d && d->is_special_reg()) {
-                       assert(a->bc.op_ptr->flags & AF_MOVA);
+                       assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
                        d = NULL;
                }
 
index 0d39e9c3f0302caa25f59277fe3d62c5ae1b9171..eb242b1c26f3fca3d073f3f6a18d4ed42412a60c 100644 (file)
@@ -55,6 +55,7 @@ sb_ostream& operator << (sb_ostream &o, value &v) {
                        case SV_ALU_PRED: o << "PR"; break;
                        case SV_EXEC_MASK: o << "EM"; break;
                        case SV_VALID_MASK: o << "VM"; break;
+                       case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
                        default: o << "???specialreg"; break;
                }
                break;