r600/sb: start adding GDS support
authorDave Airlie <airlied@redhat.com>
Thu, 7 Dec 2017 02:14:45 +0000 (02:14 +0000)
committerDave Airlie <airlied@redhat.com>
Thu, 18 Jan 2018 03:35:37 +0000 (03:35 +0000)
This adds support for GDS ops to sb backend.

This seems to work for atomics and tess factor writes.

Acked-By: Roland Scheidegger <sroland@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
13 files changed:
src/gallium/drivers/r600/r600_isa.h
src/gallium/drivers/r600/sb/sb_bc.h
src/gallium/drivers/r600/sb/sb_bc_builder.cpp
src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
src/gallium/drivers/r600/sb/sb_bc_dump.cpp
src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
src/gallium/drivers/r600/sb/sb_bc_parser.cpp
src/gallium/drivers/r600/sb/sb_dump.cpp
src/gallium/drivers/r600/sb/sb_gcm.cpp
src/gallium/drivers/r600/sb/sb_ir.h
src/gallium/drivers/r600/sb/sb_peephole.cpp
src/gallium/drivers/r600/sb/sb_ra_init.cpp
src/gallium/drivers/r600/sb/sb_shader.cpp

index b5a36b4e80ed557eadd0e657a2906e8c89893249..f6e26976c5fcbb57599d1a0f40e65404aec64282 100644 (file)
@@ -115,7 +115,7 @@ enum alu_op_flags
        AF_CC_LE        = (5U << AF_CC_SHIFT),
 };
 
-/* flags for FETCH instructions (TEX/VTX) */
+/* flags for FETCH instructions (TEX/VTX/GDS) */
 enum fetch_op_flags
 {
        FF_GDS          = (1<<0),
index fed041cf506739f5ad0e52eae782494df70bf9a5..fc3fa5082d0a0121c7afb6fb0cca1b1ea9a5c911 100644 (file)
@@ -401,6 +401,7 @@ enum sched_queue_id {
        SQ_ALU,
        SQ_TEX,
        SQ_VTX,
+       SQ_GDS,
 
        SQ_NUM
 };
@@ -580,6 +581,11 @@ struct bc_fetch {
        unsigned mega_fetch:1;
 
        unsigned src2_gpr:7; /* for GDS */
+       unsigned alloc_consume:1;
+       unsigned uav_id:4;
+       unsigned uav_index_mode:2;
+       unsigned bcast_first_req:1;
+
        void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
 };
 
@@ -966,6 +972,7 @@ private:
        int build_fetch_clause(cf_node *n);
        int build_fetch_tex(fetch_node *n);
        int build_fetch_vtx(fetch_node *n);
+       int build_fetch_gds(fetch_node *n);
 };
 
 } // namespace r600_sb
index b0df3d9a54490a51e3f0ac5148d06db28d42ee09..ea91e197c094bbdf695c5a8bbe22acc81d356384 100644 (file)
@@ -129,7 +129,9 @@ int bc_builder::build_fetch_clause(cf_node* n) {
                        I != E; ++I) {
                fetch_node *f = static_cast<fetch_node*>(*I);
 
-               if (f->bc.op_ptr->flags & FF_VTX)
+               if (f->bc.op_ptr->flags & FF_GDS)
+                       build_fetch_gds(f);
+               else if (f->bc.op_ptr->flags & FF_VTX)
                        build_fetch_vtx(f);
                else
                        build_fetch_tex(f);
@@ -558,6 +560,46 @@ int bc_builder::build_fetch_tex(fetch_node* n) {
        return 0;
 }
 
+int bc_builder::build_fetch_gds(fetch_node *n) {
+       const bc_fetch &bc = n->bc;
+       const fetch_op_info *fop = bc.op_ptr;
+       unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f;
+       unsigned mem_op = 4;
+       assert(fop->flags && FF_GDS);
+
+       if (bc.op == FETCH_OP_TF_WRITE) {
+               mem_op = 5;
+               gds_op = 0;
+       }
+
+       bb << MEM_GDS_WORD0_EGCM()
+               .MEM_INST(2)
+               .MEM_OP(mem_op)
+               .SRC_GPR(bc.src_gpr)
+               .SRC_SEL_X(bc.src_sel[0])
+               .SRC_SEL_Y(bc.src_sel[1])
+               .SRC_SEL_Z(bc.src_sel[2]);
+
+       bb << MEM_GDS_WORD1_EGCM()
+               .DST_GPR(bc.dst_gpr)
+               .DST_REL_MODE(bc.dst_rel)
+               .GDS_OP(gds_op)
+               .SRC_GPR(bc.src2_gpr)
+               .UAV_INDEX_MODE(bc.uav_index_mode)
+               .UAV_ID(bc.uav_id)
+               .ALLOC_CONSUME(bc.alloc_consume)
+               .BCAST_FIRST_REQ(bc.bcast_first_req);
+
+       bb << MEM_GDS_WORD2_EGCM()
+               .DST_SEL_X(bc.dst_sel[0])
+               .DST_SEL_Y(bc.dst_sel[1])
+               .DST_SEL_Z(bc.dst_sel[2])
+               .DST_SEL_W(bc.dst_sel[3]);
+
+       bb << 0;
+       return 0;
+}
+
 int bc_builder::build_fetch_vtx(fetch_node* n) {
        const bc_fetch &bc = n->bc;
        const fetch_op_info *fop = bc.op_ptr;
index 8712abe5f7894158b60a5fb16e2d3f9bb008d202..1fa580e66d67e66971866c27efa42a1ceab7992b 100644 (file)
@@ -415,7 +415,10 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
                unsigned gds_op;
                if (mem_op == 4) {
                        gds_op = (dw1 >> 9) & 0x1f;
-                       fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
+                       if ((dw1 >> 9) & 0x20)
+                               fetch_opcode = FETCH_OP_GDS_ADD_RET + gds_op;
+                       else
+                               fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
                } else if (mem_op == 5)
                        fetch_opcode = FETCH_OP_TF_WRITE;
                bc.set_op(fetch_opcode);
@@ -512,6 +515,10 @@ int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) {
        tmp = w1.get_DST_REL_MODE();
        bc.dst_rel_global = (tmp == 2);
        bc.src2_gpr = w1.get_SRC_GPR();
+       bc.alloc_consume = w1.get_ALLOC_CONSUME();
+       bc.uav_id = w1.get_UAV_ID();
+       bc.uav_index_mode = w1.get_UAV_INDEX_MODE();
+       bc.bcast_first_req = w1.get_BCAST_FIRST_REQ();
 
        MEM_GDS_WORD2_EGCM w2(dw2);
        bc.dst_sel[0] = w2.get_DST_SEL_X();
index 788450b3c9ceee84a8a219da722650f7f0df6159..72a1b24467d035fcbc1703af0655f1f107f43fc4 100644 (file)
@@ -452,11 +452,14 @@ void bc_dump::dump(fetch_node& n) {
        sb_ostringstream s;
        static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
        unsigned gds = n.bc.op_ptr->flags & FF_GDS;
+       bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
+               n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
+       bool show_dst = !gds || (gds && gds_has_ret);
 
        s << n.bc.op_ptr->name;
        fill_to(s, 20);
 
-       if (!gds) {
+       if (show_dst) {
                s << "R";
                print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
                s << ".";
@@ -483,7 +486,13 @@ void bc_dump::dump(fetch_node& n) {
                s << ",   RID:" << n.bc.resource_id;
 
        if (gds) {
-
+               s << " UAV:" << n.bc.uav_id;
+               if (n.bc.uav_index_mode)
+                       s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
+               if (n.bc.bcast_first_req)
+                       s << " BFQ";
+               if (n.bc.alloc_consume)
+                       s << " AC";
        } else if (vtx) {
                s << "  " << fetch_type[n.bc.fetch_type];
                if (!ctx.is_cayman() && n.bc.mega_fetch_count)
index 7f1dd0a7a0e9f38ca24a79935ff1dd264dfb7b2b..c20640e47676b490df2b75154b10783f5faf3793 100644 (file)
@@ -557,6 +557,8 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
 
        if (flags & FF_VTX) {
                src_count = 1;
+       } else if (flags & FF_GDS) {
+               src_count = 2;
        } else if (flags & FF_USEGRAD) {
                emit_set_grad(f);
        } else if (flags & FF_USE_TEXTURE_OFFSETS) {
@@ -661,6 +663,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
        for (unsigned i = 0; i < 4; ++i)
                f->bc.dst_sel[i] = dst_swz[i];
 
+       if ((flags & FF_GDS) && reg == -1) {
+               f->bc.dst_sel[0] = SEL_MASK;
+               f->bc.dst_gpr = 0;
+               return ;
+       }
        assert(reg >= 0);
 
        if (reg >= 0)
index de3984f5967e2947fe76b6569a18b2355ca7f2c6..8ab4083a3c99008f231e2c76c6467336a342c1ae 100644 (file)
@@ -569,7 +569,10 @@ int bc_parser::decode_fetch_clause(cf_node* cf) {
        int r;
        unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
 
-       cf->subtype = NST_TEX_CLAUSE;
+       if (cf->bc.op_ptr->flags && FF_GDS)
+               cf->subtype = NST_GDS_CLAUSE;
+       else
+               cf->subtype = NST_TEX_CLAUSE;
 
        while (cnt--) {
                fetch_node *n = sh->create_fetch();
@@ -595,10 +598,14 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
                unsigned flags = n->bc.op_ptr->flags;
 
                unsigned vtx = flags & FF_VTX;
-               unsigned num_src = vtx ? ctx.vtx_src_num : 4;
+               unsigned gds = flags & FF_GDS;
+               unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4;
 
                n->dst.resize(4);
 
+               if (gds) {
+                       n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL;
+               }
                if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
                        sh->uses_gradients = true;
                }
index d6051704c158008ca07c2d586a48bb6d71640cc6..70892e1c8c4fd6703f30a76ed10b9f0cc5e17fd2 100644 (file)
@@ -469,6 +469,7 @@ void dump::dump_op(node* n) {
        case NST_ALU_CLAUSE:
        case NST_TEX_CLAUSE:
        case NST_VTX_CLAUSE:
+       case NST_GDS_CLAUSE:
                dump_op(*n, static_cast<cf_node*>(n)->bc.op_ptr->name);
                break;
        case NST_ALU_PACKED_INST:
index 7b43a32818e5ea17a42041c3f25fc61b19b07967..fbebe3427d4489559eea3f4df8d09b3e41a369b1 100644 (file)
@@ -427,10 +427,22 @@ void gcm::bu_sched_bb(bb_node* bb) {
 
                                if (sq != SQ_CF) {
                                        if (!clause || sampler_indexing) {
-                                               clause = sh.create_clause(sq == SQ_ALU ?
-                                                               NST_ALU_CLAUSE :
-                                                                       sq == SQ_TEX ? NST_TEX_CLAUSE :
-                                                                                       NST_VTX_CLAUSE);
+                                               node_subtype nst;
+                                               switch (sq) {
+                                               case SQ_ALU:
+                                                       nst = NST_ALU_CLAUSE;
+                                                       break;
+                                               case SQ_TEX:
+                                                       nst = NST_TEX_CLAUSE;
+                                                       break;
+                                               case SQ_GDS:
+                                                       nst = NST_GDS_CLAUSE;
+                                                       break;
+                                               default:
+                                                       nst = NST_VTX_CLAUSE;
+                                                       break;
+                                               }
+                                               clause = sh.create_clause(nst);
                                                bb->push_front(clause);
                                        }
                                } else {
index 67c7cd8aa488232ac0bdf376e59b5aacf5570585..2390babfcfe0e0eb03445ef67a30eb4ddf5f1c86 100644 (file)
@@ -663,6 +663,7 @@ enum node_subtype {
        NST_FETCH_INST,
        NST_TEX_CLAUSE,
        NST_VTX_CLAUSE,
+       NST_GDS_CLAUSE,
 
        NST_BB,
 
@@ -787,7 +788,7 @@ public:
        bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
 
        bool is_fetch_clause() {
-               return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE;
+               return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
        }
 
        bool is_copy() { return subtype == NST_COPY; }
index d4b97557d4e8950845b479bbfec11877c4b133b8..49a6965b1f385380fd864527482b909d9a1bfee1 100644 (file)
@@ -52,7 +52,19 @@ void peephole::run_on(container_node* c) {
                if (n->is_container())
                        run_on(static_cast<container_node*>(n));
                else {
-
+                       if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) {
+                               fetch_node *f = static_cast<fetch_node*>(n);
+                               bool has_dst = false;
+
+                               for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) {
+                                       value *v = *I;
+                                       if (v)
+                                               has_dst = true;
+                               }
+                               if (!has_dst)
+                                       if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET)
+                                               f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD);
+                       }
                        if (n->is_alu_inst()) {
                                alu_node *a = static_cast<alu_node*>(n);
 
index 68ee98291f8d4c3e72f72b0620017b4a8ee4611e..e5ec9db23b70dd7353df99293169c90a9d3f026d 100644 (file)
@@ -745,6 +745,8 @@ void ra_split::split_vector_inst(node* n) {
        no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) ||
                        n->is_fetch_op(FETCH_OP_SEMFETCH);
 
+       no_src_swizzle |= n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS);
+
        if (!n->src.empty() && !call_fs) {
 
                // we may have more than one source vector -
index 8c7b39bb03f0ef54297a5122d545511ccf0e531f..321e24ea256e51a1d484b77fdfda9d5496a46adf 100644 (file)
@@ -91,6 +91,7 @@ cf_node* shader::create_clause(node_subtype nst) {
        case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
        case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
        case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
+       case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break;
        default: assert(!"invalid clause type"); break;
        }
 
@@ -597,6 +598,8 @@ sched_queue_id shader::get_queue_id(node* n) {
                        fetch_node *f = static_cast<fetch_node*>(n);
                        if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
                                return SQ_VTX;
+                       if (f->bc.op_ptr->flags & FF_GDS)
+                               return SQ_GDS;
                        return SQ_TEX;
                }
                case NST_CF_INST: