From da977ad9074707932b9dc1f7c52b5427ce920c13 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 7 Dec 2017 02:14:45 +0000 Subject: [PATCH] r600/sb: start adding GDS support This adds support for GDS ops to sb backend. This seems to work for atomics and tess factor writes. Acked-By: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_isa.h | 2 +- src/gallium/drivers/r600/sb/sb_bc.h | 7 +++ src/gallium/drivers/r600/sb/sb_bc_builder.cpp | 44 ++++++++++++++++++- src/gallium/drivers/r600/sb/sb_bc_decoder.cpp | 9 +++- src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 13 +++++- .../drivers/r600/sb/sb_bc_finalize.cpp | 7 +++ src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 11 ++++- src/gallium/drivers/r600/sb/sb_dump.cpp | 1 + src/gallium/drivers/r600/sb/sb_gcm.cpp | 20 +++++++-- src/gallium/drivers/r600/sb/sb_ir.h | 3 +- src/gallium/drivers/r600/sb/sb_peephole.cpp | 14 +++++- src/gallium/drivers/r600/sb/sb_ra_init.cpp | 2 + src/gallium/drivers/r600/sb/sb_shader.cpp | 3 ++ 13 files changed, 123 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h index b5a36b4e80e..f6e26976c5f 100644 --- a/src/gallium/drivers/r600/r600_isa.h +++ b/src/gallium/drivers/r600/r600_isa.h @@ -115,7 +115,7 @@ enum alu_op_flags AF_CC_LE = (5U << AF_CC_SHIFT), }; -/* flags for FETCH instructions (TEX/VTX) */ +/* flags for FETCH instructions (TEX/VTX/GDS) */ enum fetch_op_flags { FF_GDS = (1<<0), diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index fed041cf506..fc3fa5082d0 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -401,6 +401,7 @@ enum sched_queue_id { SQ_ALU, SQ_TEX, SQ_VTX, + SQ_GDS, SQ_NUM }; @@ -580,6 +581,11 @@ struct bc_fetch { unsigned mega_fetch:1; unsigned src2_gpr:7; /* for GDS */ + unsigned alloc_consume:1; + unsigned uav_id:4; + unsigned uav_index_mode:2; + unsigned bcast_first_req:1; + void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } }; @@ -966,6 +972,7 @@ private: int build_fetch_clause(cf_node *n); int build_fetch_tex(fetch_node *n); int build_fetch_vtx(fetch_node *n); + int build_fetch_gds(fetch_node *n); }; } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index b0df3d9a544..ea91e197c09 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -129,7 +129,9 @@ int bc_builder::build_fetch_clause(cf_node* n) { I != E; ++I) { fetch_node *f = static_cast(*I); - if (f->bc.op_ptr->flags & FF_VTX) + if (f->bc.op_ptr->flags & FF_GDS) + build_fetch_gds(f); + else if (f->bc.op_ptr->flags & FF_VTX) build_fetch_vtx(f); else build_fetch_tex(f); @@ -558,6 +560,46 @@ int bc_builder::build_fetch_tex(fetch_node* n) { return 0; } +int bc_builder::build_fetch_gds(fetch_node *n) { + const bc_fetch &bc = n->bc; + const fetch_op_info *fop = bc.op_ptr; + unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; + unsigned mem_op = 4; + assert(fop->flags && FF_GDS); + + if (bc.op == FETCH_OP_TF_WRITE) { + mem_op = 5; + gds_op = 0; + } + + bb << MEM_GDS_WORD0_EGCM() + .MEM_INST(2) + .MEM_OP(mem_op) + .SRC_GPR(bc.src_gpr) + .SRC_SEL_X(bc.src_sel[0]) + .SRC_SEL_Y(bc.src_sel[1]) + .SRC_SEL_Z(bc.src_sel[2]); + + bb << MEM_GDS_WORD1_EGCM() + .DST_GPR(bc.dst_gpr) + .DST_REL_MODE(bc.dst_rel) + .GDS_OP(gds_op) + .SRC_GPR(bc.src2_gpr) + .UAV_INDEX_MODE(bc.uav_index_mode) + .UAV_ID(bc.uav_id) + .ALLOC_CONSUME(bc.alloc_consume) + .BCAST_FIRST_REQ(bc.bcast_first_req); + + bb << MEM_GDS_WORD2_EGCM() + .DST_SEL_X(bc.dst_sel[0]) + .DST_SEL_Y(bc.dst_sel[1]) + .DST_SEL_Z(bc.dst_sel[2]) + .DST_SEL_W(bc.dst_sel[3]); + + bb << 0; + return 0; +} + int bc_builder::build_fetch_vtx(fetch_node* n) { const bc_fetch &bc = n->bc; const fetch_op_info *fop = bc.op_ptr; diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp index 8712abe5f78..1fa580e66d6 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp @@ -415,7 +415,10 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) { unsigned gds_op; if (mem_op == 4) { gds_op = (dw1 >> 9) & 0x1f; - fetch_opcode = FETCH_OP_GDS_ADD + gds_op; + if ((dw1 >> 9) & 0x20) + fetch_opcode = FETCH_OP_GDS_ADD_RET + gds_op; + else + fetch_opcode = FETCH_OP_GDS_ADD + gds_op; } else if (mem_op == 5) fetch_opcode = FETCH_OP_TF_WRITE; bc.set_op(fetch_opcode); @@ -512,6 +515,10 @@ int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) { tmp = w1.get_DST_REL_MODE(); bc.dst_rel_global = (tmp == 2); bc.src2_gpr = w1.get_SRC_GPR(); + bc.alloc_consume = w1.get_ALLOC_CONSUME(); + bc.uav_id = w1.get_UAV_ID(); + bc.uav_index_mode = w1.get_UAV_INDEX_MODE(); + bc.bcast_first_req = w1.get_BCAST_FIRST_REQ(); MEM_GDS_WORD2_EGCM w2(dw2); bc.dst_sel[0] = w2.get_DST_SEL_X(); diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp index 788450b3c9c..72a1b24467d 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp @@ -452,11 +452,14 @@ void bc_dump::dump(fetch_node& n) { sb_ostringstream s; static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; unsigned gds = n.bc.op_ptr->flags & FF_GDS; + bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET && + n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET; + bool show_dst = !gds || (gds && gds_has_ret); s << n.bc.op_ptr->name; fill_to(s, 20); - if (!gds) { + if (show_dst) { s << "R"; print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0); s << "."; @@ -483,7 +486,13 @@ void bc_dump::dump(fetch_node& n) { s << ", RID:" << n.bc.resource_id; if (gds) { - + s << " UAV:" << n.bc.uav_id; + if (n.bc.uav_index_mode) + s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0); + if (n.bc.bcast_first_req) + s << " BFQ"; + if (n.bc.alloc_consume) + s << " AC"; } else if (vtx) { s << " " << fetch_type[n.bc.fetch_type]; if (!ctx.is_cayman() && n.bc.mega_fetch_count) diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 7f1dd0a7a0e..c20640e4767 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -557,6 +557,8 @@ void bc_finalizer::finalize_fetch(fetch_node* f) { if (flags & FF_VTX) { src_count = 1; + } else if (flags & FF_GDS) { + src_count = 2; } else if (flags & FF_USEGRAD) { emit_set_grad(f); } else if (flags & FF_USE_TEXTURE_OFFSETS) { @@ -661,6 +663,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) { for (unsigned i = 0; i < 4; ++i) f->bc.dst_sel[i] = dst_swz[i]; + if ((flags & FF_GDS) && reg == -1) { + f->bc.dst_sel[0] = SEL_MASK; + f->bc.dst_gpr = 0; + return ; + } assert(reg >= 0); if (reg >= 0) diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index de3984f5967..8ab4083a3c9 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -569,7 +569,10 @@ int bc_parser::decode_fetch_clause(cf_node* cf) { int r; unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; - cf->subtype = NST_TEX_CLAUSE; + if (cf->bc.op_ptr->flags && FF_GDS) + cf->subtype = NST_GDS_CLAUSE; + else + cf->subtype = NST_TEX_CLAUSE; while (cnt--) { fetch_node *n = sh->create_fetch(); @@ -595,10 +598,14 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) { unsigned flags = n->bc.op_ptr->flags; unsigned vtx = flags & FF_VTX; - unsigned num_src = vtx ? ctx.vtx_src_num : 4; + unsigned gds = flags & FF_GDS; + unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4; n->dst.resize(4); + if (gds) { + n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL; + } if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { sh->uses_gradients = true; } diff --git a/src/gallium/drivers/r600/sb/sb_dump.cpp b/src/gallium/drivers/r600/sb/sb_dump.cpp index d6051704c15..70892e1c8c4 100644 --- a/src/gallium/drivers/r600/sb/sb_dump.cpp +++ b/src/gallium/drivers/r600/sb/sb_dump.cpp @@ -469,6 +469,7 @@ void dump::dump_op(node* n) { case NST_ALU_CLAUSE: case NST_TEX_CLAUSE: case NST_VTX_CLAUSE: + case NST_GDS_CLAUSE: dump_op(*n, static_cast(n)->bc.op_ptr->name); break; case NST_ALU_PACKED_INST: diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp index 7b43a32818e..fbebe3427d4 100644 --- a/src/gallium/drivers/r600/sb/sb_gcm.cpp +++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp @@ -427,10 +427,22 @@ void gcm::bu_sched_bb(bb_node* bb) { if (sq != SQ_CF) { if (!clause || sampler_indexing) { - clause = sh.create_clause(sq == SQ_ALU ? - NST_ALU_CLAUSE : - sq == SQ_TEX ? NST_TEX_CLAUSE : - NST_VTX_CLAUSE); + node_subtype nst; + switch (sq) { + case SQ_ALU: + nst = NST_ALU_CLAUSE; + break; + case SQ_TEX: + nst = NST_TEX_CLAUSE; + break; + case SQ_GDS: + nst = NST_GDS_CLAUSE; + break; + default: + nst = NST_VTX_CLAUSE; + break; + } + clause = sh.create_clause(nst); bb->push_front(clause); } } else { diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h index 67c7cd8aa48..2390babfcfe 100644 --- a/src/gallium/drivers/r600/sb/sb_ir.h +++ b/src/gallium/drivers/r600/sb/sb_ir.h @@ -663,6 +663,7 @@ enum node_subtype { NST_FETCH_INST, NST_TEX_CLAUSE, NST_VTX_CLAUSE, + NST_GDS_CLAUSE, NST_BB, @@ -787,7 +788,7 @@ public: bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; } bool is_fetch_clause() { - return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE; + return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE; } bool is_copy() { return subtype == NST_COPY; } diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp index d4b97557d4e..49a6965b1f3 100644 --- a/src/gallium/drivers/r600/sb/sb_peephole.cpp +++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp @@ -52,7 +52,19 @@ void peephole::run_on(container_node* c) { if (n->is_container()) run_on(static_cast(n)); else { - + if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) { + fetch_node *f = static_cast(n); + bool has_dst = false; + + for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) { + value *v = *I; + if (v) + has_dst = true; + } + if (!has_dst) + if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET) + f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD); + } if (n->is_alu_inst()) { alu_node *a = static_cast(n); diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp index 68ee98291f8..e5ec9db23b7 100644 --- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp +++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp @@ -745,6 +745,8 @@ void ra_split::split_vector_inst(node* n) { no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) || n->is_fetch_op(FETCH_OP_SEMFETCH); + no_src_swizzle |= n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS); + if (!n->src.empty() && !call_fs) { // we may have more than one source vector - diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp index 8c7b39bb03f..321e24ea256 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.cpp +++ b/src/gallium/drivers/r600/sb/sb_shader.cpp @@ -91,6 +91,7 @@ cf_node* shader::create_clause(node_subtype nst) { case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; + case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break; default: assert(!"invalid clause type"); break; } @@ -597,6 +598,8 @@ sched_queue_id shader::get_queue_id(node* n) { fetch_node *f = static_cast(n); if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) return SQ_VTX; + if (f->bc.op_ptr->flags & FF_GDS) + return SQ_GDS; return SQ_TEX; } case NST_CF_INST: -- 2.30.2