From aa38bf658f1e0dd166b3ce12025533775f373788 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 15 Nov 2017 10:01:56 +1000 Subject: [PATCH] r600/shader: add support for load/store/atomic ops on images. This adds support to the shader assembler for load/store/atomic ops on images which are handled via the RAT operations. Tested-By: Gert Wollny Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 319 ++++++++++++++++++++++++- 1 file changed, 315 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b351c8904c5..f508774ed0c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -969,6 +969,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_SAMPLER: case TGSI_FILE_SAMPLER_VIEW: case TGSI_FILE_ADDRESS: + case TGSI_FILE_IMAGE: break; case TGSI_FILE_HW_ATOMIC: @@ -3074,6 +3075,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, case PIPE_SHADER_FRAGMENT: shader->two_side = key.ps.color_two_side; shader->atomic_base = key.ps.first_atomic_counter; + shader->rat_base = key.ps.nr_cbufs; break; default: break; @@ -7775,14 +7777,321 @@ static int tgsi_load_gds(struct r600_shader_ctx *ctx) return 0; } +/* this fixes up 1D arrays properly */ +static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int r, i; + struct r600_bytecode_alu alu; + int temp_reg = r600_get_temp(ctx); + + for (i = 0; i < 4; i++) { + bool def_val = true, write_zero = false; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = temp_reg; + alu.dst.chan = i; + + switch (inst->Memory.Texture) { + case TGSI_TEXTURE_BUFFER: + case TGSI_TEXTURE_1D: + if (i == 1 || i == 2 || i == 3) { + write_zero = true; + } + break; + case TGSI_TEXTURE_1D_ARRAY: + if (i == 1 || i == 3) + write_zero = true; + else if (i == 2) { + r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1); + def_val = false; + } + break; + case TGSI_TEXTURE_2D: + if (i == 2 || i == 3) + write_zero = true; + break; + default: + if (i == 3) + write_zero = true; + break; + } + + if (write_zero) { + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[0].value = 0; + } else if (def_val) { + r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i); + } + + if (i == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + *idx_gpr = temp_reg; + return 0; +} + +static int tgsi_load_rat(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + /* have to work out the offset into the RAT immediate return buffer */ + struct r600_bytecode_vtx vtx; + struct r600_bytecode_cf *cf; + int r; + int idx_gpr; + unsigned format, num_format, format_comp, endian; + const struct util_format_description *desc; + unsigned rat_index_mode; + unsigned immed_base; + + rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE + + immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; + r = load_index_src(ctx, 1, &idx_gpr); + if (r) + return r; + + if (rat_index_mode) + egcm_load_index_reg(ctx->bc, 1, false); + + r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); + cf = ctx->bc->cf_last; + + cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index; + cf->rat.inst = V_RAT_INST_NOP_RTN; + cf->rat.index_mode = rat_index_mode; + cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; + cf->output.gpr = ctx->thread_id_gpr; + cf->output.index_gpr = idx_gpr; + cf->output.comp_mask = 0xf; + cf->output.burst_count = 1; + cf->vpm = 1; + cf->barrier = 1; + cf->mark = 1; + cf->output.elem_size = 0; + + r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); + cf = ctx->bc->cf_last; + cf->barrier = 1; + + desc = util_format_description(inst->Memory.Format); + r600_vertex_data_type(inst->Memory.Format, + &format, &num_format, &format_comp, &endian); + memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = immed_base + inst->Src[0].Register.Index; + vtx.buffer_index_mode = rat_index_mode; + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; + vtx.src_gpr = ctx->thread_id_gpr; + vtx.src_sel_x = 1; + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.srf_mode_all = 1; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.endian = endian; + vtx.offset = 0; + vtx.mega_fetch_count = 3; + r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); + if (r) + return r; + cf = ctx->bc->cf_last; + cf->barrier = 1; + return 0; +} + static int tgsi_load(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + return tgsi_load_rat(ctx); if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) return tgsi_load_gds(ctx); return 0; } +static int tgsi_store_rat(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_cf *cf; + bool src_requires_loading = false; + int val_gpr, idx_gpr; + int r, i; + unsigned rat_index_mode; + + rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE + + r = load_index_src(ctx, 0, &idx_gpr); + if (r) + return r; + + if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY) + src_requires_loading = true; + + if (src_requires_loading) { + struct r600_bytecode_alu alu; + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + + r600_bytecode_src(&alu.src[0], &ctx->src[1], i); + if (i == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + val_gpr = ctx->temp_reg; + } else + val_gpr = tgsi_tex_get_src_gpr(ctx, 1); + if (rat_index_mode) + egcm_load_index_reg(ctx->bc, 1, false); + + r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); + cf = ctx->bc->cf_last; + + cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index; + cf->rat.inst = V_RAT_INST_STORE_TYPED; + cf->rat.index_mode = rat_index_mode; + cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; + cf->output.gpr = val_gpr; + cf->output.index_gpr = idx_gpr; + cf->output.comp_mask = 0xf; + cf->output.burst_count = 1; + cf->vpm = 1; + cf->barrier = 1; + cf->output.elem_size = 0; + return 0; +} + +static int tgsi_store(struct r600_shader_ctx *ctx) +{ + return tgsi_store_rat(ctx); +} + +static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + /* have to work out the offset into the RAT immediate return buffer */ + struct r600_bytecode_alu alu; + struct r600_bytecode_vtx vtx; + struct r600_bytecode_cf *cf; + int r; + int idx_gpr; + unsigned format, num_format, format_comp, endian; + const struct util_format_description *desc; + unsigned rat_index_mode; + unsigned immed_base; + + immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET; + + assert (inst->Src[0].Register.File == TGSI_FILE_IMAGE); + rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE + + r = load_index_src(ctx, 1, &idx_gpr); + if (r) + return r; + + if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = ctx->thread_id_gpr; + alu.dst.chan = 0; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[3], 0); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = ctx->thread_id_gpr; + alu.dst.chan = 3; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = ctx->thread_id_gpr; + alu.dst.chan = 0; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[2], 0); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (rat_index_mode) + egcm_load_index_reg(ctx->bc, 1, false); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT); + cf = ctx->bc->cf_last; + + cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index; + cf->rat.inst = ctx->inst_info->op; + cf->rat.index_mode = rat_index_mode; + cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND; + cf->output.gpr = ctx->thread_id_gpr; + cf->output.index_gpr = idx_gpr; + cf->output.comp_mask = 0xf; + cf->output.burst_count = 1; + cf->vpm = 1; + cf->barrier = 1; + cf->mark = 1; + cf->output.elem_size = 0; + r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK); + cf = ctx->bc->cf_last; + cf->barrier = 1; + cf->cf_addr = 1; + + desc = util_format_description(inst->Memory.Format); + r600_vertex_data_type(inst->Memory.Format, + &format, &num_format, &format_comp, &endian); + memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = immed_base + inst->Src[0].Register.Index; + vtx.buffer_index_mode = rat_index_mode; + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; + vtx.src_gpr = ctx->thread_id_gpr; + vtx.src_sel_x = 1; + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = 7; + vtx.dst_sel_z = 7; + vtx.dst_sel_w = 7; + vtx.use_const_fields = 0; + vtx.srf_mode_all = 1; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.endian = endian; + vtx.offset = 0; + vtx.mega_fetch_count = 0xf; + r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx); + if (r) + return r; + cf = ctx->bc->cf_last; + cf->vpm = 1; + cf->barrier = 1; + return 0; +} + static int get_gds_op(int opcode) { switch (opcode) { @@ -7881,6 +8190,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx) static int tgsi_atomic_op(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) + return tgsi_atomic_op_rat(ctx); if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) return tgsi_atomic_op_gds(ctx); return 0; @@ -9540,7 +9851,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, [113] = { ALU_OP0_NOP, tgsi_unsupported}, [114] = { ALU_OP0_NOP, tgsi_unsupported}, [115] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9590,7 +9901,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load}, - [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store}, [163] = { ALU_OP0_NOP, tgsi_unsupported}, [164] = { ALU_OP0_NOP, tgsi_unsupported}, [165] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9763,7 +10074,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, [113] = { ALU_OP0_NOP, tgsi_unsupported}, [114] = { ALU_OP0_NOP, tgsi_unsupported}, [115] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9813,7 +10124,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load}, - [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store}, [163] = { ALU_OP0_NOP, tgsi_unsupported}, [164] = { ALU_OP0_NOP, tgsi_unsupported}, [165] = { ALU_OP0_NOP, tgsi_unsupported}, -- 2.30.2