From 816bb30245b9e4be78cc24228ada450a425b948d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 30 Nov 2015 10:07:44 +1000 Subject: [PATCH] r600: add support for LDS instruction encoding. These are used in tessellation shaders to read/write values between VS/TCS/TES. This splits the eg alu assembler out to handle these instructions. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/eg_asm.c | 75 +++++++++++++++++++++++++++++ src/gallium/drivers/r600/eg_sq.h | 39 +++++++++++++++ src/gallium/drivers/r600/r600_asm.c | 28 ++++++++++- src/gallium/drivers/r600/r600_asm.h | 4 ++ 4 files changed, 144 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index f55564973bf..46683c19020 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -216,3 +216,78 @@ int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gd S_SQ_MEM_GDS_WORD2_DST_SEL_W(gds->dst_sel_w); return 0; } + +int eg_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) +{ + if (alu->is_lds_idx_op) { + assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); + assert(!alu->src[0].neg && !alu->src[1].neg && !alu->src[2].neg); + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_4(alu->lds_idx >> 4) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_5(alu->lds_idx >> 5) | + S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | + S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | + S_SQ_ALU_WORD0_LAST(alu->last); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | + S_SQ_ALU_WORD0_LAST(alu->last); + } + + /* don't replace gpr by pv or ps for destination register */ + if (alu->is_lds_idx_op) { + unsigned lds_op = r600_isa_alu_opcode(bc->isa->hw_class, alu->op); + bc->bytecode[id++] = + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_1(alu->lds_idx >> 1) | + + S_SQ_ALU_WORD1_OP3_ALU_INST(lds_op & 0xff) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | + S_SQ_ALU_WORD1_LDS_IDX_OP_LDS_OP((lds_op >> 8) & 0xff) | + S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_0(alu->lds_idx) | + S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_2(alu->lds_idx >> 2) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_3(alu->lds_idx >> 3); + + } else if (alu->is_op3) { + assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | + S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | + S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | + S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | + S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | + S_SQ_ALU_WORD1_OP3_ALU_INST(r600_isa_alu_opcode(bc->isa->hw_class, alu->op)) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); + } else { + bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | + S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | + S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | + S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | + S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | + S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | + S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | + S_SQ_ALU_WORD1_OP2_ALU_INST(r600_isa_alu_opcode(bc->isa->hw_class, alu->op)) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | + S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) | + S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred); + } + return 0; +} diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h index 3074cfe97d2..c118d3a007f 100644 --- a/src/gallium/drivers/r600/eg_sq.h +++ b/src/gallium/drivers/r600/eg_sq.h @@ -535,6 +535,45 @@ #define S_SQ_MEM_GDS_WORD2_DST_SEL_Z(x) (((x) & 0x7) << 6) #define S_SQ_MEM_GDS_WORD2_DST_SEL_W(x) (((x) & 0x7) << 9) +/* LDS IDX redefines the neg bits on op3 */ +#define S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_4(x) (((x) & 0x1) << 12) +#define S_SQ_ALU_WORD0_LDS_IDX_OP_IDX_OFFSET_5(x) (((x) & 0x1) << 25) + +/* src2 neg */ +#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_1(x) (((x) & 0x1) << 12) + +/* this was dst gpr */ +#define S_SQ_ALU_WORD1_LDS_IDX_OP_LDS_OP(x) (((x) & 0x3f) << 21) +#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_0(x) (((x) & 0x1) << 27) + +/* this was dst rel */ +#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_2(x) (((x) & 0x1) << 28) +/* this was clamp */ +#define S_SQ_ALU_WORD1_LDS_IDX_OP_IDX_OFFSET_3(x) (((x) & 0x1) << 31) + +#define V_SQ_LDS_INST_ADD 0x00 +#define V_SQ_LDS_INST_SUB 0x01 +#define V_SQ_LDS_INST_RSUB 0x02 + +#define V_SQ_LDS_INST_INC 0x03 +#define V_SQ_LDS_INST_DEC 0x04 +#define V_SQ_LDS_INST_MIN_INT 0x05 +#define V_SQ_LDS_INST_MAX_INT 0x06 +#define V_SQ_LDS_INST_MIN_UINT 0x07 +#define V_SQ_LDS_INST_MAX_UINT 0x08 +#define V_SQ_LDS_INST_AND 0x09 +#define V_SQ_LDS_INST_OR 0x0a +#define V_SQ_LDS_INST_XOR 0x0b +#define V_SQ_LDS_INST_MSKOR 0x0c +#define V_SQ_LDS_INST_WRITE 0x0d +#define V_SQ_LDS_INST_WRITE_REL 0x0e +#define V_SQ_LDS_INST_WRITE2 0x0f + +#define V_SQ_LDS_INST_READ_RET 0x32 +#define V_SQ_LDS_INST_READ_REL_RET 0x33 +#define V_SQ_LDS_INST_READ2_RET 0x34 +#define V_SQ_LDS_INST_READWRITE_RET 0x35 + #define V_SQ_CF_COND_ACTIVE 0x00 #define V_SQ_CF_COND_FALSE 0x01 #define V_SQ_CF_COND_BOOL 0x02 diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 2471f8a4694..ba17909bf7c 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1714,10 +1714,12 @@ int r600_bytecode_build(struct r600_bytecode *bc) r = r600_bytecode_alu_build(bc, alu, addr); break; case R700: - case EVERGREEN: /* eg alu is same encoding as r700 */ - case CAYMAN: r = r700_bytecode_alu_build(bc, alu, addr); break; + case EVERGREEN: + case CAYMAN: + r = eg_bytecode_alu_build(bc, alu, addr); + break; default: R600_ERR("unknown chip class %d.\n", bc->chip_class); return -EINVAL; @@ -1904,6 +1906,28 @@ static int print_src(struct r600_bytecode_alu *alu, unsigned idx) need_sel = 0; need_chan = 0; switch (sel) { + case EG_V_SQ_ALU_SRC_LDS_DIRECT_A: + o += fprintf(stderr, "LDS_A[0x%08X]", src->value); + break; + case EG_V_SQ_ALU_SRC_LDS_DIRECT_B: + o += fprintf(stderr, "LDS_B[0x%08X]", src->value); + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_A: + o += fprintf(stderr, "LDS_OQ_A"); + need_chan = 1; + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_B: + o += fprintf(stderr, "LDS_OQ_B"); + need_chan = 1; + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP: + o += fprintf(stderr, "LDS_OQ_A_POP"); + need_chan = 1; + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP: + o += fprintf(stderr, "LDS_OQ_B_POP"); + need_chan = 1; + break; case V_SQ_ALU_SRC_PS: o += fprintf(stderr, "PS"); break; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index f786bab3d59..0b78290295a 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -52,6 +52,7 @@ struct r600_bytecode_alu { unsigned op; unsigned last; unsigned is_op3; + unsigned is_lds_idx_op; unsigned execute_mask; unsigned update_pred; unsigned pred_sel; @@ -59,6 +60,7 @@ struct r600_bytecode_alu { unsigned bank_swizzle_force; unsigned omod; unsigned index_mode; + unsigned lds_idx; }; struct r600_bytecode_tex { @@ -253,6 +255,8 @@ struct r600_bytecode { int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf); int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_clause); int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id); +int eg_bytecode_alu_build(struct r600_bytecode *bc, + struct r600_bytecode_alu *alu, unsigned id); /* r600_asm.c */ void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, -- 2.30.2