r600/shader: add local memory support to shader assembler.
authorDave Airlie <airlied@redhat.com>
Fri, 3 Nov 2017 01:14:28 +0000 (11:14 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 5 Dec 2017 20:31:34 +0000 (20:31 +0000)
This is needed for compute shaders.

v1.1: make work for vectors, fix missing lds ops.

Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600_shader.c

index 7d1e444f4ffaae31577b44396de07dede4167288..db64eb51a520dcc4d7d9d1a19cb0b7e7b215cb40 100644 (file)
@@ -971,6 +971,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
        case TGSI_FILE_ADDRESS:
        case TGSI_FILE_BUFFER:
        case TGSI_FILE_IMAGE:
+       case TGSI_FILE_MEMORY:
                break;
 
        case TGSI_FILE_HW_ATOMIC:
@@ -8115,6 +8116,30 @@ static int tgsi_load_rat(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int tgsi_load_lds(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int r;
+       int temp_reg = r600_get_temp(ctx);
+       
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
+       alu.dst.sel = temp_reg;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       
+       r = do_lds_fetch_values(ctx, temp_reg,
+                               ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index, inst->Dst[0].Register.WriteMask);
+       if (r)
+               return r;
+       return 0;
+}
+
 static int tgsi_load(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -8124,6 +8149,8 @@ static int tgsi_load(struct r600_shader_ctx *ctx)
                return tgsi_load_gds(ctx);
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
                return tgsi_load_buffer(ctx);
+       if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+               return tgsi_load_lds(ctx);
        return 0;
 }
 
@@ -8258,11 +8285,82 @@ static int tgsi_store_rat(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int tgsi_store_lds(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int r, i, lasti;
+       int write_mask = inst->Dst[0].Register.WriteMask;
+       int temp_reg = r600_get_temp(ctx);
+
+       /* LDS write */
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+       alu.dst.sel = temp_reg;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       lasti = tgsi_last_instruction(write_mask);
+       for (i = 1; i <= lasti; i++) {
+               if (!(write_mask & (1 << i)))
+                       continue;
+               r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
+                                  temp_reg, i,
+                                  temp_reg, 0,
+                                  V_SQ_ALU_SRC_LITERAL, 4 * i);
+               if (r)
+                       return r;
+       }
+       for (i = 0; i <= lasti; i++) {
+               if (!(write_mask & (1 << i)))
+                       continue;
+
+               if ((i == 0 && ((write_mask & 3) == 3)) ||
+                   (i == 2 && ((write_mask & 0xc) == 0xc))) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = LDS_OP3_LDS_WRITE_REL;
+
+                       alu.src[0].sel = temp_reg;
+                       alu.src[0].chan = i;
+                       r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+                       r600_bytecode_src(&alu.src[2], &ctx->src[1], i + 1);
+                       alu.last = 1;
+                       alu.is_lds_idx_op = true;
+                       alu.lds_idx = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+                       i += 1;
+                       continue;
+               }
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = LDS_OP2_LDS_WRITE;
+
+               alu.src[0].sel = temp_reg;
+               alu.src[0].chan = i;
+               r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+
+               alu.last = 1;
+               alu.is_lds_idx_op = true;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int tgsi_store(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
                return tgsi_store_buffer_rat(ctx);
+       else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
+               return tgsi_store_lds(ctx);
        else
                return tgsi_store_rat(ctx);
 }
@@ -8502,6 +8600,71 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int get_lds_op(int opcode)
+{
+       switch (opcode) {
+       case TGSI_OPCODE_ATOMUADD:
+               return LDS_OP2_LDS_ADD_RET;
+       case TGSI_OPCODE_ATOMAND:
+               return LDS_OP2_LDS_AND_RET;
+       case TGSI_OPCODE_ATOMOR:
+               return LDS_OP2_LDS_OR_RET;
+       case TGSI_OPCODE_ATOMXOR:
+               return LDS_OP2_LDS_XOR_RET;
+       case TGSI_OPCODE_ATOMUMIN:
+               return LDS_OP2_LDS_MIN_UINT_RET;
+       case TGSI_OPCODE_ATOMUMAX:
+               return LDS_OP2_LDS_MAX_UINT_RET;
+       case TGSI_OPCODE_ATOMIMIN:
+               return LDS_OP2_LDS_MIN_INT_RET;
+       case TGSI_OPCODE_ATOMIMAX:
+               return LDS_OP2_LDS_MAX_INT_RET;
+       case TGSI_OPCODE_ATOMXCHG:
+               return LDS_OP2_LDS_XCHG_RET;
+       case TGSI_OPCODE_ATOMCAS:
+               return LDS_OP3_LDS_CMP_XCHG_RET;
+       default:
+               return -1;
+       }
+}
+
+static int tgsi_atomic_op_lds(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int lds_op = get_lds_op(inst->Instruction.Opcode);
+       int r;
+
+       struct r600_bytecode_alu alu;
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = lds_op;
+       alu.is_lds_idx_op = true;
+       alu.last = 1;
+       r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
+       r600_bytecode_src(&alu.src[1], &ctx->src[2], 0);
+       if (lds_op == LDS_OP3_LDS_CMP_XCHG_RET)
+               r600_bytecode_src(&alu.src[2], &ctx->src[3], 0);
+       else
+               alu.src[2].sel = V_SQ_ALU_SRC_0;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       /* then read from LDS_OQ_A_POP */
+       memset(&alu, 0, sizeof(alu));
+
+       alu.op = ALU_OP1_MOV;
+       alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+       alu.src[0].chan = 0;
+       tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -8511,6 +8674,8 @@ static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
                return tgsi_atomic_op_gds(ctx);
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
                return tgsi_atomic_op_rat(ctx);
+       if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
+               return tgsi_atomic_op_lds(ctx);
        return 0;
 }