r600g: fix dp2, dp3, dp4 tokens
authorJerome Glisse <jglisse@redhat.com>
Fri, 23 Jul 2010 22:19:13 +0000 (18:19 -0400)
committerJerome Glisse <jglisse@redhat.com>
Fri, 23 Jul 2010 22:19:13 +0000 (18:19 -0400)
We need to make sure dp are all mirror accross the alu unit.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/r600_shader.c

index e983cc90b43c51c68979414ebca5b3a1b619cf65..d788ab88be0138d8a0dcf4478e1818dd6212c95f 100644 (file)
@@ -541,18 +541,6 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
                case TGSI_OPCODE_SUB:
                        alu.src[1].neg = 1;
                        break;
-               case TGSI_OPCODE_DP2:
-                       if (i > 1) {
-                               alu.src[0].sel = alu.src[1].sel = 248;
-                               alu.src[0].chan = alu.src[1].chan = 0;
-                       }
-                       break;
-               case TGSI_OPCODE_DP3:
-                       if (i > 2) {
-                               alu.src[0].sel = alu.src[1].sel = 248;
-                               alu.src[0].chan = alu.src[1].chan = 0;
-                       }
-                       break;
                default:
                        break;
                }
@@ -625,6 +613,33 @@ static int tgsi_trans(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
+{
+       struct r600_bc_alu alu;
+       int i, r;
+
+       for (i = 0; i < 4; i++) {
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
+                       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
+               } else {
+                       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+                       r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       if (r)
+                               return r;
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = i;
+               }
+               if (i == 3) {
+                       alu.last = 1;
+               }
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int tgsi_op3(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -642,6 +657,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
                }
                alu.dst.sel = ctx->temp_reg;
                alu.dst.chan = i;
+               alu.dst.write = 1;
                alu.is_op3 = 1;
                if (i == 3) {
                        alu.last = 1;
@@ -650,17 +666,42 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
        }
+       return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_dp(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bc_alu alu;
+       int i, j, r;
+
        for (i = 0; i < 4; i++) {
                memset(&alu, 0, sizeof(struct r600_bc_alu));
-               if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
-                       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
-               } else {
-                       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
-                       r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.inst = ctx->inst_info->r600_opcode;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
                        if (r)
                                return r;
-                       alu.src[0].sel = ctx->temp_reg;
-                       alu.src[0].chan = i;
+               }
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = i;
+               alu.dst.write = 1;
+               /* handle some special cases */
+               switch (ctx->inst_info->tgsi_opcode) {
+               case TGSI_OPCODE_DP2:
+                       if (i > 1) {
+                               alu.src[0].sel = alu.src[1].sel = 248;
+                               alu.src[0].chan = alu.src[1].chan = 0;
+                       }
+                       break;
+               case TGSI_OPCODE_DP3:
+                       if (i > 2) {
+                               alu.src[0].sel = alu.src[1].sel = 248;
+                               alu.src[0].chan = alu.src[1].chan = 0;
+                       }
+                       break;
+               default:
+                       break;
                }
                if (i == 3) {
                        alu.last = 1;
@@ -669,7 +710,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
        }
-       return 0;
+       return tgsi_helper_copy(ctx, inst);
 }
 
 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
@@ -682,8 +723,8 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_LOG,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_MUL,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
        {TGSI_OPCODE_ADD,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
-       {TGSI_OPCODE_DP3,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2},
-       {TGSI_OPCODE_DP4,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2},
+       {TGSI_OPCODE_DP3,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+       {TGSI_OPCODE_DP4,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
        {TGSI_OPCODE_DST,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_MIN,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_MAX,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
@@ -747,7 +788,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_TXB,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_NRM,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_DIV,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_DP2,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_op2},
+       {TGSI_OPCODE_DP2,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
        {TGSI_OPCODE_TXL,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_BRK,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_IF,        0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},