freedreno/ir3: add support for store instructions
authorRob Clark <robclark@freedesktop.org>
Sat, 25 Jul 2015 17:48:07 +0000 (13:48 -0400)
committerRob Clark <robclark@freedesktop.org>
Mon, 27 Jul 2015 17:51:06 +0000 (13:51 -0400)
For store instructions, the "dst" register is a read register, not a
written register.  (Ie. it is the address to store to.)  Lets not
confuse register allocation, scheduling, etc, with these details.
Instead just leave a dummy instr->regs[0], and take "dst" from
instr->regs[1] and srcs following.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_legalize.c

index 6d19a29275b86004e4e504f790f2c7d2bbbb931a..b24825cff854abd747ad25737515821c7314e383 100644 (file)
@@ -499,12 +499,28 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
 static int emit_cat6(struct ir3_instruction *instr, void *ptr,
                struct ir3_info *info)
 {
-       struct ir3_register *dst  = instr->regs[0];
-       struct ir3_register *src1 = instr->regs[1];
-       struct ir3_register *src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+       struct ir3_register *dst, *src1, *src2;
        instr_cat6_t *cat6 = ptr;
 
-       iassert(instr->regs_count >= 2);
+       /* the "dst" for a store instruction is (from the perspective
+        * of data flow in the shader, ie. register use/def, etc) in
+        * fact a register that is read by the instruction, rather
+        * than written:
+        */
+       if (is_store(instr)) {
+               iassert(instr->regs_count >= 3);
+
+               dst  = instr->regs[1];
+               src1 = instr->regs[2];
+               src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
+       } else {
+               iassert(instr->regs_count >= 2);
+
+               dst  = instr->regs[0];
+               src1 = instr->regs[1];
+               src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+       }
+
 
        /* TODO we need a more comprehensive list about which instructions
         * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
index c3b61a0fe0111d1f1755bbdb661edfeb5c772d96..e68170dec580f82c3bdbdc04462185a6084e2c5e 100644 (file)
@@ -554,6 +554,26 @@ is_store(struct ir3_instruction *instr)
        return false;
 }
 
+static inline bool is_load(struct ir3_instruction *instr)
+{
+       if (is_mem(instr)) {
+               switch (instr->opc) {
+               case OPC_LDG:
+               case OPC_LDL:
+               case OPC_LDP:
+               case OPC_L2G:
+               case OPC_LDLW:
+               case OPC_LDC_4:
+               case OPC_LDLV:
+               /* probably some others too.. */
+                       return true;
+               default:
+                       break;
+               }
+       }
+       return false;
+}
+
 static inline bool is_input(struct ir3_instruction *instr)
 {
        /* in some cases, ldlv is used to fetch varying without
@@ -1043,6 +1063,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
 /* cat6 instructions: */
 INSTR2(6, LDLV)
 INSTR2(6, LDG)
+INSTR3(6, STG)
 
 /* ************************************************************************* */
 /* split this out or find some helper to use.. like main/bitset.h.. */
index f4a4223ae1736f88db66e7b98b4223725576bbb6..e94293f6d6b4847983a250e64816c20e2900af7b 100644 (file)
@@ -182,14 +182,14 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                         */
                        ctx->has_samp = true;
                        regmask_set(&needs_sy, n->regs[0]);
-               } else if (is_mem(n)) {
+               } else if (is_load(n)) {
                        regmask_set(&needs_sy, n->regs[0]);
                }
 
                /* both tex/sfu appear to not always immediately consume
                 * their src register(s):
                 */
-               if (is_tex(n) || is_sfu(n) || is_mem(n)) {
+               if (is_tex(n) || is_sfu(n) || is_load(n)) {
                        foreach_src(reg, n) {
                                if (reg_gpr(reg))
                                        regmask_set(&needs_ss_war, reg);