nv50: record last access to temp and attr regs
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Sat, 23 May 2009 11:32:51 +0000 (13:32 +0200)
committerBen Skeggs <bskeggs@redhat.com>
Thu, 28 May 2009 06:06:18 +0000 (16:06 +1000)
We now inspect the TGSI instructions in tx_prep to determine where
temps and FP attrs are last accessed.
This will enable us to reclaim some temporaries early and we also
use it to omit pre-loading FP attributes that aren't used.

src/gallium/drivers/nv50/nv50_program.c

index 40d2384c3e201ae52a203aad6993610226cac85a..c73ed08a5594dbecded2e427d010d8a3191e8d3c 100644 (file)
@@ -85,6 +85,8 @@ struct nv50_reg {
 
        int hw;
        int neg;
+
+       int acc; /* instruction where this reg is last read (first insn == 1) */
 };
 
 struct nv50_pc {
@@ -108,6 +110,10 @@ struct nv50_pc {
 
        struct nv50_reg *temp_temp[16];
        unsigned temp_temp_nr;
+
+       /* current instruction and total number of insns */
+       unsigned insn_cur;
+       unsigned insn_nr;
 };
 
 static void
@@ -1323,6 +1329,112 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
        return TRUE;
 }
 
+/* Adjust a bitmask that indicates what components of a source are used,
+ * we use this in tx_prep so we only load interpolants that are needed.
+ */
+static void
+insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask)
+{
+       const struct tgsi_instruction_ext_texture *tex;
+
+       switch (insn->Instruction.Opcode) {
+       case TGSI_OPCODE_DP3:
+               *mask = 0x7;
+               break;
+       case TGSI_OPCODE_DP4:
+       case TGSI_OPCODE_DPH:
+               *mask = 0xF;
+               break;
+       case TGSI_OPCODE_LIT:
+               *mask = 0xB;
+               break;
+       case TGSI_OPCODE_RCP:
+       case TGSI_OPCODE_RSQ:
+               *mask = 0x1;
+               break;
+       case TGSI_OPCODE_TEX:
+       case TGSI_OPCODE_TXP:
+               assert(insn->Instruction.Extended);
+               tex = &insn->InstructionExtTexture;
+
+               *mask = 0x7;
+               if (tex->Texture == TGSI_TEXTURE_1D)
+                       *mask = 0x1;
+               else
+               if (tex->Texture == TGSI_TEXTURE_2D)
+                       *mask = 0x3;
+
+               if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
+                       *mask |= 0x8;
+               break;
+       default:
+               break;
+       }
+}
+
+static void
+prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
+                 unsigned *r_usage[2])
+{
+       const struct tgsi_full_instruction *insn;
+       const struct tgsi_full_src_register *src;
+       const struct tgsi_dst_register *dst;
+
+       unsigned i, c, k, n, mask, *acc_p;
+
+       insn = &tok->FullInstruction;
+       dst = &insn->FullDstRegisters[0].DstRegister;
+       mask = dst->WriteMask;
+
+       if (!r_usage[0])
+               r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
+       if (!r_usage[1])
+               r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
+
+       if (dst->File == TGSI_FILE_TEMPORARY) {
+               for (c = 0; c < 4; c++) {
+                       if (!(mask & (1 << c)))
+                               continue;
+                       r_usage[0][dst->Index * 4 + c] = pc->insn_nr;
+               }
+       }
+
+       for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
+               src = &insn->FullSrcRegisters[i];
+
+               switch (src->SrcRegister.File) {
+               case TGSI_FILE_TEMPORARY:
+                       acc_p = r_usage[0];
+                       break;
+               case TGSI_FILE_INPUT:
+                       acc_p = r_usage[1];
+                       break;
+               default:
+                       continue;
+               }
+
+               insn_adjust_mask(insn, &mask);
+
+               for (c = 0; c < 4; c++) {
+                       if (!(mask & (1 << c)))
+                               continue;
+
+                       k = tgsi_util_get_full_src_register_extswizzle(src, c);
+                       switch (k) {
+                       case TGSI_EXTSWIZZLE_X:
+                       case TGSI_EXTSWIZZLE_Y:
+                       case TGSI_EXTSWIZZLE_Z:
+                       case TGSI_EXTSWIZZLE_W:
+                               n = src->SrcRegister.Index * 4 + k;
+                               acc_p[n] = pc->insn_nr;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+}
+
 static boolean
 nv50_program_tx_prep(struct nv50_pc *pc)
 {
@@ -1330,6 +1442,11 @@ nv50_program_tx_prep(struct nv50_pc *pc)
        boolean ret = FALSE;
        unsigned i, c;
 
+       /* track register access for temps and attrs */
+       unsigned *r_usage[2];
+       r_usage[0] = NULL;
+       r_usage[1] = NULL;
+
        tgsi_parse_init(&p, pc->p->pipe.tokens);
        while (!tgsi_parse_end_of_tokens(&p)) {
                const union tgsi_full_token *tok = &p.FullToken;
@@ -1382,6 +1499,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                }
                        break;
                case TGSI_TOKEN_TYPE_INSTRUCTION:
+                       pc->insn_nr++;
+                       prep_inspect_insn(pc, tok, r_usage);
                        break;
                default:
                        break;
@@ -1398,6 +1517,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                                pc->temp[i*4+c].type = P_TEMP;
                                pc->temp[i*4+c].hw = -1;
                                pc->temp[i*4+c].index = i;
+                               pc->temp[i*4+c].acc = r_usage[0][i*4+c];
                        }
                }
        }
@@ -1427,6 +1547,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
                                        pc->attr[i*4+c].type = at->type;
                                        pc->attr[i*4+c].hw = at->hw;
                                        pc->attr[i*4+c].index = at->index;
+                                       pc->attr[i*4+c].acc = r_usage[1][i*4+c];
                                } else {
                                        pc->p->cfg.vp.attr[aid/32] |=
                                                (1 << (aid % 32));
@@ -1504,6 +1625,11 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
        ret = TRUE;
 out_err:
+       if (r_usage[0])
+               FREE(r_usage[0]);
+       if (r_usage[1])
+               FREE(r_usage[1]);
+
        tgsi_parse_free(&p);
        return ret;
 }
@@ -1558,6 +1684,7 @@ nv50_program_tx(struct nv50_program *p)
 
                switch (tok->Token.Type) {
                case TGSI_TOKEN_TYPE_INSTRUCTION:
+                       ++pc->insn_cur;
                        ret = nv50_program_tx_insn(pc, tok);
                        if (ret == FALSE)
                                goto out_err;