i965/fs: Bake regs_written into the IR instead of recomputing it later.

author Eric Anholt <eric@anholt.net>

Mon, 18 Mar 2013 18:30:57 +0000 (11:30 -0700)

committer Eric Anholt <eric@anholt.net>

Mon, 1 Apr 2013 23:17:26 +0000 (16:17 -0700)
author Eric Anholt <eric@anholt.net>
Mon, 18 Mar 2013 18:30:57 +0000 (11:30 -0700)
committer Eric Anholt <eric@anholt.net>
Mon, 1 Apr 2013 23:17:26 +0000 (16:17 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 703c3c5d8b4fe255303a57818ce6ba0419c8a5fd..84e88b1aa9135eddf002f0d0def52cbfbc14c87e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -60,6 +60,9 @@ fs_inst::init()
     this->src[0] = reg_undef;
     this->src[1] = reg_undef;
     this->src[2] = reg_undef;
+
+   /* This will be the case for almost all instructions. */
+   this->regs_written = 1;
  }
  
  fs_inst::fs_inst()
@@ -254,6 +257,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
        fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type);
        inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
                                    vec4_result, surf_index, vec4_offset);
+      inst->regs_written = 4;
        instructions.push_tail(inst);
  
        vec4_result.reg_offset += const_offset & 3;
@@ -329,26 +333,13 @@ fs_inst::equals(fs_inst *inst)
             offset == inst->offset);
  }
  
-int
-fs_inst::regs_written()
-{
-   if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7)
-      return 4;
-
-   /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
-    * but we don't currently use them...nor do we have an opcode for them.
-    */
-
-   return 1;
-}
-
  bool
  fs_inst::overwrites_reg(const fs_reg &reg)
  {
     return (reg.file == dst.file &&
             reg.reg == dst.reg &&
             reg.reg_offset >= dst.reg_offset  &&
-           reg.reg_offset < dst.reg_offset + regs_written());
+           reg.reg_offset < dst.reg_offset + regs_written);
  }
  
  bool
@@ -1376,7 +1367,7 @@ fs_visitor::split_virtual_grfs()
        /* If there's a SEND message that requires contiguous destination
         * registers, no splitting is allowed.
         */
-      if (inst->regs_written() > 1) {
+      if (inst->regs_written > 1) {
          split_grf[inst->dst.reg] = false;
        }
  
@@ -2104,7 +2095,7 @@ fs_visitor::compute_to_mrf()
              /* Things returning more than one register would need us to
               * understand coalescing out more than one MOV at a time.
               */
-            if (scan_inst->regs_written() > 1)
+            if (scan_inst->regs_written > 1)
                 break;
  
             /* SEND instructions can't have MRF as a destination. */
@@ -2321,7 +2312,7 @@ void
  fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
  {
     int reg_size = dispatch_width / 8;
-   int write_len = inst->regs_written() * reg_size;
+   int write_len = inst->regs_written * reg_size;
     int first_write_grf = inst->dst.reg;
     bool needs_dep[BRW_MAX_MRF];
     assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2361,7 +2352,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
         * dependency has more latency than a MOV.
         */
        if (scan_inst->dst.file == GRF) {
-         for (int i = 0; i < scan_inst->regs_written(); i++) {
+         for (int i = 0; i < scan_inst->regs_written; i++) {
              int reg = scan_inst->dst.reg + i * reg_size;
  
              if (reg >= first_write_grf &&
@@ -2400,7 +2391,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
  void
  fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
  {
-   int write_len = inst->regs_written() * dispatch_width / 8;
+   int write_len = inst->regs_written * dispatch_width / 8;
     int first_write_grf = inst->dst.reg;
     bool needs_dep[BRW_MAX_MRF];
     assert(write_len < (int)sizeof(needs_dep) - 1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 06106c31bf65d3061b69cb6ec483e5b8f2ac89f7..0940489ed49cfcde0eb252584921d4f70c7decc9 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -174,7 +174,6 @@ public:
             fs_reg src0, fs_reg src1,fs_reg src2);
  
     bool equals(fs_inst *inst);
-   int regs_written();
     bool overwrites_reg(const fs_reg &reg);
     bool is_tex();
     bool is_math();
@@ -192,6 +191,7 @@ public:
     uint8_t flag_subreg;
  
     int mlen; /**< SEND message length */
+   int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
     int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
     uint32_t texture_offset; /**< Texture offset bitfield */
     int sampler;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp

index 5a50d45ddc981ed847c25a5ad55b61d98507ba81..8a8616d39c79771d5b5d557962251da3d87a7f7c 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -130,7 +130,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
              */
             bool no_existing_temp = entry->tmp.file == BAD_FILE;
             if (no_existing_temp) {
-               int written = entry->generator->regs_written();
+               int written = entry->generator->regs_written;
  
                 fs_reg orig_dst = entry->generator->dst;
                 fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
@@ -150,8 +150,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
             }
  
             /* dest <- temp */
-            int written = inst->regs_written();
-            assert(written == entry->generator->regs_written());
+            int written = inst->regs_written;
+            assert(written == entry->generator->regs_written);
              assert(inst->dst.type == entry->tmp.type);
              fs_reg dst = inst->dst;
              fs_reg tmp = entry->tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp

index 63af1480d5666dc3a04e5f1bf133658333590640..373aa2d5e3f35e9c0dc224e711e8d75ee4abc8ab 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
           * variable, and thus qualify for being in def[].
           */
          if (inst->dst.file == GRF &&
-            inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
+            inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
              !inst->predicate &&
              !inst->force_uncompressed &&
              !inst->force_sechalf) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index b8936dcfe7d10c1e49074a6ccb37f33e2b95c354..4ee7bbc66593b3c97dd8cdd7887384d6c4f6a576 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -553,7 +553,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
        }
  
        if (inst->dst.file == GRF) {
-        spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
+        spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
  
           if (inst->dst.smear >= 0) {
              no_spill[inst->dst.reg] = true;
@@ -622,7 +622,7 @@ fs_visitor::spill_reg(int spill_reg)
           inst->dst.reg == spill_reg) {
           int subset_spill_offset = (spill_offset +
                                      REG_SIZE * inst->dst.reg_offset);
-         inst->dst.reg = virtual_grf_alloc(inst->regs_written());
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written);
           inst->dst.reg_offset = 0;
  
          /* If our write is going to affect just part of the
@@ -631,7 +631,7 @@ fs_visitor::spill_reg(int spill_reg)
           */
          if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
              fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written(); chan++) {
+            for (int chan = 0; chan < inst->regs_written; chan++) {
                 emit_unspill(inst, unspill_reg,
                              subset_spill_offset + REG_SIZE * chan);
                 unspill_reg.reg_offset++;
@@ -644,7 +644,7 @@ fs_visitor::spill_reg(int spill_reg)
          spill_src.negate = false;
          spill_src.smear = -1;
  
-        for (int chan = 0; chan < inst->regs_written(); chan++) {
+        for (int chan = 0; chan < inst->regs_written; chan++) {
             fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
                                                        reg_null_f, spill_src);
             spill_src.reg_offset++;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp

index 997341b153cfcd9a92efcd77843a9e509cc71ac6..901a36210472646cfdc60c31e57b9367f39e8d54 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -556,7 +556,7 @@ instruction_scheduler::calculate_deps()
        /* write-after-write deps. */
        if (inst->dst.file == GRF) {
           if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++) {
                 add_dep(last_grf_write[inst->dst.reg + r], n);
                 last_grf_write[inst->dst.reg + r] = n;
              }
@@ -663,7 +663,7 @@ instruction_scheduler::calculate_deps()
         */
        if (inst->dst.file == GRF) {
           if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++)
+            for (int r = 0; r < inst->regs_written * reg_width; r++)
                 last_grf_write[inst->dst.reg + r] = n;
           } else {
              last_grf_write[inst->dst.reg] = n;
@@ -762,7 +762,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
              schedule_node *n = (schedule_node *)node;
  
              chosen = n;
-            if (chosen->inst->regs_written() <= 1)
+            if (chosen->inst->regs_written <= 1)
                 break;
           }
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 5d11e6719a6c187cd2da1ea6833211656e366ad0..d54d13410ae112f4d0645c9101988f700d27d8ae 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -261,7 +261,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
      * src, generate a saturated MOV
      */
     fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (!modify || modify->regs_written() != 1) {
+   if (!modify || modify->regs_written != 1) {
        this->result = fs_reg(this, ir->type);
        fs_inst *inst = emit(MOV(this->result, src));
        inst->saturate = true;
@@ -746,7 +746,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
     /* If last_rhs_inst wrote a different number of components than our LHS,
      * we can't safely rewrite it.
      */
-   if (virtual_grf_sizes[dst.reg] != modify->regs_written())
+   if (virtual_grf_sizes[dst.reg] != modify->regs_written)
        return false;
  
     /* Success!  Rewrite the instruction. */
@@ -948,6 +948,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
     inst->header_present = true;
+   inst->regs_written = simd16 ? 8 : 4;
  
     if (simd16) {
        for (int i = 0; i < 4; i++) {
@@ -1089,6 +1090,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
     inst->header_present = header_present;
+   inst->regs_written = 4;
  
     if (mlen > 11) {
        fail("Message length >11 disallowed by hardware\n");
@@ -1244,6 +1246,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     inst->base_mrf = base_mrf;
     inst->mlen = mlen;
     inst->header_present = header_present;
+   inst->regs_written = 4;
  
     if (mlen > 11) {
        fail("Message length >11 disallowed by hardware\n");
author	Eric Anholt <eric@anholt.net>
	Mon, 18 Mar 2013 18:30:57 +0000 (11:30 -0700)
committer	Eric Anholt <eric@anholt.net>
	Mon, 1 Apr 2013 23:17:26 +0000 (16:17 -0700)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_cse.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history