i965/fs: Convert gen7 to using GRFs for texture messages.

author Eric Anholt <eric@anholt.net>

Thu, 10 Oct 2013 00:17:59 +0000 (17:17 -0700)

committer Eric Anholt <eric@anholt.net>

Thu, 10 Oct 2013 22:54:16 +0000 (15:54 -0700)
author Eric Anholt <eric@anholt.net>
Thu, 10 Oct 2013 00:17:59 +0000 (17:17 -0700)
committer Eric Anholt <eric@anholt.net>
Thu, 10 Oct 2013 22:54:16 +0000 (15:54 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c

index 33245d701e01095f9d223c3046207dde0cca390e..8efd6796a97116fa80c0f6c3050eb93f0b909fd9 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2193,7 +2193,8 @@ void brw_SAMPLE(struct brw_compile *p,
     struct brw_context *brw = p->brw;
     struct brw_instruction *insn;
  
-   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   if (msg_reg_nr != -1)
+      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
  
     insn = next_insn(p, BRW_OPCODE_SEND);
     insn->header.predicate_control = 0; /* XXX */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index da31b3e5eb95ae4a3db9ba5eaea68a3ff5930cf2..e5d6e4b281e39b92a7dd09d27c15426843db4e52 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -355,7 +355,8 @@ fs_inst::is_send_from_grf()
     return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
             opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
             (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
-            src[1].file == GRF));
+            src[1].file == GRF) ||
+           (is_tex() && src[0].file == GRF));
  }
  
  bool
@@ -436,6 +437,14 @@ fs_reg::equals(const fs_reg &r) const
             imm.u == r.imm.u);
  }
  
+fs_reg
+fs_reg::retype(uint32_t type)
+{
+   fs_reg result = *this;
+   result.type = type;
+   return result;
+}
+
  bool
  fs_reg::is_zero() const
  {
@@ -698,6 +707,18 @@ fs_inst::is_partial_write()
             this->force_sechalf);
  }
  
+int
+fs_inst::regs_read(fs_visitor *v, int arg)
+{
+   if (is_tex() && arg == 0 && src[0].file == GRF) {
+      if (v->dispatch_width == 16)
+        return (mlen + 1) / 2;
+      else
+        return mlen;
+   }
+   return 1;
+}
+
  /**
   * Returns how many MRFs an FS opcode will write over.
   *
@@ -710,6 +731,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
     if (inst->mlen == 0)
        return 0;
  
+   if (inst->base_mrf == -1)
+      return 0;
+
     switch (inst->opcode) {
     case SHADER_OPCODE_RCP:
     case SHADER_OPCODE_RSQ:
@@ -2194,6 +2218,13 @@ fs_visitor::register_coalesce()
             break;
          }
  
+        if (scan_inst->mlen > 0 && scan_inst->base_mrf == -1 &&
+            scan_inst->src[0].file == GRF &&
+            scan_inst->src[0].reg == inst->dst.reg) {
+           interfered = true;
+           break;
+        }
+
          /* The accumulator result appears to get used for the
           * conditional modifier generation.  When negating a UD
           * value, there is a 33rd bit generated for the sign in the
@@ -2382,7 +2413,7 @@ fs_visitor::compute_to_mrf()
             }
          }
  
-        if (scan_inst->mlen > 0) {
+        if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1) {
             /* Found a SEND instruction, which means that there are
              * live values in MRFs from base_mrf to base_mrf +
              * scan_inst->mlen - 1.  Don't go pushing our MRF write up
@@ -2444,7 +2475,7 @@ fs_visitor::remove_duplicate_mrf_writes()
          last_mrf_move[inst->dst.reg] = NULL;
        }
  
-      if (inst->mlen > 0) {
+      if (inst->mlen > 0 && inst->base_mrf != -1) {
          /* Found a SEND instruction, which will include two or fewer
           * implied MRF writes.  We could do better here.
           */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 360dbadc19d68c13405b6b1975ccf3a617ea27d0..c78f9ae7961d46f56ed91c0f781cbc466a0b1d0a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -78,6 +78,7 @@ public:
     bool is_zero() const;
     bool is_one() const;
     bool is_valid_3src() const;
+   fs_reg retype(uint32_t type);
  
     /** Register file: GRF, MRF, IMM. */
     enum register_file file;
@@ -145,6 +146,7 @@ public:
     bool overwrites_reg(const fs_reg &reg);
     bool is_send_from_grf();
     bool is_partial_write();
+   int regs_read(fs_visitor *v, int arg);
  
     fs_reg dst;
     fs_reg src[3];
@@ -354,7 +356,8 @@ public:
     void try_replace_with_sel();
     void emit_bool_to_cond_code(ir_rvalue *condition);
     void emit_if_gen6(ir_if *ir);
-   void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+   void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset,
+                     int count);
  
     void emit_fragment_program_code();
     void setup_fp_regs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp

index fb6fe184f330c57e9e004911b3230d6a2785979a..7b90982a2a28cf299ba820e33ab0fa037be4a969 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
     if (entry->src.file == IMM)
        return false;
  
+   if (inst->regs_read(this, arg) > 1)
+      return false;
+
     if (inst->src[arg].file != entry->dst.file ||
         inst->src[arg].reg != entry->dst.reg ||
         inst->src[arg].reg_offset != entry->dst.reg_offset) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index dbfbc113dc782a6bb574917ec4769ffdb6fa35aa..4b668f162aa668902ddac609999313b1783d40a3 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -501,24 +501,43 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
        dst = vec16(dst);
     }
  
+   if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
+      /* The send-from-GRF for 16-wide texturing with a header has an extra
+       * hardware register allocated to it, which we need to skip over (since
+       * our coordinates in the payload are in the even-numbered registers,
+       * and the header comes right before the first one).
+       */
+      assert(src.file == BRW_GENERAL_REGISTER_FILE);
+      src.nr++;
+   }
+
     /* Load the message header if present.  If there's a texture offset,
      * we need to set it up explicitly and load the offset bitfield.
      * Otherwise, we can use an implied move from g0 to the first message reg.
      */
     if (inst->texture_offset) {
+      struct brw_reg header_reg;
+
+      if (brw->gen >= 7) {
+         header_reg = src;
+      } else {
+         assert(inst->base_mrf != -1);
+         header_reg = retype(brw_message_reg(inst->base_mrf),
+                             BRW_REGISTER_TYPE_UD);
+      }
        brw_push_insn_state(p);
        brw_set_mask_control(p, BRW_MASK_DISABLE);
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
        /* Explicitly set up the message header by copying g0 to the MRF. */
-      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
-                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      brw_MOV(p, header_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
  
        /* Then set the offset bits in DWord 2. */
-      brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                     inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+      brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
+                                     header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
                   brw_imm_ud(inst->texture_offset));
        brw_pop_insn_state(p);
     } else if (inst->header_present) {
+      assert(brw->gen < 7);
        /* Set up an implied move from g0 to the MRF. */
        src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
     }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp

index 50aa7a62ae3859ff091e927accd618671016b9e0..b3026c2685089e225a865f32dbd9a8caebb63a93 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -151,14 +151,7 @@ fs_live_variables::setup_def_use()
              if (reg.file != GRF)
                 continue;
  
-            int regs_read = 1;
-            /* We don't know how many components are read in a send-from-grf,
-             * so just assume "all of them."
-             */
-            if (inst->is_send_from_grf())
-               regs_read = v->virtual_grf_sizes[reg.reg];
-
-            for (int i = 0; i < regs_read; i++) {
+            for (int j = 0; j < inst->regs_read(v, i); j++) {
                 setup_one_read(block, inst, ip, reg);
                 reg.reg_offset++;
              }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index f0f4ad9a9280fcbcd4ecee0b2c4831bca58b9d99..157c9ae4ffa91c1160a62a6b1f149e4f798e025d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -512,19 +512,25 @@ fs_visitor::assign_regs()
  }
  
  void
-fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
+fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset,
+                         int count)
  {
-   fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
-   unspill_inst->offset = spill_offset;
-   unspill_inst->ir = inst->ir;
-   unspill_inst->annotation = inst->annotation;
+   for (int i = 0; i < count; i++) {
+      fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
+      unspill_inst->offset = spill_offset;
+      unspill_inst->ir = inst->ir;
+      unspill_inst->annotation = inst->annotation;
+
+      /* Choose a MRF that won't conflict with an MRF that's live across the
+       * spill.  Nothing else will make it up to MRF 14/15.
+       */
+      unspill_inst->base_mrf = 14;
+      unspill_inst->mlen = 1; /* header contains offset */
+      inst->insert_before(unspill_inst);
  
-   /* Choose a MRF that won't conflict with an MRF that's live across the
-    * spill.  Nothing else will make it up to MRF 14/15.
-    */
-   unspill_inst->base_mrf = 14;
-   unspill_inst->mlen = 1; /* header contains offset */
-   inst->insert_before(unspill_inst);
+      dst.reg_offset++;
+      spill_offset += REG_SIZE;
+   }
  }
  
  int
@@ -623,9 +629,14 @@ fs_visitor::spill_reg(int spill_reg)
        for (unsigned int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF &&
              inst->src[i].reg == spill_reg) {
-           inst->src[i].reg = virtual_grf_alloc(1);
-           emit_unspill(inst, inst->src[i],
-                         spill_offset + REG_SIZE * inst->src[i].reg_offset);
+            int regs_read = inst->regs_read(this, i);
+
+            inst->src[i].reg = virtual_grf_alloc(regs_read);
+            inst->src[i].reg_offset = 0;
+
+            emit_unspill(inst, inst->src[i],
+                         spill_offset + REG_SIZE * inst->src[i].reg_offset,
+                         regs_read);
          }
        }
  
@@ -641,12 +652,8 @@ fs_visitor::spill_reg(int spill_reg)
            * since we write back out all of the regs_written().
           */
          if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
-            fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written; chan++) {
-               emit_unspill(inst, unspill_reg,
-                            subset_spill_offset + REG_SIZE * chan);
-               unspill_reg.reg_offset++;
-            }
+            emit_unspill(inst, inst->dst, subset_spill_offset,
+                         inst->regs_written);
          }
  
          fs_reg spill_src = inst->dst;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 728567cc2a883eeacbac6de61af62aa93603f1fb..e659203dd58565f9da56b9ce35e4b92e9d34ea64 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1226,27 +1226,28 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
                                fs_reg shadow_c, fs_reg lod, fs_reg lod2,
                                fs_reg sample_index)
  {
-   int mlen = 0;
-   int base_mrf = 2;
     int reg_width = dispatch_width / 8;
     bool header_present = false;
     int offsets[3];
  
+   fs_reg payload = fs_reg(this, glsl_type::float_type);
+   fs_reg next = payload;
+
     if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
-      /* * The offsets set up by the ir_texture visitor are in the
-       * m1 header, so we can't go headerless.
+      /* For general texture offsets (no txf workaround), we need a header to
+       * put them in.  Note that for 16-wide we're making space for two actual
+       * hardware registers here, so the emit will have to fix up for this.
         *
         * * ir4_tg4 needs to place its channel select in the header,
         * for interaction with ARB_texture_swizzle
         */
        header_present = true;
-      mlen++;
-      base_mrf--;
+      next.reg_offset++;
     }
  
     if (ir->shadow_comparitor) {
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
-      mlen += reg_width;
+      emit(MOV(next, shadow_c));
+      next.reg_offset++;
     }
  
     /* Set up the LOD info */
@@ -1256,12 +1257,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     case ir_tg4:
        break;
     case ir_txb:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
-      mlen += reg_width;
+      emit(MOV(next, lod));
+      next.reg_offset++;
        break;
     case ir_txl:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
-      mlen += reg_width;
+      emit(MOV(next, lod));
+      next.reg_offset++;
        break;
     case ir_txd: {
        if (dispatch_width == 16)
@@ -1271,32 +1272,32 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
         * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
         */
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+        emit(MOV(next, coordinate));
          coordinate.reg_offset++;
-        mlen += reg_width;
+        next.reg_offset++;
  
           /* For cube map array, the coordinate is (u,v,r,ai) but there are
            * only derivatives for (u, v, r).
            */
           if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
-            emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+            emit(MOV(next, lod));
              lod.reg_offset++;
-            mlen += reg_width;
+            next.reg_offset++;
  
-            emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
+            emit(MOV(next, lod2));
              lod2.reg_offset++;
-            mlen += reg_width;
+            next.reg_offset++;
           }
        }
        break;
     }
     case ir_txs:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), lod));
+      next.reg_offset++;
        break;
     case ir_query_levels:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0)));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+      next.reg_offset++;
        break;
     case ir_txf:
        /* It appears that the ld instruction used for txf does its
@@ -1314,40 +1315,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        }
  
        /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
-      emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
-               coordinate, offsets[0]));
+      emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[0]));
        coordinate.reg_offset++;
-      mlen += reg_width;
+      next.reg_offset++;
  
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_D), lod));
+      next.reg_offset++;
  
        for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
-        emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
-                  coordinate, offsets[i]));
+        emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[i]));
          coordinate.reg_offset++;
-        mlen += reg_width;
+        next.reg_offset++;
        }
        break;
     case ir_txf_ms:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index));
+      next.reg_offset++;
  
        /* constant zero MCS; we arrange to never actually have a compressed
         * multisample surface here for now. TODO: issue ld_mcs to get this first,
         * if we ever support texturing from compressed multisample surfaces
         */
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+      next.reg_offset++;
  
        /* there is no offsetting for this message; just copy in the integer
         * texture coordinates
         */
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-         emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
-                  coordinate));
+         emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
           coordinate.reg_offset++;
-         mlen += reg_width;
+         next.reg_offset++;
        }
        break;
     }
@@ -1355,32 +1353,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
     /* Set up the coordinate (except for cases where it was done above) */
     if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms && ir->op != ir_query_levels) {
        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+        emit(MOV(next, coordinate));
          coordinate.reg_offset++;
-        mlen += reg_width;
+        next.reg_offset++;
        }
     }
  
     /* Generate the SEND */
     fs_inst *inst = NULL;
     switch (ir->op) {
-   case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
-   case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
-   case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
-   case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
-   case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
-   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
-   case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
-   case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst); break;
-   case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
-   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
-   }
-   inst->base_mrf = base_mrf;
-   inst->mlen = mlen;
+   case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break;
+   case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break;
+   case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
+   case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
+   case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
+   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break;
+   case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+   case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+   case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
+   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst, payload); break;
+   }
+   inst->base_mrf = -1;
+   if (reg_width == 2)
+      inst->mlen = next.reg_offset * reg_width - header_present;
+   else
+      inst->mlen = next.reg_offset * reg_width;
+
     inst->header_present = header_present;
     inst->regs_written = 4;
  
-   if (mlen > 11) {
+   virtual_grf_sizes[payload.reg] = next.reg_offset;
+   if (inst->mlen > 11) {
        fail("Message length >11 disallowed by hardware\n");
     }
  
@@ -1591,9 +1594,6 @@ fs_visitor::visit(ir_texture *ir)
                                 lod, lod2);
     }
  
-   /* The header is set up by generate_tex() when necessary. */
-   inst->src[0] = reg_undef;
-
     if (ir->offset != NULL && ir->op != ir_txf)
        inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
  
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 735ad93561e4c0ed8329f0a5efff1966dac63be8..b24c38c351d304a9c6805c4b46d3043569393e9b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -569,7 +569,7 @@ fs_instruction_scheduler::calculate_deps()
        for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
              if (post_reg_alloc) {
-               for (int r = 0; r < reg_width; r++)
+               for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
                    add_dep(last_grf_write[inst->src[i].reg + r], n);
              } else {
                 add_dep(last_grf_write[inst->src[i].reg], n);
@@ -594,12 +594,14 @@ fs_instruction_scheduler::calculate_deps()
          }
        }
  
-      for (int i = 0; i < inst->mlen; i++) {
-        /* It looks like the MRF regs are released in the send
-         * instruction once it's sent, not when the result comes
-         * back.
-         */
-        add_dep(last_mrf_write[inst->base_mrf + i], n);
+      if (inst->base_mrf != -1) {
+        for (int i = 0; i < inst->mlen; i++) {
+           /* It looks like the MRF regs are released in the send
+            * instruction once it's sent, not when the result comes
+            * back.
+            */
+           add_dep(last_mrf_write[inst->base_mrf + i], n);
+        }
        }
  
        if (inst->predicate) {
@@ -642,7 +644,7 @@ fs_instruction_scheduler::calculate_deps()
          add_barrier_deps(n);
        }
  
-      if (inst->mlen > 0) {
+      if (inst->mlen > 0 && inst->base_mrf != -1) {
          for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
             add_dep(last_mrf_write[inst->base_mrf + i], n);
             last_mrf_write[inst->base_mrf + i] = n;
@@ -677,7 +679,7 @@ fs_instruction_scheduler::calculate_deps()
        for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
              if (post_reg_alloc) {
-               for (int r = 0; r < reg_width; r++)
+               for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
                    add_dep(n, last_grf_write[inst->src[i].reg + r]);
              } else {
                 add_dep(n, last_grf_write[inst->src[i].reg]);
@@ -702,12 +704,14 @@ fs_instruction_scheduler::calculate_deps()
          }
        }
  
-      for (int i = 0; i < inst->mlen; i++) {
-        /* It looks like the MRF regs are released in the send
-         * instruction once it's sent, not when the result comes
-         * back.
-         */
-        add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+      if (inst->base_mrf != -1) {
+        for (int i = 0; i < inst->mlen; i++) {
+           /* It looks like the MRF regs are released in the send
+            * instruction once it's sent, not when the result comes
+            * back.
+            */
+           add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+        }
        }
  
        if (inst->predicate) {
@@ -749,7 +753,7 @@ fs_instruction_scheduler::calculate_deps()
          add_barrier_deps(n);
        }
  
-      if (inst->mlen > 0) {
+      if (inst->mlen > 0 && inst->base_mrf != -1) {
          for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
             last_mrf_write[inst->base_mrf + i] = n;
          }
author	Eric Anholt <eric@anholt.net>
	Thu, 10 Oct 2013 00:17:59 +0000 (17:17 -0700)
committer	Eric Anholt <eric@anholt.net>
	Thu, 10 Oct 2013 22:54:16 +0000 (15:54 -0700)
src/mesa/drivers/dri/i965/brw_eu_emit.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_generator.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp		patch \| blob \| history