i965/fs: Add support for 16-wide dispatch on gen5.

author Eric Anholt <eric@anholt.net>

Mon, 14 Mar 2011 17:29:12 +0000 (10:29 -0700)

committer Eric Anholt <eric@anholt.net>

Tue, 26 Apr 2011 19:19:49 +0000 (12:19 -0700)
author Eric Anholt <eric@anholt.net>
Mon, 14 Mar 2011 17:29:12 +0000 (10:29 -0700)
committer Eric Anholt <eric@anholt.net>
Tue, 26 Apr 2011 19:19:49 +0000 (12:19 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 8785957b6e657ada83782778be0b9392adc72f03..4e3adbc0a69ed78d69454d85fed33268d99fa326 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2039,6 +2039,59 @@ fs_visitor::emit_interpolation_setup_gen6()
     this->current_annotation = NULL;
  }
  
+void
+fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
+{
+   int reg_width = c->dispatch_width / 8;
+
+   if (c->dispatch_width == 8 || intel->gen == 6) {
+      /* SIMD8 write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       *
+       * gen6 SIMD16 DP write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       * m + 4: b0
+       * m + 5: b1
+       * m + 6: a0
+       * m + 7: a1
+       */
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
+          color);
+   } else {
+      /* pre-gen6 SIMD16 single source DP write looks like:
+       * m + 0: r0
+       * m + 1: g0
+       * m + 2: b0
+       * m + 3: a0
+       * m + 4: r1
+       * m + 5: g1
+       * m + 6: b1
+       * m + 7: a1
+       *
+       * By setting the high bit of the MRF register number,
+       * we could indicate that we want COMPR4 mode - instead
+       * of doing the usual destination + 1 for the second
+       * half we would get destination + 4.  We would need to
+       * clue the optimizer into that, though.
+       */
+      push_force_uncompressed();
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+      pop_force_uncompressed();
+
+      push_force_sechalf();
+      color.sechalf = true;
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+      pop_force_sechalf();
+      color.sechalf = false;
+   }
+}
+
  void
  fs_visitor::emit_fb_writes()
  {
@@ -2113,7 +2166,7 @@ fs_visitor::emit_fb_writes()
                                                  target);
        if (this->frag_color || this->frag_data) {
          for (int i = 0; i < 4; i++) {
-           emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i * reg_width), color);
+           emit_color_write(i, color_mrf, color);
             color.reg_offset++;
          }
        }
@@ -2137,7 +2190,7 @@ fs_visitor::emit_fb_writes()
           * renderbuffer.
           */
          color.reg_offset += 3;
-        emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color);
+        emit_color_write(3, color_mrf, color);
        }
  
        fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
@@ -2330,7 +2383,7 @@ fs_visitor::generate_math(fs_inst *inst,
             brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
          }
        }
-   } else {
+   } else /* gen <= 5 */{
        assert(inst->mlen >= 1);
  
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -2351,6 +2404,7 @@ fs_visitor::generate_math(fs_inst *inst,
                   inst->base_mrf + 1, sechalf(src[0]),
                   BRW_MATH_DATA_VECTOR,
                   BRW_MATH_PRECISION_FULL);
+
          brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
        }
     }
@@ -3528,6 +3582,8 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
                                 reg->hw_reg, reg->smear);
        }
        brw_reg = retype(brw_reg, reg->type);
+      if (reg->sechalf)
+        brw_reg = sechalf(brw_reg);
        break;
     case IMM:
        switch (reg->type) {
@@ -3881,7 +3937,7 @@ fs_visitor::run()
          /* Haven't hooked in support for uniforms through the 16-wide
           * version yet.
           */
-        return GL_FALSE;
+        return false;
        }
  
        /* align to 64 byte boundary. */
@@ -3957,11 +4013,10 @@ fs_visitor::run()
     assert(force_uncompressed_stack == 0);
     assert(force_sechalf_stack == 0);
  
-   if (!failed)
-      generate_code();
-
     if (failed)
-      return GL_FALSE;
+      return false;
+
+   generate_code();
  
     if (c->dispatch_width == 8) {
        c->prog_data.total_grf = grf_used;
@@ -4005,7 +4060,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
        return false;
     }
  
-   if (intel->gen >= 6) {
+   if (intel->gen >= 5) {
        c->dispatch_width = 16;
        fs_visitor v2(c, shader);
        v2.run();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index b158992071ec0c98c1de846b57a22dec54db437b..60398ac870e3b288de121ef49a91e68ccffbbb4d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -178,6 +178,7 @@ public:
     int type;
     bool negate;
     bool abs;
+   bool sechalf;
     struct brw_reg fixed_hw_reg;
     int smear; /* -1, or a channel of the reg to smear to all channels. */
  
@@ -521,6 +522,7 @@ public:
     void emit_if_gen6(ir_if *ir);
     void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
  
+   void emit_color_write(int index, int first_color_mrf, fs_reg color);
     void emit_fb_writes();
     void emit_assignment_writes(fs_reg &l, fs_reg &r,
                                const glsl_type *type, bool predicated);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c

index be4b260a5ff881c0de884d3edb58b996ca37d797..9d0a7a8d27dedcfb451413a2ab00cfc27cb00e16 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -41,10 +41,11 @@
   */
  
  struct brw_wm_unit_key {
-   unsigned int total_grf, total_scratch;
+   unsigned int total_grf, total_grf_16, total_scratch;
     unsigned int urb_entry_read_length;
     unsigned int curb_entry_read_length;
     unsigned int dispatch_grf_start_reg;
+   uint32_t prog_offset_16;
  
     unsigned int curbe_offset;
  
@@ -92,10 +93,21 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
  
     /* CACHE_NEW_WM_PROG */
     key->total_grf = brw->wm.prog_data->total_grf;
+   key->total_grf_16 = brw->wm.prog_data->total_grf_16;
     key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
     key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
     key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
     key->total_scratch = brw->wm.prog_data->total_scratch;
+   key->prog_offset_16 = brw->wm.prog_data->prog_offset_16;
+
+   if (key->prog_offset_16) {
+      /* These two fields should be the same pre-gen6, which is why we
+       * only have one hardware field to program for both dispatch
+       * widths.
+       */
+      assert(brw->wm.prog_data->first_curbe_grf ==
+            brw->wm.prog_data->first_curbe_grf_16);
+   }
  
     /* BRW_NEW_CURBE_OFFSETS */
     key->curbe_offset = brw->curbe.wm_start;
@@ -166,7 +178,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
     memset(&wm, 0, sizeof(wm));
  
     wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   wm.wm9.grf_reg_count_2 = ALIGN(key->total_grf_16, 16) / 16 - 1;
     wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+   wm.wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
+                                   key->prog_offset_16) >> 6; /* reloc */
     wm.thread1.depth_coef_urb_read_offset = 1;
     wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
  
@@ -206,9 +221,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
     wm.wm5.program_computes_depth = key->computes_depth;
     wm.wm5.program_uses_killpixel = key->uses_kill;
  
-   if (key->is_glsl)
+   if (key->is_glsl) {
        wm.wm5.enable_8_pix = 1;
-   else
+      if (key->prog_offset_16)
+        wm.wm5.enable_16_pix = 1;
+   } else
        wm.wm5.enable_16_pix = 1;
  
     wm.wm5.max_threads = brw->wm_max_threads - 1;
@@ -256,6 +273,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
                            brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
                            I915_GEM_DOMAIN_INSTRUCTION, 0);
  
+   if (key->prog_offset_16) {
+      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm9),
+                             brw->wm.prog_bo, ((wm.wm9.grf_reg_count_2 << 1) +
+                                               key->prog_offset_16),
+                             I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+
     /* Emit scratch space relocation */
     if (key->total_scratch != 0) {
        drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
author	Eric Anholt <eric@anholt.net>
	Mon, 14 Mar 2011 17:29:12 +0000 (10:29 -0700)
committer	Eric Anholt <eric@anholt.net>
	Tue, 26 Apr 2011 19:19:49 +0000 (12:19 -0700)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm_state.c		patch \| blob \| history