i965: Replace 8-wide and 16-wide with SIMD8 and SIMD16.

author Eric Anholt <eric@anholt.net>

Tue, 12 Nov 2013 23:33:27 +0000 (15:33 -0800)

committer Eric Anholt <eric@anholt.net>

Fri, 17 Jan 2014 20:58:43 +0000 (12:58 -0800)
author Eric Anholt <eric@anholt.net>
Tue, 12 Nov 2013 23:33:27 +0000 (15:33 -0800)
committer Eric Anholt <eric@anholt.net>
Fri, 17 Jan 2014 20:58:43 +0000 (12:58 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c

index 5a07d8397305bd41998c3f512bea64cd7a10a745..dee91123ff39c2ce96d21e5a5d2d3cba67d3b0a7 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -112,7 +112,7 @@ brw_set_compression_control(struct brw_compile *p,
     p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
  
     if (p->brw->gen >= 6) {
-      /* Since we don't use the 32-wide support in gen6, we translate
+      /* Since we don't use the SIMD32 support in gen6, we translate
         * the pre-gen6 compression control here.
         */
        switch (compression_control) {
@@ -123,12 +123,12 @@ brw_set_compression_control(struct brw_compile *p,
          p->current->header.compression_control = GEN6_COMPRESSION_1Q;
          break;
        case BRW_COMPRESSION_2NDHALF:
-        /* For 8-wide, this is "use the second set of 8 bits." */
+        /* For SIMD8, this is "use the second set of 8 bits." */
          p->current->header.compression_control = GEN6_COMPRESSION_2Q;
          break;
        case BRW_COMPRESSION_COMPRESSED:
-        /* For 16-wide instruction compression, use the first set of 16 bits
-         * since we don't do 32-wide dispatch.
+        /* For SIMD16 instruction compression, use the first set of 16 bits
+         * since we don't do SIMD32 dispatch.
           */
          p->current->header.compression_control = GEN6_COMPRESSION_1H;
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 3536cbeecc5207bad06efc0a359dd8301f1adb45..37329b9e6b20b30cfc770a588cf17356dd08603e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -856,7 +856,7 @@ import_uniforms_callback(const void *key,
     hash_table_insert(dst_ht, data, key);
  }
  
-/* For 16-wide, we need to follow from the uniform setup of 8-wide dispatch.
+/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
   * This brings in those uniform definitions
   */
  void
@@ -1340,7 +1340,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
     case SHADER_OPCODE_INT_QUOTIENT:
     case SHADER_OPCODE_INT_REMAINDER:
        if (brw->gen >= 7 && dispatch_width == 16)
-        fail("16-wide INTDIV unsupported\n");
+        fail("SIMD16 INTDIV unsupported\n");
        break;
     case SHADER_OPCODE_POW:
        break;
@@ -1764,7 +1764,7 @@ fs_visitor::remove_dead_constants()
  
        c->prog_data.nr_params = new_nr_params;
     } else {
-      /* This should have been generated in the 8-wide pass already. */
+      /* This should have been generated in the SIMD8 pass already. */
        assert(this->params_remap);
     }
  
@@ -1883,7 +1883,7 @@ fs_visitor::setup_pull_constants()
        return;
  
     if (dispatch_width == 16) {
-      fail("Pull constants not supported in 16-wide\n");
+      fail("Pull constants not supported in SIMD16\n");
        return;
     }
  
@@ -2557,7 +2557,7 @@ static void
  clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
                          int first_grf, int grf_len)
  {
-   bool inst_16wide = (dispatch_width > 8 &&
+   bool inst_simd16 = (dispatch_width > 8 &&
                         !inst->force_uncompressed &&
                         !inst->force_sechalf);
  
@@ -2576,7 +2576,7 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
        if (grf >= first_grf &&
            grf < first_grf + grf_len) {
           deps[grf - first_grf] = false;
-         if (inst_16wide)
+         if (inst_simd16)
              deps[grf - first_grf + 1] = false;
        }
     }
@@ -2634,7 +2634,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
           return;
        }
  
-      bool scan_inst_16wide = (dispatch_width > 8 &&
+      bool scan_inst_simd16 = (dispatch_width > 8 &&
                                 !scan_inst->force_uncompressed &&
                                 !scan_inst->force_sechalf);
  
@@ -2651,7 +2651,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
                  needs_dep[reg - first_write_grf]) {
                 inst->insert_before(DEP_RESOLVE_MOV(reg));
                 needs_dep[reg - first_write_grf] = false;
-               if (scan_inst_16wide)
+               if (scan_inst_simd16)
                    needs_dep[reg - first_write_grf + 1] = false;
              }
           }
@@ -3062,7 +3062,7 @@ fs_visitor::setup_payload_gen6()
        c->source_depth_reg = c->nr_payload_regs;
        c->nr_payload_regs++;
        if (dispatch_width == 16) {
-         /* R28: interpolated depth if not 8-wide. */
+         /* R28: interpolated depth if not SIMD8. */
           c->nr_payload_regs++;
        }
     }
@@ -3071,7 +3071,7 @@ fs_visitor::setup_payload_gen6()
        c->source_w_reg = c->nr_payload_regs;
        c->nr_payload_regs++;
        if (dispatch_width == 16) {
-         /* R30: interpolated W if not 8-wide. */
+         /* R30: interpolated W if not SIMD8. */
           c->nr_payload_regs++;
        }
     }
@@ -3089,7 +3089,7 @@ fs_visitor::setup_payload_gen6()
        c->sample_mask_reg = c->nr_payload_regs;
        c->nr_payload_regs++;
        if (dispatch_width == 16) {
-         /* R33: input coverage mask if not 8-wide. */
+         /* R33: input coverage mask if not SIMD8. */
           c->nr_payload_regs++;
        }
     }
@@ -3333,16 +3333,16 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
     fs_visitor v2(brw, c, prog, fp, 16);
     if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
        if (c->prog_data.nr_pull_params == 0) {
-         /* Try a 16-wide compile */
+         /* Try a SIMD16 compile */
           v2.import_uniforms(&v);
           if (!v2.run()) {
-            perf_debug("16-wide shader failed to compile, falling back to "
-                       "8-wide at a 10-20%% performance cost: %s", v2.fail_msg);
+            perf_debug("SIMD16 shader failed to compile, falling back to "
+                       "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
           } else {
              simd16_instructions = &v2.instructions;
           }
        } else {
-         perf_debug("Skipping 16-wide due to pull parameters.\n");
+         perf_debug("Skipping SIMD16 due to pull parameters.\n");
        }
     }
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 2bbf687c590255a9207b9b33a33be087ba5ab091..e701fc524d2360d1647a17b85e6bffd6f4451625 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -215,8 +215,8 @@ fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x)
        dst = vec16(dst);
     }
  
-   /* We do this 8 or 16-wide, but since the destination is UW we
-    * don't do compression in the 16-wide case.
+   /* We do this SIMD8 or SIMD16, but since the destination is UW we
+    * don't do compression in the SIMD16 case.
      */
     brw_push_insn_state(p);
     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -521,7 +521,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
     }
  
     if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
-      /* The send-from-GRF for 16-wide texturing with a header has an extra
+      /* The send-from-GRF for SIMD16 texturing with a header has an extra
         * hardware register allocated to it, which we need to skip over (since
         * our coordinates in the payload are in the even-numbered registers,
         * and the header comes right before the first one).
@@ -1302,13 +1302,13 @@ fs_generator::generate_code(exec_list *instructions)
  
     if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
        if (shader) {
-         printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+         printf("Native code for fragment shader %d (SIMD%d dispatch):\n",
                  prog->Name, dispatch_width);
        } else if (fp) {
-         printf("Native code for fragment program %d (%d-wide dispatch):\n",
+         printf("Native code for fragment program %d (SIMD%d dispatch):\n",
                  fp->Base.Id, dispatch_width);
        } else {
-         printf("Native code for blorp program (%d-wide dispatch):\n",
+         printf("Native code for blorp program (SIMD%d dispatch):\n",
                  dispatch_width);
        }
     }
@@ -1831,7 +1831,7 @@ fs_generator::generate_assembly(exec_list *simd8_instructions,
           brw_NOP(p);
        }
  
-      /* Save off the start of this 16-wide program */
+      /* Save off the start of this SIMD16 program */
        c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
  
        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index 8567afd3c1640faedcdd7045967b40a5df08d9d6..f54a2defd4414c9c8fedef6cb7b4aff16080a238 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -78,7 +78,7 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width)
  
     /* The registers used to make up almost all values handled in the compiler
      * are a scalar value occupying a single register (or 2 registers in the
-    * case of 16-wide, which is handled by dividing base_reg_count by 2 and
+    * case of SIMD16, which is handled by dividing base_reg_count by 2 and
      * multiplying allocated register numbers by 2).  Things that were
      * aggregates of scalar values at the GLSL level were split to scalar
      * values by split_virtual_grfs().
@@ -225,7 +225,7 @@ count_to_loop_end(fs_inst *do_inst)
   * nr_payload_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
   *
   * And we have payload_node_count nodes covering these registers in order
- * (note that in 16-wide, a node is two registers).
+ * (note that in SIMD16, a node is two registers).
   */
  void
  fs_visitor::setup_payload_interference(struct ra_graph *g,
@@ -295,7 +295,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
           break;
  
        case FS_OPCODE_LINTERP:
-         /* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
+         /* On gen6+ in SIMD16, there are 4 adjacent registers (so 2 nodes)
            * used by PLN's sourcing of the deltas, while we list only the first
            * two in the arguments (1 node).  Pre-gen6, the deltas are computed
            * in normal VGRFs.
@@ -420,7 +420,7 @@ bool
  fs_visitor::assign_regs(bool allow_spilling)
  {
     /* Most of this allocation was written for a reg_width of 1
-    * (dispatch_width == 8).  In extending to 16-wide, the code was
+    * (dispatch_width == 8).  In extending to SIMD16, the code was
      * left in place and it was converted to have the hardware
      * registers it's allocating be contiguous physical pairs of regs
      * for reg_width == 2.
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 53cd0a1cf57bc216b3145b9c2f5aaaa276332464..1727ef9593a9aee52afffb7ad84445655d4cb2e7 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -116,7 +116,7 @@ fs_visitor::visit(ir_variable *ir)
  
        if (dispatch_width == 16) {
          if (!variable_storage(ir)) {
-           fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
+           fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
          }
          return;
        }
@@ -461,7 +461,7 @@ fs_visitor::visit(ir_expression *ir)
           * enough.
           */
          if (brw->gen >= 7 && dispatch_width == 16)
-           fail("16-wide explicit accumulator operands unsupported\n");
+           fail("SIMD16 explicit accumulator operands unsupported\n");
  
          struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
  
@@ -474,7 +474,7 @@ fs_visitor::visit(ir_expression *ir)
        break;
     case ir_binop_imul_high: {
        if (brw->gen >= 7 && dispatch_width == 16)
-         fail("16-wide explicit accumulator operands unsupported\n");
+         fail("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
  
@@ -489,7 +489,7 @@ fs_visitor::visit(ir_expression *ir)
        break;
     case ir_binop_carry: {
        if (brw->gen >= 7 && dispatch_width == 16)
-         fail("16-wide explicit accumulator operands unsupported\n");
+         fail("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -499,7 +499,7 @@ fs_visitor::visit(ir_expression *ir)
     }
     case ir_binop_borrow: {
        if (brw->gen >= 7 && dispatch_width == 16)
-         fail("16-wide explicit accumulator operands unsupported\n");
+         fail("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -1251,7 +1251,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
  
     if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
        /* For general texture offsets (no txf workaround), we need a header to
-       * put them in.  Note that for 16-wide we're making space for two actual
+       * put them in.  Note that for SIMD16 we're making space for two actual
         * hardware registers here, so the emit will have to fix up for this.
         *
         * * ir4_tg4 needs to place its channel select in the header,
@@ -1457,7 +1457,7 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
        };
  
        if (dispatch_width == 16) {
-        fail("rectangle scale uniform setup not supported on 16-wide\n");
+        fail("rectangle scale uniform setup not supported on SIMD16\n");
          return coordinate;
        }
  
@@ -2142,7 +2142,7 @@ void
  fs_visitor::visit(ir_if *ir)
  {
     if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on 16-wide\n");
+      fail("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     /* Don't point the annotation at the if statement, because then it plus
@@ -2185,7 +2185,7 @@ void
  fs_visitor::visit(ir_loop *ir)
  {
     if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on 16-wide\n");
+      fail("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     this->base_ir = NULL;
@@ -2693,7 +2693,7 @@ fs_visitor::emit_fb_writes()
     bool src0_alpha_to_render_target = false;
  
     if (dispatch_width == 16 && do_dual_src) {
-      fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
+      fail("GL_ARB_blend_func_extended not yet supported in SIMD16.");
        do_dual_src = false;
     }
  
@@ -2747,7 +2747,7 @@ fs_visitor::emit_fb_writes()
     if (c->source_depth_to_render_target) {
        if (brw->gen == 6 && dispatch_width == 16) {
          /* For outputting oDepth on gen6, SIMD8 writes have to be
-         * used.  This would require 8-wide moves of each half to
+         * used.  This would require SIMD8 moves of each half to
           * message regs, kind of like pre-gen5 SIMD16 FB writes.
           * Just bail on doing so for now.
           */
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index baf67fb1ea26252f110148cd52a31c5853a35a43..a61bbab613b95b8176a730eaf890f5302d54863c 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -431,8 +431,8 @@ public:
      * Returns how many cycles it takes the instruction to issue.
      *
      * Instructions in gen hardware are handled one simd4 vector at a time,
-    * with 1 cycle per vector dispatched.  Thus 8-wide pixel shaders take 2
-    * cycles to dispatch and 16-wide (compressed) instructions take 4.
+    * with 1 cycle per vector dispatched.  Thus SIMD8 pixel shaders take 2
+    * cycles to dispatch and SIMD16 (compressed) instructions take 4.
      */
     virtual int issue_time(backend_instruction *inst) = 0;
  
@@ -1157,7 +1157,7 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
     } else {
        /* Before register allocation, we don't care about the latencies of
         * instructions.  All we care about is reducing live intervals of
-       * variables so that we can avoid register spilling, or get 16-wide
+       * variables so that we can avoid register spilling, or get SIMD16
         * shaders which naturally do a better job of hiding instruction
         * latency.
         */
author	Eric Anholt <eric@anholt.net>
	Tue, 12 Nov 2013 23:33:27 +0000 (15:33 -0800)
committer	Eric Anholt <eric@anholt.net>
	Fri, 17 Jan 2014 20:58:43 +0000 (12:58 -0800)
src/mesa/drivers/dri/i965/brw_eu.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_generator.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp		patch \| blob \| history