i965: Drop pointless check for variable declarations in splitting.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 424763bd6cb453b7f85b86e12d9c4c84b1176a31..c89f4d6a05ddc21081b2b8407de047e4affc2799 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -458,25 +458,34 @@ fs_visitor::visit(ir_expression *ir)
           * of one of the operands (src0 on gen6, src1 on gen7).  The
           * MACH accumulates in the contribution of the upper 16 bits
           * of that operand.
-         *
-         * FINISHME: Emit just the MUL if we know an operand is small
-         * enough.
-         */
-        if (brw->gen >= 7 && dispatch_width == 16)
-           fail("SIMD16 explicit accumulator operands unsupported\n");
-
-        struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
-
-        emit(MUL(acc, op[0], op[1]));
-        emit(MACH(reg_null_d, op[0], op[1]));
-        emit(MOV(this->result, fs_reg(acc)));
+          */
+         if (ir->operands[0]->is_uint16_constant()) {
+            if (brw->gen < 7)
+               emit(MUL(this->result, op[0], op[1]));
+            else
+               emit(MUL(this->result, op[1], op[0]));
+         } else if (ir->operands[1]->is_uint16_constant()) {
+            if (brw->gen < 7)
+               emit(MUL(this->result, op[1], op[0]));
+            else
+               emit(MUL(this->result, op[0], op[1]));
+         } else {
+            if (brw->gen >= 7)
+               no16("SIMD16 explicit accumulator operands unsupported\n");
+
+            struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
+
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(reg_null_d, op[0], op[1]));
+            emit(MOV(this->result, fs_reg(acc)));
+         }
        } else {
          emit(MUL(this->result, op[0], op[1]));
        }
        break;
     case ir_binop_imul_high: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
  
@@ -490,8 +499,8 @@ fs_visitor::visit(ir_expression *ir)
        emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
        break;
     case ir_binop_carry: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -500,8 +509,8 @@ fs_visitor::visit(ir_expression *ir)
        break;
     }
     case ir_binop_borrow: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -1290,8 +1299,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        next.reg_offset++;
        break;
     case ir_txd: {
-      if (dispatch_width == 16)
-        fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
+      no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
  
        /* Load dPdx and the coordinate together:
         * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
@@ -1364,8 +1372,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        break;
     case ir_tg4:
        if (has_nonconstant_offset) {
-         if (ir->shadow_comparitor && dispatch_width == 16)
-            fail("Gen7 does not support gather4_po_c in SIMD16 mode.");
+         if (ir->shadow_comparitor)
+            no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
  
           /* More crazy intermixing */
           ir->offset->accept(this);
@@ -1464,8 +1472,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
          0
        };
  
+      no16("rectangle scale uniform setup not supported on SIMD16\n");
        if (dispatch_width == 16) {
-        fail("rectangle scale uniform setup not supported on SIMD16\n");
          return coordinate;
        }
  
@@ -2183,8 +2191,8 @@ fs_visitor::try_replace_with_sel()
  void
  fs_visitor::visit(ir_if *ir)
  {
-   if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on SIMD16\n");
+   if (brw->gen < 6) {
+      no16("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     /* Don't point the annotation at the if statement, because then it plus
@@ -2226,8 +2234,8 @@ fs_visitor::visit(ir_if *ir)
  void
  fs_visitor::visit(ir_loop *ir)
  {
-   if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on SIMD16\n");
+   if (brw->gen < 6) {
+      no16("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     this->base_ir = NULL;
@@ -2725,9 +2733,10 @@ fs_visitor::emit_fb_writes()
     bool do_dual_src = this->dual_src_output.file != BAD_FILE;
     bool src0_alpha_to_render_target = false;
  
-   if (dispatch_width == 16 && do_dual_src) {
-      fail("GL_ARB_blend_func_extended not yet supported in SIMD16.");
-      do_dual_src = false;
+   if (do_dual_src) {
+      no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");
+      if (dispatch_width == 16)
+         do_dual_src = false;
     }
  
     /* From the Sandy Bridge PRM, volume 4, page 198:
@@ -2778,13 +2787,13 @@ fs_visitor::emit_fb_writes()
        nr += reg_width;
  
     if (c->source_depth_to_render_target) {
-      if (brw->gen == 6 && dispatch_width == 16) {
+      if (brw->gen == 6) {
          /* For outputting oDepth on gen6, SIMD8 writes have to be
           * used.  This would require SIMD8 moves of each half to
           * message regs, kind of like pre-gen5 SIMD16 FB writes.
           * Just bail on doing so for now.
           */
-        fail("Missing support for simd16 depth writes on gen6\n");
+        no16("Missing support for simd16 depth writes on gen6\n");
        }
  
        if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
@@ -2942,18 +2951,16 @@ fs_visitor::fs_visitor(struct brw_context *brw,
                         struct gl_shader_program *shader_prog,
                         struct gl_fragment_program *fp,
                         unsigned dispatch_width)
-   : backend_visitor(brw, shader_prog, &fp->Base, &c->prog_data.base),
+   : backend_visitor(brw, shader_prog, &fp->Base, &c->prog_data.base,
+                     MESA_SHADER_FRAGMENT),
       dispatch_width(dispatch_width)
  {
     this->c = c;
     this->fp = fp;
     this->mem_ctx = ralloc_context(NULL);
-   if (shader_prog)
-      shader = (struct brw_shader *)
-         shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   else
-      shader = NULL;
     this->failed = false;
+   this->simd16_unsupported = false;
+   this->no16_msg = NULL;
     this->variable_ht = hash_table_ctor(0,
                                         hash_table_pointer_hash,
                                         hash_table_pointer_compare);
@@ -2975,14 +2982,15 @@ fs_visitor::fs_visitor(struct brw_context *brw,
     this->regs_live_at_ip = NULL;
  
     this->uniforms = 0;
-   this->params_remap = NULL;
-   this->nr_params_remap = 0;
+   this->pull_constant_loc = NULL;
+   this->push_constant_loc = NULL;
  
     this->force_uncompressed_stack = 0;
  
     this->spilled_any_registers = false;
  
-   this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
+   if (dispatch_width == 8)
+      this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
  }
  
  fs_visitor::~fs_visitor()