i965: Drop pointless check for variable declarations in splitting.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index b0fed9eecdac43b8630dc89fcda0068ab02d48f5..c89f4d6a05ddc21081b2b8407de047e4affc2799 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -212,7 +212,8 @@ fs_visitor::visit(ir_dereference_array *ir)
  }
  
  void
-fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
+fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
+                     const fs_reg &a)
  {
     if (brw->gen < 6 ||
         !x.is_valid_3src() ||
@@ -225,8 +226,9 @@ fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
  
        emit(MUL(y_times_a, y, a));
  
-      a.negate = !a.negate;
-      emit(ADD(one_minus_a, a, fs_reg(1.0f)));
+      fs_reg negative_a = a;
+      negative_a.negate = !a.negate;
+      emit(ADD(one_minus_a, negative_a, fs_reg(1.0f)));
        emit(MUL(x_times_one_minus_a, x, one_minus_a));
  
        emit(ADD(dst, x_times_one_minus_a, y_times_a));
@@ -239,8 +241,8 @@ fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
  }
  
  void
-fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
-                        fs_reg src0, fs_reg src1)
+fs_visitor::emit_minmax(uint32_t conditionalmod, const fs_reg &dst,
+                        const fs_reg &src0, const fs_reg &src1)
  {
     fs_inst *inst;
  
@@ -347,8 +349,8 @@ fs_visitor::visit(ir_expression *ir)
        ir->operands[operand]->accept(this);
        if (this->result.file == BAD_FILE) {
          fail("Failed to get tree for expression operand:\n");
-        ir->operands[operand]->print();
-         printf("\n");
+        ir->operands[operand]->fprint(stderr);
+         fprintf(stderr, "\n");
        }
        assert(this->result.is_valid_3src());
        op[operand] = this->result;
@@ -456,25 +458,34 @@ fs_visitor::visit(ir_expression *ir)
           * of one of the operands (src0 on gen6, src1 on gen7).  The
           * MACH accumulates in the contribution of the upper 16 bits
           * of that operand.
-         *
-         * FINISHME: Emit just the MUL if we know an operand is small
-         * enough.
-         */
-        if (brw->gen >= 7 && dispatch_width == 16)
-           fail("SIMD16 explicit accumulator operands unsupported\n");
-
-        struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
-
-        emit(MUL(acc, op[0], op[1]));
-        emit(MACH(reg_null_d, op[0], op[1]));
-        emit(MOV(this->result, fs_reg(acc)));
+          */
+         if (ir->operands[0]->is_uint16_constant()) {
+            if (brw->gen < 7)
+               emit(MUL(this->result, op[0], op[1]));
+            else
+               emit(MUL(this->result, op[1], op[0]));
+         } else if (ir->operands[1]->is_uint16_constant()) {
+            if (brw->gen < 7)
+               emit(MUL(this->result, op[1], op[0]));
+            else
+               emit(MUL(this->result, op[0], op[1]));
+         } else {
+            if (brw->gen >= 7)
+               no16("SIMD16 explicit accumulator operands unsupported\n");
+
+            struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
+
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(reg_null_d, op[0], op[1]));
+            emit(MOV(this->result, fs_reg(acc)));
+         }
        } else {
          emit(MUL(this->result, op[0], op[1]));
        }
        break;
     case ir_binop_imul_high: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
  
@@ -488,8 +499,8 @@ fs_visitor::visit(ir_expression *ir)
        emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
        break;
     case ir_binop_carry: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -498,8 +509,8 @@ fs_visitor::visit(ir_expression *ir)
        break;
     }
     case ir_binop_borrow: {
-      if (brw->gen >= 7 && dispatch_width == 16)
-         fail("SIMD16 explicit accumulator operands unsupported\n");
+      if (brw->gen >= 7)
+         no16("SIMD16 explicit accumulator operands unsupported\n");
  
        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
  
@@ -741,8 +752,8 @@ fs_visitor::visit(ir_expression *ir)
           packed_consts.type = result.type;
  
           fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
-         emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                      packed_consts, surf_index, const_offset_reg));
+         emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                                   packed_consts, surf_index, const_offset_reg));
  
           for (int i = 0; i < ir->type->vector_elements; i++) {
              packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i);
@@ -1288,8 +1299,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        next.reg_offset++;
        break;
     case ir_txd: {
-      if (dispatch_width == 16)
-        fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
+      no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
  
        /* Load dPdx and the coordinate together:
         * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
@@ -1362,8 +1372,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        break;
     case ir_tg4:
        if (has_nonconstant_offset) {
-         if (ir->shadow_comparitor && dispatch_width == 16)
-            fail("Gen7 does not support gather4_po_c in SIMD16 mode.");
+         if (ir->shadow_comparitor)
+            no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
  
           /* More crazy intermixing */
           ir->offset->accept(this);
@@ -1462,8 +1472,8 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
          0
        };
  
+      no16("rectangle scale uniform setup not supported on SIMD16\n");
        if (dispatch_width == 16) {
-        fail("rectangle scale uniform setup not supported on SIMD16\n");
          return coordinate;
        }
  
@@ -2181,8 +2191,8 @@ fs_visitor::try_replace_with_sel()
  void
  fs_visitor::visit(ir_if *ir)
  {
-   if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on SIMD16\n");
+   if (brw->gen < 6) {
+      no16("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     /* Don't point the annotation at the if statement, because then it plus
@@ -2224,8 +2234,8 @@ fs_visitor::visit(ir_if *ir)
  void
  fs_visitor::visit(ir_loop *ir)
  {
-   if (brw->gen < 6 && dispatch_width == 16) {
-      fail("Can't support (non-uniform) control flow on SIMD16\n");
+   if (brw->gen < 6) {
+      no16("Can't support (non-uniform) control flow on SIMD16\n");
     }
  
     this->base_ir = NULL;
@@ -2397,9 +2407,10 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
     }
  
     /* Emit the instruction. */
-   fs_inst inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst, atomic_op, surf_index);
-   inst.base_mrf = 0;
-   inst.mlen = mlen;
+   fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
+                                        atomic_op, surf_index);
+   inst->base_mrf = 0;
+   inst->mlen = mlen;
     emit(inst);
  }
  
@@ -2430,21 +2441,13 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
     mlen += operand_len;
  
     /* Emit the instruction. */
-   fs_inst inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index);
-   inst.base_mrf = 0;
-   inst.mlen = mlen;
+   fs_inst *inst = new(mem_ctx)
+      fs_inst(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, surf_index);
+   inst->base_mrf = 0;
+   inst->mlen = mlen;
     emit(inst);
  }
  
-fs_inst *
-fs_visitor::emit(fs_inst inst)
-{
-   fs_inst *list_inst = new(mem_ctx) fs_inst;
-   *list_inst = inst;
-   emit(list_inst);
-   return list_inst;
-}
-
  fs_inst *
  fs_visitor::emit(fs_inst *inst)
  {
@@ -2730,9 +2733,10 @@ fs_visitor::emit_fb_writes()
     bool do_dual_src = this->dual_src_output.file != BAD_FILE;
     bool src0_alpha_to_render_target = false;
  
-   if (dispatch_width == 16 && do_dual_src) {
-      fail("GL_ARB_blend_func_extended not yet supported in SIMD16.");
-      do_dual_src = false;
+   if (do_dual_src) {
+      no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");
+      if (dispatch_width == 16)
+         do_dual_src = false;
     }
  
     /* From the Sandy Bridge PRM, volume 4, page 198:
@@ -2783,13 +2787,13 @@ fs_visitor::emit_fb_writes()
        nr += reg_width;
  
     if (c->source_depth_to_render_target) {
-      if (brw->gen == 6 && dispatch_width == 16) {
+      if (brw->gen == 6) {
          /* For outputting oDepth on gen6, SIMD8 writes have to be
           * used.  This would require SIMD8 moves of each half to
           * message regs, kind of like pre-gen5 SIMD16 FB writes.
           * Just bail on doing so for now.
           */
-        fail("Missing support for simd16 depth writes on gen6\n");
+        no16("Missing support for simd16 depth writes on gen6\n");
        }
  
        if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
@@ -2947,23 +2951,16 @@ fs_visitor::fs_visitor(struct brw_context *brw,
                         struct gl_shader_program *shader_prog,
                         struct gl_fragment_program *fp,
                         unsigned dispatch_width)
-   : dispatch_width(dispatch_width)
+   : backend_visitor(brw, shader_prog, &fp->Base, &c->prog_data.base,
+                     MESA_SHADER_FRAGMENT),
+     dispatch_width(dispatch_width)
  {
     this->c = c;
-   this->brw = brw;
     this->fp = fp;
-   this->prog = &fp->Base;
-   this->shader_prog = shader_prog;
-   this->prog = &fp->Base;
-   this->stage_prog_data = &c->prog_data.base;
-   this->ctx = &brw->ctx;
     this->mem_ctx = ralloc_context(NULL);
-   if (shader_prog)
-      shader = (struct brw_shader *)
-         shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   else
-      shader = NULL;
     this->failed = false;
+   this->simd16_unsupported = false;
+   this->no16_msg = NULL;
     this->variable_ht = hash_table_ctor(0,
                                         hash_table_pointer_hash,
                                         hash_table_pointer_compare);
@@ -2985,14 +2982,15 @@ fs_visitor::fs_visitor(struct brw_context *brw,
     this->regs_live_at_ip = NULL;
  
     this->uniforms = 0;
-   this->params_remap = NULL;
-   this->nr_params_remap = 0;
+   this->pull_constant_loc = NULL;
+   this->push_constant_loc = NULL;
  
     this->force_uncompressed_stack = 0;
  
     this->spilled_any_registers = false;
  
-   memset(&this->param_size, 0, sizeof(this->param_size));
+   if (dispatch_width == 8)
+      this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
  }
  
  fs_visitor::~fs_visitor()