Merge remote-tracking branch 'public/master' into vulkan

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_nir.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 65a0ffc4d8d4a8735bc1f52a9877e0c335cbaab3..ab564bbcb9eee596f31da32dbacc5ed44da10e99 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -21,7 +21,7 @@
   * IN THE SOFTWARE.
   */
  
-#include "glsl/ir.h"
+#include "compiler/glsl/ir.h"
  #include "main/shaderimage.h"
  #include "brw_fs.h"
  #include "brw_fs_surface_builder.h"
@@ -130,7 +130,11 @@ fs_visitor::nir_setup_outputs()
           break;
        }
        case MESA_SHADER_FRAGMENT:
-         if (var->data.index > 0) {
+         if (key->force_dual_color_blend &&
+             var->data.location == FRAG_RESULT_DATA1) {
+            this->dual_src_output = reg;
+            this->do_dual_src = true;
+         } else if (var->data.index > 0) {
              assert(var->data.location == FRAG_RESULT_DATA0);
              assert(var->data.index == 1);
              this->dual_src_output = reg;
@@ -487,6 +491,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
     }
  }
  
+/**
+ * Recognizes a parent instruction of nir_op_extract_* and changes the type to
+ * match instr.
+ */
+bool
+fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
+                                      const fs_reg &result)
+{
+   if (!instr->src[0].src.is_ssa ||
+       !instr->src[0].src.ssa->parent_instr)
+      return false;
+
+   if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *src0 =
+      nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+
+   if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 &&
+       src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
+      return false;
+
+   nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
+   assert(element != NULL);
+
+   enum opcode extract_op;
+   if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
+      assert(element->u32[0] <= 1);
+      extract_op = SHADER_OPCODE_EXTRACT_WORD;
+   } else {
+      assert(element->u32[0] <= 3);
+      extract_op = SHADER_OPCODE_EXTRACT_BYTE;
+   }
+
+   fs_reg op0 = get_nir_src(src0->src[0].src);
+   op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]);
+   op0 = offset(op0, bld, src0->src[0].swizzle[0]);
+
+   set_saturate(instr->dest.saturate,
+                bld.emit(extract_op, result, op0, brw_imm_ud(element->u32[0])));
+   return true;
+}
+
  bool
  fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
                                           const fs_reg &result)
@@ -502,11 +549,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
        return false;
  
     nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-   if (!value1 || fabsf(value1->f[0]) != 1.0f)
+   if (!value1 || fabsf(value1->f32[0]) != 1.0f)
        return false;
  
     nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
-   if (!value2 || fabsf(value2->f[0]) != 1.0f)
+   if (!value2 || fabsf(value2->f32[0]) != 1.0f)
        return false;
  
     fs_reg tmp = vgrf(glsl_type::int_type);
@@ -526,7 +573,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
         * surely be TRIANGLES
         */
  
-      if (value1->f[0] == -1.0f) {
+      if (value1->f32[0] == -1.0f) {
           g0.negate = true;
        }
  
@@ -554,7 +601,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
         * surely be TRIANGLES
         */
  
-      if (value1->f[0] == -1.0f) {
+      if (value1->f32[0] == -1.0f) {
           g1_6.negate = true;
        }
  
@@ -658,6 +705,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
     switch (instr->op) {
     case nir_op_i2f:
     case nir_op_u2f:
+      if (optimize_extract_to_float(instr, result))
+         return;
+
        inst = bld.MOV(result, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
@@ -814,6 +864,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
         * When we XOR the sources, the top bit is 0 if they are the same and 1
         * if they are different.  We can then use a conditional modifier to
         * turn that into a predicate.  This leads us to an XOR.l instruction.
+       *
+       * Technically, according to the PRM, you're not allowed to use .l on a
+       * XOR instruction.  However, emperical experiments and Curro's reading
+       * of the simulator source both indicate that it's safe.
         */
        fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D);
        inst = bld.XOR(tmp, op[0], op[1]);
@@ -997,28 +1051,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
     case nir_op_fmin:
     case nir_op_imin:
     case nir_op_umin:
-      if (devinfo->gen >= 6) {
-         inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
-         inst->conditional_mod = BRW_CONDITIONAL_L;
-      } else {
-         bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
-         inst = bld.SEL(result, op[0], op[1]);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
+      inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L);
        inst->saturate = instr->dest.saturate;
        break;
  
     case nir_op_fmax:
     case nir_op_imax:
     case nir_op_umax:
-      if (devinfo->gen >= 6) {
-         inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
-         inst->conditional_mod = BRW_CONDITIONAL_GE;
-      } else {
-         bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
-         inst = bld.SEL(result, op[0], op[1]);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
+      inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE);
        inst->saturate = instr->dest.saturate;
        break;
  
@@ -1126,6 +1166,22 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        inst->predicate = BRW_PREDICATE_NORMAL;
        break;
  
+   case nir_op_extract_u8:
+   case nir_op_extract_i8: {
+      nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
+      bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
+               result, op[0], brw_imm_ud(byte->u32[0]));
+      break;
+   }
+
+   case nir_op_extract_u16:
+   case nir_op_extract_i16: {
+      nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
+      bld.emit(SHADER_OPCODE_EXTRACT_WORD,
+               result, op[0], brw_imm_ud(word->u32[0]));
+      break;
+   }
+
     default:
        unreachable("unhandled instruction");
     }
@@ -1149,7 +1205,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
     fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
  
     for (unsigned i = 0; i < instr->def.num_components; i++)
-      bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i]));
+      bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
  
     nir_ssa_values[instr->def.index] = reg;
  }
@@ -1284,7 +1340,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
  static brw_reg_type
  get_image_base_type(const glsl_type *type)
  {
-   switch ((glsl_base_type)type->sampler_type) {
+   switch ((glsl_base_type)type->sampled_type) {
     case GLSL_TYPE_UINT:
        return BRW_REGISTER_TYPE_UD;
     case GLSL_TYPE_INT:
@@ -1703,9 +1759,9 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
     const bool is_point_size = (base_offset == 0);
  
     if (offset_const != NULL && vertex_const != NULL &&
-       4 * (base_offset + offset_const->u[0]) < push_reg_count) {
-      int imm_offset = (base_offset + offset_const->u[0]) * 4 +
-                       vertex_const->u[0] * push_reg_count;
+       4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
+      int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
+                       vertex_const->u32[0] * push_reg_count;
        /* This input was pushed into registers. */
        if (is_point_size) {
           /* gl_PointSize comes in .w */
@@ -1727,7 +1783,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
        if (vertex_const) {
           /* The vertex index is constant; just select the proper URB handle. */
           icp_handle =
-            retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0),
+            retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0),
                     BRW_REGISTER_TYPE_UD);
        } else {
           /* The vertex index is non-constant.  We need to use indirect
@@ -1771,7 +1827,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
        if (offset_const) {
           /* Constant indexing - use global offset. */
           inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
-         inst->offset = base_offset + offset_const->u[0];
+         inst->offset = base_offset + offset_const->u32[0];
           inst->base_mrf = -1;
           inst->mlen = 1;
           inst->regs_written = num_components;
@@ -1809,7 +1865,7 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
         * add_const_offset_to_base() will fold other constant offsets
         * into instr->const_index[0].
         */
-      assert(const_value->u[0] == 0);
+      assert(const_value->u32[0] == 0);
        return fs_reg();
     }
  
@@ -2127,7 +2183,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
           nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
  
           if (const_sample) {
-            unsigned msg_data = const_sample->i[0] << 4;
+            unsigned msg_data = const_sample->i32[0] << 4;
  
              emit_pixel_interpolater_send(bld,
                                           FS_OPCODE_INTERPOLATE_AT_SAMPLE,
@@ -2194,8 +2250,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
           nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
  
           if (const_offset) {
-            unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
-            unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
+            unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
+            unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
  
              emit_pixel_interpolater_send(bld,
                                           FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
@@ -2354,7 +2410,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
        fs_reg offset_reg;
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
        if (const_offset) {
-         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
+         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
        } else {
           offset_reg = vgrf(glsl_type::uint_type);
           bld.ADD(offset_reg,
@@ -2398,7 +2454,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
  
           nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
           if (const_offset) {
-            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
+            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
                                      4 * first_component);
           } else {
              offset_reg = vgrf(glsl_type::uint_type);
@@ -2438,8 +2494,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
     case nir_intrinsic_atomic_counter_inc:
     case nir_intrinsic_atomic_counter_dec:
     case nir_intrinsic_atomic_counter_read: {
-      using namespace surface_access;
-
        /* Get the arguments of the atomic intrinsic. */
        const fs_reg offset = get_nir_src(instr->src[0]);
        const unsigned surface = (stage_prog_data->binding_table.abo_start +
@@ -2631,8 +2685,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
        if (const_offset) {
           /* Offsets are in bytes but they should always be multiples of 4 */
-         assert(const_offset->u[0] % 4 == 0);
-         src.reg_offset = const_offset->u[0] / 4;
+         assert(const_offset->u32[0] % 4 == 0);
+         src.reg_offset = const_offset->u32[0] / 4;
  
           for (unsigned j = 0; j < instr->num_components; j++) {
              bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2665,7 +2719,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
        if (const_index) {
           const unsigned index = stage_prog_data->binding_table.ubo_start +
-                                const_index->u[0];
+                                const_index->u32[0];
           surf_index = brw_imm_ud(index);
           brw_mark_surface_used(prog_data, index);
        } else {
@@ -2698,12 +2752,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           fs_reg packed_consts = vgrf(glsl_type::float_type);
           packed_consts.type = dest.type;
  
-         struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15);
+         struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u32[0] & ~15);
           bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
                    surf_index, const_offset_reg);
  
           for (unsigned i = 0; i < instr->num_components; i++) {
-            packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i);
+            packed_consts.set_smear(const_offset->u32[0] % 16 / 4 + i);
  
              /* The std140 packing rules don't allow vectors to cross 16-byte
               * boundaries, and a reg is 32 bytes.
@@ -2726,7 +2780,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        fs_reg surf_index;
        if (const_uniform_block) {
           unsigned index = stage_prog_data->binding_table.ssbo_start +
-                          const_uniform_block->u[0];
+                          const_uniform_block->u32[0];
           surf_index = brw_imm_ud(index);
           brw_mark_surface_used(prog_data, index);
        } else {
@@ -2745,7 +2799,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        fs_reg offset_reg;
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
        if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u[0]);
+         offset_reg = brw_imm_ud(const_offset->u32[0]);
        } else {
           offset_reg = get_nir_src(instr->src[1]);
        }
@@ -2773,7 +2827,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
        assert(const_offset && "Indirect input loads not allowed");
-      src = offset(src, bld, const_offset->u[0]);
+      src = offset(src, bld, const_offset->u32[0]);
  
        for (unsigned j = 0; j < instr->num_components; j++) {
           bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2790,7 +2844,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           nir_src_as_const_value(instr->src[1]);
        if (const_uniform_block) {
           unsigned index = stage_prog_data->binding_table.ssbo_start +
-                          const_uniform_block->u[0];
+                          const_uniform_block->u32[0];
           surf_index = brw_imm_ud(index);
           brw_mark_surface_used(prog_data, index);
        } else {
@@ -2821,7 +2875,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
           fs_reg offset_reg;
           nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
           if (const_offset) {
-            offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component);
+            offset_reg = brw_imm_ud(const_offset->u32[0] + 4 * first_component);
           } else {
              offset_reg = vgrf(glsl_type::uint_type);
              bld.ADD(offset_reg,
@@ -2849,7 +2903,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
        assert(const_offset && "Indirect output stores not allowed");
-      new_dest = offset(new_dest, bld, const_offset->u[0]);
+      new_dest = offset(new_dest, bld, const_offset->u32[0]);
  
        for (unsigned j = 0; j < instr->num_components; j++) {
           bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
@@ -2890,7 +2944,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
     case nir_intrinsic_get_buffer_size: {
        nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
-      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
        int reg_width = dispatch_width / 8;
  
        /* Set LOD = 0 */
@@ -2941,7 +2995,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
     nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
     if (const_surface) {
        unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
-                            const_surface->u[0];
+                            const_surface->u32[0];
        surface = brw_imm_ud(surf_index);
        brw_mark_surface_used(prog_data, surf_index);
     } else {
@@ -2965,12 +3019,11 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
  
     /* Emit the actual atomic operation operation */
  
-   fs_reg atomic_result =
-      surface_access::emit_untyped_atomic(bld, surface, offset,
-                                          data1, data2,
-                                          1 /* dims */, 1 /* rsize */,
-                                          op,
-                                          BRW_PREDICATE_NONE);
+   fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
+                                              data1, data2,
+                                              1 /* dims */, 1 /* rsize */,
+                                              op,
+                                              BRW_PREDICATE_NONE);
     dest.type = atomic_result.type;
     bld.MOV(dest, atomic_result);
  }
@@ -2992,12 +3045,11 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
  
     /* Emit the actual atomic operation operation */
  
-   fs_reg atomic_result =
-      surface_access::emit_untyped_atomic(bld, surface, offset,
-                                          data1, data2,
-                                          1 /* dims */, 1 /* rsize */,
-                                          op,
-                                          BRW_PREDICATE_NONE);
+   fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
+                                              data1, data2,
+                                              1 /* dims */, 1 /* rsize */,
+                                              op,
+                                              BRW_PREDICATE_NONE);
     dest.type = atomic_result.type;
     bld.MOV(dest, atomic_result);
  }
@@ -3016,11 +3068,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
                          instr->is_array;
  
     int lod_components = 0;
-   int UNUSED offset_components = 0;
  
     fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
  
-   /* Our hardware requires a LOD for buffer textures */
+   /* The hardware requires a LOD for buffer textures */
     if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
        lod = brw_imm_d(0);
  
@@ -3068,13 +3119,18 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
        case nir_tex_src_ms_index:
           sample_index = retype(src, BRW_REGISTER_TYPE_UD);
           break;
-      case nir_tex_src_offset:
-         tex_offset = retype(src, BRW_REGISTER_TYPE_D);
-         if (instr->is_array)
-            offset_components = instr->coord_components - 1;
-         else
-            offset_components = instr->coord_components;
+
+      case nir_tex_src_offset: {
+         nir_const_value *const_offset =
+            nir_src_as_const_value(instr->src[i].src);
+         if (const_offset) {
+            tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i32, 3));
+         } else {
+            tex_offset = retype(src, BRW_REGISTER_TYPE_D);
+         }
           break;
+      }
+
        case nir_tex_src_projector:
           unreachable("should be lowered");
  
@@ -3118,14 +3174,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
        }
     }
  
-   for (unsigned i = 0; i < 3; i++) {
-      if (instr->const_offset[i] != 0) {
-         assert(offset_components == 0);
-         tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3));
-         break;
-      }
-   }
-
     enum glsl_base_type dest_base_type =
       brw_glsl_base_type_for_nir_type (instr->dest_type);
  
@@ -3162,8 +3210,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
  
     emit_texture(op, dest_type, coordinate, instr->coord_components,
                  shadow_comparitor, lod, lod2, lod_components, sample_index,
-                tex_offset, mcs, gather_component,
-                is_cube_array, texture, texture_reg, sampler, sampler_reg);
+                tex_offset, mcs, gather_component, is_cube_array,
+                texture, texture_reg, sampler, sampler_reg);
  
     fs_reg dest = get_nir_dest(instr->dest);
     dest.type = this->result.type;