i965: Hide the register type hardware encodings

[mesa.git] / src / intel / compiler / brw_fs_nir.cpp
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index b392895e8fd5ba035697c6012fa1964b3bb1e98f..d760946e62445f616b734e116191959c9ae329f8 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -89,39 +89,11 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
           unreachable("should be lowered by lower_vertex_id().");
  
        case nir_intrinsic_load_vertex_id_zero_base:
-         assert(v->stage == MESA_SHADER_VERTEX);
-         reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
-         break;
-
        case nir_intrinsic_load_base_vertex:
-         assert(v->stage == MESA_SHADER_VERTEX);
-         reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
-         break;
-
        case nir_intrinsic_load_instance_id:
-         assert(v->stage == MESA_SHADER_VERTEX);
-         reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
-         break;
-
        case nir_intrinsic_load_base_instance:
-         assert(v->stage == MESA_SHADER_VERTEX);
-         reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
-         break;
-
        case nir_intrinsic_load_draw_id:
-         assert(v->stage == MESA_SHADER_VERTEX);
-         reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
-         break;
+         unreachable("should be lowered by brw_nir_lower_vs_inputs().");
  
        case nir_intrinsic_load_invocation_id:
           if (v->stage == MESA_SHADER_TESS_CTRL)
@@ -641,19 +613,17 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
     }
  
     switch (instr->op) {
-   case nir_op_i2f:
-   case nir_op_u2f:
-   case nir_op_i642d:
-   case nir_op_u642d:
+   case nir_op_i2f32:
+   case nir_op_u2f32:
        if (optimize_extract_to_float(instr, result))
           return;
        inst = bld.MOV(result, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
  
-   case nir_op_f2d:
-   case nir_op_i2d:
-   case nir_op_u2d:
+   case nir_op_f2f64:
+   case nir_op_i2f64:
+   case nir_op_u2f64:
        /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
         *
         *    "When source or destination is 64b (...), regioning in Align1
@@ -669,7 +639,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
         */
        if (nir_dest_bit_size(instr->dest.dest) == 64 &&
            nir_src_bit_size(instr->src[0].src) == 32 &&
-          (devinfo->is_cherryview || devinfo->is_broxton)) {
+          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
           fs_reg tmp = bld.vgrf(result.type, 1);
           tmp = subscript(tmp, op[0].type, 0);
           inst = bld.MOV(tmp, op[0]);
@@ -678,33 +648,17 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
           break;
        }
        /* fallthrough */
+   case nir_op_f2f32:
+   case nir_op_f2i32:
+   case nir_op_f2u32:
     case nir_op_f2i64:
     case nir_op_f2u64:
-   case nir_op_i2i64:
-   case nir_op_i2u64:
-   case nir_op_u2i64:
-   case nir_op_u2u64:
-   case nir_op_b2i64:
-   case nir_op_d2f:
-   case nir_op_d2i:
-   case nir_op_d2u:
-   case nir_op_i642f:
-   case nir_op_u642f:
-   case nir_op_u2i32:
     case nir_op_i2i32:
+   case nir_op_i2i64:
     case nir_op_u2u32:
-   case nir_op_i2u32:
-      if (instr->op == nir_op_b2i64) {
-         bld.MOV(result, negate(op[0]));
-      } else {
-         inst = bld.MOV(result, op[0]);
-         inst->saturate = instr->dest.saturate;
-      }
-      break;
-
-   case nir_op_f2i:
-   case nir_op_f2u:
-      bld.MOV(result, op[0]);
+   case nir_op_u2u64:
+      inst = bld.MOV(result, op[0]);
+      inst->saturate = instr->dest.saturate;
        break;
  
     case nir_op_fsign: {
@@ -1090,35 +1044,35 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        bld.MOV(result, negate(op[0]));
        break;
  
+   case nir_op_i2b:
     case nir_op_f2b:
-      bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
-      break;
-
-   case nir_op_i642b:
-   case nir_op_d2b: {
-      /* two-argument instructions can't take 64-bit immediates */
-      fs_reg zero;
-      fs_reg tmp;
+      if (nir_src_bit_size(instr->src[0].src) == 64) {
+         /* two-argument instructions can't take 64-bit immediates */
+         fs_reg zero;
+         fs_reg tmp;
+
+         if (instr->op == nir_op_f2b) {
+            zero = vgrf(glsl_type::double_type);
+            tmp = vgrf(glsl_type::double_type);
+         } else {
+            zero = vgrf(glsl_type::int64_t_type);
+            tmp = vgrf(glsl_type::int64_t_type);
+         }
  
-      if (instr->op == nir_op_d2b) {
-         zero = vgrf(glsl_type::double_type);
-         tmp = vgrf(glsl_type::double_type);
+         bld.MOV(zero, setup_imm_df(bld, 0.0));
+         /* A SIMD16 execution needs to be split in two instructions, so use
+          * a vgrf instead of the flag register as dst so instruction splitting
+          * works
+          */
+         bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
+         bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
        } else {
-         zero = vgrf(glsl_type::int64_t_type);
-         tmp = vgrf(glsl_type::int64_t_type);
+         if (instr->op == nir_op_f2b) {
+            bld.CMP(result, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
+         } else {
+            bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
+         }
        }
-
-      bld.MOV(zero, setup_imm_df(bld, 0.0));
-      /* A SIMD16 execution needs to be split in two instructions, so use
-       * a vgrf instead of the flag register as dst so instruction splitting
-       * works
-       */
-      bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
-      bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
-      break;
-   }
-   case nir_op_i2b:
-      bld.CMP(result, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ);
        break;
  
     case nir_op_ftrunc:
@@ -1822,7 +1776,7 @@ fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count,
     assert(gs_compile->control_data_bits_per_vertex == 2);
  
     /* Must be a valid stream */
-   assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+   assert(stream_id < MAX_VERTEX_STREAMS);
  
     /* Control data bits are initialized to 0 so we don't have to set any
      * bits when sending vertices to stream 0.
@@ -1871,7 +1825,7 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
      * be recorded by transform feedback, we can simply discard all geometry
      * bound to these streams when transform feedback is disabled.
      */
-   if (stream_id > 0 && !nir->info->has_transform_feedback_varyings)
+   if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
        return;
  
     /* If we're outputting 32 control data bits or less, then we can wait
@@ -1960,27 +1914,15 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
     nir_const_value *offset_const = nir_src_as_const_value(offset_src);
     const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
  
-   /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y],
-    * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].  Only
-    * gl_PointSize is available as a GS input, however, so it must be that.
-    */
-   const bool is_point_size = (base_offset == 0);
-
     /* TODO: figure out push input layout for invocations == 1 */
     if (gs_prog_data->invocations == 1 &&
         offset_const != NULL && vertex_const != NULL &&
         4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
        int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
                         vertex_const->u32[0] * push_reg_count;
-      /* This input was pushed into registers. */
-      if (is_point_size) {
-         /* gl_PointSize comes in .w */
-         bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type));
-      } else {
-         for (unsigned i = 0; i < num_components; i++) {
-            bld.MOV(offset(dst, bld, i),
-                    fs_reg(ATTR, imm_offset + i + first_component, dst.type));
-         }
+      for (unsigned i = 0; i < num_components; i++) {
+         bld.MOV(offset(dst, bld, i),
+                 fs_reg(ATTR, imm_offset + i + first_component, dst.type));
        }
        return;
     }
@@ -2026,12 +1968,12 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
  
           /* Use first_icp_handle as the base offset.  There is one register
            * of URB handles per vertex, so inform the register allocator that
-          * we might read up to nir->info->gs.vertices_in registers.
+          * we might read up to nir->info.gs.vertices_in registers.
            */
           bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
                    retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type),
                    fs_reg(icp_offset_bytes),
-                  brw_imm_ud(nir->info->gs.vertices_in * REG_SIZE));
+                  brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE));
        }
     } else {
        assert(gs_prog_data->invocations > 1);
@@ -2057,12 +1999,12 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
  
           /* Use first_icp_handle as the base offset.  There is one DWord
            * of URB handles per vertex, so inform the register allocator that
-          * we might read up to ceil(nir->info->gs.vertices_in / 8) registers.
+          * we might read up to ceil(nir->info.gs.vertices_in / 8) registers.
            */
           bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
                    retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type),
                    fs_reg(icp_offset_bytes),
-                  brw_imm_ud(DIV_ROUND_UP(nir->info->gs.vertices_in, 8) *
+                  brw_imm_ud(DIV_ROUND_UP(nir->info.gs.vertices_in, 8) *
                               REG_SIZE));
        }
     }
@@ -2150,14 +2092,6 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
           }
        }
     }
-
-   if (is_point_size) {
-      /* Read the whole VUE header (because of alignment) and read .w. */
-      fs_reg tmp = bld.vgrf(dst.type, 4);
-      inst->dst = tmp;
-      inst->size_written = 4 * REG_SIZE;
-      bld.MOV(dst, offset(tmp, bld, 3));
-   }
  }
  
  fs_reg
@@ -2269,7 +2203,7 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
     }
  
     case nir_intrinsic_load_input: {
-      fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+      fs_reg src = fs_reg(ATTR, nir_intrinsic_base(instr) * 4, dest.type);
        unsigned first_component = nir_intrinsic_component(instr);
        unsigned num_components = instr->num_components;
        enum brw_reg_type type = dest.type;
@@ -3814,7 +3748,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
              (instr->num_components - 1) * type_sz(dest.type);
  
           bool supports_64bit_indirects =
-            !devinfo->is_cherryview && !devinfo->is_broxton;
+            !devinfo->is_cherryview && !gen_device_info_is_9lp(devinfo);
  
           if (type_sz(dest.type) != 8 || supports_64bit_indirects) {
              for (unsigned j = 0; j < instr->num_components; j++) {
@@ -3867,7 +3801,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            */
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ubo_start +
-                               nir->info->num_ubos - 1);
+                               nir->info.num_ubos - 1);
        }
  
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
@@ -3888,6 +3822,34 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            * and we have to split it if necessary.
            */
           const unsigned type_size = type_sz(dest.type);
+
+         /* See if we've selected this as a push constant candidate */
+         if (const_index) {
+            const unsigned ubo_block = const_index->u32[0];
+            const unsigned offset_256b = const_offset->u32[0] / 32;
+
+            fs_reg push_reg;
+            for (int i = 0; i < 4; i++) {
+               const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
+               if (range->block == ubo_block &&
+                   offset_256b >= range->start &&
+                   offset_256b < range->start + range->length) {
+
+                  push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type);
+                  push_reg.offset = const_offset->u32[0] - 32 * range->start;
+                  break;
+               }
+            }
+
+            if (push_reg.file != BAD_FILE) {
+               for (unsigned i = 0; i < instr->num_components; i++) {
+                  bld.MOV(offset(dest, bld, i),
+                          byte_offset(push_reg, i * type_size));
+               }
+               break;
+            }
+         }
+
           const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
           const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
           const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
@@ -3937,7 +3899,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            */
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ssbo_start +
-                               nir->info->num_ssbos - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        fs_reg offset_reg;
@@ -3977,7 +3939,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ssbo_start +
-                               nir->info->num_ssbos - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        /* Value */
@@ -4141,7 +4103,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
     }
  
-   case nir_intrinsic_load_channel_num: {
+   case nir_intrinsic_load_subgroup_size:
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width));
+      break;
+
+   case nir_intrinsic_load_subgroup_invocation: {
        fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
        dest = retype(dest, BRW_REGISTER_TYPE_UD);
        const fs_builder allbld8 = bld.group(8, 0).exec_all();
@@ -4156,6 +4122,102 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
     }
  
+   case nir_intrinsic_load_subgroup_eq_mask:
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask:
+      unreachable("not reached");
+
+   case nir_intrinsic_vote_any: {
+      const fs_builder ubld = bld.exec_all();
+
+      /* The any/all predicates do not consider channel enables. To prevent
+       * dead channels from affecting the result, we initialize the flag with
+       * with the identity value for the logical operation.
+       */
+      ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+      bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
+      bld.MOV(dest, brw_imm_d(-1));
+      set_predicate(dispatch_width == 8 ?
+                    BRW_PREDICATE_ALIGN1_ANY8H :
+                    BRW_PREDICATE_ALIGN1_ANY16H,
+                    bld.SEL(dest, dest, brw_imm_d(0)));
+      break;
+   }
+   case nir_intrinsic_vote_all: {
+      const fs_builder ubld = bld.exec_all();
+
+      /* The any/all predicates do not consider channel enables. To prevent
+       * dead channels from affecting the result, we initialize the flag with
+       * with the identity value for the logical operation.
+       */
+      ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+      bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
+      bld.MOV(dest, brw_imm_d(-1));
+      set_predicate(dispatch_width == 8 ?
+                    BRW_PREDICATE_ALIGN1_ALL8H :
+                    BRW_PREDICATE_ALIGN1_ALL16H,
+                    bld.SEL(dest, dest, brw_imm_d(0)));
+      break;
+   }
+   case nir_intrinsic_vote_eq: {
+      fs_reg value = get_nir_src(instr->src[0]);
+      fs_reg uniformized = bld.emit_uniformize(value);
+      const fs_builder ubld = bld.exec_all();
+
+      /* The any/all predicates do not consider channel enables. To prevent
+       * dead channels from affecting the result, we initialize the flag with
+       * with the identity value for the logical operation.
+       */
+      ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
+      bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
+      bld.MOV(dest, brw_imm_d(-1));
+      set_predicate(dispatch_width == 8 ?
+                    BRW_PREDICATE_ALIGN1_ALL8H :
+                    BRW_PREDICATE_ALIGN1_ALL16H,
+                    bld.SEL(dest, dest, brw_imm_d(0)));
+      break;
+   }
+
+   case nir_intrinsic_ballot: {
+      const fs_reg value = retype(get_nir_src(instr->src[0]),
+                                  BRW_REGISTER_TYPE_UD);
+      const struct brw_reg flag = retype(brw_flag_reg(0, 0),
+                                         BRW_REGISTER_TYPE_UD);
+
+      bld.exec_all().MOV(flag, brw_imm_ud(0u));
+      bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
+
+      if (instr->dest.ssa.bit_size > 32) {
+         dest.type = BRW_REGISTER_TYPE_UQ;
+      } else {
+         dest.type = BRW_REGISTER_TYPE_UD;
+      }
+      bld.MOV(dest, flag);
+      break;
+   }
+
+   case nir_intrinsic_read_invocation: {
+      const fs_reg value = get_nir_src(instr->src[0]);
+      const fs_reg invocation = get_nir_src(instr->src[1]);
+      fs_reg tmp = bld.vgrf(value.type);
+
+      bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value,
+                          component(invocation, 0));
+
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+              fs_reg(component(tmp, 0)));
+      break;
+   }
+
+   case nir_intrinsic_read_first_invocation: {
+      const fs_reg value = get_nir_src(instr->src[0]);
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+              bld.emit_uniformize(value));
+      break;
+   }
+
     default:
        unreachable("unknown intrinsic");
     }
@@ -4189,7 +4251,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
         */
        brw_mark_surface_used(prog_data,
                              stage_prog_data->binding_table.ssbo_start +
-                            nir->info->num_ssbos - 1);
+                            nir->info.num_ssbos - 1);
     }
  
     fs_reg offset = get_nir_src(instr->src[1]);
@@ -4398,15 +4460,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
     srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
     srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
  
-   if (instr->op == nir_texop_query_levels ||
-       (instr->op == nir_texop_tex && stage != MESA_SHADER_FRAGMENT)) {
-      /* textureQueryLevels() and texture() are implemented in terms of TXS
-       * and TXL respectively, so we need to pass a valid LOD argument.
-       */
-      assert(srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE);
-      srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
-   }
-
     enum opcode opcode;
     switch (instr->op) {
     case nir_texop_tex:
@@ -4473,6 +4526,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
        unreachable("unknown texture opcode");
     }
  
+   /* TXS and TXL require a LOD but not everything we implement using those
+    * two opcodes provides one.  Provide a default LOD of 0.
+    */
+   if ((opcode == SHADER_OPCODE_TXS_LOGICAL ||
+        opcode == SHADER_OPCODE_TXL_LOGICAL) &&
+       srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE) {
+      srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
+   }
+
     if (instr->op == nir_texop_tg4) {
        if (instr->component == 1 &&
            key_tex->gather_channel_quirk_mask & (1 << texture)) {