intel/compiler: assert restrictions on conversions to half-float

[mesa.git] / src / intel / compiler / brw_fs_nir.cpp
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index 110473bfa8506f2c4198b0c30138b9ad22a31bc0..e04667c509944c61b882f93562c9c86caa75b548 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -58,7 +58,7 @@ fs_visitor::nir_setup_outputs()
        const int loc = var->data.driver_location;
        const unsigned var_vec4s =
           var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
-                           : type_size_vec4(var->type);
+                           : type_size_vec4(var->type, true);
        vec4s[loc] = MAX2(vec4s[loc], var_vec4s);
     }
  
@@ -381,13 +381,30 @@ fs_visitor::nir_emit_cf_list(exec_list *list)
  void
  fs_visitor::nir_emit_if(nir_if *if_stmt)
  {
+   bool invert;
+   fs_reg cond_reg;
+
+   /* If the condition has the form !other_condition, use other_condition as
+    * the source, but invert the predicate on the if instruction.
+    */
+   nir_alu_instr *const cond = nir_src_as_alu_instr(&if_stmt->condition);
+   if (cond != NULL && cond->op == nir_op_inot) {
+      assert(!cond->src[0].negate);
+      assert(!cond->src[0].abs);
+
+      invert = true;
+      cond_reg = get_nir_src(cond->src[0].src);
+   } else {
+      invert = false;
+      cond_reg = get_nir_src(if_stmt->condition);
+   }
+
     /* first, put the condition into f0 */
     fs_inst *inst = bld.MOV(bld.null_reg_d(),
-                            retype(get_nir_src(if_stmt->condition),
-                                   BRW_REGISTER_TYPE_D));
+                           retype(cond_reg, BRW_REGISTER_TYPE_D));
     inst->conditional_mod = BRW_CONDITIONAL_NZ;
  
-   bld.IF(BRW_PREDICATE_NORMAL);
+   bld.IF(BRW_PREDICATE_NORMAL)->predicate_inverse = invert;
  
     nir_emit_cf_list(&if_stmt->then_list);
  
@@ -753,6 +770,44 @@ fs_visitor::resolve_inot_sources(const fs_builder &bld, nir_alu_instr *instr,
     }
  }
  
+bool
+fs_visitor::try_emit_b2fi_of_inot(const fs_builder &bld,
+                                  fs_reg result,
+                                  nir_alu_instr *instr)
+{
+   if (devinfo->gen < 6 || devinfo->gen >= 12)
+      return false;
+
+   nir_alu_instr *const inot_instr = nir_src_as_alu_instr(&instr->src[0].src);
+
+   if (inot_instr == NULL || inot_instr->op != nir_op_inot)
+      return false;
+
+   /* HF is also possible as a destination on BDW+.  For nir_op_b2i, the set
+    * of valid size-changing combinations is a bit more complex.
+    *
+    * The source restriction is just because I was lazy about generating the
+    * constant below.
+    */
+   if (nir_dest_bit_size(instr->dest.dest) != 32 ||
+       nir_src_bit_size(inot_instr->src[0].src) != 32)
+      return false;
+
+   /* b2[fi](inot(a)) maps a=0 => 1, a=-1 => 0.  Since a can only be 0 or -1,
+    * this is float(1 + a).
+    */
+   fs_reg op;
+
+   prepare_alu_destination_and_sources(bld, inot_instr, &op, false);
+
+   /* Ignore the saturate modifier, if there is one.  The result of the
+    * arithmetic can only be 0 or 1, so the clamping will do nothing anyway.
+    */
+   bld.ADD(result, op, brw_imm_d(1));
+
+   return true;
+}
+
  void
  fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
  {
@@ -833,6 +888,17 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
         */
  
     case nir_op_f2f16:
+   case nir_op_i2f16:
+   case nir_op_u2f16:
+      assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
+      inst = bld.MOV(result, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_f2f64:
+   case nir_op_f2i64:
+   case nir_op_f2u64:
+      assert(type_sz(op[0].type) > 2); /* brw_nir_lower_conversions */
        inst = bld.MOV(result, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
@@ -844,16 +910,17 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
     case nir_op_b2f16:
     case nir_op_b2f32:
     case nir_op_b2f64:
+      if (try_emit_b2fi_of_inot(bld, result, instr))
+         break;
        op[0].type = BRW_REGISTER_TYPE_D;
        op[0].negate = !op[0].negate;
        /* fallthrough */
-   case nir_op_f2f64:
-   case nir_op_f2i64:
-   case nir_op_f2u64:
     case nir_op_i2f64:
     case nir_op_i2i64:
     case nir_op_u2f64:
     case nir_op_u2u64:
+      assert(type_sz(op[0].type) > 1); /* brw_nir_lower_conversions */
+      /* fallthrough */
     case nir_op_f2f32:
     case nir_op_f2i32:
     case nir_op_f2u32:
@@ -863,8 +930,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
     case nir_op_u2u32:
     case nir_op_i2i16:
     case nir_op_u2u16:
-   case nir_op_i2f16:
-   case nir_op_u2f16:
     case nir_op_i2i8:
     case nir_op_u2u8:
        inst = bld.MOV(result, op[0]);
@@ -998,6 +1063,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        inst->saturate = instr->dest.saturate;
        break;
  
+   case nir_op_imul_2x32_64:
+   case nir_op_umul_2x32_64:
+      bld.MUL(result, op[0], op[1]);
+      break;
+
     case nir_op_imul:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
        bld.MUL(result, op[0], op[1]);
@@ -1570,16 +1640,25 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
         *    Use two instructions and a word or DWord intermediate integer type.
         */
        if (nir_dest_bit_size(instr->dest.dest) == 64) {
-         const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i8);
+         const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8);
  
           if (instr->op == nir_op_extract_i8) {
              /* If we need to sign extend, extract to a word first */
              fs_reg w_temp = bld.vgrf(BRW_REGISTER_TYPE_W);
              bld.MOV(w_temp, subscript(op[0], type, byte));
              bld.MOV(result, w_temp);
+         } else if (byte & 1) {
+            /* Extract the high byte from the word containing the desired byte
+             * offset.
+             */
+            bld.SHR(result,
+                    subscript(op[0], BRW_REGISTER_TYPE_UW, byte / 2),
+                    brw_imm_uw(8));
           } else {
              /* Otherwise use an AND with 0xff and a word type */
-            bld.AND(result, subscript(op[0], type, byte / 2), brw_imm_uw(0xff));
+            bld.AND(result,
+                    subscript(op[0], BRW_REGISTER_TYPE_UW, byte / 2),
+                    brw_imm_uw(0xff));
           }
        } else {
           const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8);
@@ -1623,17 +1702,17 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
     switch (instr->def.bit_size) {
     case 8:
        for (unsigned i = 0; i < instr->def.num_components; i++)
-         bld.MOV(offset(reg, bld, i), setup_imm_b(bld, instr->value.i8[i]));
+         bld.MOV(offset(reg, bld, i), setup_imm_b(bld, instr->value[i].i8));
        break;
  
     case 16:
        for (unsigned i = 0; i < instr->def.num_components; i++)
-         bld.MOV(offset(reg, bld, i), brw_imm_w(instr->value.i16[i]));
+         bld.MOV(offset(reg, bld, i), brw_imm_w(instr->value[i].i16));
        break;
  
     case 32:
        for (unsigned i = 0; i < instr->def.num_components; i++)
-         bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
+         bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value[i].i32));
        break;
  
     case 64:
@@ -1642,11 +1721,11 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
           /* We don't get 64-bit integer types until gen8 */
           for (unsigned i = 0; i < instr->def.num_components; i++) {
              bld.MOV(retype(offset(reg, bld, i), BRW_REGISTER_TYPE_DF),
-                    setup_imm_df(bld, instr->value.f64[i]));
+                    setup_imm_df(bld, instr->value[i].f64));
           }
        } else {
           for (unsigned i = 0; i < instr->def.num_components; i++)
-            bld.MOV(offset(reg, bld, i), brw_imm_q(instr->value.i64[i]));
+            bld.MOV(offset(reg, bld, i), brw_imm_q(instr->value[i].i64));
        }
        break;
  
@@ -2386,16 +2465,26 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
        /* Zero the message header */
        bld.exec_all().MOV(m0, brw_imm_ud(0u));
  
-      /* Copy "Barrier ID" from r0.2, bits 16:13 */
-      chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
-                  brw_imm_ud(INTEL_MASK(16, 13)));
+      if (devinfo->gen < 11) {
+         /* Copy "Barrier ID" from r0.2, bits 16:13 */
+         chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+                     brw_imm_ud(INTEL_MASK(16, 13)));
  
-      /* Shift it up to bits 27:24. */
-      chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
+         /* Shift it up to bits 27:24. */
+         chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
+      } else {
+         chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+                     brw_imm_ud(INTEL_MASK(30, 24)));
+      }
  
        /* Set the Barrier Count and the enable bit */
-      chanbld.OR(m0_2, m0_2,
-                 brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
+      if (devinfo->gen < 11) {
+         chanbld.OR(m0_2, m0_2,
+                    brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
+      } else {
+         chanbld.OR(m0_2, m0_2,
+                    brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
+      }
  
        bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
        break;
@@ -3312,8 +3401,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
  
        if (const_offset) {
           assert(nir_src_bit_size(instr->src[0]) == 32);
-         unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
-         unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
+         unsigned off_x = MIN2((int)(const_offset[0].f32 * 16), 7) & 0xf;
+         unsigned off_y = MIN2((int)(const_offset[1].f32 * 16), 7) & 0xf;
  
           emit_pixel_interpolater_send(bld,
                                        FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
@@ -3603,14 +3692,14 @@ brw_nir_reduction_op_identity(const fs_builder &bld,
     switch (type_sz(type)) {
     case 2:
        assert(type != BRW_REGISTER_TYPE_HF);
-      return retype(brw_imm_uw(value.u16[0]), type);
+      return retype(brw_imm_uw(value.u16), type);
     case 4:
-      return retype(brw_imm_ud(value.u32[0]), type);
+      return retype(brw_imm_ud(value.u32), type);
     case 8:
        if (type == BRW_REGISTER_TYPE_DF)
-         return setup_imm_df(bld, value.f64[0]);
+         return setup_imm_df(bld, value.f64);
        else
-         return retype(brw_imm_u64(value.u64[0]), type);
+         return retype(brw_imm_u64(value.u64), type);
     default:
        unreachable("Invalid type size");
     }
@@ -4306,6 +4395,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        break;
  
     case nir_intrinsic_get_buffer_size: {
+      assert(nir_src_num_components(instr->src[0]) == 1);
        unsigned ssbo_index = nir_src_is_const(instr->src[0]) ?
                              nir_src_as_uint(instr->src[0]) : 0;
  
@@ -4990,14 +5080,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
           break;
  
        case nir_tex_src_offset: {
-         nir_const_value *const_offset =
-            nir_src_as_const_value(instr->src[i].src);
-         assert(nir_src_bit_size(instr->src[i].src) == 32);
-         unsigned offset_bits = 0;
-         if (const_offset &&
-             brw_texture_offset(const_offset->i32,
-                                nir_tex_instr_src_size(instr, i),
-                                &offset_bits)) {
+         uint32_t offset_bits = 0;
+         if (brw_texture_offset(instr, i, &offset_bits)) {
              header_bits |= offset_bits;
           } else {
              srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
@@ -5063,11 +5147,15 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
     srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
     srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
  
+   bool shader_supports_implicit_lod = stage == MESA_SHADER_FRAGMENT ||
+      (stage == MESA_SHADER_COMPUTE &&
+       nir->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
+
     enum opcode opcode;
     switch (instr->op) {
     case nir_texop_tex:
-      opcode = (stage == MESA_SHADER_FRAGMENT ? SHADER_OPCODE_TEX_LOGICAL :
-                SHADER_OPCODE_TXL_LOGICAL);
+      opcode = shader_supports_implicit_lod ?
+         SHADER_OPCODE_TEX_LOGICAL : SHADER_OPCODE_TXL_LOGICAL;
        break;
     case nir_texop_txb:
        opcode = FS_OPCODE_TXB_LOGICAL;