intel/compiler: Don't left-shift by >= the number of bits of the type

[mesa.git] / src / intel / compiler / brw_vec4_visitor.cpp
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp

index 88e80aaa3af28a8e1c9bd9d5600bc9fb57e125b6..3b60ddf9573ae8493e7c7788dd0f3a980547f1f9 100644 (file)
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -24,6 +24,7 @@
  #include "brw_vec4.h"
  #include "brw_cfg.h"
  #include "brw_eu.h"
+#include "util/u_math.h"
  
  namespace brw {
  
@@ -46,6 +47,7 @@ vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
     this->predicate_inverse = false;
     this->target = 0;
     this->shadow_compare = false;
+   this->eot = false;
     this->ir = NULL;
     this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
     this->header_size = 0;
@@ -574,7 +576,7 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
   * false) elements needed to pack a type.
   */
  static int
-type_size_xvec4(const struct glsl_type *type, bool as_vec4)
+type_size_xvec4(const struct glsl_type *type, bool as_vec4, bool bindless)
  {
     unsigned int i;
     int size;
@@ -583,8 +585,13 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
     case GLSL_TYPE_UINT:
     case GLSL_TYPE_INT:
     case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_FLOAT16:
     case GLSL_TYPE_BOOL:
     case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
     case GLSL_TYPE_UINT64:
     case GLSL_TYPE_INT64:
        if (type->is_matrix()) {
@@ -602,11 +609,14 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
        }
     case GLSL_TYPE_ARRAY:
        assert(type->length > 0);
-      return type_size_xvec4(type->fields.array, as_vec4) * type->length;
+      return type_size_xvec4(type->fields.array, as_vec4, bindless) *
+             type->length;
     case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_INTERFACE:
        size = 0;
        for (i = 0; i < type->length; i++) {
-        size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
+        size += type_size_xvec4(type->fields.structure[i].type, as_vec4,
+                                 bindless);
        }
        return size;
     case GLSL_TYPE_SUBROUTINE:
@@ -616,14 +626,13 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
        /* Samplers take up no register space, since they're baked in at
         * link time.
         */
-      return 0;
+      return bindless ? 1 : 0;
     case GLSL_TYPE_ATOMIC_UINT:
        return 0;
     case GLSL_TYPE_IMAGE:
-      return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
+      return bindless ? 1 : DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
     case GLSL_TYPE_VOID:
     case GLSL_TYPE_ERROR:
-   case GLSL_TYPE_INTERFACE:
     case GLSL_TYPE_FUNCTION:
        unreachable("not reached");
     }
@@ -642,9 +651,9 @@ type_size_xvec4(const struct glsl_type *type, bool as_vec4)
   * store a particular type.
   */
  extern "C" int
-type_size_vec4(const struct glsl_type *type)
+type_size_vec4(const struct glsl_type *type, bool bindless)
  {
-   return type_size_xvec4(type, true);
+   return type_size_xvec4(type, true, bindless);
  }
  
  /**
@@ -667,9 +676,9 @@ type_size_vec4(const struct glsl_type *type)
   * type fits in one or two vec4 slots.
   */
  extern "C" int
-type_size_dvec4(const struct glsl_type *type)
+type_size_dvec4(const struct glsl_type *type, bool bindless)
  {
-   return type_size_xvec4(type, false);
+   return type_size_xvec4(type, false, bindless);
  }
  
  src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
@@ -677,9 +686,9 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
     init();
  
     this->file = VGRF;
-   this->nr = v->alloc.allocate(type_size_vec4(type));
+   this->nr = v->alloc.allocate(type_size_vec4(type, false));
  
-   if (type->is_array() || type->is_record()) {
+   if (type->is_array() || type->is_struct()) {
        this->swizzle = BRW_SWIZZLE_NOOP;
     } else {
        this->swizzle = brw_swizzle_for_size(type->vector_elements);
@@ -695,7 +704,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
     init();
  
     this->file = VGRF;
-   this->nr = v->alloc.allocate(type_size_vec4(type) * size);
+   this->nr = v->alloc.allocate(type_size_vec4(type, false) * size);
  
     this->swizzle = BRW_SWIZZLE_NOOP;
  
@@ -707,9 +716,9 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
     init();
  
     this->file = VGRF;
-   this->nr = v->alloc.allocate(type_size_vec4(type));
+   this->nr = v->alloc.allocate(type_size_vec4(type, false));
  
-   if (type->is_array() || type->is_record()) {
+   if (type->is_array() || type->is_struct()) {
        this->writemask = WRITEMASK_XYZW;
     } else {
        this->writemask = (1 << type->vector_elements) - 1;
@@ -727,34 +736,6 @@ vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
     return inst;
  }
  
-vec4_instruction *
-vec4_visitor::emit_lrp(const dst_reg &dst,
-                       const src_reg &x, const src_reg &y, const src_reg &a)
-{
-   if (devinfo->gen >= 6) {
-      /* Note that the instruction's argument order is reversed from GLSL
-       * and the IR.
-       */
-     return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
-                     fix_3src_operand(x)));
-   } else {
-      /* Earlier generations don't support three source operations, so we
-       * need to emit x*(1-a) + y*a.
-       */
-      dst_reg y_times_a           = dst_reg(this, glsl_type::vec4_type);
-      dst_reg one_minus_a         = dst_reg(this, glsl_type::vec4_type);
-      dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
-      y_times_a.writemask           = dst.writemask;
-      one_minus_a.writemask         = dst.writemask;
-      x_times_one_minus_a.writemask = dst.writemask;
-
-      emit(MUL(y_times_a, y, a));
-      emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f)));
-      emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
-      return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
-   }
-}
-
  /**
   * Emits the instructions needed to perform a pull constant load. before_block
   * and before_inst can be NULL in which case the instruction will be appended
@@ -856,7 +837,7 @@ vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
                                      dst_reg(this, glsl_type::uvec4_type));
     inst->base_mrf = 2;
     inst->src[1] = surface;
-   inst->src[2] = surface;
+   inst->src[2] = brw_imm_ud(0); /* sampler */
  
     int param_base;
  
@@ -915,18 +896,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
                             src_reg surface_reg,
                             src_reg sampler_reg)
  {
-   /* The sampler can only meaningfully compute LOD for fragment shader
-    * messages. For all other stages, we change the opcode to TXL and hardcode
-    * the LOD to 0.
-    *
-    * textureQueryLevels() is implemented in terms of TXS so we need to pass a
-    * valid LOD argument.
-    */
-   if (op == ir_tex || op == ir_query_levels) {
-      assert(lod.file == BAD_FILE);
-      lod = brw_imm_f(0.0f);
-   }
-
     enum opcode opcode;
     switch (op) {
     case ir_tex: opcode = SHADER_OPCODE_TXL; break;
@@ -1206,12 +1175,14 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
        if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) {
           current_annotation = "Clipping flags";
           dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
-         dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
  
           emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
           emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
           emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
+      }
  
+      if (output_reg[VARYING_SLOT_CLIP_DIST1][0].file != BAD_FILE) {
+         dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
           emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
           emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
           emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
@@ -1323,7 +1294,7 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
         * determine which edges should be drawn as wireframe.
         */
        current_annotation = "edge flag";
-      int edge_attr = _mesa_bitcount_64(nir->info.inputs_read &
+      int edge_attr = util_bitcount64(nir->info.inputs_read &
                                          BITFIELD64_MASK(VERT_ATTRIB_EDGEFLAG));
        emit(MOV(reg, src_reg(dst_reg(ATTR, edge_attr,
                                      glsl_type::float_type, WRITEMASK_XYZW))));
@@ -1340,8 +1311,8 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
     }
  }
  
-static int
-align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen)
+static unsigned
+align_interleaved_urb_mlen(const struct gen_device_info *devinfo, unsigned mlen)
  {
     if (devinfo->gen >= 6) {
        /* URB data written (does not include the message header reg) must
@@ -1751,8 +1722,6 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
        src = byte_offset(src, 16);
     }
  
-   brw_mark_surface_used(&prog_data->base, index);
-
     if (is_64bit) {
        temp = retype(temp, BRW_REGISTER_TYPE_DF);
        shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
@@ -1777,11 +1746,16 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
     /* The vulkan dirver doesn't support pull constants other than UBOs so
      * everything has to be pushed regardless.
      */
-   if (stage_prog_data->pull_param == NULL) {
+   if (!compiler->supports_pull_constants) {
        split_uniform_registers();
        return;
     }
  
+   /* Allocate the pull_params array */
+   assert(stage_prog_data->nr_pull_params == 0);
+   stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
+                                              this->uniforms * 4);
+
     int pull_constant_loc[this->uniforms];
     memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
  
@@ -1887,10 +1861,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
     this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
  
     this->uniforms = 0;
-}
  
-vec4_visitor::~vec4_visitor()
-{
+   this->nir_locals = NULL;
+   this->nir_ssa_values = NULL;
  }