i965/blorp: Remove some i965-isms from genX_blorp_exec.h

[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 0c5bfb8579cb1e79cf0be2987b40e346bb60162a..76b2a05700f6b06d65070f159515186a70c53bb8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -183,6 +183,7 @@ ALU3(MAD)
  ALU2_ACC(ADDC)
  ALU2_ACC(SUBB)
  ALU2(MAC)
+ALU1(DIM)
  
  /** Gen4 predicated IF. */
  vec4_instruction *
@@ -566,18 +567,12 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
     emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
  }
  
-/**
- * Returns the minimum number of vec4 elements needed to pack a type.
- *
- * For simple types, it will return 1 (a single vec4); for matrices, the
- * number of columns; for array and struct, the sum of the vec4_size of
- * each of its elements; and for sampler and atomic, zero.
- *
- * This method is useful to calculate how much register space is needed to
- * store a particular type.
+/*
+ * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 ==
+ * false) elements needed to pack a type.
   */
-extern "C" int
-type_size_vec4(const struct glsl_type *type)
+static int
+type_size_xvec4(const struct glsl_type *type, bool as_vec4)
  {
     unsigned int i;
     int size;
@@ -587,23 +582,27 @@ type_size_vec4(const struct glsl_type *type)
     case GLSL_TYPE_INT:
     case GLSL_TYPE_FLOAT:
     case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
        if (type->is_matrix()) {
-        return type->matrix_columns;
+         const glsl_type *col_type = type->column_type();
+         unsigned col_slots =
+            (as_vec4 && col_type->is_dual_slot()) ? 2 : 1;
+         return type->matrix_columns * col_slots;
        } else {
-        /* Regardless of size of vector, it gets a vec4. This is bad
-         * packing for things like floats, but otherwise arrays become a
-         * mess.  Hopefully a later pass over the code can pack scalars
-         * down if appropriate.
-         */
-        return 1;
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return (as_vec4 && type->is_dual_slot()) ? 2 : 1;
        }
     case GLSL_TYPE_ARRAY:
        assert(type->length > 0);
-      return type_size_vec4(type->fields.array) * type->length;
+      return type_size_xvec4(type->fields.array, as_vec4) * type->length;
     case GLSL_TYPE_STRUCT:
        size = 0;
        for (i = 0; i < type->length; i++) {
-        size += type_size_vec4(type->fields.structure[i].type);
+        size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
        }
        return size;
     case GLSL_TYPE_SUBROUTINE:
@@ -619,7 +618,6 @@ type_size_vec4(const struct glsl_type *type)
     case GLSL_TYPE_IMAGE:
        return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
     case GLSL_TYPE_VOID:
-   case GLSL_TYPE_DOUBLE:
     case GLSL_TYPE_ERROR:
     case GLSL_TYPE_INTERFACE:
     case GLSL_TYPE_FUNCTION:
@@ -629,6 +627,47 @@ type_size_vec4(const struct glsl_type *type)
     return 0;
  }
  
+/**
+ * Returns the minimum number of vec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single vec4); for matrices, the
+ * number of columns; for array and struct, the sum of the vec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ */
+extern "C" int
+type_size_vec4(const struct glsl_type *type)
+{
+   return type_size_xvec4(type, true);
+}
+
+/**
+ * Returns the minimum number of dvec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single dvec4); for matrices, the
+ * number of columns; for array and struct, the sum of the dvec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ *
+ * Measuring double-precision vertex inputs as dvec4 is required because
+ * ARB_vertex_attrib_64bit states that these uses the same number of locations
+ * than the single-precision version. That is, two consecutives dvec4 would be
+ * located in location "x" and location "x+1", not "x+2".
+ *
+ * In order to map vec4/dvec4 vertex inputs in the proper ATTRs,
+ * remap_vs_attrs() will take in account both the location and also if the
+ * type fits in one or two vec4 slots.
+ */
+extern "C" int
+type_size_dvec4(const struct glsl_type *type)
+{
+   return type_size_xvec4(type, false);
+}
+
  src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
  {
     init();
@@ -679,18 +718,8 @@ vec4_instruction *
  vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
                            src_reg src0, src_reg src1)
  {
-   vec4_instruction *inst;
-
-   if (devinfo->gen >= 6) {
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->conditional_mod = conditionalmod;
-   } else {
-      emit(CMP(dst, src0, src1, conditionalmod));
-
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-   }
-
+   vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+   inst->conditional_mod = conditionalmod;
     return inst;
  }
  
@@ -768,7 +797,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
        pull->mlen = 2;
        pull->header_size = 1;
     } else if (devinfo->gen >= 7) {
-      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+      dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
  
        grf_offset.type = offset_reg.type;
  
@@ -816,13 +845,14 @@ vec4_visitor::emit_uniformize(const src_reg &src)
  
  src_reg
  vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
-                             src_reg coordinate, src_reg sampler)
+                             src_reg coordinate, src_reg surface)
  {
     vec4_instruction *inst =
        new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
                                      dst_reg(this, glsl_type::uvec4_type));
     inst->base_mrf = 2;
-   inst->src[1] = sampler;
+   inst->src[1] = surface;
+   inst->src[2] = surface;
  
     int param_base;
  
@@ -877,7 +907,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
                             uint32_t constant_offset,
                             src_reg offset_value,
                             src_reg mcs,
-                           bool is_cube_array,
                             uint32_t surface,
                             src_reg surface_reg,
                             uint32_t sampler,
@@ -1065,10 +1094,10 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
     /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
      * spec requires layers.
      */
-   if (op == ir_txs && is_cube_array) {
-      emit_math(SHADER_OPCODE_INT_QUOTIENT,
-                writemask(inst->dst, WRITEMASK_Z),
-                src_reg(inst->dst), brw_imm_d(6));
+   if (op == ir_txs && devinfo->gen < 7) {
+      /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
+      emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
+                  src_reg(inst->dst), brw_imm_d(1));
     }
  
     if (devinfo->gen == 6 && op == ir_tg4) {
@@ -1123,61 +1152,6 @@ vec4_visitor::gs_end_primitive()
     unreachable("not reached");
  }
  
-void
-vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-                                  dst_reg dst, src_reg surf_offset,
-                                  src_reg src0, src_reg src1)
-{
-   unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
-   src_reg src_payload(this, glsl_type::uint_type, mlen);
-   dst_reg payload(src_payload);
-   payload.writemask = WRITEMASK_X;
-
-   /* Set the atomic operation offset. */
-   emit(MOV(offset(payload, 0), surf_offset));
-   unsigned i = 1;
-
-   /* Set the atomic operation arguments. */
-   if (src0.file != BAD_FILE) {
-      emit(MOV(offset(payload, i), src0));
-      i++;
-   }
-
-   if (src1.file != BAD_FILE) {
-      emit(MOV(offset(payload, i), src1));
-      i++;
-   }
-
-   /* Emit the instruction.  Note that this maps to the normal SIMD8
-    * untyped atomic message on Ivy Bridge, but that's OK because
-    * unused channels will be masked out.
-    */
-   vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
-                                 src_payload,
-                                 brw_imm_ud(surf_index), brw_imm_ud(atomic_op));
-   inst->mlen = mlen;
-}
-
-void
-vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
-                                        src_reg surf_offset)
-{
-   dst_reg offset(this, glsl_type::uint_type);
-   offset.writemask = WRITEMASK_X;
-
-   /* Set the surface read offset. */
-   emit(MOV(offset, surf_offset));
-
-   /* Emit the instruction.  Note that this maps to the normal SIMD8
-    * untyped surface read message, but that's OK because unused
-    * channels will be masked out.
-    */
-   vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
-                                 src_reg(offset),
-                                 brw_imm_ud(surf_index), brw_imm_d(1));
-   inst->mlen = 1;
-}
-
  void
  vec4_visitor::emit_ndc_computation()
  {
@@ -1298,12 +1272,34 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
     assert(varying < VARYING_SLOT_MAX);
     assert(output_reg[varying].type == reg.type);
     current_annotation = output_reg_annotation[varying];
-   if (output_reg[varying].file != BAD_FILE)
+   if (output_reg[varying].file != BAD_FILE) {
        return emit(MOV(reg, src_reg(output_reg[varying])));
-   else
+   } else
        return NULL;
  }
  
+void
+vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
+{
+   assert(varying < VARYING_SLOT_MAX);
+   assert(varying >= VARYING_SLOT_VAR0);
+   varying = varying - VARYING_SLOT_VAR0;
+
+   unsigned num_comps = output_generic_num_components[varying][component];
+   if (num_comps == 0)
+      return;
+
+   assert(output_generic_reg[varying][component].type == reg.type);
+   current_annotation = output_reg_annotation[varying];
+   if (output_generic_reg[varying][component].file != BAD_FILE) {
+      src_reg src = src_reg(output_generic_reg[varying][component]);
+      src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
+      reg.writemask =
+         brw_writemask_for_component_packing(num_comps, component);
+      emit(MOV(reg, src));
+   }
+}
+
  void
  vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
  {
@@ -1343,7 +1339,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
        /* No need to write to this slot */
        break;
     default:
-      emit_generic_urb_slot(reg, varying);
+      if (varying >= VARYING_SLOT_VAR0) {
+         for (int i = 0; i < 4; i++) {
+            emit_generic_urb_slot(reg, varying, i);
+         }
+      } else {
+         emit_generic_urb_slot(reg, varying);
+      }
        break;
     }
  }
@@ -1645,21 +1647,21 @@ vec4_visitor::move_grf_array_access_to_scratch()
  void
  vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
                                       dst_reg temp, src_reg orig_src,
-                                     int base_offset, src_reg indirect)
+                                      int base_offset, src_reg indirect)
  {
     int reg_offset = base_offset + orig_src.reg_offset;
     const unsigned index = prog_data->base.binding_table.pull_constants_start;
  
     src_reg offset;
     if (indirect.file != BAD_FILE) {
-      offset = src_reg(this, glsl_type::int_type);
+      offset = src_reg(this, glsl_type::uint_type);
  
        emit_before(block, inst, ADD(dst_reg(offset), indirect,
-                                   brw_imm_d(reg_offset * 16)));
+                                   brw_imm_ud(reg_offset * 16)));
     } else if (devinfo->gen >= 8) {
        /* Store the offset in a GRF so we can send-from-GRF. */
-      offset = src_reg(this, glsl_type::int_type);
-      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
+      offset = src_reg(this, glsl_type::uint_type);
+      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
     } else {
        offset = brw_imm_d(reg_offset * 16);
     }
@@ -1687,6 +1689,14 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
  void
  vec4_visitor::move_uniform_array_access_to_pull_constants()
  {
+   /* The vulkan dirver doesn't support pull constants other than UBOs so
+    * everything has to be pushed regardless.
+    */
+   if (stage_prog_data->pull_param == NULL) {
+      split_uniform_registers();
+      return;
+   }
+
     int pull_constant_loc[this->uniforms];
     memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
  
@@ -1783,6 +1793,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
     this->current_annotation = NULL;
     memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
  
+   memset(this->output_generic_num_components, 0,
+          sizeof(this->output_generic_num_components));
+
     this->virtual_grf_start = NULL;
     this->virtual_grf_end = NULL;
     this->live_intervals = NULL;