i965: Make uniform offsets be in terms of bytes
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 11 Nov 2015 05:12:47 +0000 (21:12 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 8 Dec 2015 05:51:23 +0000 (21:51 -0800)
This commit pushes makes uniform offsets be terms of bytes starting with
nir_lower_io.  They get converted to be in terms of vec4s or floats when we
cram them in the UNIFORM register file but reladdr remains in terms of
bytes all the way down to the point where we lower it to a pull constant
load.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index de5c17a78ede445c93e26200da8d6cf4a0cb9959..5e8acec2759958bf14c2f9fe4f16c9421b980100 100644 (file)
@@ -2052,11 +2052,9 @@ fs_visitor::demote_pull_constants()
 
          /* Generate a pull load into dst. */
          if (inst->src[i].reladdr) {
-            fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D);
-            ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4));
             VARYING_PULL_CONSTANT_LOAD(ibld, dst,
                                        brw_imm_ud(index),
-                                       indirect,
+                                       *inst->src[i].reladdr,
                                        pull_index * 4);
             inst->src[i].reladdr = NULL;
             inst->src[i].stride = 1;
index c34f85662b20570a8b193531a9011eec41bc74a0..a00fd0e186e9b2f3895fb3b28dc6b03a5fc98906 100644 (file)
@@ -173,7 +173,7 @@ fs_visitor::nir_setup_uniforms()
    if (dispatch_width != 8)
       return;
 
-   uniforms = nir->num_uniforms;
+   uniforms = nir->num_uniforms / 4;
 
    nir_foreach_variable(var, &nir->uniforms) {
       /* UBO's and atomics don't take up space in the uniform file */
@@ -181,7 +181,7 @@ fs_visitor::nir_setup_uniforms()
          continue;
 
       if (type_size_scalar(var->type) > 0)
-         param_size[var->data.driver_location] = type_size_scalar(var->type);
+         param_size[var->data.driver_location / 4] = type_size_scalar(var->type);
    }
 }
 
@@ -1134,7 +1134,7 @@ fs_visitor::get_nir_dest(nir_dest dest)
 fs_reg
 fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
 {
-   fs_reg image(UNIFORM, deref->var->data.driver_location,
+   fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
                 BRW_REGISTER_TYPE_UD);
 
    for (const nir_deref *tail = &deref->deref; tail->child;
@@ -1165,7 +1165,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
             bld.MOV(tmp, get_nir_src(deref_array->indirect));
          }
 
-         bld.MUL(tmp, tmp, brw_imm_ud(element_size));
+         bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
          if (image.reladdr)
             bld.ADD(*image.reladdr, *image.reladdr, tmp);
          else
@@ -2300,8 +2300,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       has_indirect = true;
       /* fallthrough */
    case nir_intrinsic_load_uniform: {
-      fs_reg uniform_reg(UNIFORM, instr->const_index[0]);
-      uniform_reg.reg_offset = instr->const_index[1];
+      /* Offsets are in bytes but they should always be multiples of 4 */
+      assert(instr->const_index[0] % 4 == 0);
+      assert(instr->const_index[1] % 4 == 0);
+
+      fs_reg uniform_reg(UNIFORM, instr->const_index[0] / 4);
+      uniform_reg.reg_offset = instr->const_index[1] / 4;
 
       for (unsigned j = 0; j < instr->num_components; j++) {
          fs_reg src = offset(retype(uniform_reg, dest.type), bld, j);
index 5182bcaabde975fbf831f71da66cbe5edd2f7698..d62470379ee11cde5d9619a1c4d869c431d992a1 100644 (file)
@@ -166,6 +166,32 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    }
 }
 
+static int
+type_size_scalar_bytes(const struct glsl_type *type)
+{
+   return type_size_scalar(type) * 4;
+}
+
+static int
+type_size_vec4_bytes(const struct glsl_type *type)
+{
+   return type_size_vec4(type) * 16;
+}
+
+static void
+brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
+{
+   if (is_scalar) {
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+                               type_size_scalar_bytes);
+      nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes);
+   } else {
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+                               type_size_vec4_bytes);
+      nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes);
+   }
+}
+
 #include "util/debug.h"
 
 static bool
@@ -295,9 +321,7 @@ brw_lower_nir(nir_shader *nir,
 
    OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
    OPT_V(brw_nir_lower_outputs, is_scalar);
-   nir_assign_var_locations(&nir->uniforms,
-                            &nir->num_uniforms,
-                            is_scalar ? type_size_scalar : type_size_vec4);
+   OPT_V(brw_nir_lower_uniforms, is_scalar);
    OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
 
    if (shader_prog) {
index 155a9c670736f404200b9d6e4dd992f3a273e14f..0849ca40046961abb5bc48420a427a6dbf4db5e3 100644 (file)
@@ -34,8 +34,7 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
    const nir_state_slot *const slots = var->state_slots;
    assert(var->state_slots != NULL);
 
-   unsigned uniform_index = is_scalar ? var->data.driver_location :
-                                        var->data.driver_location * 4;
+   unsigned uniform_index = var->data.driver_location / 4;
    for (unsigned int i = 0; i < var->num_state_slots; i++) {
       /* This state reference has already been setup by ir_to_mesa, but we'll
        * get the same index back here.
@@ -81,8 +80,7 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
     * order we'd walk the type, so walk the list of storage and find anything
     * with our name, or the prefix of a component that starts with our name.
     */
-   unsigned uniform_index = is_scalar ? var->data.driver_location :
-                                        var->data.driver_location * 4;
+   unsigned uniform_index = var->data.driver_location / 4;
    for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 
index d4eda4a3277052820967915a93c8b5bc64f7e10e..50570cd7703535f18b7ca150c4d3374a72e359b5 100644 (file)
@@ -117,7 +117,7 @@ vec4_visitor::nir_setup_system_values()
 void
 vec4_visitor::nir_setup_uniforms()
 {
-   uniforms = nir->num_uniforms;
+   uniforms = nir->num_uniforms / 16;
 
    nir_foreach_variable(var, &nir->uniforms) {
       /* UBO's and atomics don't take up space in the uniform file */
@@ -125,7 +125,7 @@ vec4_visitor::nir_setup_uniforms()
          continue;
 
       if (type_size_vec4(var->type) > 0)
-         uniform_size[var->data.driver_location] = type_size_vec4(var->type);
+         uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type);
    }
 }
 
@@ -677,10 +677,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       has_indirect = true;
       /* fallthrough */
    case nir_intrinsic_load_uniform: {
+      /* Offsets are in bytes but they should always be multiples of 16 */
+      assert(instr->const_index[0] % 16 == 0);
+      assert(instr->const_index[1] % 16 == 0);
+
       dest = get_nir_dest(instr->dest);
 
-      src = src_reg(dst_reg(UNIFORM, instr->const_index[0]));
-      src.reg_offset = instr->const_index[1];
+      src = src_reg(dst_reg(UNIFORM, instr->const_index[0] / 16));
+      src.reg_offset = instr->const_index[1] / 16;
 
       if (has_indirect) {
          src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
index 02cd99a68283ea1e40c63d5999d28aca3a66537c..443d0eb5387a1fb3dfe32fee57c6ab4254cfe21f 100644 (file)
@@ -1472,8 +1472,7 @@ vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
       src_reg index = src_reg(this, glsl_type::int_type);
 
       emit_before(block, inst, ADD(dst_reg(index), *reladdr,
-                                   brw_imm_d(reg_offset)));
-      emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16)));
+                                   brw_imm_d(reg_offset * 16)));
 
       return index;
    } else if (devinfo->gen >= 8) {