i965/fs: Fix stride field for uniforms.
authorFrancisco Jerez <currojerez@riseup.net>
Mon, 13 Jul 2015 12:29:39 +0000 (15:29 +0300)
committerFrancisco Jerez <currojerez@riseup.net>
Tue, 21 Jul 2015 14:54:00 +0000 (17:54 +0300)
This fixes essentially the same problem as for immediates.  Registers
of the UNIFORM file are typically accessed according to the formula:

 read_uniform(r, channel_index, array_index) =
    read_element(r, channel_index * 0 + array_index * 1)

Which matches the general direct addressing formula for stride=0:

 read_direct(r, channel_index, array_index) =
    read_element(r, channel_index * stride +
                    array_index * max{1, stride * width})

In either case if reladdr is present the access will be according to
the composition of two register regions, the first one determining the
per-channel array_index used for the second, like:

 read_indirect(r, channel_index, array_index) =
    read_direct(r, channel_index,
                read(r.reladdr, channel_index, array_index))

where:
 read(r, channel_index, array_index) = if r.reladdr == NULL
    then read_direct(r, channel_index, array_index)
    else read_indirect(r, channel_index, array_index)

In conclusion we can handle uniforms consistently with the other
register files if we set stride to zero.  After lowering to a GRF
using VARYING_PULL_CONSTANT_LOAD in demote_pull_constant_loads() the
stride of the source is set to one again because the result of
VARYING_PULL_CONSTANT_LOAD is generally non-uniform.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
src/mesa/drivers/dri/i965/brw_fs.cpp

index f7fdb174efbec7bc6e41041166fc07d9a54cdd43..9af153773610a04c6f5dd8bbceedca98f8df3688 100644 (file)
@@ -823,6 +823,7 @@ fs_reg::fs_reg(enum register_file file, int reg)
    this->file = file;
    this->reg = reg;
    this->type = BRW_REGISTER_TYPE_F;
+   this->stride = (file == UNIFORM ? 0 : 1);
 }
 
 /** Fixed HW reg constructor. */
@@ -832,6 +833,7 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type)
    this->file = file;
    this->reg = reg;
    this->type = type;
+   this->stride = (file == UNIFORM ? 0 : 1);
 }
 
 /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
@@ -1272,6 +1274,7 @@ fs_visitor::assign_curb_setup()
                                                  constant_nr / 8,
                                                  constant_nr % 8);
 
+            assert(inst->src[i].stride == 0);
            inst->src[i].file = HW_REG;
            inst->src[i].fixed_hw_reg = byte_offset(
                retype(brw_reg, inst->src[i].type),
@@ -1822,6 +1825,8 @@ fs_visitor::demote_pull_constants()
          fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
          fs_reg dst = vgrf(glsl_type::float_type);
 
+         assert(inst->src[i].stride == 0);
+
          /* Generate a pull load into dst. */
          if (inst->src[i].reladdr) {
             VARYING_PULL_CONSTANT_LOAD(ibld, dst,
@@ -1829,6 +1834,7 @@ fs_visitor::demote_pull_constants()
                                        *inst->src[i].reladdr,
                                        pull_index);
             inst->src[i].reladdr = NULL;
+            inst->src[i].stride = 1;
          } else {
             fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
             ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,