i965/fs: Fetch one cacheline of pull constants at a time.

author Francisco Jerez <currojerez@riseup.net>

Fri, 9 Dec 2016 03:18:00 +0000 (19:18 -0800)

committer Francisco Jerez <currojerez@riseup.net>

Thu, 15 Dec 2016 00:50:27 +0000 (16:50 -0800)
author Francisco Jerez <currojerez@riseup.net>
Fri, 9 Dec 2016 03:18:00 +0000 (19:18 -0800)
committer Francisco Jerez <currojerez@riseup.net>
Thu, 15 Dec 2016 00:50:27 +0000 (16:50 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 977fd8c35f592ea32591355bce06d7fd410daeff..671b44bd50d08335f2d17a4b743d8fa463c5e085 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2111,25 +2111,22 @@ fs_visitor::lower_constant_loads()
           if (pull_index == -1)
             continue;
  
-         const unsigned index = stage_prog_data->binding_table.pull_constants_start;
-         fs_reg dst;
-
-         if (type_sz(inst->src[i].type) <= 4)
-            dst = vgrf(glsl_type::float_type);
-         else
-            dst = vgrf(glsl_type::double_type);
-
           assert(inst->src[i].stride == 0);
  
-         const fs_builder ubld = ibld.exec_all().group(4, 0);
-         struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
+         const unsigned index = stage_prog_data->binding_table.pull_constants_start;
+         const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
+         const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0);
+         const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+         const unsigned base = pull_index * 4;
+
           ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                   dst, brw_imm_ud(index), offset);
+                   dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1)));
  
           /* Rewrite the instruction to use the temporary VGRF. */
           inst->src[i].file = VGRF;
           inst->src[i].nr = dst.nr;
-         inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4;
+         inst->src[i].offset = (base & (block_sz - 1)) +
+                               inst->src[i].offset % 4;
  
           brw_mark_surface_used(prog_data, index);
        }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 7df74232457fcfaec5296555ffac18d89ed88109..9f2729a9b6ef335ffa3efd1b4cd52d7da1eca6ed 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4059,21 +4059,23 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            * and we have to split it if necessary.
            */
           const unsigned type_size = type_sz(dest.type);
-         const fs_builder ubld = bld.exec_all().group(4, 0);
-         const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
+         const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
+         const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
+         const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
  
           for (unsigned c = 0; c < instr->num_components;) {
              const unsigned base = const_offset->u32[0] + c * type_size;
-
-            /* Number of usable components in the next 16B-aligned load */
+            /* Number of usable components in the next block-aligned load. */
              const unsigned count = MIN2(instr->num_components - c,
-                                        (16 - base % 16) / type_size);
+                                        (block_sz - base % block_sz) / type_size);
  
              ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                      packed_consts, surf_index, brw_imm_ud(base & ~15));
+                      packed_consts, surf_index,
+                      brw_imm_ud(base & ~(block_sz - 1)));
  
              const fs_reg consts =
-               retype(byte_offset(packed_consts, base & 15), dest.type);
+               retype(byte_offset(packed_consts, base & (block_sz - 1)),
+                      dest.type);
  
              for (unsigned d = 0; d < count; d++)
                 bld.MOV(offset(dest, bld, c + d), component(consts, d));
author	Francisco Jerez <currojerez@riseup.net>
	Fri, 9 Dec 2016 03:18:00 +0000 (19:18 -0800)
committer	Francisco Jerez <currojerez@riseup.net>
	Thu, 15 Dec 2016 00:50:27 +0000 (16:50 -0800)
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history