i965: support constant gather offsets larger than 4 bits
authorIlia Mirkin <imirkin@alum.mit.edu>
Mon, 28 Nov 2016 02:05:34 +0000 (21:05 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 29 Nov 2016 15:44:01 +0000 (07:44 -0800)
Offsets that don't fit into 4 bits need to force gather_po to be
selected. Adjust the logic so that this happens.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_shader.h
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index baa973c31a0858d6513036f29e603fcd603a8e62..855266f78ac1f168e25e24b28c460fa91284bb21 100644 (file)
@@ -4485,8 +4485,12 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
       case nir_tex_src_offset: {
          nir_const_value *const_offset =
             nir_src_as_const_value(instr->src[i].src);
-         if (const_offset) {
-            header_bits |= brw_texture_offset(const_offset->i32, 3);
+         unsigned offset_bits = 0;
+         if (const_offset &&
+             brw_texture_offset(const_offset->i32,
+                                nir_tex_instr_src_size(instr, i),
+                                &offset_bits)) {
+            header_bits |= offset_bits;
          } else {
             srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
                retype(src, BRW_REGISTER_TYPE_D);
index bee4d8875c570c0a9b6a33affe3324a81c943585..25f745d23a6578b8694efe778af8cb658483662c 100644 (file)
@@ -127,10 +127,15 @@ brw_math_function(enum opcode op)
    }
 }
 
-uint32_t
-brw_texture_offset(int *offsets, unsigned num_components)
+bool
+brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits)
 {
-   if (!offsets) return 0;  /* nonconstant offset; caller will handle it. */
+   if (!offsets) return false;  /* nonconstant offset; caller will handle it. */
+
+   /* offset out of bounds; caller will handle it. */
+   for (unsigned i = 0; i < num_components; i++)
+      if (offsets[i] > 7 || offsets[i] < -8)
+         return false;
 
    /* Combine all three offsets into a single unsigned dword:
     *
@@ -138,12 +143,12 @@ brw_texture_offset(int *offsets, unsigned num_components)
     *    bits  7:4 - V Offset (Y component)
     *    bits  3:0 - R Offset (Z component)
     */
-   unsigned offset_bits = 0;
+   *offset_bits = 0;
    for (unsigned i = 0; i < num_components; i++) {
       const unsigned shift = 4 * (2 - i);
-      offset_bits |= (offsets[i] << shift) & (0xF << shift);
+      *offset_bits |= (offsets[i] << shift) & (0xF << shift);
    }
-   return offset_bits;
+   return true;
 }
 
 const char *
index 12113b97e104e7f4627d2f598c3e98599030bb3c..e8b34d59ab60ceeb160bbe8793c295e2fe8f0ccb 100644 (file)
@@ -215,7 +215,9 @@ public:
    virtual void invalidate_live_intervals() = 0;
 };
 
-uint32_t brw_texture_offset(int *offsets, unsigned num_components);
+bool brw_texture_offset(int *offsets,
+                        unsigned num_components,
+                        uint32_t *offset_bits);
 
 void brw_setup_image_uniform_values(gl_shader_stage stage,
                                     struct brw_stage_prog_data *stage_prog_data,
index 0d54907cadf021a22567f881ee63be4b8c91ec4f..dde07d0d141d539be44cb3d098a1597424e3938e 100644 (file)
@@ -1880,9 +1880,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
       case nir_tex_src_offset: {
          nir_const_value *const_offset =
             nir_src_as_const_value(instr->src[i].src);
-         if (const_offset) {
-            constant_offset = brw_texture_offset(const_offset->i32, 3);
-         } else {
+         if (!const_offset ||
+             !brw_texture_offset(const_offset->i32,
+                                 nir_tex_instr_src_size(instr, i),
+                                 &constant_offset)) {
             offset_value =
                get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
          }