i965: Apply VS attribute workarounds in NIR.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 14 Jan 2016 04:33:15 +0000 (20:33 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 10 Feb 2016 01:01:45 +0000 (17:01 -0800)
This patch re-implements the pre-Haswell VS attribute workarounds.
Instead of emitting shader code in the vec4 backend, we now simply
call a NIR pass to emit the necessary code.

This simplifies the vec4 backend.  Beyond deleting code, it removes
the primary use of ATTR as a destination.  It also eliminates the
requirement that the vec4 VS backend express the ATTR file in terms
of VERT_ATTRIB_* locations, giving us a bit more flexibility.

This approach is a little different: rather than munging the attributes
at the top, we emit code to fix them up when they're accessed.  However,
we run the optimizer afterwards, so CSE should eliminate the redundant
math.  It may even be able to fuse it with other calculations based on
the input value.

shader-db does not handle non-default NOS settings, so I have no
statistics about this patch.

Note that the scalar backend does not implement VS attribute
workarounds, as they are unnecessary on hardware which allows SIMD8 VS.

v2: Do one multiply for FIXED rescaling and select components from
    either the original or scaled copy, rather than multiplying each
    component separately (suggested by Matt Turner).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_nir.h
src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp

index caabb0decfb2706a89a615b20da74c9862e225e0..300c13909d404f12910f0851716908983346408f 100644 (file)
@@ -43,6 +43,7 @@ i965_compiler_FILES = \
        brw_nir.h \
        brw_nir.c \
        brw_nir_analyze_boolean_resolves.c \
+       brw_nir_attribute_workarounds.c \
        brw_nir_opt_peephole_ffma.c \
        brw_nir_uniforms.cpp \
        brw_packed_float.c \
index 46b5116357917cffa2a607789ecc0ad917f7f4ab..41059b3227e53ab74d0ee8d28fb0cd83493f57b7 100644 (file)
@@ -205,7 +205,9 @@ remap_patch_urb_offsets(nir_block *block, void *closure)
 static void
 brw_nir_lower_inputs(nir_shader *nir,
                      const struct brw_device_info *devinfo,
-                     bool is_scalar)
+                     bool is_scalar,
+                     bool use_legacy_snorm_formula,
+                     const uint8_t *vs_attrib_wa_flags)
 {
    switch (nir->stage) {
    case MESA_SHADER_VERTEX:
@@ -225,6 +227,9 @@ brw_nir_lower_inputs(nir_shader *nir,
 
       add_const_offset_to_base(nir, nir_var_shader_in);
 
+      brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
+                                          vs_attrib_wa_flags);
+
       if (is_scalar) {
          /* Finally, translate VERT_ATTRIB_* values into the actual registers.
           *
@@ -501,12 +506,15 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
 nir_shader *
 brw_nir_lower_io(nir_shader *nir,
                  const struct brw_device_info *devinfo,
-                 bool is_scalar)
+                 bool is_scalar,
+                 bool use_legacy_snorm_formula,
+                 const uint8_t *vs_attrib_wa_flags)
 {
    bool progress; /* Written by OPT and OPT_V */
    (void)progress;
 
-   OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
+   OPT_V(brw_nir_lower_inputs, devinfo, is_scalar,
+         use_legacy_snorm_formula, vs_attrib_wa_flags);
    OPT_V(brw_nir_lower_outputs, devinfo, is_scalar);
    OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
 
@@ -617,9 +625,10 @@ brw_create_nir(struct brw_context *brw,
       OPT_V(nir_lower_atomics, shader_prog);
    }
 
-   if (nir->stage != MESA_SHADER_TESS_CTRL &&
+   if (nir->stage != MESA_SHADER_VERTEX &&
+       nir->stage != MESA_SHADER_TESS_CTRL &&
        nir->stage != MESA_SHADER_TESS_EVAL) {
-      nir = brw_nir_lower_io(nir, devinfo, is_scalar);
+      nir = brw_nir_lower_io(nir, devinfo, is_scalar, false, NULL);
    }
 
    return nir;
index 079d8b25174e37d4ca45a90c50c339f152466d41..9a90e36964b7e495660b4be151c2f2f78251bf04 100644 (file)
@@ -84,11 +84,16 @@ nir_shader *brw_create_nir(struct brw_context *brw,
 nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar);
 nir_shader *brw_nir_lower_io(nir_shader *nir,
                             const struct brw_device_info *devinfo,
-                            bool is_scalar);
+                            bool is_scalar,
+                            bool use_legacy_snorm_formula,
+                            const uint8_t *vs_attrib_wa_flags);
 nir_shader *brw_postprocess_nir(nir_shader *nir,
                                 const struct brw_device_info *devinfo,
                                 bool is_scalar);
 
+bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
+                                         bool use_legacy_snorm_formula,
+                                         const uint8_t *attrib_wa_flags);
 
 nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
                                       const struct brw_device_info *devinfo,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
new file mode 100644 (file)
index 0000000..9c65e54
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "brw_nir.h"
+#include "brw_vs.h"
+
+/**
+ * Prior to Haswell, the hardware can't natively support GL_FIXED or
+ * 2_10_10_10_REV vertex formats.  This pass inserts extra shader code
+ * to produce the correct values.
+ */
+
+struct attr_wa_state {
+   nir_builder builder;
+   bool impl_progress;
+   bool use_legacy_snorm_formula;
+   const uint8_t *wa_flags;
+};
+
+static bool
+apply_attr_wa_block(nir_block *block, void *void_state)
+{
+   struct attr_wa_state *state = void_state;
+   nir_builder *b = &state->builder;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      if (intrin->intrinsic != nir_intrinsic_load_input)
+         continue;
+
+      uint8_t wa_flags = state->wa_flags[intrin->const_index[0]];
+      if (wa_flags == 0)
+         continue;
+
+      b->cursor = nir_after_instr(instr);
+
+      nir_ssa_def *val = &intrin->dest.ssa;
+
+      /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
+       * come in as floating point conversions of the integer values.
+       */
+      if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
+         nir_ssa_def *scaled =
+            nir_fmul(b, val, nir_imm_float(b, 1.0f / 65536.0f));
+         nir_ssa_def *comps[4];
+         for (int i = 0; i < val->num_components; i++) {
+            bool rescale = i < (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK);
+            comps[i] = nir_channel(b, rescale ? scaled : val, i);
+         }
+         val = nir_vec(b, comps, val->num_components);
+      }
+
+      /* Do sign recovery for 2101010 formats if required. */
+      if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+         /* sign recovery shift: <22, 22, 22, 30> */
+         nir_ssa_def *shift = nir_imm_ivec4(b, 22, 22, 22, 30);
+         val = nir_ishr(b, nir_ishl(b, val, shift), shift);
+      }
+
+      /* Apply BGRA swizzle if required. */
+      if (wa_flags & BRW_ATTRIB_WA_BGRA) {
+         val = nir_swizzle(b, val, (unsigned[4]){2,1,0,3}, 4, true);
+      }
+
+      if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
+         /* ES 3.0 has different rules for converting signed normalized
+          * fixed-point numbers than desktop GL.
+          */
+         if ((wa_flags & BRW_ATTRIB_WA_SIGN) &&
+             !state->use_legacy_snorm_formula) {
+            /* According to equation 2.2 of the ES 3.0 specification,
+             * signed normalization conversion is done by:
+             *
+             * f = c / (2^(b-1)-1)
+             */
+            nir_ssa_def *es3_normalize_factor =
+               nir_imm_vec4(b, 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 9) - 1),
+                               1.0f / ((1 << 9) - 1), 1.0f / ((1 << 1) - 1));
+            val = nir_fmax(b,
+                           nir_fmul(b, nir_i2f(b, val), es3_normalize_factor),
+                           nir_imm_float(b, -1.0f));
+         } else {
+            /* The following equations are from the OpenGL 3.2 specification:
+             *
+             * 2.1 unsigned normalization
+             * f = c/(2^n-1)
+             *
+             * 2.2 signed normalization
+             * f = (2c+1)/(2^n-1)
+             *
+             * Both of these share a common divisor, which we handle by
+             * multiplying by 1 / (2^b - 1) for b = <10, 10, 10, 2>.
+             */
+            nir_ssa_def *normalize_factor =
+               nir_imm_vec4(b, 1.0f / ((1 << 10) - 1), 1.0f / ((1 << 10) - 1),
+                               1.0f / ((1 << 10) - 1), 1.0f / ((1 << 2)  - 1));
+
+            if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+               /* For signed normalization, the numerator is 2c+1. */
+               nir_ssa_def *two = nir_imm_float(b, 2.0f);
+               nir_ssa_def *one = nir_imm_float(b, 1.0f);
+               val = nir_fadd(b, nir_fmul(b, nir_i2f(b, val), two), one);
+            } else {
+               /* For unsigned normalization, the numerator is just c. */
+               val = nir_u2f(b, val);
+            }
+            val = nir_fmul(b, val, normalize_factor);
+         }
+      }
+
+      if (wa_flags & BRW_ATTRIB_WA_SCALE) {
+         val = (wa_flags & BRW_ATTRIB_WA_SIGN) ? nir_i2f(b, val)
+                                               : nir_u2f(b, val);
+      }
+
+      nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, nir_src_for_ssa(val),
+                                     val->parent_instr);
+      state->impl_progress = true;
+   }
+
+   return true;
+}
+
+bool
+brw_nir_apply_attribute_workarounds(nir_shader *shader,
+                                    bool use_legacy_snorm_formula,
+                                    const uint8_t *attrib_wa_flags)
+{
+   bool progress = false;
+   struct attr_wa_state state = {
+      .use_legacy_snorm_formula = use_legacy_snorm_formula,
+      .wa_flags = attrib_wa_flags,
+   };
+
+   nir_foreach_function(shader, func) {
+      if (!func->impl)
+         continue;
+
+      nir_builder_init(&state.builder, func->impl);
+      state.impl_progress = false;
+
+      nir_foreach_block(func->impl, apply_attr_wa_block, &state);
+
+      if (state.impl_progress) {
+         nir_metadata_preserve(func->impl, nir_metadata_block_index |
+                                           nir_metadata_dominance);
+         progress = true;
+      }
+   }
+
+   return progress;
+}
index 6a6efa9aea281fc27d41370abf08b6efd3f4430e..8518622c0b6aa57af44211d1a7b9e5145b9ebb62 100644 (file)
@@ -1229,7 +1229,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
    nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
    nir->info.inputs_read = key->inputs_read;
    nir->info.patch_inputs_read = key->patch_inputs_read;
-   nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
+   nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL);
    nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
 
    brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
index e8bc2ec241f1b84d4b09e03041d9a72fc792deef..109080af9a6ef83ad8ae3bd5b0631975bf2c5197 100644 (file)
@@ -1992,6 +1992,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
    shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
                                       is_scalar);
+   shader = brw_nir_lower_io(shader, compiler->devinfo, is_scalar,
+                             use_legacy_snorm_formula,
+                             key->gl_attrib_wa_flags);
    shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);
 
    const unsigned *assembly = NULL;
index 0d56356a0167b4cfcdb26b7dccd1623e0efbe45e..d8bb00f5d03d2d3c4e20ca875d4630933e12037d 100644 (file)
@@ -516,7 +516,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
    nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
    nir->info.outputs_written = key->outputs_written;
    nir->info.patch_outputs_written = key->patch_outputs_written;
-   nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
+   nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL);
    nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
 
    prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2);
index 1d6914902b391f21143cb3ff86ba90fc83a9b9f1..f3cfc8892d3fb4deaef6bb17129d0fad095620b5 100644 (file)
@@ -30,115 +30,6 @@ namespace brw {
 void
 vec4_vs_visitor::emit_prolog()
 {
-   dst_reg sign_recovery_shift;
-   dst_reg normalize_factor;
-   dst_reg es3_normalize_factor;
-
-   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
-      if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
-         uint8_t wa_flags = key->gl_attrib_wa_flags[i];
-         dst_reg reg(ATTR, i);
-         dst_reg reg_d = reg;
-         reg_d.type = BRW_REGISTER_TYPE_D;
-         dst_reg reg_ud = reg;
-         reg_ud.type = BRW_REGISTER_TYPE_UD;
-
-         /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
-          * come in as floating point conversions of the integer values.
-          */
-         if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
-            dst_reg dst = reg;
-            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
-            dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
-            emit(MUL(dst, src_reg(dst), brw_imm_f(1.0f / 65536.0f)));
-         }
-
-         /* Do sign recovery for 2101010 formats if required. */
-         if (wa_flags & BRW_ATTRIB_WA_SIGN) {
-            if (sign_recovery_shift.file == BAD_FILE) {
-               /* shift constant: <22,22,22,30> */
-               sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
-               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), brw_imm_ud(22u)));
-               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), brw_imm_ud(30u)));
-            }
-
-            emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
-            emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
-         }
-
-         /* Apply BGRA swizzle if required. */
-         if (wa_flags & BRW_ATTRIB_WA_BGRA) {
-            src_reg temp = src_reg(reg);
-            temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
-            emit(MOV(reg, temp));
-         }
-
-         if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
-            /* ES 3.0 has different rules for converting signed normalized
-             * fixed-point numbers than desktop GL.
-             */
-            if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) {
-               /* According to equation 2.2 of the ES 3.0 specification,
-                * signed normalization conversion is done by:
-                *
-                * f = c / (2^(b-1)-1)
-                */
-               if (es3_normalize_factor.file == BAD_FILE) {
-                  /* mul constant: 1 / (2^(b-1) - 1) */
-                  es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
-                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
-                           brw_imm_f(1.0f / ((1<<9) - 1))));
-                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
-                           brw_imm_f(1.0f / ((1<<1) - 1))));
-               }
-
-               dst_reg dst = reg;
-               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
-               emit(MOV(dst, src_reg(reg_d)));
-               emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
-               emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), brw_imm_f(-1.0f));
-            } else {
-               /* The following equations are from the OpenGL 3.2 specification:
-                *
-                * 2.1 unsigned normalization
-                * f = c/(2^n-1)
-                *
-                * 2.2 signed normalization
-                * f = (2c+1)/(2^n-1)
-                *
-                * Both of these share a common divisor, which is represented by
-                * "normalize_factor" in the code below.
-                */
-               if (normalize_factor.file == BAD_FILE) {
-                  /* 1 / (2^b - 1) for b=<10,10,10,2> */
-                  normalize_factor = dst_reg(this, glsl_type::vec4_type);
-                  emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
-                           brw_imm_f(1.0f / ((1<<10) - 1))));
-                  emit(MOV(writemask(normalize_factor, WRITEMASK_W),
-                           brw_imm_f(1.0f / ((1<<2) - 1))));
-               }
-
-               dst_reg dst = reg;
-               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
-               emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
-
-               /* For signed normalization, we want the numerator to be 2c+1. */
-               if (wa_flags & BRW_ATTRIB_WA_SIGN) {
-                  emit(MUL(dst, src_reg(dst), brw_imm_f(2.0f)));
-                  emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f)));
-               }
-
-               emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
-            }
-         }
-
-         if (wa_flags & BRW_ATTRIB_WA_SCALE) {
-            dst_reg dst = reg;
-            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
-            emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
-         }
-      }
-   }
 }