i965: Lower the GLSL ES 3.00 pack/unpack operations (v2)
authorChad Versace <chad.versace@linux.intel.com>
Sat, 1 Dec 2012 00:49:21 +0000 (16:49 -0800)
committerChad Versace <chad.versace@linux.intel.com>
Fri, 25 Jan 2013 05:24:10 +0000 (21:24 -0800)
On gen < 7, we fully lower all operations to arithmetic and bitwise
operations.

On gen >= 7, we fully lower the Snorm2x16 and Unorm2x16 operations, and
partially lower the Half2x16 operations.

v2:
  - Comment that scalarization is needed only for SOA code [for idr].
  - Replace switch-statement with if-statement [for idr].
  - Remove misplaced hunk from previous patch [found by idr].

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Tuner <mattst88@gmail.com>
Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
src/mesa/drivers/dri/i965/brw_shader.cpp

index 7539d542f2ce403e6e2ed3541ba022dfee7ca9cb..1c02c87a11b19e1f9915b3c9c7dcb6463497b318 100644 (file)
@@ -75,6 +75,34 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
    return true;
 }
 
+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+                           gl_shader_type shader_type,
+                           exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+           | LOWER_UNPACK_SNORM_2x16
+           | LOWER_PACK_UNORM_2x16
+           | LOWER_UNPACK_UNORM_2x16;
+
+   if (brw->intel.gen >= 7) {
+      /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
+       * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
+       * lowering is needed. For SOA code, the Half2x16 ops must be
+       * scalarized.
+       */
+      if (shader_type == MESA_SHADER_FRAGMENT) {
+         ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+             |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+      }
+   } else {
+      ops |= LOWER_PACK_HALF_2x16
+          |  LOWER_UNPACK_HALF_2x16;
+   }
+
+   lower_packing_builtins(ir, ops);
+}
+
 GLboolean
 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 {
@@ -113,6 +141,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
       shader->ir = new(shader) exec_list;
       clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
+      /* lower_packing_builtins() inserts arithmetic instructions, so it
+       * must precede lower_instructions().
+       */
+      brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
       do_mat_op_to_vec(shader->ir);
       lower_instructions(shader->ir,
                         MOD_TO_FRACT |