From ee0ed52d69b3c3c10e344acae7ca901b4e9a03fa Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Fri, 30 Nov 2012 16:49:21 -0800 Subject: [PATCH] i965: Lower the GLSL ES 3.00 pack/unpack operations (v2) On gen < 7, we fully lower all operations to arithmetic and bitwise operations. On gen >= 7, we fully lower the Snorm2x16 and Unorm2x16 operations, and partially lower the Half2x16 operations. v2: - Comment that scalarization is needed only for SOA code [for idr]. - Replace switch-statement with if-statement [for idr]. - Remove misplaced hunk from previous patch [found by idr]. Reviewed-by: Ian Romanick Reviewed-by: Matt Tuner Signed-off-by: Chad Versace --- src/mesa/drivers/dri/i965/brw_shader.cpp | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 7539d542f2c..1c02c87a11b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -75,6 +75,34 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) return true; } +static void +brw_lower_packing_builtins(struct brw_context *brw, + gl_shader_type shader_type, + exec_list *ir) +{ + int ops = LOWER_PACK_SNORM_2x16 + | LOWER_UNPACK_SNORM_2x16 + | LOWER_PACK_UNORM_2x16 + | LOWER_UNPACK_UNORM_2x16; + + if (brw->intel.gen >= 7) { + /* Gen7 introduced the f32to16 and f16to32 instructions, which can be + * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no + * lowering is needed. For SOA code, the Half2x16 ops must be + * scalarized. + */ + if (shader_type == MESA_SHADER_FRAGMENT) { + ops |= LOWER_PACK_HALF_2x16_TO_SPLIT + | LOWER_UNPACK_HALF_2x16_TO_SPLIT; + } + } else { + ops |= LOWER_PACK_HALF_2x16 + | LOWER_UNPACK_HALF_2x16; + } + + lower_packing_builtins(ir, ops); +} + GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { @@ -113,6 +141,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) shader->ir = new(shader) exec_list; clone_ir_list(mem_ctx, shader->ir, shader->base.ir); + /* lower_packing_builtins() inserts arithmetic instructions, so it + * must precede lower_instructions(). + */ + brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir); do_mat_op_to_vec(shader->ir); lower_instructions(shader->ir, MOD_TO_FRACT | -- 2.30.2