From 89fe5190a256ee0939061c4c264e9156256d16e8 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 14 Jun 2017 16:20:41 -0700 Subject: [PATCH] intel/compiler: Lower flrp32 on Gen11+ The LRP instruction is no more. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_compiler.c | 35 ++++++++++++++++--------- src/intel/compiler/brw_fs_builder.h | 2 +- src/intel/compiler/brw_fs_generator.cpp | 2 +- src/intel/compiler/brw_vec4_builder.h | 2 +- src/intel/compiler/brw_vec4_visitor.cpp | 2 +- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index bb9df5e7013..34be3b705fe 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -46,20 +46,28 @@ .use_interpolated_input_intrinsics = true, \ .vertex_id_zero_based = true +#define COMMON_SCALAR_OPTIONS \ + .lower_pack_half_2x16 = true, \ + .lower_pack_snorm_2x16 = true, \ + .lower_pack_snorm_4x8 = true, \ + .lower_pack_unorm_2x16 = true, \ + .lower_pack_unorm_4x8 = true, \ + .lower_unpack_half_2x16 = true, \ + .lower_unpack_snorm_2x16 = true, \ + .lower_unpack_snorm_4x8 = true, \ + .lower_unpack_unorm_2x16 = true, \ + .lower_unpack_unorm_4x8 = true, \ + .max_unroll_iterations = 32 + static const struct nir_shader_compiler_options scalar_nir_options = { COMMON_OPTIONS, - .lower_pack_half_2x16 = true, - .lower_pack_snorm_2x16 = true, - .lower_pack_snorm_4x8 = true, - .lower_pack_unorm_2x16 = true, - .lower_pack_unorm_4x8 = true, - .lower_unpack_half_2x16 = true, - .lower_unpack_snorm_2x16 = true, - .lower_unpack_snorm_4x8 = true, - .lower_unpack_unorm_2x16 = true, - .lower_unpack_unorm_4x8 = true, - .vs_inputs_dual_locations = true, - .max_unroll_iterations = 32, + COMMON_SCALAR_OPTIONS, +}; + +static const struct nir_shader_compiler_options scalar_nir_options_gen11 = { + COMMON_OPTIONS, + COMMON_SCALAR_OPTIONS, + .lower_flrp32 = true, }; static const struct nir_shader_compiler_options vector_nir_options = { @@ -149,7 +157,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; if (is_scalar) { - compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options; + compiler->glsl_compiler_options[i].NirOptions = + devinfo->gen < 11 ? &scalar_nir_options : &scalar_nir_options_gen11; } else { compiler->glsl_compiler_options[i].NirOptions = devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6; diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 87394bc17b3..874272b7afd 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -540,7 +540,7 @@ namespace brw { LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a) const { - if (shader->devinfo->gen >= 6) { + if (shader->devinfo->gen >= 6 && shader->devinfo->gen <= 10) { /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so * we need to reorder the operands. */ diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 0dc0a695e4e..b59c09f46ec 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1826,7 +1826,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case BRW_OPCODE_LRP: - assert(devinfo->gen >= 6); + assert(devinfo->gen >= 6 && devinfo->gen <= 10); if (devinfo->gen < 10) brw_set_default_access_mode(p, BRW_ALIGN_16); brw_LRP(p, dst, src[0], src[1], src[2]); diff --git a/src/intel/compiler/brw_vec4_builder.h b/src/intel/compiler/brw_vec4_builder.h index 4c3efe8457b..5c880c19f52 100644 --- a/src/intel/compiler/brw_vec4_builder.h +++ b/src/intel/compiler/brw_vec4_builder.h @@ -501,7 +501,7 @@ namespace brw { LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a) const { - if (shader->devinfo->gen >= 6) { + if (shader->devinfo->gen >= 6 && shader->devinfo->gen <= 10) { /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so * we need to reorder the operands. */ diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index 53f6a5ed546..e683a8c51db 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -735,7 +735,7 @@ vec4_instruction * vec4_visitor::emit_lrp(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a) { - if (devinfo->gen >= 6) { + if (devinfo->gen >= 6 && devinfo->gen <= 10) { /* Note that the instruction's argument order is reversed from GLSL * and the IR. */ -- 2.30.2