From 3da9f708d4f1375d674fae4d6c6eb06e4c8d9613 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 20 Feb 2015 20:25:04 +0200 Subject: [PATCH] i965: Perform basic optimizations on the FIND_LIVE_CHANNEL opcode. v2: Save some CPU cycles by doing 'return progress' rather than 'depth++' in the discard jump special case. Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 48 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 41 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 1 + 6 files changed, 93 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 182c79fc83c..1ca7ca6caed 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2979,6 +2979,53 @@ fs_visitor::compute_to_mrf() return progress; } +/** + * Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control + * flow. We could probably do better here with some form of divergence + * analysis. + */ +bool +fs_visitor::eliminate_find_live_channel() +{ + bool progress = false; + unsigned depth = 0; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + switch (inst->opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_DO: + depth++; + break; + + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_WHILE: + depth--; + break; + + case FS_OPCODE_DISCARD_JUMP: + /* This can potentially make control flow non-uniform until the end + * of the program. + */ + return progress; + + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + if (depth == 0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = fs_reg(0); + inst->sources = 1; + inst->force_writemask_all = true; + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} + /** * Once we've generated code, try to convert normal FS_OPCODE_FB_WRITE * instructions to FS_OPCODE_REP_FB_WRITE. @@ -3920,6 +3967,7 @@ fs_visitor::optimize() OPT(opt_zero_samples); OPT(register_coalesce); OPT(compute_to_mrf); + OPT(eliminate_find_live_channel); OPT(compact_virtual_grfs); } while (progress); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0783754cba7..5f4ff15ba4d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -239,6 +239,7 @@ public: bool opt_register_renaming(); bool register_coalesce(); bool compute_to_mrf(); + bool eliminate_find_live_channel(); bool dead_code_eliminate(); bool remove_duplicate_mrf_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 8958e621802..43370cb5c2e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -89,6 +89,7 @@ is_expression(const fs_inst *const inst) case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: + case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: return true; case SHADER_OPCODE_RCP: diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 607129bc63f..a8d0e4acfc9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1131,6 +1131,46 @@ vec4_visitor::opt_register_coalesce() return progress; } +/** + * Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control + * flow. We could probably do better here with some form of divergence + * analysis. + */ +bool +vec4_visitor::eliminate_find_live_channel() +{ + bool progress = false; + unsigned depth = 0; + + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + switch (inst->opcode) { + case BRW_OPCODE_IF: + case BRW_OPCODE_DO: + depth++; + break; + + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_WHILE: + depth--; + break; + + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + if (depth == 0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = src_reg(0); + inst->force_writemask_all = true; + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} + /** * Splits virtual GRFs requesting more than one contiguous physical register. * @@ -1759,6 +1799,7 @@ vec4_visitor::run() OPT(opt_cse); OPT(opt_algebraic); OPT(opt_register_coalesce); + OPT(eliminate_find_live_channel); } while (progress); pass_num = 0; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 1f1f100788e..3f56c5cf03f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -209,6 +209,7 @@ public: bool opt_cse(); bool opt_algebraic(); bool opt_register_coalesce(); + bool eliminate_find_live_channel(); bool is_dep_ctrl_unsafe(const vec4_instruction *inst); void opt_set_dependency_control(); void opt_schedule_instructions(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 66b531c2909..9147c3cbb79 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -72,6 +72,7 @@ is_expression(const vec4_instruction *const inst) case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: case VEC4_OPCODE_UNPACK_UNIFORM: + case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: return true; case SHADER_OPCODE_RCP: -- 2.30.2