From b16b3c8703f198ca0f025b730d582600df79c19c Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 20 Oct 2013 11:38:17 -0700 Subject: [PATCH] i965/fs: Perform CSE on CMP(N) instructions. Optimizes cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g50<1>F g40<8,8,1>F g10<8,8,1>F cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g51<1>F g41<8,8,1>F g11<8,8,1>F cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g52<1>F g42<8,8,1>F g12<8,8,1>F cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g53<1>F g43<8,8,1>F g13<8,8,1>F into cmp.ge.f0(8) null g45<8,8,1>F 0F (+f0) sel(8) g50<1>F g40<8,8,1>F g10<8,8,1>F (+f0) sel(8) g51<1>F g41<8,8,1>F g11<8,8,1>F (+f0) sel(8) g52<1>F g42<8,8,1>F g12<8,8,1>F (+f0) sel(8) g53<1>F g43<8,8,1>F g13<8,8,1>F total instructions in shared programs: 1644938 -> 1638181 (-0.41%) instructions in affected programs: 574955 -> 568198 (-1.18%) Two more 16-wide programs (in L4D2). Some large (-9%) decreases in instruction count in some of Valve's Source Engine games. No regressions. Reviewed-by: Eric Anholt Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 39 ++++++++++++++++++------ 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 3f59339f716..47938744cf6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -54,6 +54,8 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_SHR: case BRW_OPCODE_SHL: case BRW_OPCODE_ASR: + case BRW_OPCODE_CMP: + case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: case BRW_OPCODE_FRC: @@ -102,6 +104,18 @@ operands_match(enum opcode op, fs_reg *xs, fs_reg *ys) } } +static bool +instructions_match(fs_inst *a, fs_inst *b) +{ + return a->opcode == b->opcode && + a->saturate == b->saturate && + a->predicate == b->predicate && + a->predicate_inverse == b->predicate_inverse && + a->conditional_mod == b->conditional_mod && + a->dst.type == b->dst.type && + operands_match(a->opcode, a->src, b->src); +} + bool fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) { @@ -115,11 +129,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) inst = (fs_inst *) inst->next) { /* Skip some cases. */ - if (is_expression(inst) && - !inst->predicate && - !inst->is_partial_write() && - !inst->conditional_mod && - inst->dst.file != HW_REG) + if (is_expression(inst) && !inst->is_partial_write()) { bool found = false; @@ -128,11 +138,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) entry = (aeb_entry *) entry_node; /* Match current instruction's expression against those in AEB. */ - if (inst->opcode == entry->generator->opcode && - inst->saturate == entry->generator->saturate && - inst->dst.type == entry->generator->dst.type && - operands_match(inst->opcode, entry->generator->src, inst->src)) { - + if (instructions_match(inst, entry->generator)) { found = true; progress = true; break; @@ -208,6 +214,19 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) foreach_list_safe(entry_node, aeb) { aeb_entry *entry = (aeb_entry *)entry_node; + /* Kill all AEB entries that write a different value to or read from + * the flag register if we just wrote it. + */ + if (inst->writes_flag()) { + if (entry->generator->reads_flag() || + (entry->generator->writes_flag() && + !instructions_match(inst, entry->generator))) { + entry->remove(); + ralloc_free(entry); + continue; + } + } + for (int i = 0; i < 3; i++) { fs_reg *src_reg = &entry->generator->src[i]; -- 2.30.2