From 42ce13195b94d0d51ca8e7fa5eed07fde8f37988 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 30 Aug 2011 13:25:15 -0700 Subject: [PATCH] i965/vs: Add constant propagation to a few opcodes. This differs from the FS in that we track constants in each destination channel, and we we have to look at all the swizzled source channels. Also, the instruction stream walk is done in an O(n) manner instead of O(n^2). Across shader-db, this reduces 8.0% of the instructions from 60.0% of the vertex shaders, leaving us now behind the old backend by 11.1% overall. --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 16 ++ src/mesa/drivers/dri/i965/brw_vec4.h | 3 + .../dri/i965/brw_vec4_copy_propagation.cpp | 260 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 + 5 files changed, 281 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 6917286ff2f..da7a952e61f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -119,6 +119,7 @@ i965_CXX_SOURCES := \ brw_shader.cpp \ brw_vec4.cpp \ brw_vec4_emit.cpp \ + brw_vec4_copy_propagation.cpp \ brw_vec4_reg_allocate.cpp \ brw_vec4_visitor.cpp diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e3562d29238..656589dee3a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -31,6 +31,22 @@ extern "C" { namespace brw { +bool +src_reg::equals(src_reg *r) +{ + return (file == r->file && + reg == r->reg && + reg_offset == r->reg_offset && + type == r->type && + negate == r->negate && + abs == r->abs && + swizzle == r->swizzle && + !reladdr && !r->reladdr && + memcmp(&fixed_hw_reg, &r->fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0 && + imm.u == r->imm.u); +} + void vec4_visitor::calculate_live_intervals() { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index f148ca62cd5..545e8f15a8d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -162,6 +162,8 @@ public: this->imm.i = i; } + bool equals(src_reg *r); + src_reg(class vec4_visitor *v, const struct glsl_type *type); explicit src_reg(dst_reg reg); @@ -396,6 +398,7 @@ public: void calculate_live_intervals(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); + bool opt_copy_propagation(); vec4_instruction *emit(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp new file mode 100644 index 00000000000..1e24e2e1ccc --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -0,0 +1,260 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file brw_vec4_copy_propagation.cpp + * + * Implements tracking of values copied between registers, and + * optimizations based on that: copy propagation and constant + * propagation. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +} + +namespace brw { + +static bool +is_direct_copy(vec4_instruction *inst) +{ + return (inst->opcode == BRW_OPCODE_MOV && + !inst->predicate && + inst->dst.file == GRF && + !inst->saturate && + !inst->dst.reladdr && + !inst->src[0].reladdr && + inst->dst.type == inst->src[0].type); +} + +static bool +is_dominated_by_previous_instruction(vec4_instruction *inst) +{ + return (inst->opcode != BRW_OPCODE_DO && + inst->opcode != BRW_OPCODE_WHILE && + inst->opcode != BRW_OPCODE_ELSE && + inst->opcode != BRW_OPCODE_ENDIF); +} + +static bool +try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4]) +{ + /* For constant propagation, we only handle the same constant + * across all 4 channels. Some day, we should handle the 8-bit + * float vector format, which would let us constant propagate + * vectors better. + */ + src_reg value = *values[0]; + for (int i = 1; i < 4; i++) { + if (!value.equals(values[i])) + return false; + } + + if (value.file != IMM) + return false; + + if (inst->src[arg].abs) { + if (value.type == BRW_REGISTER_TYPE_F) { + value.imm.f = fabs(value.imm.f); + } else if (value.type == BRW_REGISTER_TYPE_D) { + if (value.imm.i < 0) + value.imm.i = -value.imm.i; + } + } + + if (inst->src[arg].negate) { + if (value.type == BRW_REGISTER_TYPE_F) + value.imm.f = -value.imm.f; + else + value.imm.u = -value.imm.u; + } + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + inst->src[arg] = value; + return true; + + case BRW_OPCODE_MUL: + case BRW_OPCODE_ADD: + if (arg == 1) { + inst->src[arg] = value; + return true; + } else if (arg == 0 && inst->src[1].file != IMM) { + /* Fit this constant in by commuting the operands */ + inst->src[0] = inst->src[1]; + inst->src[1] = value; + return true; + } + break; + + case BRW_OPCODE_CMP: + if (arg == 1) { + inst->src[arg] = value; + return true; + } else if (arg == 0 && inst->src[1].file != IMM) { + uint32_t new_cmod; + + new_cmod = brw_swap_cmod(inst->conditional_mod); + if (new_cmod != ~0u) { + /* Fit this constant in by swapping the operands and + * flipping the test. + */ + inst->src[0] = inst->src[1]; + inst->src[1] = value; + inst->conditional_mod = new_cmod; + return true; + } + } + break; + + case BRW_OPCODE_SEL: + if (arg == 1) { + inst->src[arg] = value; + return true; + } else if (arg == 0 && inst->src[1].file != IMM) { + inst->src[0] = inst->src[1]; + inst->src[1] = value; + + /* If this was predicated, flipping operands means + * we also need to flip the predicate. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { + inst->predicate_inverse = !inst->predicate_inverse; + } + return true; + } + break; + + default: + break; + } + + return false; +} + +bool +vec4_visitor::opt_copy_propagation() +{ + bool progress = false; + src_reg *cur_value[virtual_grf_reg_count][4]; + + memset(&cur_value, 0, sizeof(cur_value)); + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* This pass only works on basic blocks. If there's flow + * control, throw out all our information and start from + * scratch. + * + * This should really be fixed by using a structure like in + * src/glsl/opt_copy_propagation.cpp to track available copies. + */ + if (!is_dominated_by_previous_instruction(inst)) { + memset(cur_value, 0, sizeof(cur_value)); + continue; + } + + /* For each source arg, see if each component comes from a copy + * from the same type file (IMM, GRF, UNIFORM), and try + * optimizing out access to the copy result + */ + for (int i = 2; i >= 0; i--) { + int reg = (virtual_grf_reg_map[inst->src[i].reg] + + inst->src[i].reg_offset); + + /* Copied values end up in GRFs, and we don't track reladdr + * accesses. + */ + if (inst->src[i].file != GRF || + inst->src[i].reladdr) + continue; + + /* Find the regs that each swizzle component came from. + */ + src_reg *values[4]; + int c; + for (c = 0; c < 4; c++) { + values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)]; + + /* If there's no available copy for this channel, bail. + * We could be more aggressive here -- some channels might + * not get used based on the destination writemask. + */ + if (!values[c]) + break; + + /* We'll only be able to copy propagate if the sources are + * all from the same file -- there's no ability to swizzle + * 0 or 1 constants in with source registers like in i915. + */ + if (c > 0 && values[c - 1]->file != values[c]->file) + break; + } + + if (c != 4) + continue; + + if (try_constant_propagation(inst, i, values)) + progress = true; + } + + /* Track available source registers. */ + if (is_direct_copy(inst)) { + int reg = virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset; + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) { + cur_value[reg][i] = &inst->src[0]; + } + } + continue; + } + + /* For any updated channels, clear tracking of them as a source + * or destination. + * + * FINISHME: Sources aren't handled, which will need to be done + * for copy propagation. + */ + if (inst->dst.file == GRF) { + if (inst->dst.reladdr) + memset(cur_value, 0, sizeof(cur_value)); + else { + int reg = virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset; + + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) + cur_value[reg][i] = NULL; + } + } + } + } + + if (progress) + live_intervals_valid = false; + + return progress; +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 78ffbd7b6ab..c40c41f7a0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -614,6 +614,7 @@ vec4_visitor::run() do { progress = false; progress = dead_code_eliminate() || progress; + progress = opt_copy_propagation() || progress; } while (progress); -- 2.30.2