src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "brw_cfg.h"
  29 #include "brw_vec4_live_variables.h"
  30
  31 using namespace brw;
  32
  33 /** @file brw_vec4_live_variables.cpp
  34  *
  35  * Support for computing at the basic block level which variables
  36  * (virtual GRFs in our case) are live at entry and exit.
  37  *
  38  * See Muchnik's Advanced Compiler Design and Implementation, section
  39  * 14.1 (p444).
  40  */
  41
  42 /**
  43  * Sets up the use[] and def[] arrays.
  44  *
  45  * The basic-block-level live variable analysis needs to know which
  46  * variables get used before they're completely defined, and which
  47  * variables are completely defined before they're used.
  48  *
  49  * We independently track each channel of a vec4.  This is because we need to
  50  * be able to recognize a sequence like:
  51  *
  52  * ...
  53  * DP4 tmp.x a b;
  54  * DP4 tmp.y c d;
  55  * MUL result.xy tmp.xy e.xy
  56  * ...
  57  *
  58  * as having tmp live only across that sequence (assuming it's used nowhere
  59  * else), because it's a common pattern.  A more conservative approach that
  60  * doesn't get tmp marked a deffed in this block will tend to result in
  61  * spilling.
  62  */
  63 void
  64 vec4_live_variables::setup_def_use()
  65 {
  66    int ip = 0;
  67
  68    for (int b = 0; b < cfg->num_blocks; b++) {
  69       bblock_t *block = cfg->blocks[b];
  70
  71       assert(ip == block->start_ip);
  72       if (b > 0)
  73          assert(cfg->blocks[b - 1]->end_ip == ip - 1);
  74
  75       for (vec4_instruction *inst = (vec4_instruction *)block->start;
  76            inst != block->end->next;
  77            inst = (vec4_instruction *)inst->next) {
  78
  79          /* Set use[] for this instruction */
  80          for (unsigned int i = 0; i < 3; i++) {
  81             if (inst->src[i].file == GRF) {
  82                int reg = inst->src[i].reg;
  83
  84                for (int j = 0; j < 4; j++) {
  85                   int c = BRW_GET_SWZ(inst->src[i].swizzle, j);
  86                   if (!bd[b].def[reg * 4 + c])
  87                      bd[b].use[reg * 4 + c] = true;
  88                }
  89             }
  90          }
  91
  92          /* Check for unconditional writes to whole registers. These
  93           * are the things that screen off preceding definitions of a
  94           * variable, and thus qualify for being in def[].
  95           */
  96          if (inst->dst.file == GRF &&
  97              v->virtual_grf_sizes[inst->dst.reg] == 1 &&
  98              !inst->predicate) {
  99             for (int c = 0; c < 4; c++) {
 100                if (inst->dst.writemask & (1 << c)) {
 101                   int reg = inst->dst.reg;
 102                   if (!bd[b].use[reg * 4 + c])
 103                      bd[b].def[reg * 4 + c] = true;
 104                }
 105             }
 106          }
 107
 108          ip++;
 109       }
 110    }
 111 }
 112
 113 /**
 114  * The algorithm incrementally sets bits in liveout and livein,
 115  * propagating it through control flow.  It will eventually terminate
 116  * because it only ever adds bits, and stops when no bits are added in
 117  * a pass.
 118  */
 119 void
 120 vec4_live_variables::compute_live_variables()
 121 {
 122    bool cont = true;
 123
 124    while (cont) {
 125       cont = false;
 126
 127       for (int b = 0; b < cfg->num_blocks; b++) {
 128          /* Update livein */
 129          for (int i = 0; i < num_vars; i++) {
 130             if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
 131                if (!bd[b].livein[i]) {
 132                   bd[b].livein[i] = true;
 133                   cont = true;
 134                }
 135             }
 136          }
 137
 138          /* Update liveout */
 139          foreach_list(block_node, &cfg->blocks[b]->children) {
 140             bblock_link *link = (bblock_link *)block_node;
 141             bblock_t *block = link->block;
 142
 143             for (int i = 0; i < num_vars; i++) {
 144                if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
 145                   bd[b].liveout[i] = true;
 146                   cont = true;
 147                }
 148             }
 149          }
 150       }
 151    }
 152 }
 153
 154 vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
 155    : v(v), cfg(cfg)
 156 {
 157    mem_ctx = ralloc_context(cfg->mem_ctx);
 158
 159    num_vars = v->virtual_grf_count * 4;
 160    bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
 161
 162    for (int i = 0; i < cfg->num_blocks; i++) {
 163       bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
 164       bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
 165       bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
 166       bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
 167    }
 168
 169    setup_def_use();
 170    compute_live_variables();
 171 }
 172
 173 vec4_live_variables::~vec4_live_variables()
 174 {
 175    ralloc_free(mem_ctx);
 176 }
 177
 178 #define MAX_INSTRUCTION (1 << 30)
 179
 180 /**
 181  * Computes a conservative start/end of the live intervals for each virtual GRF.
 182  *
 183  * We could expose per-channel live intervals to the consumer based on the
 184  * information we computed in vec4_live_variables, except that our only
 185  * current user is virtual_grf_interferes().  So we instead union the
 186  * per-channel ranges into a per-vgrf range for virtual_grf_start[] and
 187  * virtual_grf_end[].
 188  *
 189  * We could potentially have virtual_grf_interferes() do the test per-channel,
 190  * which would let some interesting register allocation occur (particularly on
 191  * code-generated GLSL sequences from the Cg compiler which does register
 192  * allocation at the GLSL level and thus reuses components of the variable
 193  * with distinct lifetimes).  But right now the complexity of doing so doesn't
 194  * seem worth it, since having virtual_grf_interferes() be cheap is important
 195  * for register allocation performance.
 196  */
 197 void
 198 vec4_visitor::calculate_live_intervals()
 199 {
 200    if (this->live_intervals_valid)
 201       return;
 202
 203    int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count);
 204    int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count);
 205    ralloc_free(this->virtual_grf_start);
 206    ralloc_free(this->virtual_grf_end);
 207    this->virtual_grf_start = start;
 208    this->virtual_grf_end = end;
 209
 210    for (int i = 0; i < this->virtual_grf_count; i++) {
 211       start[i] = MAX_INSTRUCTION;
 212       end[i] = -1;
 213    }
 214
 215    /* Start by setting up the intervals with no knowledge of control
 216     * flow.
 217     */
 218    int ip = 0;
 219    foreach_list(node, &this->instructions) {
 220       vec4_instruction *inst = (vec4_instruction *)node;
 221
 222       for (unsigned int i = 0; i < 3; i++) {
 223          if (inst->src[i].file == GRF) {
 224             int reg = inst->src[i].reg;
 225
 226             start[reg] = MIN2(start[reg], ip);
 227             end[reg] = ip;
 228          }
 229       }
 230
 231       if (inst->dst.file == GRF) {
 232          int reg = inst->dst.reg;
 233
 234          start[reg] = MIN2(start[reg], ip);
 235          end[reg] = ip;
 236       }
 237
 238       ip++;
 239    }
 240
 241    /* Now, extend those intervals using our analysis of control flow.
 242     *
 243     * The control flow-aware analysis was done at a channel level, while at
 244     * this point we're distilling it down to vgrfs.
 245     */
 246    cfg_t cfg(this);
 247    vec4_live_variables livevars(this, &cfg);
 248
 249    for (int b = 0; b < cfg.num_blocks; b++) {
 250       for (int i = 0; i < livevars.num_vars; i++) {
 251          if (livevars.bd[b].livein[i]) {
 252             start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);
 253             end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);
 254          }
 255
 256          if (livevars.bd[b].liveout[i]) {
 257             start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->end_ip);
 258             end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->end_ip);
 259          }
 260       }
 261    }
 262
 263    this->live_intervals_valid = true;
 264 }
 265
 266 bool
 267 vec4_visitor::virtual_grf_interferes(int a, int b)
 268 {
 269    return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
 270             virtual_grf_end[b] <= virtual_grf_start[a]);
 271 }