src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "brw_cfg.h"
  29 #include "brw_vec4_live_variables.h"
  30
  31 using namespace brw;
  32
  33 /** @file brw_vec4_live_variables.cpp
  34  *
  35  * Support for computing at the basic block level which variables
  36  * (virtual GRFs in our case) are live at entry and exit.
  37  *
  38  * See Muchnik's Advanced Compiler Design and Implementation, section
  39  * 14.1 (p444).
  40  */
  41
  42 /**
  43  * Sets up the use[] and def[] arrays.
  44  *
  45  * The basic-block-level live variable analysis needs to know which
  46  * variables get used before they're completely defined, and which
  47  * variables are completely defined before they're used.
  48  *
  49  * We independently track each channel of a vec4.  This is because we need to
  50  * be able to recognize a sequence like:
  51  *
  52  * ...
  53  * DP4 tmp.x a b;
  54  * DP4 tmp.y c d;
  55  * MUL result.xy tmp.xy e.xy
  56  * ...
  57  *
  58  * as having tmp live only across that sequence (assuming it's used nowhere
  59  * else), because it's a common pattern.  A more conservative approach that
  60  * doesn't get tmp marked a deffed in this block will tend to result in
  61  * spilling.
  62  */
  63 void
  64 vec4_live_variables::setup_def_use()
  65 {
  66    int ip = 0;
  67
  68    for (int b = 0; b < cfg->num_blocks; b++) {
  69       bblock_t *block = cfg->blocks[b];
  70
  71       assert(ip == block->start_ip);
  72       if (b > 0)
  73          assert(cfg->blocks[b - 1]->end_ip == ip - 1);
  74
  75       for (vec4_instruction *inst = (vec4_instruction *)block->start;
  76            inst != block->end->next;
  77            inst = (vec4_instruction *)inst->next) {
  78
  79          /* Set use[] for this instruction */
  80          for (unsigned int i = 0; i < 3; i++) {
  81             if (inst->src[i].file == GRF) {
  82                int reg = inst->src[i].reg;
  83
  84                for (int j = 0; j < 4; j++) {
  85                   int c = BRW_GET_SWZ(inst->src[i].swizzle, j);
  86                   if (!bd[b].def[reg * 4 + c])
  87                      bd[b].use[reg * 4 + c] = true;
  88                }
  89             }
  90          }
  91
  92          /* Check for unconditional writes to whole registers. These
  93           * are the things that screen off preceding definitions of a
  94           * variable, and thus qualify for being in def[].
  95           */
  96          if (inst->dst.file == GRF &&
  97              v->virtual_grf_sizes[inst->dst.reg] == 1 &&
  98              !inst->predicate) {
  99             for (int c = 0; c < 4; c++) {
 100                if (inst->dst.writemask & (1 << c)) {
 101                   int reg = inst->dst.reg;
 102                   if (!bd[b].use[reg * 4 + c])
 103                      bd[b].def[reg * 4 + c] = true;
 104                }
 105             }
 106          }
 107
 108          ip++;
 109       }
 110    }
 111 }
 112
 113 /**
 114  * The algorithm incrementally sets bits in liveout and livein,
 115  * propagating it through control flow.  It will eventually terminate
 116  * because it only ever adds bits, and stops when no bits are added in
 117  * a pass.
 118  */
 119 void
 120 vec4_live_variables::compute_live_variables()
 121 {
 122    bool cont = true;
 123
 124    while (cont) {
 125       cont = false;
 126
 127       for (int b = 0; b < cfg->num_blocks; b++) {
 128          /* Update livein */
 129          for (int i = 0; i < num_vars; i++) {
 130             if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
 131                if (!bd[b].livein[i]) {
 132                   bd[b].livein[i] = true;
 133                   cont = true;
 134                }
 135             }
 136          }
 137
 138          /* Update liveout */
 139          foreach_list(block_node, &cfg->blocks[b]->children) {
 140             bblock_link *link = (bblock_link *)block_node;
 141             bblock_t *block = link->block;
 142
 143             for (int i = 0; i < num_vars; i++) {
 144                if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
 145                   bd[b].liveout[i] = true;
 146                   cont = true;
 147                }
 148             }
 149          }
 150       }
 151    }
 152 }
 153
 154 vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
 155    : v(v), cfg(cfg)
 156 {
 157    mem_ctx = ralloc_context(cfg->mem_ctx);
 158
 159    num_vars = v->virtual_grf_count * 4;
 160    bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
 161
 162    for (int i = 0; i < cfg->num_blocks; i++) {
 163       bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
 164       bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
 165       bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
 166       bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
 167    }
 168
 169    setup_def_use();
 170    compute_live_variables();
 171 }
 172
 173 vec4_live_variables::~vec4_live_variables()
 174 {
 175    ralloc_free(mem_ctx);
 176 }
 177
 178 #define MAX_INSTRUCTION (1 << 30)
 179
 180 /**
 181  * Computes a conservative start/end of the live intervals for each virtual GRF.
 182  *
 183  * We could expose per-channel live intervals to the consumer based on the
 184  * information we computed in vec4_live_variables, except that our only
 185  * current user is virtual_grf_interferes().  So we instead union the
 186  * per-channel ranges into a per-vgrf range for virtual_grf_def[] and
 187  * virtual_grf_use[].
 188  *
 189  * We could potentially have virtual_grf_interferes() do the test per-channel,
 190  * which would let some interesting register allocation occur (particularly on
 191  * code-generated GLSL sequences from the Cg compiler which does register
 192  * allocation at the GLSL level and thus reuses components of the variable
 193  * with distinct lifetimes).  But right now the complexity of doing so doesn't
 194  * seem worth it, since having virtual_grf_interferes() be cheap is important
 195  * for register allocation performance.
 196  */
 197 void
 198 vec4_visitor::calculate_live_intervals()
 199 {
 200    if (this->live_intervals_valid)
 201       return;
 202
 203    int *def = ralloc_array(mem_ctx, int, this->virtual_grf_count);
 204    int *use = ralloc_array(mem_ctx, int, this->virtual_grf_count);
 205    ralloc_free(this->virtual_grf_def);
 206    ralloc_free(this->virtual_grf_use);
 207    this->virtual_grf_def = def;
 208    this->virtual_grf_use = use;
 209
 210    for (int i = 0; i < this->virtual_grf_count; i++) {
 211       def[i] = MAX_INSTRUCTION;
 212       use[i] = -1;
 213    }
 214
 215    /* Start by setting up the intervals with no knowledge of control
 216     * flow.
 217     */
 218    int ip = 0;
 219    foreach_list(node, &this->instructions) {
 220       vec4_instruction *inst = (vec4_instruction *)node;
 221
 222       for (unsigned int i = 0; i < 3; i++) {
 223          if (inst->src[i].file == GRF) {
 224             int reg = inst->src[i].reg;
 225
 226             use[reg] = ip;
 227          }
 228       }
 229
 230       if (inst->dst.file == GRF) {
 231          int reg = inst->dst.reg;
 232
 233          def[reg] = MIN2(def[reg], ip);
 234       }
 235
 236       ip++;
 237    }
 238
 239    /* Now, extend those intervals using our analysis of control flow.
 240     *
 241     * The control flow-aware analysis was done at a channel level, while at
 242     * this point we're distilling it down to vgrfs.
 243     */
 244    cfg_t cfg(this);
 245    vec4_live_variables livevars(this, &cfg);
 246
 247    for (int b = 0; b < cfg.num_blocks; b++) {
 248       for (int i = 0; i < livevars.num_vars; i++) {
 249          if (livevars.bd[b].livein[i]) {
 250             def[i / 4] = MIN2(def[i / 4], cfg.blocks[b]->start_ip);
 251             use[i / 4] = MAX2(use[i / 4], cfg.blocks[b]->start_ip);
 252          }
 253
 254          if (livevars.bd[b].liveout[i]) {
 255             def[i / 4] = MIN2(def[i / 4], cfg.blocks[b]->end_ip);
 256             use[i / 4] = MAX2(use[i / 4], cfg.blocks[b]->end_ip);
 257          }
 258       }
 259    }
 260
 261    this->live_intervals_valid = true;
 262
 263    /* Note in the non-control-flow code above, that we only take def[] as the
 264     * first store, and use[] as the last use.  We use this in dead code
 265     * elimination, to determine when a store never gets used.  However, we
 266     * also use these arrays to answer the virtual_grf_interferes() question
 267     * (live interval analysis), which is used for register coalescing and
 268     * register allocation.
 269     *
 270     * So, there's a conflict over what the array should mean: if use[]
 271     * considers a def after the last use, then the dead code elimination pass
 272     * never does anything (and it's an important pass!).  But if we don't
 273     * include dead code, then virtual_grf_interferes() lies and we'll do
 274     * horrible things like coalesce the register that is dead-code-written
 275     * into another register that was live across the dead write (causing the
 276     * use of the second register to take the dead write's source value instead
 277     * of the coalesced MOV's source value).
 278     *
 279     * To resolve the conflict, immediately after calculating live intervals,
 280     * detect dead code, nuke it, and if we changed anything, calculate again
 281     * before returning to the caller.  Now we happen to produce def[] and
 282     * use[] arrays that will work for virtual_grf_interferes().
 283     */
 284    if (dead_code_eliminate())
 285       calculate_live_intervals();
 286 }
 287
 288 bool
 289 vec4_visitor::virtual_grf_interferes(int a, int b)
 290 {
 291    int a_def = this->virtual_grf_def[a], a_use = this->virtual_grf_use[a];
 292    int b_def = this->virtual_grf_def[b], b_use = this->virtual_grf_use[b];
 293
 294    /* If there's dead code (def but not use), it would break our test
 295     * unless we consider it used.
 296     */
 297    if ((a_use == -1 && a_def != MAX_INSTRUCTION) ||
 298        (b_use == -1 && b_def != MAX_INSTRUCTION)) {
 299       return true;
 300    }
 301
 302    int start = MAX2(a_def, b_def);
 303    int end = MIN2(a_use, b_use);
 304
 305    return start < end;
 306 }