From 34c58acb59bc0b827e28ef9e89044621ab0b3ee1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 3 Oct 2012 10:04:22 -0700 Subject: [PATCH] i965/vs: Add support for splitting virtual GRFs. This should improve our ability to register allocate without spilling. Unfortuantely, due to the live variable analysis being ignorant of loops, we still have register allocation failures on some programs. v2: Add more context to the comment explaining the function. Reviewed-by: Kenneth Graunke (v1) --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 60 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 + 3 files changed, 62 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 2941729c4a0..e0b643242a3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -865,4 +865,64 @@ vec4_visitor::opt_compute_to_mrf() return progress; } +/** + * Splits virtual GRFs requesting more than one contiguous physical register. + * + * We initially create large virtual GRFs for temporary structures, arrays, + * and matrices, so that the dereference visitor functions can add reg_offsets + * to work their way down to the actual member being accessed. + * + * Unlike in the FS visitor, though, we have no SEND messages that return more + * than 1 register. We also don't do any array access in register space, + * which would have required contiguous physical registers. Thus, all those + * large virtual GRFs can be split up into independent single-register virtual + * GRFs, making allocation and optimization easier. + */ +void +vec4_visitor::split_virtual_grfs() +{ + int num_vars = this->virtual_grf_count; + int new_virtual_grf[num_vars]; + + memset(new_virtual_grf, 0, sizeof(new_virtual_grf)); + + /* Allocate new space for split regs. Note that the virtual + * numbers will be contiguous. + */ + for (int i = 0; i < num_vars; i++) { + if (this->virtual_grf_sizes[i] == 1) + continue; + + new_virtual_grf[i] = virtual_grf_alloc(1); + for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { + int reg = virtual_grf_alloc(1); + assert(reg == new_virtual_grf[i] + j - 1); + (void) reg; + } + this->virtual_grf_sizes[i] = 1; + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && + new_virtual_grf[inst->dst.reg] && + inst->dst.reg_offset != 0) { + inst->dst.reg = (new_virtual_grf[inst->dst.reg] + + inst->dst.reg_offset - 1); + inst->dst.reg_offset = 0; + } + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF && + new_virtual_grf[inst->src[i].reg] && + inst->src[i].reg_offset != 0) { + inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + + inst->src[i].reg_offset - 1); + inst->src[i].reg_offset = 0; + } + } + } + this->live_intervals_valid = false; +} + } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index dce3c89575d..4fdede3ab26 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -323,6 +323,7 @@ public: void split_uniform_registers(); void pack_uniform_registers(); void calculate_live_intervals(); + void split_virtual_grfs(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 5a941d90c30..479b0a607aa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -794,6 +794,7 @@ vec4_visitor::run() move_uniform_array_access_to_pull_constants(); pack_uniform_registers(); move_push_constants_to_pull_constants(); + split_virtual_grfs(); bool progress; do { -- 2.30.2