From 4a7d0c550e28ae3d434da81c9029272d22fa315e Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 10 Dec 2013 16:04:27 -0800 Subject: [PATCH] i965/fs: Support coalescing registers of size > 1. total instructions in shared programs: 1550048 -> 1549880 (-0.01%) instructions in affected programs: 1896 -> 1728 (-8.86%) Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 82 ++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ed7f57467b9..914f0619481 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2276,6 +2276,12 @@ fs_visitor::register_coalesce() calculate_live_intervals(); + int src_size = 0; + int channels_remaining = 0; + int reg_from = -1, reg_to = -1; + int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE]; + fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE]; + foreach_list_safe(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; @@ -2287,11 +2293,14 @@ fs_visitor::register_coalesce() inst->src[0].abs || inst->src[0].smear != -1 || inst->dst.file != GRF || - inst->dst.type != inst->src[0].type || - virtual_grf_sizes[inst->src[0].reg] != 1) { + inst->dst.type != inst->src[0].type) { continue; } + if (virtual_grf_sizes[inst->src[0].reg] > + virtual_grf_sizes[inst->dst.reg]) + continue; + int var_from = live_intervals->var_from_reg(&inst->src[0]); int var_to = live_intervals->var_from_reg(&inst->dst); @@ -2299,31 +2308,58 @@ fs_visitor::register_coalesce() !inst->dst.equals(inst->src[0])) continue; - int reg_from = inst->src[0].reg; - assert(inst->src[0].reg_offset == 0); - int reg_to = inst->dst.reg; - int reg_to_offset = inst->dst.reg_offset; + if (reg_from != inst->src[0].reg) { + reg_from = inst->src[0].reg; - foreach_list(node, &this->instructions) { - fs_inst *scan_inst = (fs_inst *)node; + src_size = virtual_grf_sizes[inst->src[0].reg]; + assert(src_size <= MAX_SAMPLER_MESSAGE_SIZE); - if (scan_inst->dst.file == GRF && - scan_inst->dst.reg == reg_from) { - scan_inst->dst.reg = reg_to; - scan_inst->dst.reg_offset = reg_to_offset; - } - for (int i = 0; i < 3; i++) { - if (scan_inst->src[i].file == GRF && - scan_inst->src[i].reg == reg_from) { - scan_inst->src[i].reg = reg_to; - scan_inst->src[i].reg_offset = reg_to_offset; - } - } + channels_remaining = src_size; + memset(mov, 0, sizeof(mov)); + + reg_to = inst->dst.reg; + } + + if (reg_to != inst->dst.reg) + continue; + + const int offset = inst->src[0].reg_offset; + reg_to_offset[offset] = inst->dst.reg_offset; + mov[offset] = inst; + channels_remaining--; + + if (channels_remaining) + continue; + + for (int i = 0; i < src_size; i++) { + if (mov[i]) + mov[i]->remove(); } - inst->remove(); - progress = true; - continue; + foreach_list(node, &this->instructions) { + fs_inst *scan_inst = (fs_inst *)node; + + for (int i = 0; i < src_size; i++) { + if (mov[i]) { + if (scan_inst->dst.file == GRF && + scan_inst->dst.reg == reg_from && + scan_inst->dst.reg_offset == i) { + scan_inst->dst.reg = reg_to; + scan_inst->dst.reg_offset = reg_to_offset[i]; + } + for (int j = 0; j < 3; j++) { + if (scan_inst->src[j].file == GRF && + scan_inst->src[j].reg == reg_from && + scan_inst->src[j].reg_offset == i) { + scan_inst->src[j].reg = reg_to; + scan_inst->src[j].reg_offset = reg_to_offset[i]; + } + } + + progress = true; + } + } + } } if (progress) -- 2.30.2