From 773556e0f537eba82d9d68d618e229140f413620 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 24 Jun 2011 15:40:51 -0700 Subject: [PATCH] i965/gen5: Fix grf_used calculation for 16-wide. If we happened to allocate a texture result (or other vector) to the highest hardware register slot, and we were in 16-wide, we would under-count the registers used and potentially wrap around to g0 if that allocation crossed a 16-register block boundary. Bad rendering and hangs ensued. Tested-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f88b1316775..b4689d2c293 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -101,7 +101,6 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next + 1]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; @@ -263,6 +262,7 @@ fs_visitor::assign_regs() * regs in the register classes back down to real hardware reg * numbers. */ + this->grf_used = first_assigned_grf; hw_reg_mapping[0] = 0; /* unused */ for (int i = 1; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); @@ -278,8 +278,9 @@ fs_visitor::assign_regs() assert(hw_reg >= 0); hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; - last_grf = MAX2(last_grf, - hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); + this->grf_used = MAX2(this->grf_used, + hw_reg_mapping[i] + this->virtual_grf_sizes[i] * + reg_width); } foreach_iter(exec_list_iterator, iter, this->instructions) { @@ -290,8 +291,6 @@ fs_visitor::assign_regs() assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - this->grf_used = last_grf + reg_width; - ralloc_free(g); ralloc_free(regs); -- 2.30.2