From b1f0bffd399f377a19b0541e1d834afad8b9dad0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 May 2011 09:56:18 -0700 Subject: [PATCH] i965/fs: Factor out the register allocator setup to a separate function. Besides separating out a logical step of the giant register allocator function, this now communicates a bunch of the allocator information through entries in brw_context, which will make this code partially reusable for caching the expensive allocator setup. --- src/mesa/drivers/dri/i965/brw_context.h | 23 +++ .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 148 ++++++++++-------- 2 files changed, 105 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 22baf978ad4..cc11d06874d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -748,6 +748,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 42ab66df6d8..8e44a010576 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -87,55 +87,14 @@ fs_visitor::assign_regs_trivial() } -bool -fs_visitor::assign_regs() +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int reg_width, + int base_reg_count) { - /* Most of this allocation was written for a reg_width of 1 - * (dispatch_width == 8). In extending to 16-wide, the code was - * left in place and it was converted to have the hardware - * registers it's allocating be contiguous physical pairs of regs - * for reg_width == 2. - */ - int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next]; - int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); - int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; - int class_sizes[base_reg_count]; - int class_count = 0; - int aligned_pairs_class = -1; - - calculate_live_intervals(); - - /* Set up the register classes. - * - * The base registers store a scalar value. For texture samples, - * we get virtual GRFs composed of 4 contiguous hw register. For - * structures and arrays, we store them as contiguous larger things - * than that, though we should be able to do better most of the - * time. - */ - class_sizes[class_count++] = 1; - if (brw->has_pln && intel->gen < 6) { - /* Always set up the (unaligned) pairs for gen5, so we can find - * them for making the aligned pair class. - */ - class_sizes[class_count++] = 2; - } - for (int r = 0; r < this->virtual_grf_next; r++) { - int i; - - for (i = 0; i < class_count; i++) { - if (class_sizes[i] == this->virtual_grf_sizes[r]) - break; - } - if (i == class_count) { - if (this->virtual_grf_sizes[r] >= base_reg_count) { - fail("Object too large to register allocate.\n"); - } - - class_sizes[class_count++] = this->virtual_grf_sizes[r]; - } - } + struct intel_context *intel = &brw->intel; /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; @@ -143,9 +102,14 @@ fs_visitor::assign_regs() ra_reg_count += base_reg_count - (class_sizes[i] - 1); } - struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); - uint8_t ra_reg_to_grf[ra_reg_count]; - int classes[class_count + 1]; + ralloc_free(brw->wm.ra_reg_to_grf); + brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->wm.regs); + brw->wm.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->wm.classes); + brw->wm.classes = ralloc_array(brw, int, class_count + 1); + + brw->wm.aligned_pairs_class = -1; /* Now, add the registers to their classes, and add the conflicts * between them and the base GRF registers (and also each other). @@ -155,7 +119,7 @@ fs_visitor::assign_regs() int pairs_reg_count = 0; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); - classes[i] = ra_alloc_reg_class(regs); + brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); /* Save this off for the aligned pair class at the end. */ if (class_sizes[i] == 2) { @@ -164,14 +128,14 @@ fs_visitor::assign_regs() } for (int j = 0; j < class_reg_count; j++) { - ra_class_add_reg(regs, classes[i], reg); + ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); - ra_reg_to_grf[reg] = j; + brw->wm.ra_reg_to_grf[reg] = j; for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); + ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); } reg++; @@ -183,30 +147,83 @@ fs_visitor::assign_regs() * in on gen5 so that we can do PLN. */ if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - aligned_pairs_class = ra_alloc_reg_class(regs); + brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); for (int i = 0; i < pairs_reg_count; i++) { - if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { - ra_class_add_reg(regs, aligned_pairs_class, + if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, pairs_base_reg + i); } } class_count++; } - ra_set_finalize(regs); + ra_set_finalize(brw->wm.regs); +} + +bool +fs_visitor::assign_regs() +{ + /* Most of this allocation was written for a reg_width of 1 + * (dispatch_width == 8). In extending to 16-wide, the code was + * left in place and it was converted to have the hardware + * registers it's allocating be contiguous physical pairs of regs + * for reg_width == 2. + */ + int reg_width = c->dispatch_width / 8; + int hw_reg_mapping[this->virtual_grf_next]; + int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); + int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; + int class_sizes[base_reg_count]; + int class_count = 0; + + calculate_live_intervals(); + + /* Set up the register classes. + * + * The base registers store a scalar value. For texture samples, + * we get virtual GRFs composed of 4 contiguous hw register. For + * structures and arrays, we store them as contiguous larger things + * than that, though we should be able to do better most of the + * time. + */ + class_sizes[class_count++] = 1; + if (brw->has_pln && intel->gen < 6) { + /* Always set up the (unaligned) pairs for gen5, so we can find + * them for making the aligned pair class. + */ + class_sizes[class_count++] = 2; + } + for (int r = 0; r < this->virtual_grf_next; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + if (this->virtual_grf_sizes[r] >= base_reg_count) { + fail("Object too large to register allocate.\n"); + } + + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, + reg_width, base_reg_count); - struct ra_graph *g = ra_alloc_interference_graph(regs, + struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_next); for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pairs_class >= 0 && + if (brw->wm.aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, aligned_pairs_class); + ra_set_node_class(g, i, brw->wm.aligned_pairs_class); } else { - ra_set_node_class(g, i, classes[c]); + ra_set_node_class(g, i, brw->wm.classes[c]); } break; } @@ -237,7 +254,6 @@ fs_visitor::assign_regs() ralloc_free(g); - ralloc_free(regs); return false; } @@ -250,7 +266,8 @@ fs_visitor::assign_regs() for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width; + hw_reg_mapping[i] = (first_assigned_grf + + brw->wm.ra_reg_to_grf[reg] * reg_width); this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); @@ -265,7 +282,6 @@ fs_visitor::assign_regs() } ralloc_free(g); - ralloc_free(regs); return true; } -- 2.30.2