}
-bool
-fs_visitor::assign_regs()
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+ int *class_sizes,
+ int class_count,
+ int reg_width,
+ int base_reg_count)
{
- /* Most of this allocation was written for a reg_width of 1
- * (dispatch_width == 8). In extending to 16-wide, the code was
- * left in place and it was converted to have the hardware
- * registers it's allocating be contiguous physical pairs of regs
- * for reg_width == 2.
- */
- int reg_width = c->dispatch_width / 8;
- int hw_reg_mapping[this->virtual_grf_next];
- int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
- int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
- int class_sizes[base_reg_count];
- int class_count = 0;
- int aligned_pairs_class = -1;
-
- calculate_live_intervals();
-
- /* Set up the register classes.
- *
- * The base registers store a scalar value. For texture samples,
- * we get virtual GRFs composed of 4 contiguous hw register. For
- * structures and arrays, we store them as contiguous larger things
- * than that, though we should be able to do better most of the
- * time.
- */
- class_sizes[class_count++] = 1;
- if (brw->has_pln && intel->gen < 6) {
- /* Always set up the (unaligned) pairs for gen5, so we can find
- * them for making the aligned pair class.
- */
- class_sizes[class_count++] = 2;
- }
- for (int r = 0; r < this->virtual_grf_next; r++) {
- int i;
-
- for (i = 0; i < class_count; i++) {
- if (class_sizes[i] == this->virtual_grf_sizes[r])
- break;
- }
- if (i == class_count) {
- if (this->virtual_grf_sizes[r] >= base_reg_count) {
- fail("Object too large to register allocate.\n");
- }
-
- class_sizes[class_count++] = this->virtual_grf_sizes[r];
- }
- }
+ struct intel_context *intel = &brw->intel;
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
}
- struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
- uint8_t ra_reg_to_grf[ra_reg_count];
- int classes[class_count + 1];
+ ralloc_free(brw->wm.ra_reg_to_grf);
+ brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+ ralloc_free(brw->wm.regs);
+ brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+ ralloc_free(brw->wm.classes);
+ brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+ brw->wm.aligned_pairs_class = -1;
/* Now, add the registers to their classes, and add the conflicts
* between them and the base GRF registers (and also each other).
int pairs_reg_count = 0;
for (int i = 0; i < class_count; i++) {
int class_reg_count = base_reg_count - (class_sizes[i] - 1);
- classes[i] = ra_alloc_reg_class(regs);
+ brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
/* Save this off for the aligned pair class at the end. */
if (class_sizes[i] == 2) {
}
for (int j = 0; j < class_reg_count; j++) {
- ra_class_add_reg(regs, classes[i], reg);
+ ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
- ra_reg_to_grf[reg] = j;
+ brw->wm.ra_reg_to_grf[reg] = j;
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
- ra_add_transitive_reg_conflict(regs, base_reg, reg);
+ ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
}
reg++;
* in on gen5 so that we can do PLN.
*/
if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
- aligned_pairs_class = ra_alloc_reg_class(regs);
+ brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
for (int i = 0; i < pairs_reg_count; i++) {
- if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
- ra_class_add_reg(regs, aligned_pairs_class,
+ if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+ ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
pairs_base_reg + i);
}
}
class_count++;
}
- ra_set_finalize(regs);
+ ra_set_finalize(brw->wm.regs);
+}
+
+bool
+fs_visitor::assign_regs()
+{
+ /* Most of this allocation was written for a reg_width of 1
+ * (dispatch_width == 8). In extending to 16-wide, the code was
+ * left in place and it was converted to have the hardware
+ * registers it's allocating be contiguous physical pairs of regs
+ * for reg_width == 2.
+ */
+ int reg_width = c->dispatch_width / 8;
+ int hw_reg_mapping[this->virtual_grf_next];
+ int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
+ int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
+ int class_sizes[base_reg_count];
+ int class_count = 0;
+
+ calculate_live_intervals();
+
+ /* Set up the register classes.
+ *
+ * The base registers store a scalar value. For texture samples,
+ * we get virtual GRFs composed of 4 contiguous hw register. For
+ * structures and arrays, we store them as contiguous larger things
+ * than that, though we should be able to do better most of the
+ * time.
+ */
+ class_sizes[class_count++] = 1;
+ if (brw->has_pln && intel->gen < 6) {
+ /* Always set up the (unaligned) pairs for gen5, so we can find
+ * them for making the aligned pair class.
+ */
+ class_sizes[class_count++] = 2;
+ }
+ for (int r = 0; r < this->virtual_grf_next; r++) {
+ int i;
+
+ for (i = 0; i < class_count; i++) {
+ if (class_sizes[i] == this->virtual_grf_sizes[r])
+ break;
+ }
+ if (i == class_count) {
+ if (this->virtual_grf_sizes[r] >= base_reg_count) {
+ fail("Object too large to register allocate.\n");
+ }
+
+ class_sizes[class_count++] = this->virtual_grf_sizes[r];
+ }
+ }
+
+ brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+ reg_width, base_reg_count);
- struct ra_graph *g = ra_alloc_interference_graph(regs,
+ struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
this->virtual_grf_next);
for (int i = 0; i < this->virtual_grf_next; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
- if (aligned_pairs_class >= 0 &&
+ if (brw->wm.aligned_pairs_class >= 0 &&
this->delta_x.reg == i) {
- ra_set_node_class(g, i, aligned_pairs_class);
+ ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
} else {
- ra_set_node_class(g, i, classes[c]);
+ ra_set_node_class(g, i, brw->wm.classes[c]);
}
break;
}
ralloc_free(g);
- ralloc_free(regs);
return false;
}
for (int i = 0; i < this->virtual_grf_next; i++) {
int reg = ra_get_node_reg(g, i);
- hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
+ hw_reg_mapping[i] = (first_assigned_grf +
+ brw->wm.ra_reg_to_grf[reg] * reg_width);
this->grf_used = MAX2(this->grf_used,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
reg_width);
}
ralloc_free(g);
- ralloc_free(regs);
return true;
}