+
+ for (int i = 0; i < class_count; i++) {
+ /* These are a little counter-intuitive because the pair registers
+ * are required to be aligned while the register they are
+ * potentially interferring with are not. In the case where the
+ * size is even, the worst-case is that the register is
+ * odd-aligned. In the odd-size case, it doesn't matter.
+ */
+ q_values[class_count][i] = class_sizes[i] / 2 + 1;
+ q_values[i][class_count] = class_sizes[i] + 1;
+ }
+ q_values[class_count][class_count] = 1;
+ }
+
+ ra_set_finalize(regs, q_values);
+
+ ralloc_free(q_values);
+
+ screen->wm_reg_sets[index].regs = regs;
+ for (unsigned i = 0; i < ARRAY_SIZE(screen->wm_reg_sets[index].classes); i++)
+ screen->wm_reg_sets[index].classes[i] = -1;
+ for (int i = 0; i < class_count; i++)
+ screen->wm_reg_sets[index].classes[class_sizes[i] - 1] = classes[i];
+ screen->wm_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf;
+ screen->wm_reg_sets[index].aligned_pairs_class = aligned_pairs_class;
+}
+
+void
+brw_fs_alloc_reg_sets(struct intel_screen *screen)
+{
+ brw_alloc_reg_set(screen, 1);
+ brw_alloc_reg_set(screen, 2);
+}
+
+static int
+count_to_loop_end(const bblock_t *block)
+{
+ if (block->end()->opcode == BRW_OPCODE_WHILE)
+ return block->end_ip;
+
+ int depth = 1;
+ /* Skip the first block, since we don't want to count the do the calling
+ * function found.
+ */
+ for (block = block->next();
+ depth > 0;
+ block = block->next()) {
+ if (block->start()->opcode == BRW_OPCODE_DO)
+ depth++;
+ if (block->end()->opcode == BRW_OPCODE_WHILE) {
+ depth--;
+ if (depth == 0)
+ return block->end_ip;
+ }
+ }
+ unreachable("not reached");
+}
+
+/**
+ * Sets up interference between thread payload registers and the virtual GRFs
+ * to be allocated for program temporaries.
+ *
+ * We want to be able to reallocate the payload for our virtual GRFs, notably
+ * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
+ * our 128 registers.
+ *
+ * The layout of the payload registers is:
+ *
+ * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
+ * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
+ * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
+ *
+ * And we have payload_node_count nodes covering these registers in order
+ * (note that in SIMD16, a node is two registers).
+ */
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+ int payload_node_count,
+ int first_payload_node)
+{
+ int loop_depth = 0;
+ int loop_end_ip = 0;
+
+ int payload_last_use_ip[payload_node_count];
+ memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip));
+ int ip = 0;
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ switch (inst->opcode) {
+ case BRW_OPCODE_DO:
+ loop_depth++;
+
+ /* Since payload regs are deffed only at the start of the shader
+ * execution, any uses of the payload within a loop mean the live
+ * interval extends to the end of the outermost loop. Find the ip of
+ * the end now.
+ */
+ if (loop_depth == 1)
+ loop_end_ip = count_to_loop_end(block);
+ break;
+ case BRW_OPCODE_WHILE:
+ loop_depth--;
+ break;
+ default:
+ break;
+ }
+
+ int use_ip;
+ if (loop_depth > 0)
+ use_ip = loop_end_ip;
+ else
+ use_ip = ip;
+
+ /* Note that UNIFORM args have been turned into FIXED_HW_REG by
+ * assign_curbe_setup(), and interpolation uses fixed hardware regs from
+ * the start (see interp_reg()).
+ */
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ int node_nr = inst->src[i].fixed_hw_reg.nr;
+ if (node_nr >= payload_node_count)
+ continue;
+
+ payload_last_use_ip[node_nr] = use_ip;
+ }
+ }
+
+ /* Special case instructions which have extra implied registers used. */
+ switch (inst->opcode) {
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case FS_OPCODE_FB_WRITE:
+ /* We could omit this for the !inst->header_present case, except that
+ * the simulator apparently incorrectly reads from g0/g1 instead of
+ * sideband. It also really freaks out driver developers to see g0
+ * used in unusual places, so just always reserve it.
+ */
+ payload_last_use_ip[0] = use_ip;
+ payload_last_use_ip[1] = use_ip;
+ break;
+
+ case FS_OPCODE_LINTERP:
+ /* On gen6+ in SIMD16, there are 4 adjacent registers used by
+ * PLN's sourcing of the deltas, while we list only the first one
+ * in the arguments. Pre-gen6, the deltas are computed in normal
+ * VGRFs.
+ */
+ if (brw->gen >= 6) {
+ int delta_x_arg = 0;
+ if (inst->src[delta_x_arg].file == HW_REG &&
+ inst->src[delta_x_arg].fixed_hw_reg.file ==
+ BRW_GENERAL_REGISTER_FILE) {
+ for (int i = 1; i < 4; ++i) {
+ int node = inst->src[delta_x_arg].fixed_hw_reg.nr + i;
+ assert(node < payload_node_count);
+ payload_last_use_ip[node] = use_ip;
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ ip++;