+ install_registers_instr(ctx, l, ins);
+}
+
+
+/* If register allocation fails, find the best spill node */
+
+static signed
+mir_choose_spill_node(
+ compiler_context *ctx,
+ struct lcra_state *l)
+{
+ /* We can't spill a previously spilled value or an unspill */
+
+ mir_foreach_instr_global(ctx, ins) {
+ if (ins->no_spill & (1 << l->spill_class)) {
+ lcra_set_node_spill_cost(l, ins->dest, -1);
+
+ if (l->spill_class != REG_CLASS_WORK) {
+ mir_foreach_src(ins, s)
+ lcra_set_node_spill_cost(l, ins->src[s], -1);
+ }
+ }
+ }
+
+ return lcra_get_best_spill_node(l);
+}
+
+/* Once we've chosen a spill node, spill it */
+
+static void
+mir_spill_register(
+ compiler_context *ctx,
+ unsigned spill_node,
+ unsigned spill_class,
+ unsigned *spill_count)
+{
+ if (spill_class == REG_CLASS_WORK && ctx->is_blend)
+ unreachable("Blend shader spilling is currently unimplemented");
+
+ unsigned spill_index = ctx->temp_count;
+
+ /* We have a spill node, so check the class. Work registers
+ * legitimately spill to TLS, but special registers just spill to work
+ * registers */
+
+ bool is_special = spill_class != REG_CLASS_WORK;
+ bool is_special_w = spill_class == REG_CLASS_TEXW;
+
+ /* Allocate TLS slot (maybe) */
+ unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
+
+ /* For TLS, replace all stores to the spilled node. For
+ * special reads, just keep as-is; the class will be demoted
+ * implicitly. For special writes, spill to a work register */
+
+ if (!is_special || is_special_w) {
+ if (is_special_w)
+ spill_slot = spill_index++;
+
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
+ mir_foreach_instr_in_block_safe(block, ins) {
+ if (ins->dest != spill_node) continue;
+
+ midgard_instruction st;
+
+ if (is_special_w) {
+ st = v_mov(spill_node, spill_slot);
+ st.no_spill |= (1 << spill_class);
+ } else {
+ ins->dest = spill_index++;
+ ins->no_spill |= (1 << spill_class);
+ st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask);
+ }
+
+ /* Hint: don't rewrite this node */
+ st.hint = true;
+
+ mir_insert_instruction_after_scheduled(ctx, block, ins, st);
+
+ if (!is_special)
+ ctx->spills++;
+ }
+ }
+ }
+
+ /* For special reads, figure out how many bytes we need */
+ unsigned read_bytemask = 0;
+
+ mir_foreach_instr_global_safe(ctx, ins) {
+ read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
+ }
+
+ /* Insert a load from TLS before the first consecutive
+ * use of the node, rewriting to use spilled indices to
+ * break up the live range. Or, for special, insert a
+ * move. Ironically the latter *increases* register
+ * pressure, but the two uses of the spilling mechanism
+ * are somewhat orthogonal. (special spilling is to use
+ * work registers to back special registers; TLS
+ * spilling is to use memory to back work registers) */
+
+ mir_foreach_block(ctx, _block) {
+ midgard_block *block = (midgard_block *) _block;
+ mir_foreach_instr_in_block(block, ins) {
+ /* We can't rewrite the moves used to spill in the
+ * first place. These moves are hinted. */
+ if (ins->hint) continue;
+
+ /* If we don't use the spilled value, nothing to do */
+ if (!mir_has_arg(ins, spill_node)) continue;
+
+ unsigned index = 0;
+
+ if (!is_special_w) {
+ index = ++spill_index;
+
+ midgard_instruction *before = ins;
+ midgard_instruction st;
+
+ if (is_special) {
+ /* Move */
+ st = v_mov(spill_node, index);
+ st.no_spill |= (1 << spill_class);
+ } else {
+ /* TLS load */
+ st = v_load_store_scratch(index, spill_slot, false, 0xF);
+ }
+
+ /* Mask the load based on the component count
+ * actually needed to prevent RA loops */
+
+ st.mask = mir_from_bytemask(mir_round_bytemask_up(
+ read_bytemask, 32), 32);
+
+ mir_insert_instruction_before_scheduled(ctx, block, before, st);
+ } else {
+ /* Special writes already have their move spilled in */
+ index = spill_slot;
+ }
+
+
+ /* Rewrite to use */
+ mir_rewrite_index_src_single(ins, spill_node, index);
+
+ if (!is_special)
+ ctx->fills++;
+ }
+ }
+
+ /* Reset hints */
+
+ mir_foreach_instr_global(ctx, ins) {
+ ins->hint = false;
+ }
+}
+
+/* Run register allocation in a loop, spilling until we succeed */
+
+void
+mir_ra(compiler_context *ctx)
+{
+ struct lcra_state *l = NULL;
+ bool spilled = false;
+ int iter_count = 1000; /* max iterations */
+
+ /* Number of 128-bit slots in memory we've spilled into */
+ unsigned spill_count = 0;
+
+
+ mir_create_pipeline_registers(ctx);
+
+ do {
+ if (spilled) {
+ signed spill_node = mir_choose_spill_node(ctx, l);
+
+ if (spill_node == -1) {
+ fprintf(stderr, "ERROR: Failed to choose spill node\n");
+ return;
+ }
+
+ mir_spill_register(ctx, spill_node, l->spill_class, &spill_count);
+ }
+
+ mir_squeeze_index(ctx);
+ mir_invalidate_liveness(ctx);
+
+ if (l) {
+ lcra_free(l);
+ l = NULL;
+ }
+
+ l = allocate_registers(ctx, &spilled);
+ } while(spilled && ((iter_count--) > 0));
+
+ if (iter_count <= 0) {
+ fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n");
+ assert(0);
+ }
+
+ /* Report spilling information. spill_count is in 128-bit slots (vec4 x
+ * fp32), but tls_size is in bytes, so multiply by 16 */
+
+ ctx->tls_size = spill_count * 16;
+
+ install_registers(ctx, l);
+
+ lcra_free(l);