- int virtual_count = work_count * WORK_STRIDE;
-
- /* First, initialize the RA */
- struct ra_regs *regs = ra_alloc_reg_set(NULL, virtual_count, true);
-
- int work_vec4 = ra_alloc_reg_class(regs);
- int work_vec3 = ra_alloc_reg_class(regs);
- int work_vec2 = ra_alloc_reg_class(regs);
- int work_vec1 = ra_alloc_reg_class(regs);
-
- classes[0] = work_vec1;
- classes[1] = work_vec2;
- classes[2] = work_vec3;
- classes[3] = work_vec4;
-
- /* Add the full set of work registers */
- for (unsigned i = 0; i < work_count; ++i) {
- int base = WORK_STRIDE * i;
-
- /* Build a full set of subdivisions */
- ra_class_add_reg(regs, work_vec4, base);
- ra_class_add_reg(regs, work_vec3, base + 1);
- ra_class_add_reg(regs, work_vec3, base + 2);
- ra_class_add_reg(regs, work_vec2, base + 3);
- ra_class_add_reg(regs, work_vec2, base + 4);
- ra_class_add_reg(regs, work_vec2, base + 5);
- ra_class_add_reg(regs, work_vec1, base + 6);
- ra_class_add_reg(regs, work_vec1, base + 7);
- ra_class_add_reg(regs, work_vec1, base + 8);
- ra_class_add_reg(regs, work_vec1, base + 9);
-
- for (unsigned a = 0; a < 10; ++a) {
- unsigned mask1 = reg_type_to_mask[a];
-
- for (unsigned b = 0; b < 10; ++b) {
- unsigned mask2 = reg_type_to_mask[b];
-
- if (mask1 & mask2)
- ra_add_reg_conflict(regs,
- base + a, base + b);
+ if (node < SSA_FIXED_MINIMUM)
+ BITSET_SET(bitfield, node);
+}
+
+void
+mir_lower_special_reads(compiler_context *ctx)
+{
+ size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD);
+
+ /* Bitfields for the various types of registers we could have. aluw can
+ * be written by either ALU or load/store */
+
+ unsigned *alur = calloc(sz, 1);
+ unsigned *aluw = calloc(sz, 1);
+ unsigned *brar = calloc(sz, 1);
+ unsigned *ldst = calloc(sz, 1);
+ unsigned *texr = calloc(sz, 1);
+ unsigned *texw = calloc(sz, 1);
+
+ /* Pass #1 is analysis, a linear scan to fill out the bitfields */
+
+ mir_foreach_instr_global(ctx, ins) {
+ switch (ins->type) {
+ case TAG_ALU_4:
+ mark_node_class(aluw, ins->dest);
+ mark_node_class(alur, ins->src[0]);
+ mark_node_class(alur, ins->src[1]);
+ mark_node_class(alur, ins->src[2]);
+
+ if (ins->compact_branch && ins->writeout)
+ mark_node_class(brar, ins->src[0]);
+
+ break;
+
+ case TAG_LOAD_STORE_4:
+ mark_node_class(aluw, ins->dest);
+ mark_node_class(ldst, ins->src[0]);
+ mark_node_class(ldst, ins->src[1]);
+ mark_node_class(ldst, ins->src[2]);
+ break;
+
+ case TAG_TEXTURE_4:
+ mark_node_class(texr, ins->src[0]);
+ mark_node_class(texr, ins->src[1]);
+ mark_node_class(texr, ins->src[2]);
+ mark_node_class(texw, ins->dest);
+ break;
+ }
+ }
+
+ /* Pass #2 is lowering now that we've analyzed all the classes.
+ * Conceptually, if an index is only marked for a single type of use,
+ * there is nothing to lower. If it is marked for different uses, we
+ * split up based on the number of types of uses. To do so, we divide
+ * into N distinct classes of use (where N>1 by definition), emit N-1
+ * moves from the index to copies of the index, and finally rewrite N-1
+ * of the types of uses to use the corresponding move */
+
+ unsigned spill_idx = ctx->temp_count;
+
+ for (unsigned i = 0; i < ctx->temp_count; ++i) {
+ bool is_alur = BITSET_TEST(alur, i);
+ bool is_aluw = BITSET_TEST(aluw, i);
+ bool is_brar = BITSET_TEST(brar, i);
+ bool is_ldst = BITSET_TEST(ldst, i);
+ bool is_texr = BITSET_TEST(texr, i);
+ bool is_texw = BITSET_TEST(texw, i);
+
+ /* Analyse to check how many distinct uses there are. ALU ops
+ * (alur) can read the results of the texture pipeline (texw)
+ * but not ldst or texr. Load/store ops (ldst) cannot read
+ * anything but load/store inputs. Texture pipeline cannot read
+ * anything but texture inputs. TODO: Simplify. */
+
+ bool collision =
+ (is_alur && (is_ldst || is_texr)) ||
+ (is_ldst && (is_alur || is_texr || is_texw)) ||
+ (is_texr && (is_alur || is_ldst || is_texw)) ||
+ (is_texw && (is_aluw || is_ldst || is_texr)) ||
+ (is_brar && is_texw);
+
+ if (!collision)
+ continue;
+
+ /* Use the index as-is as the work copy. Emit copies for
+ * special uses */
+
+ unsigned classes[] = { TAG_LOAD_STORE_4, TAG_TEXTURE_4, TAG_TEXTURE_4, TAG_ALU_4};
+ bool collisions[] = { is_ldst, is_texr, is_texw && is_aluw, is_brar };
+
+ for (unsigned j = 0; j < ARRAY_SIZE(collisions); ++j) {
+ if (!collisions[j]) continue;
+
+ /* When the hazard is from reading, we move and rewrite
+ * sources (typical case). When it's from writing, we
+ * flip the move and rewrite destinations (obscure,
+ * only from control flow -- impossible in SSA) */
+
+ bool hazard_write = (j == 2);
+
+ unsigned idx = spill_idx++;
+
+ midgard_instruction m = hazard_write ?
+ v_mov(idx, i) : v_mov(i, idx);
+
+ /* Insert move before each read/write, depending on the
+ * hazard we're trying to account for */
+
+ mir_foreach_instr_global_safe(ctx, pre_use) {
+ if (pre_use->type != classes[j])
+ continue;
+
+ if (hazard_write) {
+ if (pre_use->dest != i)
+ continue;
+ } else {
+ if (!mir_has_arg(pre_use, i))
+ continue;
+ }
+
+ if (hazard_write) {
+ midgard_instruction *use = mir_next_op(pre_use);
+ assert(use);
+ mir_insert_instruction_before(ctx, use, m);
+ mir_rewrite_index_dst_single(pre_use, i, idx);
+ } else {
+ idx = spill_idx++;
+ m = v_mov(i, idx);
+ m.mask = mir_from_bytemask(mir_bytemask_of_read_components(pre_use, i), midgard_reg_mode_32);
+ mir_insert_instruction_before(ctx, pre_use, m);
+ mir_rewrite_index_src_single(pre_use, i, idx);
+ }