f7feec222bd9a4551c158fe6d74ff38889cf10a9
[mesa.git] / src / panfrost / bifrost / bi_ra.c
1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28 #include "panfrost/util/lcra.h"
29 #include "util/u_memory.h"
30
31 static void
32 bi_compute_interference(bi_context *ctx, struct lcra_state *l)
33 {
34 bi_compute_liveness(ctx);
35
36 bi_foreach_block(ctx, _blk) {
37 bi_block *blk = (bi_block *) _blk;
38 uint16_t *live = mem_dup(_blk->live_out, l->node_count * sizeof(uint16_t));
39
40 bi_foreach_instr_in_block_rev(blk, ins) {
41 /* Mark all registers live after the instruction as
42 * interfering with the destination */
43
44 if (ins->dest && (ins->dest < l->node_count)) {
45 for (unsigned i = 1; i < l->node_count; ++i) {
46 if (live[i])
47 lcra_add_node_interference(l, ins->dest, ins->writemask, i, live[i]);
48 }
49 }
50
51 /* Update live_in */
52 bi_liveness_ins_update(live, ins, l->node_count);
53 }
54
55 free(live);
56 }
57 }
58
59 enum {
60 BI_REG_CLASS_WORK = 0,
61 } bi_reg_class;
62
63 static struct lcra_state *
64 bi_allocate_registers(bi_context *ctx, bool *success)
65 {
66 unsigned node_count = bi_max_temp(ctx);
67
68 struct lcra_state *l =
69 lcra_alloc_equations(node_count, 1, 8, 16, 1);
70
71 l->class_start[BI_REG_CLASS_WORK] = 0;
72 l->class_size[BI_REG_CLASS_WORK] = 64 * 4; /* R0 - R63, all 32-bit */
73
74 bi_foreach_instr_global(ctx, ins) {
75 unsigned dest = ins->dest;
76
77 if (!dest || (dest >= node_count))
78 continue;
79
80 l->class[dest] = BI_REG_CLASS_WORK;
81 lcra_set_alignment(l, dest, 2); /* 2^2 = 4 */
82 lcra_restrict_range(l, dest, 4);
83 }
84
85 bi_compute_interference(ctx, l);
86
87 *success = lcra_solve(l);
88
89 return l;
90 }
91
92 static unsigned
93 bi_reg_from_index(struct lcra_state *l, unsigned index, unsigned offset)
94 {
95 /* Did we run RA for this index at all */
96 if (index >= l->node_count)
97 return index;
98
99 /* LCRA didn't bother solving this index (how lazy!) */
100 signed solution = l->solutions[index];
101 if (solution < 0)
102 return index;
103
104 solution += offset;
105
106 assert((solution & 0x3) == 0);
107 unsigned reg = solution / 4;
108 return BIR_INDEX_REGISTER | reg;
109 }
110
111 static void
112 bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
113 {
114 if (ins->src[src] >= l->node_count)
115 return;
116
117 bool vector = (bi_class_props[ins->type] & BI_VECTOR) && src == 0;
118 unsigned offset = 0;
119
120 if (vector) {
121 /* TODO: Do we do anything here? */
122 } else {
123 /* Use the swizzle as component select */
124 nir_alu_type T = ins->src_types[src];
125 unsigned size = nir_alu_type_get_type_size(T);
126 assert(size <= 32); /* TODO: 64-bit */
127 unsigned comps_per_reg = 32 / size;
128 unsigned components = bi_get_component_count(ins, src);
129
130 for (unsigned i = 0; i < components; ++i) {
131 unsigned off = ins->swizzle[src][i] / comps_per_reg;
132 off *= 4; /* 32-bit registers */
133
134 /* We can't cross register boundaries in a swizzle */
135 if (i == 0)
136 offset = off;
137 else
138 assert(off == offset);
139
140 ins->swizzle[src][i] %= comps_per_reg;
141 }
142 }
143
144 ins->src[src] = bi_reg_from_index(l, ins->src[src], offset);
145 }
146
147 static void
148 bi_adjust_dest_ra(bi_instruction *ins, struct lcra_state *l)
149 {
150 if (ins->dest >= l->node_count)
151 return;
152
153 bool vector = (bi_class_props[ins->type] & BI_VECTOR);
154 unsigned offset = 0;
155
156 if (!vector) {
157 /* Look at the writemask to get an offset, specifically the
158 * trailing zeros */
159
160 unsigned tz = __builtin_ctz(ins->writemask);
161
162 /* Recall writemask is one bit per byte, so tz is in eytes */
163 unsigned regs = tz / 4;
164 offset = regs * 4;
165
166 /* Adjust writemask to compensate */
167 ins->writemask >>= offset;
168 }
169
170 ins->dest = bi_reg_from_index(l, ins->dest, offset);
171
172 }
173
174 static void
175 bi_install_registers(bi_context *ctx, struct lcra_state *l)
176 {
177 bi_foreach_instr_global(ctx, ins) {
178 bi_adjust_dest_ra(ins, l);
179
180 bi_foreach_src(ins, s)
181 bi_adjust_src_ra(ins, l, s);
182 }
183 }
184
185 void
186 bi_register_allocate(bi_context *ctx)
187 {
188 struct lcra_state *l = NULL;
189 bool success = false;
190
191 do {
192 if (l) {
193 lcra_free(l);
194 l = NULL;
195 }
196
197 bi_invalidate_liveness(ctx);
198 l = bi_allocate_registers(ctx, &success);
199
200 /* TODO: Spilling */
201 assert(success);
202 } while(!success);
203
204 bi_install_registers(ctx, l);
205
206 lcra_free(l);
207 }