ir3: Plumb through bindless support
[mesa.git] / src / freedreno / ir3 / ir3_ra_regset.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_math.h"
28 #include "util/register_allocate.h"
29 #include "util/ralloc.h"
30 #include "util/bitset.h"
31
32 #include "ir3.h"
33 #include "ir3_compiler.h"
34 #include "ir3_ra.h"
35
36 static void
37 build_q_values(unsigned int **q_values, unsigned off,
38 const unsigned *sizes, unsigned count)
39 {
40 for (unsigned i = 0; i < count; i++) {
41 q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
42
43 /* From register_allocate.c:
44 *
45 * q(B,C) (indexed by C, B is this register class) in
46 * Runeson/Nyström paper. This is "how many registers of B could
47 * the worst choice register from C conflict with".
48 *
49 * If we just let the register allocation algorithm compute these
50 * values, is extremely expensive. However, since all of our
51 * registers are laid out, we can very easily compute them
52 * ourselves. View the register from C as fixed starting at GRF n
53 * somewhere in the middle, and the register from B as sliding back
54 * and forth. Then the first register to conflict from B is the
55 * one starting at n - class_size[B] + 1 and the last register to
56 * conflict will start at n + class_size[B] - 1. Therefore, the
57 * number of conflicts from B is class_size[B] + class_size[C] - 1.
58 *
59 * +-+-+-+-+-+-+ +-+-+-+-+-+-+
60 * B | | | | | |n| --> | | | | | | |
61 * +-+-+-+-+-+-+ +-+-+-+-+-+-+
62 * +-+-+-+-+-+
63 * C |n| | | | |
64 * +-+-+-+-+-+
65 *
66 * (Idea copied from brw_fs_reg_allocate.cpp)
67 */
68 for (unsigned j = 0; j < count; j++)
69 q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
70 }
71 }
72
73 static void
74 setup_conflicts(struct ir3_ra_reg_set *set)
75 {
76 unsigned reg;
77
78 reg = 0;
79 for (unsigned i = 0; i < class_count; i++) {
80 for (unsigned j = 0; j < CLASS_REGS(i); j++) {
81 for (unsigned br = j; br < j + class_sizes[i]; br++) {
82 ra_add_transitive_reg_conflict(set->regs, br, reg);
83 }
84
85 reg++;
86 }
87 }
88
89 for (unsigned i = 0; i < half_class_count; i++) {
90 for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
91 for (unsigned br = j; br < j + half_class_sizes[i]; br++) {
92 ra_add_transitive_reg_conflict(set->regs,
93 br + set->first_half_reg, reg);
94 }
95
96 reg++;
97 }
98 }
99
100 for (unsigned i = 0; i < high_class_count; i++) {
101 for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
102 for (unsigned br = j; br < j + high_class_sizes[i]; br++) {
103 ra_add_transitive_reg_conflict(set->regs,
104 br + set->first_high_reg, reg);
105 }
106
107 reg++;
108 }
109 }
110 }
111
112 /* One-time setup of RA register-set, which describes all the possible
113 * "virtual" registers and their interferences. Ie. double register
114 * occupies (and conflicts with) two single registers, and so forth.
115 * Since registers do not need to be aligned to their class size, they
116 * can conflict with other registers in the same class too. Ie:
117 *
118 * Single (base) | Double
119 * --------------+---------------
120 * R0 | D0
121 * R1 | D0 D1
122 * R2 | D1 D2
123 * R3 | D2
124 * .. and so on..
125 *
126 * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
127 * really just four scalar registers. Don't let that confuse you.)
128 */
129 struct ir3_ra_reg_set *
130 ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
131 {
132 struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
133 unsigned ra_reg_count, reg, base;
134
135 /* calculate # of regs across all classes: */
136 ra_reg_count = 0;
137 for (unsigned i = 0; i < class_count; i++)
138 ra_reg_count += CLASS_REGS(i);
139 for (unsigned i = 0; i < half_class_count; i++)
140 ra_reg_count += HALF_CLASS_REGS(i);
141 for (unsigned i = 0; i < high_class_count; i++)
142 ra_reg_count += HIGH_CLASS_REGS(i);
143
144 /* allocate the reg-set.. */
145 set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
146 set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
147 set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
148
149 /* .. and classes */
150 reg = 0;
151 for (unsigned i = 0; i < class_count; i++) {
152 set->classes[i] = ra_alloc_reg_class(set->regs);
153
154 set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
155
156 for (unsigned j = 0; j < CLASS_REGS(i); j++) {
157 ra_class_add_reg(set->regs, set->classes[i], reg);
158
159 set->ra_reg_to_gpr[reg] = j;
160 set->gpr_to_ra_reg[i][j] = reg;
161
162 reg++;
163 }
164 }
165
166 set->first_half_reg = reg;
167 base = HALF_OFFSET;
168
169 for (unsigned i = 0; i < half_class_count; i++) {
170 set->half_classes[i] = ra_alloc_reg_class(set->regs);
171
172 set->gpr_to_ra_reg[base + i] =
173 ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
174
175 for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
176 ra_class_add_reg(set->regs, set->half_classes[i], reg);
177
178 set->ra_reg_to_gpr[reg] = j;
179 set->gpr_to_ra_reg[base + i][j] = reg;
180
181 reg++;
182 }
183 }
184
185 set->first_high_reg = reg;
186 base = HIGH_OFFSET;
187
188 for (unsigned i = 0; i < high_class_count; i++) {
189 set->high_classes[i] = ra_alloc_reg_class(set->regs);
190
191 set->gpr_to_ra_reg[base + i] =
192 ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
193
194 for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
195 ra_class_add_reg(set->regs, set->high_classes[i], reg);
196
197 set->ra_reg_to_gpr[reg] = j;
198 set->gpr_to_ra_reg[base + i][j] = reg;
199
200 reg++;
201 }
202 }
203
204 /* starting a6xx, half precision regs conflict w/ full precision regs: */
205 if (compiler->gpu_id >= 600) {
206 for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
207 unsigned freg = set->gpr_to_ra_reg[0][i];
208 unsigned hreg0 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 0];
209 unsigned hreg1 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 1];
210
211 ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
212 }
213
214 setup_conflicts(set);
215
216 // TODO also need to update q_values, but for now:
217 ra_set_finalize(set->regs, NULL);
218 } else {
219 setup_conflicts(set);
220
221 /* allocate and populate q_values: */
222 unsigned int **q_values = ralloc_array(set, unsigned *, total_class_count);
223
224 build_q_values(q_values, 0, class_sizes, class_count);
225 build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
226 build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
227
228 ra_set_finalize(set->regs, q_values);
229
230 ralloc_free(q_values);
231 }
232
233 return set;
234 }
235
236 int
237 ra_size_to_class(unsigned sz, bool half, bool high)
238 {
239 if (high) {
240 for (unsigned i = 0; i < high_class_count; i++)
241 if (high_class_sizes[i] >= sz)
242 return i + HIGH_OFFSET;
243 } else if (half) {
244 for (unsigned i = 0; i < half_class_count; i++)
245 if (half_class_sizes[i] >= sz)
246 return i + HALF_OFFSET;
247 } else {
248 for (unsigned i = 0; i < class_count; i++)
249 if (class_sizes[i] >= sz)
250 return i;
251 }
252 debug_assert(0);
253 return -1;
254 }
255
256 int
257 ra_class_to_size(unsigned class, bool *half, bool *high)
258 {
259 *half = *high = false;
260
261 if (class >= HIGH_OFFSET) {
262 *high = true;
263 return high_class_sizes[class - HIGH_OFFSET];
264 } else if (class >= HALF_OFFSET) {
265 *half = true;
266 return half_class_sizes[class - HALF_OFFSET];
267 } else {
268 return class_sizes[class];
269 }
270 }