ir3: use empirical size for params as used by the shader
[mesa.git] / src / freedreno / ir3 / ir3_ra_regset.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_math.h"
28 #include "util/register_allocate.h"
29 #include "util/ralloc.h"
30 #include "util/bitset.h"
31
32 #include "ir3.h"
33 #include "ir3_compiler.h"
34 #include "ir3_ra.h"
35
36 static void
37 setup_conflicts(struct ir3_ra_reg_set *set)
38 {
39 unsigned reg;
40
41 reg = 0;
42 for (unsigned i = 0; i < class_count; i++) {
43 for (unsigned j = 0; j < CLASS_REGS(i); j++) {
44 for (unsigned br = j; br < j + class_sizes[i]; br++) {
45 ra_add_transitive_reg_conflict(set->regs, br, reg);
46 }
47
48 reg++;
49 }
50 }
51
52 for (unsigned i = 0; i < half_class_count; i++) {
53 for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
54 for (unsigned br = j; br < j + half_class_sizes[i]; br++) {
55 ra_add_transitive_reg_conflict(set->regs,
56 br + set->first_half_reg, reg);
57 }
58
59 reg++;
60 }
61 }
62
63 for (unsigned i = 0; i < high_class_count; i++) {
64 for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
65 for (unsigned br = j; br < j + high_class_sizes[i]; br++) {
66 ra_add_transitive_reg_conflict(set->regs,
67 br + set->first_high_reg, reg);
68 }
69
70 reg++;
71 }
72 }
73
74 /*
75 * Setup conflicts with registers over 0x3f for the special vreg
76 * that exists to use as interference for tex-prefetch:
77 */
78
79 for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
80 ra_add_transitive_reg_conflict(set->regs, i,
81 set->prefetch_exclude_reg);
82 }
83
84 for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
85 ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
86 set->prefetch_exclude_reg);
87 }
88 }
89
90 /* One-time setup of RA register-set, which describes all the possible
91 * "virtual" registers and their interferences. Ie. double register
92 * occupies (and conflicts with) two single registers, and so forth.
93 * Since registers do not need to be aligned to their class size, they
94 * can conflict with other registers in the same class too. Ie:
95 *
96 * Single (base) | Double
97 * --------------+---------------
98 * R0 | D0
99 * R1 | D0 D1
100 * R2 | D1 D2
101 * R3 | D2
102 * .. and so on..
103 *
104 * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
105 * really just four scalar registers. Don't let that confuse you.)
106 */
107 struct ir3_ra_reg_set *
108 ir3_ra_alloc_reg_set(struct ir3_compiler *compiler, bool mergedregs)
109 {
110 struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
111 unsigned ra_reg_count, reg, base;
112
113 /* calculate # of regs across all classes: */
114 ra_reg_count = 0;
115 for (unsigned i = 0; i < class_count; i++)
116 ra_reg_count += CLASS_REGS(i);
117 for (unsigned i = 0; i < half_class_count; i++)
118 ra_reg_count += HALF_CLASS_REGS(i);
119 for (unsigned i = 0; i < high_class_count; i++)
120 ra_reg_count += HIGH_CLASS_REGS(i);
121
122 ra_reg_count += 1; /* for tex-prefetch excludes */
123
124 /* allocate the reg-set.. */
125 set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
126 set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
127 set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
128
129 /* .. and classes */
130 reg = 0;
131 for (unsigned i = 0; i < class_count; i++) {
132 set->classes[i] = ra_alloc_reg_class(set->regs);
133
134 set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
135
136 for (unsigned j = 0; j < CLASS_REGS(i); j++) {
137 ra_class_add_reg(set->regs, set->classes[i], reg);
138
139 set->ra_reg_to_gpr[reg] = j;
140 set->gpr_to_ra_reg[i][j] = reg;
141
142 reg++;
143 }
144 }
145
146 set->first_half_reg = reg;
147 base = HALF_OFFSET;
148
149 for (unsigned i = 0; i < half_class_count; i++) {
150 set->half_classes[i] = ra_alloc_reg_class(set->regs);
151
152 set->gpr_to_ra_reg[base + i] =
153 ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
154
155 for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
156 ra_class_add_reg(set->regs, set->half_classes[i], reg);
157
158 set->ra_reg_to_gpr[reg] = j;
159 set->gpr_to_ra_reg[base + i][j] = reg;
160
161 reg++;
162 }
163 }
164
165 set->first_high_reg = reg;
166 base = HIGH_OFFSET;
167
168 for (unsigned i = 0; i < high_class_count; i++) {
169 set->high_classes[i] = ra_alloc_reg_class(set->regs);
170
171 set->gpr_to_ra_reg[base + i] =
172 ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
173
174 for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
175 ra_class_add_reg(set->regs, set->high_classes[i], reg);
176
177 set->ra_reg_to_gpr[reg] = j;
178 set->gpr_to_ra_reg[base + i][j] = reg;
179
180 reg++;
181 }
182 }
183
184 /*
185 * Setup an additional class, with one vreg, to simply conflict
186 * with registers that are too high to encode tex-prefetch. This
187 * vreg is only used to setup additional conflicts so that RA
188 * knows to allocate prefetch dst regs below the limit:
189 */
190 set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
191 ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
192 set->prefetch_exclude_reg = reg++;
193
194 /*
195 * And finally setup conflicts. Starting a6xx, half precision regs
196 * conflict w/ full precision regs (when using MERGEDREGS):
197 */
198 if (mergedregs) {
199 for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
200 unsigned freg = set->gpr_to_ra_reg[0][i];
201 unsigned hreg0 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 0];
202 unsigned hreg1 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 1];
203
204 ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
205 }
206 }
207
208 setup_conflicts(set);
209
210 ra_set_finalize(set->regs, NULL);
211
212 return set;
213 }
214
215 int
216 ra_size_to_class(unsigned sz, bool half, bool high)
217 {
218 if (high) {
219 for (unsigned i = 0; i < high_class_count; i++)
220 if (high_class_sizes[i] >= sz)
221 return i + HIGH_OFFSET;
222 } else if (half) {
223 for (unsigned i = 0; i < half_class_count; i++)
224 if (half_class_sizes[i] >= sz)
225 return i + HALF_OFFSET;
226 } else {
227 for (unsigned i = 0; i < class_count; i++)
228 if (class_sizes[i] >= sz)
229 return i;
230 }
231 debug_assert(0);
232 return -1;
233 }
234
235 int
236 ra_class_to_size(unsigned class, bool *half, bool *high)
237 {
238 *half = *high = false;
239
240 if (class >= HIGH_OFFSET) {
241 *high = true;
242 return high_class_sizes[class - HIGH_OFFSET];
243 } else if (class >= HALF_OFFSET) {
244 *half = true;
245 return half_class_sizes[class - HALF_OFFSET];
246 } else {
247 return class_sizes[class];
248 }
249 }