freedreno/ir3: remove RA "q-values" optimization
[mesa.git] / src / freedreno / ir3 / ir3_ra_regset.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_math.h"
28 #include "util/register_allocate.h"
29 #include "util/ralloc.h"
30 #include "util/bitset.h"
31
32 #include "ir3.h"
33 #include "ir3_compiler.h"
34 #include "ir3_ra.h"
35
36 static void
37 setup_conflicts(struct ir3_ra_reg_set *set)
38 {
39 unsigned reg;
40
41 reg = 0;
42 for (unsigned i = 0; i < class_count; i++) {
43 for (unsigned j = 0; j < CLASS_REGS(i); j++) {
44 for (unsigned br = j; br < j + class_sizes[i]; br++) {
45 ra_add_transitive_reg_conflict(set->regs, br, reg);
46 }
47
48 reg++;
49 }
50 }
51
52 for (unsigned i = 0; i < half_class_count; i++) {
53 for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
54 for (unsigned br = j; br < j + half_class_sizes[i]; br++) {
55 ra_add_transitive_reg_conflict(set->regs,
56 br + set->first_half_reg, reg);
57 }
58
59 reg++;
60 }
61 }
62
63 for (unsigned i = 0; i < high_class_count; i++) {
64 for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
65 for (unsigned br = j; br < j + high_class_sizes[i]; br++) {
66 ra_add_transitive_reg_conflict(set->regs,
67 br + set->first_high_reg, reg);
68 }
69
70 reg++;
71 }
72 }
73 }
74
75 /* One-time setup of RA register-set, which describes all the possible
76 * "virtual" registers and their interferences. Ie. double register
77 * occupies (and conflicts with) two single registers, and so forth.
78 * Since registers do not need to be aligned to their class size, they
79 * can conflict with other registers in the same class too. Ie:
80 *
81 * Single (base) | Double
82 * --------------+---------------
83 * R0 | D0
84 * R1 | D0 D1
85 * R2 | D1 D2
86 * R3 | D2
87 * .. and so on..
88 *
89 * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
90 * really just four scalar registers. Don't let that confuse you.)
91 */
92 struct ir3_ra_reg_set *
93 ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
94 {
95 struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
96 unsigned ra_reg_count, reg, base;
97
98 /* calculate # of regs across all classes: */
99 ra_reg_count = 0;
100 for (unsigned i = 0; i < class_count; i++)
101 ra_reg_count += CLASS_REGS(i);
102 for (unsigned i = 0; i < half_class_count; i++)
103 ra_reg_count += HALF_CLASS_REGS(i);
104 for (unsigned i = 0; i < high_class_count; i++)
105 ra_reg_count += HIGH_CLASS_REGS(i);
106
107 /* allocate the reg-set.. */
108 set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
109 set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
110 set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
111
112 /* .. and classes */
113 reg = 0;
114 for (unsigned i = 0; i < class_count; i++) {
115 set->classes[i] = ra_alloc_reg_class(set->regs);
116
117 set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
118
119 for (unsigned j = 0; j < CLASS_REGS(i); j++) {
120 ra_class_add_reg(set->regs, set->classes[i], reg);
121
122 set->ra_reg_to_gpr[reg] = j;
123 set->gpr_to_ra_reg[i][j] = reg;
124
125 reg++;
126 }
127 }
128
129 set->first_half_reg = reg;
130 base = HALF_OFFSET;
131
132 for (unsigned i = 0; i < half_class_count; i++) {
133 set->half_classes[i] = ra_alloc_reg_class(set->regs);
134
135 set->gpr_to_ra_reg[base + i] =
136 ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
137
138 for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
139 ra_class_add_reg(set->regs, set->half_classes[i], reg);
140
141 set->ra_reg_to_gpr[reg] = j;
142 set->gpr_to_ra_reg[base + i][j] = reg;
143
144 reg++;
145 }
146 }
147
148 set->first_high_reg = reg;
149 base = HIGH_OFFSET;
150
151 for (unsigned i = 0; i < high_class_count; i++) {
152 set->high_classes[i] = ra_alloc_reg_class(set->regs);
153
154 set->gpr_to_ra_reg[base + i] =
155 ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
156
157 for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
158 ra_class_add_reg(set->regs, set->high_classes[i], reg);
159
160 set->ra_reg_to_gpr[reg] = j;
161 set->gpr_to_ra_reg[base + i][j] = reg;
162
163 reg++;
164 }
165 }
166
167 /* starting a6xx, half precision regs conflict w/ full precision regs: */
168 if (compiler->gpu_id >= 600) {
169 for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
170 unsigned freg = set->gpr_to_ra_reg[0][i];
171 unsigned hreg0 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 0];
172 unsigned hreg1 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 1];
173
174 ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
175 }
176 }
177
178 setup_conflicts(set);
179
180 ra_set_finalize(set->regs, NULL);
181
182 return set;
183 }
184
185 int
186 ra_size_to_class(unsigned sz, bool half, bool high)
187 {
188 if (high) {
189 for (unsigned i = 0; i < high_class_count; i++)
190 if (high_class_sizes[i] >= sz)
191 return i + HIGH_OFFSET;
192 } else if (half) {
193 for (unsigned i = 0; i < half_class_count; i++)
194 if (half_class_sizes[i] >= sz)
195 return i + HALF_OFFSET;
196 } else {
197 for (unsigned i = 0; i < class_count; i++)
198 if (class_sizes[i] >= sz)
199 return i;
200 }
201 debug_assert(0);
202 return -1;
203 }
204
205 int
206 ra_class_to_size(unsigned class, bool *half, bool *high)
207 {
208 *half = *high = false;
209
210 if (class >= HIGH_OFFSET) {
211 *high = true;
212 return high_class_sizes[class - HIGH_OFFSET];
213 } else if (class >= HALF_OFFSET) {
214 *half = true;
215 return half_class_sizes[class - HALF_OFFSET];
216 } else {
217 return class_sizes[class];
218 }
219 }