freedreno/ir3: debug cleanup
[mesa.git] / src / gallium / drivers / freedreno / a2xx / ir2_ra.c
1 /*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "ir2_private.h"
28
29 /* if an instruction has side effects, we should never kill it */
30 static bool has_side_effects(struct ir2_instr *instr)
31 {
32 if (instr->type == IR2_CF)
33 return true;
34 else if (instr->type == IR2_FETCH)
35 return false;
36
37 switch (instr->alu.scalar_opc) {
38 case PRED_SETEs ... KILLONEs:
39 return true;
40 default:
41 break;
42 }
43
44 switch (instr->alu.vector_opc) {
45 case PRED_SETE_PUSHv ... KILLNEv:
46 return true;
47 default:
48 break;
49 }
50
51 return instr->alu.export >= 0;
52 }
53
54 /* mark an instruction as required, and all its sources recursively */
55 static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
56 {
57 struct ir2_reg *reg;
58
59 /* don't repeat work already done */
60 if (instr->need_emit)
61 return;
62
63 instr->need_emit = true;
64
65 ir2_foreach_src(src, instr) {
66 switch (src->type) {
67 case IR2_SRC_SSA:
68 set_need_emit(ctx, &ctx->instr[src->num]);
69 break;
70 case IR2_SRC_REG:
71 /* slow .. */
72 reg = get_reg_src(ctx, src);
73 ir2_foreach_instr(instr, ctx) {
74 if (!instr->is_ssa && instr->reg == reg)
75 set_need_emit(ctx, instr);
76 }
77 default:
78 break;
79 }
80 }
81 }
82
83 /* get current bit mask of allocated components for a register */
84 static unsigned reg_mask(struct ir2_context *ctx, unsigned idx)
85 {
86 return ctx->reg_state[idx/8] >> idx%8*4 & 0xf;
87 }
88
89 static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
90 {
91 idx = idx * 4 + c;
92 ctx->reg_state[idx/32] |= 1 << idx%32;
93 }
94
95 static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
96 {
97 idx = idx * 4 + c;
98 ctx->reg_state[idx/32] &= ~(1 << idx%32);
99 }
100
101 void ra_count_refs(struct ir2_context *ctx)
102 {
103 struct ir2_reg *reg;
104
105 /* mark instructions as needed
106 * need to do this because "substitutions" pass makes many movs not needed
107 */
108 ir2_foreach_instr(instr, ctx) {
109 if (has_side_effects(instr))
110 set_need_emit(ctx, instr);
111 }
112
113 /* compute ref_counts */
114 ir2_foreach_instr(instr, ctx) {
115 /* kill non-needed so they can be skipped */
116 if (!instr->need_emit) {
117 instr->type = IR2_NONE;
118 continue;
119 }
120
121 ir2_foreach_src(src, instr) {
122 if (src->type == IR2_SRC_CONST)
123 continue;
124
125 reg = get_reg_src(ctx, src);
126 for (int i = 0; i < src_ncomp(instr); i++)
127 reg->comp[swiz_get(src->swizzle, i)].ref_count++;
128 }
129 }
130 }
131
132 void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
133 bool export, uint8_t export_writemask)
134 {
135 /* for export, don't allocate anything but set component layout */
136 if (export) {
137 for (int i = 0; i < 4; i++)
138 reg->comp[i].c = i;
139 return;
140 }
141
142 unsigned idx = force_idx;
143
144 /* TODO: allocate into the same register if theres room
145 * note: the blob doesn't do it, so verify that it is indeed better
146 * also, doing it would conflict with scalar mov insertion
147 */
148
149 /* check if already allocated */
150 for (int i = 0; i < reg->ncomp; i++) {
151 if (reg->comp[i].alloc)
152 return;
153 }
154
155 if (force_idx < 0) {
156 for (idx = 0; idx < 64; idx++) {
157 if (reg_mask(ctx, idx) == 0)
158 break;
159 }
160 }
161 assert(idx != 64); /* TODO ran out of register space.. */
162
163 /* update max_reg value */
164 ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx);
165
166 unsigned mask = reg_mask(ctx, idx);
167
168 for (int i = 0; i < reg->ncomp; i++) {
169 /* don't allocate never used values */
170 if (reg->comp[i].ref_count == 0) {
171 reg->comp[i].c = 7;
172 continue;
173 }
174
175 /* TODO */
176 unsigned c = 1 ? i : (ffs(~mask) - 1);
177 mask |= 1 << c;
178 reg->comp[i].c = c;
179 reg_setmask(ctx, idx, c);
180 reg->comp[i].alloc = true;
181 }
182
183 reg->idx = idx;
184 ctx->live_regs[reg->idx] = reg;
185 }
186
187 /* reduce srcs ref_count and free if needed */
188 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
189 {
190 struct ir2_reg *reg;
191 struct ir2_reg_component *comp;
192
193 ir2_foreach_src(src, instr) {
194 if (src->type == IR2_SRC_CONST)
195 continue;
196
197 reg = get_reg_src(ctx, src);
198 /* XXX use before write case */
199
200 for (int i = 0; i < src_ncomp(instr); i++) {
201 comp = &reg->comp[swiz_get(src->swizzle, i)];
202 if (!--comp->ref_count && reg->block_idx_free < 0) {
203 reg_freemask(ctx, reg->idx, comp->c);
204 comp->alloc = false;
205 }
206 }
207 }
208 }
209
210 /* free any regs left for a block */
211 void ra_block_free(struct ir2_context *ctx, unsigned block)
212 {
213 ir2_foreach_live_reg(reg, ctx) {
214 if (reg->block_idx_free != block)
215 continue;
216
217 for (int i = 0; i < reg->ncomp; i++) {
218 if (!reg->comp[i].alloc) /* XXX should never be true? */
219 continue;
220
221 reg_freemask(ctx, reg->idx, reg->comp[i].c);
222 reg->comp[i].alloc = false;
223 }
224 ctx->live_regs[reg->idx] = NULL;
225 }
226 }