freedreno: a2xx: add ir2 copy propagation
[mesa.git] / src / gallium / drivers / freedreno / a2xx / ir2_cp.c
1 /*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "ir2_private.h"
28
29 static bool is_mov(struct ir2_instr *instr)
30 {
31 return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
32 instr->src_count == 1;
33 }
34
35 static void src_combine(struct ir2_src *src, struct ir2_src b)
36 {
37 src->num = b.num;
38 src->type = b.type;
39 src->swizzle = swiz_merge(b.swizzle, src->swizzle);
40 if (!src->abs) /* if we have abs we don't care about previous negate */
41 src->negate ^= b.negate;
42 src->abs |= b.abs;
43 }
44
45 /* cp_src: replace src regs when they refer to a mov instruction
46 * example:
47 * ALU: MAXv R7 = C7, C7
48 * ALU: MULADDv R7 = R7, R10, R0.xxxx
49 * becomes:
50 * ALU: MULADDv R7 = C7, R10, R0.xxxx
51 */
52 void cp_src(struct ir2_context *ctx)
53 {
54 struct ir2_instr *p;
55
56 ir2_foreach_instr(instr, ctx) {
57 ir2_foreach_src(src, instr) {
58 /* loop to replace recursively */
59 do {
60 if (src->type != IR2_SRC_SSA)
61 break;
62
63 p = &ctx->instr[src->num];
64 /* don't work across blocks to avoid possible issues */
65 if (p->block_idx != instr->block_idx)
66 break;
67
68 if (!is_mov(p))
69 break;
70
71 /* cant apply abs to const src, const src only for alu */
72 if (p->src[0].type == IR2_SRC_CONST &&
73 (src->abs || instr->type != IR2_ALU))
74 break;
75
76 src_combine(src, p->src[0]);
77 } while (1);
78 }
79 }
80 }
81
82 /* cp_export: replace mov to export when possible
83 * in the cp_src pass we bypass any mov instructions related
84 * to the src registers, but for exports for need something different
85 * example:
86 * ALU: MAXv R3.x___ = C9.x???, C9.x???
87 * ALU: MAXv R3._y__ = R0.?x??, C8.?x??
88 * ALU: MAXv export0 = R3.yyyx, R3.yyyx
89 * becomes:
90 * ALU: MAXv export0.___w = C9.???x, C9.???x
91 * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
92 *
93 */
94 void cp_export(struct ir2_context *ctx)
95 {
96 struct ir2_instr *c[4], *ins[4];
97 struct ir2_src *src;
98 struct ir2_reg *reg;
99 unsigned ncomp;
100
101 ir2_foreach_instr(instr, ctx) {
102 if (!is_export(instr)) /* TODO */
103 continue;
104
105 if (!is_mov(instr))
106 continue;
107
108 src = &instr->src[0];
109
110 if (src->negate || src->abs) /* TODO handle these cases */
111 continue;
112
113 if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
114 continue;
115
116 reg = get_reg_src(ctx, src);
117 ncomp = dst_ncomp(instr);
118
119 unsigned reswiz[4] = {};
120 unsigned num_instr = 0;
121
122 /* fill array c with pointers to instrs that write each component */
123 if (src->type == IR2_SRC_SSA) {
124 struct ir2_instr *instr = &ctx->instr[src->num];
125
126 if (instr->type != IR2_ALU)
127 continue;
128
129 for (int i = 0; i < ncomp; i++)
130 c[i] = instr;
131
132 ins[num_instr++] = instr;
133 reswiz[0] = src->swizzle;
134 } else {
135 bool ok = true;
136 unsigned write_mask = 0;
137
138 ir2_foreach_instr(instr, ctx) {
139 if (instr->is_ssa || instr->reg != reg)
140 continue;
141
142 /* set by non-ALU */
143 if (instr->type != IR2_ALU) {
144 ok = false;
145 break;
146 }
147
148 /* component written more than once */
149 if (write_mask & instr->alu.write_mask) {
150 ok = false;
151 break;
152 }
153
154 write_mask |= instr->alu.write_mask;
155
156 /* src pointers for components */
157 for (int i = 0, j = 0; i < 4; i++) {
158 unsigned k = swiz_get(src->swizzle, i);
159 if (instr->alu.write_mask & 1 << k) {
160 c[i] = instr;
161
162 /* reswiz = compressed src->swizzle */
163 unsigned x = 0;
164 for (int i = 0; i < k; i++)
165 x += !!(instr->alu.write_mask & 1 << i);
166
167 assert(src->swizzle || x == j);
168 reswiz[num_instr] |= swiz_set(x, j++);
169 }
170 }
171 ins[num_instr++] = instr;
172 }
173 if (!ok)
174 continue;
175 }
176
177 bool redirect = true;
178
179 /* must all be in same block */
180 for (int i = 0; i < ncomp; i++)
181 redirect &= (c[i]->block_idx == instr->block_idx);
182
183 /* no other instr using the value */
184 ir2_foreach_instr(p, ctx) {
185 if (p == instr)
186 continue;
187 ir2_foreach_src(src, p)
188 redirect &= reg != get_reg_src(ctx, src);
189 }
190
191 if (!redirect)
192 continue;
193
194 /* redirect the instructions writing to the register */
195 for (int i = 0; i < num_instr; i++) {
196 struct ir2_instr *p = ins[i];
197
198 p->alu.export = instr->alu.export;
199 p->alu.write_mask = 0;
200 p->is_ssa = true;
201 p->ssa.ncomp = 0;
202 memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
203
204 switch (instr->alu.vector_opc) {
205 case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
206 case DOT2ADDv:
207 case DOT3v:
208 case DOT4v:
209 case CUBEv:
210 continue;
211 default:
212 break;
213 }
214 ir2_foreach_src(s, p)
215 swiz_merge_p(&s->swizzle, reswiz[i]);
216 }
217
218 for (int i = 0; i < ncomp; i++) {
219 c[i]->alu.write_mask |= (1 << i);
220 c[i]->ssa.ncomp++;
221 }
222 instr->type = IR2_NONE;
223 instr->need_emit = false;
224 }
225 }