0e16bdaa4c1ae0cbb8c9f15649b1311e12bf787e
[mesa.git] / src / gallium / drivers / freedreno / a2xx / ir2_cp.c
1 /*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "ir2_private.h"
28
29 static bool is_mov(struct ir2_instr *instr)
30 {
31 return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
32 instr->src_count == 1;
33 }
34
35 static void src_combine(struct ir2_src *src, struct ir2_src b)
36 {
37 src->num = b.num;
38 src->type = b.type;
39 src->swizzle = swiz_merge(b.swizzle, src->swizzle);
40 if (!src->abs) /* if we have abs we don't care about previous negate */
41 src->negate ^= b.negate;
42 src->abs |= b.abs;
43 }
44
45 /* cp_src: replace src regs when they refer to a mov instruction
46 * example:
47 * ALU: MAXv R7 = C7, C7
48 * ALU: MULADDv R7 = R7, R10, R0.xxxx
49 * becomes:
50 * ALU: MULADDv R7 = C7, R10, R0.xxxx
51 */
52 void cp_src(struct ir2_context *ctx)
53 {
54 struct ir2_instr *p;
55
56 ir2_foreach_instr(instr, ctx) {
57 ir2_foreach_src(src, instr) {
58 /* loop to replace recursively */
59 do {
60 if (src->type != IR2_SRC_SSA)
61 break;
62
63 p = &ctx->instr[src->num];
64 /* don't work across blocks to avoid possible issues */
65 if (p->block_idx != instr->block_idx)
66 break;
67
68 if (!is_mov(p))
69 break;
70
71 if (p->alu.saturate)
72 break;
73
74 /* cant apply abs to const src, const src only for alu */
75 if (p->src[0].type == IR2_SRC_CONST &&
76 (src->abs || instr->type != IR2_ALU))
77 break;
78
79 src_combine(src, p->src[0]);
80 } while (1);
81 }
82 }
83 }
84
85 /* cp_export: replace mov to export when possible
86 * in the cp_src pass we bypass any mov instructions related
87 * to the src registers, but for exports for need something different
88 * example:
89 * ALU: MAXv R3.x___ = C9.x???, C9.x???
90 * ALU: MAXv R3._y__ = R0.?x??, C8.?x??
91 * ALU: MAXv export0 = R3.yyyx, R3.yyyx
92 * becomes:
93 * ALU: MAXv export0.___w = C9.???x, C9.???x
94 * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
95 *
96 */
97 void cp_export(struct ir2_context *ctx)
98 {
99 struct ir2_instr *c[4], *ins[4];
100 struct ir2_src *src;
101 struct ir2_reg *reg;
102 unsigned ncomp;
103
104 ir2_foreach_instr(instr, ctx) {
105 if (!is_export(instr)) /* TODO */
106 continue;
107
108 if (!is_mov(instr))
109 continue;
110
111 src = &instr->src[0];
112
113 if (src->negate || src->abs) /* TODO handle these cases */
114 continue;
115
116 if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
117 continue;
118
119 reg = get_reg_src(ctx, src);
120 ncomp = dst_ncomp(instr);
121
122 unsigned reswiz[4] = {};
123 unsigned num_instr = 0;
124
125 /* fill array c with pointers to instrs that write each component */
126 if (src->type == IR2_SRC_SSA) {
127 struct ir2_instr *instr = &ctx->instr[src->num];
128
129 if (instr->type != IR2_ALU)
130 continue;
131
132 for (int i = 0; i < ncomp; i++)
133 c[i] = instr;
134
135 ins[num_instr++] = instr;
136 reswiz[0] = src->swizzle;
137 } else {
138 bool ok = true;
139 unsigned write_mask = 0;
140
141 ir2_foreach_instr(instr, ctx) {
142 if (instr->is_ssa || instr->reg != reg)
143 continue;
144
145 /* set by non-ALU */
146 if (instr->type != IR2_ALU) {
147 ok = false;
148 break;
149 }
150
151 /* component written more than once */
152 if (write_mask & instr->alu.write_mask) {
153 ok = false;
154 break;
155 }
156
157 write_mask |= instr->alu.write_mask;
158
159 /* src pointers for components */
160 for (int i = 0, j = 0; i < 4; i++) {
161 unsigned k = swiz_get(src->swizzle, i);
162 if (instr->alu.write_mask & 1 << k) {
163 c[i] = instr;
164
165 /* reswiz = compressed src->swizzle */
166 unsigned x = 0;
167 for (int i = 0; i < k; i++)
168 x += !!(instr->alu.write_mask & 1 << i);
169
170 assert(src->swizzle || x == j);
171 reswiz[num_instr] |= swiz_set(x, j++);
172 }
173 }
174 ins[num_instr++] = instr;
175 }
176 if (!ok)
177 continue;
178 }
179
180 bool redirect = true;
181
182 /* must all be in same block */
183 for (int i = 0; i < ncomp; i++)
184 redirect &= (c[i]->block_idx == instr->block_idx);
185
186 /* no other instr using the value */
187 ir2_foreach_instr(p, ctx) {
188 if (p == instr)
189 continue;
190 ir2_foreach_src(src, p)
191 redirect &= reg != get_reg_src(ctx, src);
192 }
193
194 if (!redirect)
195 continue;
196
197 /* redirect the instructions writing to the register */
198 for (int i = 0; i < num_instr; i++) {
199 struct ir2_instr *p = ins[i];
200
201 p->alu.export = instr->alu.export;
202 p->alu.write_mask = 0;
203 p->is_ssa = true;
204 p->ssa.ncomp = 0;
205 memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
206 p->alu.saturate |= instr->alu.saturate;
207
208 switch (instr->alu.vector_opc) {
209 case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
210 case DOT2ADDv:
211 case DOT3v:
212 case DOT4v:
213 case CUBEv:
214 continue;
215 default:
216 break;
217 }
218 ir2_foreach_src(s, p)
219 swiz_merge_p(&s->swizzle, reswiz[i]);
220 }
221
222 for (int i = 0; i < ncomp; i++) {
223 c[i]->alu.write_mask |= (1 << i);
224 c[i]->ssa.ncomp++;
225 }
226 instr->type = IR2_NONE;
227 instr->need_emit = false;
228 }
229 }