pan/bi: Stub out tex_compact logic
[mesa.git] / src / panfrost / bifrost / bi_lower_combine.c
1 /*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "compiler.h"
25
26 /* NIR creates vectors as vecN ops, which we represent by a synthetic
27 * BI_COMBINE instruction, e.g.:
28 *
29 * v = combine x, y, z, w
30 *
31 * These combines need to be lowered by the pass in this file. Fix a given
32 * source at component c.
33 *
34 * First suppose the source is SSA. If it is also scalar, then we may rewrite
35 * the destination of the generating instruction (unique by SSA+scalar) to
36 * write to v.c, and rewrite each of its uses to swizzle out .c instead of .x
37 * (the original by scalar). If it is vector, there are two cases. If the
38 * component c is `x`, we are accessing v.x, and each of the succeeding
39 * components y, z... up to the last component of the vector are accessed
40 * sequentially, then we may perform the same rewrite. If this is not the case,
41 * rewriting would require a swizzle or writemask (TODO), so we fallback on a
42 * move.
43 *
44 * Otherwise is the source is not SSA, we also fallback on a move. We could
45 * probably do better.
46 */
47
48 static void
49 bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R)
50 {
51 unsigned bits = nir_alu_type_get_type_size(parent->dest_type);
52 unsigned bytes = bits / 8;
53
54 bi_instruction move = {
55 .type = BI_MOV,
56 .dest = R,
57 .dest_type = parent->dest_type,
58 .writemask = ((1 << bytes) - 1) << (bytes * comp),
59 .src = { parent->src[comp] },
60 .src_types = { parent->dest_type },
61 .swizzle = { { parent->swizzle[comp][0] } }
62 };
63
64 bi_emit_before(ctx, parent, move);
65 }
66
67 /* Gets the instruction generating a given source. Combine lowering is
68 * accidentally O(n^2) right now because this function is O(n) instead of O(1).
69 * If this pass is slow, this cost can be avoided in favour for better
70 * bookkeeping. */
71
72 static bi_instruction *
73 bi_get_parent(bi_context *ctx, unsigned idx, unsigned mask)
74 {
75 bi_foreach_instr_global(ctx, ins) {
76 if (ins->dest == idx)
77 if ((ins->writemask & mask) == mask)
78 return ins;
79 }
80
81 return NULL;
82 }
83
84 /* Rewrites uses of an index. Again, this could be O(n) to the program but is
85 * currently O(nc) to the program and number of combines, so the pass becomes
86 * effectively O(n^2). Better bookkeeping would bring down to linear if that's
87 * an issue. */
88
89 static void
90 bi_rewrite_uses(bi_context *ctx,
91 unsigned old, unsigned oldc,
92 unsigned new, unsigned newc)
93 {
94 bi_foreach_instr_global(ctx, ins) {
95 bi_foreach_src(ins, s) {
96 if (ins->src[s] != old) continue;
97
98 for (unsigned i = 0; i < 16; ++i)
99 ins->swizzle[s][i] += (newc - oldc);
100
101 ins->src[s] = new;
102 }
103 }
104 }
105
106 /* Shifts the writemask of an instruction by a specified byte count,
107 * rotating the sources to compensate. Returns true if successful, and
108 * returns false if not (nondestructive in this case). */
109
110 static bool
111 bi_shift_mask(bi_instruction *ins, unsigned shift)
112 {
113 /* No op and handles the funny cases */
114 if (!shift)
115 return true;
116
117 unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
118 unsigned bytes = sz / 8;
119
120 /* If things are misaligned, we bail. Check if shift % bytes is
121 * nonzero. Note bytes is a power-of-two. */
122 if (shift & (bytes - 1))
123 return false;
124
125 /* Ensure there are no funny types */
126 bi_foreach_src(ins, s) {
127 if (ins->src[s] && nir_alu_type_get_type_size(ins->src_types[s]) != sz)
128 return false;
129 }
130
131 /* Shift swizzle so old i'th component is accessed by new (i + j)'th
132 * component where j is component shift */
133 unsigned component_shift = shift / bytes;
134
135 /* Sanity check to avoid memory corruption */
136 if (component_shift >= sizeof(ins->swizzle[0]))
137 return false;
138
139 /* Otherwise, shift is divisible by bytes, and all relevant src types
140 * are the same size as the dest type. */
141 ins->writemask <<= shift;
142
143 bi_foreach_src(ins, s) {
144 if (!ins->src[s]) continue;
145
146 size_t overlap = sizeof(ins->swizzle[s]) - component_shift;
147 memmove(ins->swizzle[s] + component_shift, ins->swizzle[s], overlap);
148 }
149
150 return true;
151 }
152
153 /* Checks if we have a nicely aligned vector prefix */
154
155 static bool
156 bi_is_aligned_vec(bi_instruction *combine, unsigned s, bi_instruction *parent,
157 unsigned *count)
158 {
159 /* We only support prefixes */
160 if (s != 0)
161 return false;
162
163 /* Is it a contiguous write? */
164 unsigned writes = util_bitcount(parent->writemask);
165 if (parent->writemask != ((1 << writes) - 1))
166 return false;
167
168 /* Okay - how many components? */
169 unsigned bytes = nir_alu_type_get_type_size(parent->dest_type) / 8;
170 unsigned components = writes / bytes;
171
172 /* Are we contiguous like that? */
173
174 for (unsigned i = 0; i < components; ++i) {
175 if (combine->src[i] != parent->dest)
176 return false;
177
178 if (combine->swizzle[i][0] != i)
179 return false;
180 }
181
182 /* We're good to go */
183 *count = components;
184 return true;
185 }
186
187 /* Tries to lower a given source of a combine to an appropriate rewrite,
188 * returning true if successful, and false with no changes otherwise. */
189
190 static bool
191 bi_lower_combine_src(bi_context *ctx, bi_instruction *ins, unsigned s, unsigned R,
192 unsigned *vec_count)
193 {
194 unsigned src = ins->src[s];
195
196 /* We currently only handle SSA */
197
198 if (!src) return false;
199 if (src & (BIR_SPECIAL | BIR_IS_REG)) return false;
200
201 /* We are SSA. Lookup the generating instruction. */
202 unsigned bytes = nir_alu_type_get_type_size(ins->dest_type) / 8;
203
204 bi_instruction *parent = bi_get_parent(ctx, src,
205 0xF << (ins->swizzle[s][0] * bytes));
206
207 if (!parent) return false;
208
209 /* We have a parent instuction, sanity check the typesize */
210 unsigned pbytes = nir_alu_type_get_type_size(parent->dest_type) / 8;
211 if (pbytes != bytes) return false;
212
213 bool scalar = (parent->writemask == ((1 << bytes) - 1));
214 if (!(scalar || bi_is_aligned_vec(ins, s, parent, vec_count))) return false;
215
216 if (!bi_shift_mask(parent, bytes * s)) return false;
217 bi_rewrite_uses(ctx, parent->dest, 0, R, s);
218 parent->dest = R;
219 return true;
220 }
221
222 void
223 bi_lower_combine(bi_context *ctx, bi_block *block)
224 {
225 bi_foreach_instr_in_block_safe(block, ins) {
226 if (ins->type != BI_COMBINE) continue;
227
228 /* The vector itself can't be shifted */
229 assert(ins->writemask & 0x1);
230
231 unsigned R = bi_make_temp_reg(ctx);
232
233 bi_foreach_src(ins, s) {
234 /* We're done early for vec2/3 */
235 if (!ins->src[s])
236 continue;
237
238 unsigned vec_count = 0;
239
240 if (bi_lower_combine_src(ctx, ins, s, R, &vec_count)) {
241 /* Skip vectored sources */
242 if (vec_count)
243 s += (vec_count - 1);
244 } else {
245 bi_insert_combine_mov(ctx, ins, s, R);
246 }
247 }
248
249
250 bi_rewrite_uses(ctx, ins->dest, 0, R, 0);
251 bi_remove_instruction(ins);
252 }
253 }