etnaviv: drop emit macro
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler_nir_emit.h
1 /*
2 * Copyright (c) 2019 Zodiac Inflight Innovations
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "etnaviv_asm.h"
28 #include "etnaviv_context.h"
29 #include "etnaviv_compiler_nir.h"
30
31 #include "compiler/nir/nir.h"
32 #include "compiler/nir/nir_builder.h"
33
34 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
35 #define SRC_DISABLE ((hw_src){})
36 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
37 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
38
39 typedef struct etna_inst_dst hw_dst;
40 typedef struct etna_inst_src hw_src;
41
42 static inline hw_src
43 src_swizzle(hw_src src, unsigned swizzle)
44 {
45 if (src.rgroup != INST_RGROUP_IMMEDIATE)
46 src.swiz = inst_swiz_compose(src.swiz, swizzle);
47
48 return src;
49 }
50
51 /* constants are represented as 64-bit ints
52 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
53 */
54
55 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
56 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
57 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
58 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
59
60 static int
61 const_add(uint64_t *c, uint64_t value)
62 {
63 for (unsigned i = 0; i < 4; i++) {
64 if (c[i] == value || !c[i]) {
65 c[i] = value;
66 return i;
67 }
68 }
69 return -1;
70 }
71
72 static hw_src
73 const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components)
74 {
75 /* use inline immediates if possible */
76 if (c->specs->halti >= 2 && num_components == 1 &&
77 value[0].u64 >> 32 == ETNA_IMMEDIATE_CONSTANT) {
78 uint32_t bits = value[0].u32;
79
80 /* "float" - shifted by 12 */
81 if ((bits & 0xfff) == 0)
82 return etna_immediate_src(0, bits >> 12);
83
84 /* "unsigned" - raw 20 bit value */
85 if (bits < (1 << 20))
86 return etna_immediate_src(2, bits);
87
88 /* "signed" - sign extended 20-bit (sign included) value */
89 if (bits >= 0xfff80000)
90 return etna_immediate_src(1, bits);
91 }
92
93 unsigned i;
94 int swiz = -1;
95 for (i = 0; swiz < 0; i++) {
96 uint64_t *a = &c->consts[i*4];
97 uint64_t save[4];
98 memcpy(save, a, sizeof(save));
99 swiz = 0;
100 for (unsigned j = 0; j < num_components; j++) {
101 int c = const_add(a, value[j].u64);
102 if (c < 0) {
103 memcpy(a, save, sizeof(save));
104 swiz = -1;
105 break;
106 }
107 swiz |= c << j * 2;
108 }
109 }
110
111 assert(i <= ETNA_MAX_IMM / 4);
112 c->const_count = MAX2(c->const_count, i);
113
114 return SRC_CONST(i - 1, swiz);
115 }
116
117 /* Swizzles and write masks can be used to layer virtual non-interfering
118 * registers on top of the real VEC4 registers. For example, the virtual
119 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
120 * physical VEC4 base register do not interfere.
121 */
122 enum reg_class {
123 REG_CLASS_VIRT_SCALAR,
124 REG_CLASS_VIRT_VEC2,
125 REG_CLASS_VIRT_VEC3,
126 REG_CLASS_VEC4,
127 /* special vec2 class for fast transcendentals, limited to XY or ZW */
128 REG_CLASS_VIRT_VEC2T,
129 /* special classes for LOAD - contiguous components */
130 REG_CLASS_VIRT_VEC2C,
131 REG_CLASS_VIRT_VEC3C,
132 NUM_REG_CLASSES,
133 };
134
135 enum reg_type {
136 REG_TYPE_VEC4,
137 REG_TYPE_VIRT_VEC3_XYZ,
138 REG_TYPE_VIRT_VEC3_XYW,
139 REG_TYPE_VIRT_VEC3_XZW,
140 REG_TYPE_VIRT_VEC3_YZW,
141 REG_TYPE_VIRT_VEC2_XY,
142 REG_TYPE_VIRT_VEC2_XZ,
143 REG_TYPE_VIRT_VEC2_XW,
144 REG_TYPE_VIRT_VEC2_YZ,
145 REG_TYPE_VIRT_VEC2_YW,
146 REG_TYPE_VIRT_VEC2_ZW,
147 REG_TYPE_VIRT_SCALAR_X,
148 REG_TYPE_VIRT_SCALAR_Y,
149 REG_TYPE_VIRT_SCALAR_Z,
150 REG_TYPE_VIRT_SCALAR_W,
151 REG_TYPE_VIRT_VEC2T_XY,
152 REG_TYPE_VIRT_VEC2T_ZW,
153 REG_TYPE_VIRT_VEC2C_XY,
154 REG_TYPE_VIRT_VEC2C_YZ,
155 REG_TYPE_VIRT_VEC2C_ZW,
156 REG_TYPE_VIRT_VEC3C_XYZ,
157 REG_TYPE_VIRT_VEC3C_YZW,
158 NUM_REG_TYPES,
159 };
160
161 /* writemask when used as dest */
162 static const uint8_t
163 reg_writemask[NUM_REG_TYPES] = {
164 [REG_TYPE_VEC4] = 0xf,
165 [REG_TYPE_VIRT_SCALAR_X] = 0x1,
166 [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
167 [REG_TYPE_VIRT_VEC2_XY] = 0x3,
168 [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
169 [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
170 [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
171 [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
172 [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
173 [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
174 [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
175 [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
176 [REG_TYPE_VIRT_SCALAR_W] = 0x8,
177 [REG_TYPE_VIRT_VEC2_XW] = 0x9,
178 [REG_TYPE_VIRT_VEC2_YW] = 0xa,
179 [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
180 [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
181 [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
182 [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
183 [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
184 [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
185 [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
186 };
187
188 /* how to swizzle when used as a src */
189 static const uint8_t
190 reg_swiz[NUM_REG_TYPES] = {
191 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
192 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
193 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
194 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
195 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
196 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
197 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
198 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
199 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
200 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
201 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
202 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
203 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
204 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
205 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
206 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
207 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
208 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
209 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
210 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
211 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
212 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
213 };
214
215 /* how to swizzle when used as a dest */
216 static const uint8_t
217 reg_dst_swiz[NUM_REG_TYPES] = {
218 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
219 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
220 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
221 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
222 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
223 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
224 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
225 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
226 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
227 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
228 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
229 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
230 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
231 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
232 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
233 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
234 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
235 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
236 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
237 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
238 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
239 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
240 };
241
242 static inline int reg_get_type(int virt_reg)
243 {
244 return virt_reg % NUM_REG_TYPES;
245 }
246
247 static inline int reg_get_base(struct etna_compile *c, int virt_reg)
248 {
249 /* offset by 1 to avoid reserved position register */
250 if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
251 return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
252 return virt_reg / NUM_REG_TYPES;
253 }
254
255 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
256 * (fs registers are offset by 1 to avoid reserving r0)
257 */
258 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
259
260 static inline int reg_get_class(int virt_reg)
261 {
262 switch (reg_get_type(virt_reg)) {
263 case REG_TYPE_VEC4:
264 return REG_CLASS_VEC4;
265 case REG_TYPE_VIRT_VEC3_XYZ:
266 case REG_TYPE_VIRT_VEC3_XYW:
267 case REG_TYPE_VIRT_VEC3_XZW:
268 case REG_TYPE_VIRT_VEC3_YZW:
269 return REG_CLASS_VIRT_VEC3;
270 case REG_TYPE_VIRT_VEC2_XY:
271 case REG_TYPE_VIRT_VEC2_XZ:
272 case REG_TYPE_VIRT_VEC2_XW:
273 case REG_TYPE_VIRT_VEC2_YZ:
274 case REG_TYPE_VIRT_VEC2_YW:
275 case REG_TYPE_VIRT_VEC2_ZW:
276 return REG_CLASS_VIRT_VEC2;
277 case REG_TYPE_VIRT_SCALAR_X:
278 case REG_TYPE_VIRT_SCALAR_Y:
279 case REG_TYPE_VIRT_SCALAR_Z:
280 case REG_TYPE_VIRT_SCALAR_W:
281 return REG_CLASS_VIRT_SCALAR;
282 case REG_TYPE_VIRT_VEC2T_XY:
283 case REG_TYPE_VIRT_VEC2T_ZW:
284 return REG_CLASS_VIRT_VEC2T;
285 case REG_TYPE_VIRT_VEC2C_XY:
286 case REG_TYPE_VIRT_VEC2C_YZ:
287 case REG_TYPE_VIRT_VEC2C_ZW:
288 return REG_CLASS_VIRT_VEC2C;
289 case REG_TYPE_VIRT_VEC3C_XYZ:
290 case REG_TYPE_VIRT_VEC3C_YZW:
291 return REG_CLASS_VIRT_VEC3C;
292 }
293
294 assert(false);
295 return 0;
296 }
297
298 /* nir_src to allocated register */
299 static hw_src
300 ra_src(struct etna_compile *c, nir_src *src)
301 {
302 unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]);
303 return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]);
304 }
305
306 static hw_src
307 get_src(struct etna_compile *c, nir_src *src)
308 {
309 if (!src->is_ssa)
310 return ra_src(c, src);
311
312 nir_instr *instr = src->ssa->parent_instr;
313
314 if (instr->pass_flags & BYPASS_SRC) {
315 assert(instr->type == nir_instr_type_alu);
316 nir_alu_instr *alu = nir_instr_as_alu(instr);
317 assert(alu->op == nir_op_mov);
318 return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
319 }
320
321 switch (instr->type) {
322 case nir_instr_type_load_const:
323 return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
324 case nir_instr_type_intrinsic: {
325 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
326 switch (intr->intrinsic) {
327 case nir_intrinsic_load_input:
328 case nir_intrinsic_load_instance_id:
329 case nir_intrinsic_load_uniform:
330 case nir_intrinsic_load_ubo:
331 return ra_src(c, src);
332 case nir_intrinsic_load_front_face:
333 return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
334 case nir_intrinsic_load_frag_coord:
335 return SRC_REG(0, INST_SWIZ_IDENTITY);
336 default:
337 compile_error(c, "Unhandled NIR intrinsic type: %s\n",
338 nir_intrinsic_infos[intr->intrinsic].name);
339 break;
340 }
341 } break;
342 case nir_instr_type_alu:
343 case nir_instr_type_tex:
344 return ra_src(c, src);
345 case nir_instr_type_ssa_undef: {
346 /* return zero to deal with broken Blur demo */
347 nir_const_value value = CONST(0);
348 return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X));
349 }
350 default:
351 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
352 break;
353 }
354
355 return SRC_DISABLE;
356 }
357
358 static bool
359 vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa)
360 {
361 for (unsigned i = 0; i < 4; i++) {
362 if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa)
363 continue;
364
365 if (vec->src[i].swizzle[0] != i)
366 return true;
367 }
368
369 /* don't deal with possible bypassed vec/mov chain */
370 nir_foreach_use(use_src, ssa) {
371 nir_instr *instr = use_src->parent_instr;
372 if (instr->type != nir_instr_type_alu)
373 continue;
374
375 nir_alu_instr *alu = nir_instr_as_alu(instr);
376
377 switch (alu->op) {
378 case nir_op_mov:
379 case nir_op_vec2:
380 case nir_op_vec3:
381 case nir_op_vec4:
382 return true;
383 default:
384 break;
385 }
386 }
387 return false;
388 }
389
390 /* get allocated dest register for nir_dest
391 * *p_swiz tells how the components need to be placed into register
392 */
393 static hw_dst
394 ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz)
395 {
396 unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
397 dest = real_dest(dest, &swiz, &mask);
398
399 unsigned r = ra_get_node_reg(c->g, c->live_map[dest_index(c->impl, dest)]);
400 unsigned t = reg_get_type(r);
401
402 *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
403
404 return (hw_dst) {
405 .use = 1,
406 .reg = reg_get_base(c, r),
407 .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
408 };
409 }
410
411 /* precomputed by register_allocate */
412 static unsigned int *q_values[] = {
413 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
414 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
415 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
416 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
417 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
418 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
419 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
420 };
421
422 static void
423 ra_assign(struct etna_compile *c, nir_shader *shader)
424 {
425 struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
426 NUM_REG_TYPES, false);
427
428 /* classes always be created from index 0, so equal to the class enum
429 * which represents a register with (c+1) components
430 */
431 for (int c = 0; c < NUM_REG_CLASSES; c++)
432 ra_alloc_reg_class(regs);
433 /* add each register of each class */
434 for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
435 ra_class_add_reg(regs, reg_get_class(r), r);
436 /* set conflicts */
437 for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
438 for (int i = 0; i < NUM_REG_TYPES; i++) {
439 for (int j = 0; j < i; j++) {
440 if (reg_writemask[i] & reg_writemask[j]) {
441 ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
442 NUM_REG_TYPES * r + j);
443 }
444 }
445 }
446 }
447 ra_set_finalize(regs, q_values);
448
449 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
450
451 /* liveness and interference */
452
453 nir_index_blocks(impl);
454 nir_index_ssa_defs(impl);
455 nir_foreach_block(block, impl) {
456 nir_foreach_instr(instr, block)
457 instr->pass_flags = 0;
458 }
459
460 /* this gives an approximation/upper limit on how many nodes are needed
461 * (some ssa values do not represent an allocated register)
462 */
463 unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
464 unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
465 memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
466 struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
467
468 unsigned num_nodes = etna_live_defs(impl, defs, live_map);
469 struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
470
471 /* set classes from num_components */
472 for (unsigned i = 0; i < num_nodes; i++) {
473 nir_instr *instr = defs[i].instr;
474 nir_dest *dest = defs[i].dest;
475 unsigned comp = nir_dest_num_components(*dest) - 1;
476
477 if (instr->type == nir_instr_type_alu &&
478 c->specs->has_new_transcendentals) {
479 switch (nir_instr_as_alu(instr)->op) {
480 case nir_op_fdiv:
481 case nir_op_flog2:
482 case nir_op_fsin:
483 case nir_op_fcos:
484 assert(dest->is_ssa);
485 comp = REG_CLASS_VIRT_VEC2T;
486 default:
487 break;
488 }
489 }
490
491 if (instr->type == nir_instr_type_intrinsic) {
492 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
493 /* can't have dst swizzle or sparse writemask on UBO loads */
494 if (intr->intrinsic == nir_intrinsic_load_ubo) {
495 assert(dest == &intr->dest);
496 if (dest->ssa.num_components == 2)
497 comp = REG_CLASS_VIRT_VEC2C;
498 if (dest->ssa.num_components == 3)
499 comp = REG_CLASS_VIRT_VEC3C;
500 }
501 }
502
503 ra_set_node_class(g, i, comp);
504 }
505
506 nir_foreach_block(block, impl) {
507 nir_foreach_instr(instr, block) {
508 if (instr->type != nir_instr_type_intrinsic)
509 continue;
510
511 nir_dest *dest = dest_for_instr(instr);
512 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
513 unsigned reg;
514
515 switch (intr->intrinsic) {
516 case nir_intrinsic_store_deref: {
517 /* don't want outputs to be swizzled
518 * TODO: better would be to set the type to X/XY/XYZ/XYZW
519 * TODO: what if fragcoord.z is read after writing fragdepth?
520 */
521 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
522 unsigned index = live_map[src_index(impl, &intr->src[1])];
523
524 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
525 deref->var->data.location == FRAG_RESULT_DEPTH) {
526 ra_set_node_reg(g, index, REG_FRAG_DEPTH);
527 } else {
528 ra_set_node_class(g, index, REG_CLASS_VEC4);
529 }
530 } continue;
531 case nir_intrinsic_load_input:
532 reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
533 REG_TYPE_VIRT_SCALAR_X,
534 REG_TYPE_VIRT_VEC2_XY,
535 REG_TYPE_VIRT_VEC3_XYZ,
536 REG_TYPE_VEC4,
537 }[nir_dest_num_components(*dest) - 1];
538 break;
539 case nir_intrinsic_load_instance_id:
540 reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
541 break;
542 default:
543 continue;
544 }
545
546 ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
547 }
548 }
549
550 /* add interference for intersecting live ranges */
551 for (unsigned i = 0; i < num_nodes; i++) {
552 assert(defs[i].live_start < defs[i].live_end);
553 for (unsigned j = 0; j < i; j++) {
554 if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
555 continue;
556 ra_add_node_interference(g, i, j);
557 }
558 }
559
560 ralloc_free(defs);
561
562 /* Allocate registers */
563 ASSERTED bool ok = ra_allocate(g);
564 assert(ok);
565
566 c->g = g;
567 c->regs = regs;
568 c->live_map = live_map;
569 c->num_nodes = num_nodes;
570 }
571
572 static unsigned
573 ra_finish(struct etna_compile *c)
574 {
575 /* TODO: better way to get number of registers used? */
576 unsigned j = 0;
577 for (unsigned i = 0; i < c->num_nodes; i++) {
578 j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1);
579 }
580
581 ralloc_free(c->g);
582 ralloc_free(c->regs);
583 ralloc_free(c->live_map);
584
585 return j;
586 }
587
588 static void
589 emit_alu(struct etna_compile *c, nir_alu_instr * alu)
590 {
591 const nir_op_info *info = &nir_op_infos[alu->op];
592
593 /* marked as dead instruction (vecN and other bypassed instr) */
594 if (alu->instr.pass_flags)
595 return;
596
597 assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
598
599 unsigned dst_swiz;
600 hw_dst dst = ra_dest(c, &alu->dest.dest, &dst_swiz);
601
602 /* compose alu write_mask with RA write mask */
603 if (!alu->dest.dest.is_ssa)
604 dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask);
605
606 switch (alu->op) {
607 case nir_op_fdot2:
608 case nir_op_fdot3:
609 case nir_op_fdot4:
610 /* not per-component - don't compose dst_swiz */
611 dst_swiz = INST_SWIZ_IDENTITY;
612 break;
613 default:
614 break;
615 }
616
617 hw_src srcs[3];
618
619 for (int i = 0; i < info->num_inputs; i++) {
620 nir_alu_src *asrc = &alu->src[i];
621 hw_src src;
622
623 src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc));
624 src = src_swizzle(src, dst_swiz);
625
626 if (src.rgroup != INST_RGROUP_IMMEDIATE) {
627 src.neg = asrc->negate || (alu->op == nir_op_fneg);
628 src.abs = asrc->abs || (alu->op == nir_op_fabs);
629 } else {
630 assert(!asrc->negate && alu->op != nir_op_fneg);
631 assert(!asrc->abs && alu->op != nir_op_fabs);
632 }
633
634 srcs[i] = src;
635 }
636
637 etna_emit_alu(c, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat));
638 }
639
640 static void
641 emit_tex(struct etna_compile *c, nir_tex_instr * tex)
642 {
643 unsigned dst_swiz;
644 hw_dst dst = ra_dest(c, &tex->dest, &dst_swiz);
645 nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL;
646
647 for (unsigned i = 0; i < tex->num_srcs; i++) {
648 switch (tex->src[i].src_type) {
649 case nir_tex_src_coord:
650 coord = &tex->src[i].src;
651 break;
652 case nir_tex_src_bias:
653 case nir_tex_src_lod:
654 assert(!lod_bias);
655 lod_bias = &tex->src[i].src;
656 break;
657 case nir_tex_src_comparator:
658 compare = &tex->src[i].src;
659 break;
660 default:
661 compile_error(c, "Unhandled NIR tex src type: %d\n",
662 tex->src[i].src_type);
663 break;
664 }
665 }
666
667 etna_emit_tex(c, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord),
668 lod_bias ? get_src(c, lod_bias) : SRC_DISABLE,
669 compare ? get_src(c, compare) : SRC_DISABLE);
670 }
671
672 static void
673 emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr)
674 {
675 switch (intr->intrinsic) {
676 case nir_intrinsic_store_deref:
677 etna_emit_output(c, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1]));
678 break;
679 case nir_intrinsic_discard_if:
680 etna_emit_discard(c, get_src(c, &intr->src[0]));
681 break;
682 case nir_intrinsic_discard:
683 etna_emit_discard(c, SRC_DISABLE);
684 break;
685 case nir_intrinsic_load_uniform: {
686 unsigned dst_swiz;
687 struct etna_inst_dst dst = ra_dest(c, &intr->dest, &dst_swiz);
688
689 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
690 emit_inst(c, &(struct etna_inst) {
691 .opcode = INST_OPCODE_MOVAR,
692 .dst.write_mask = 0x1,
693 .src[2] = get_src(c, &intr->src[0]),
694 });
695 emit_inst(c, &(struct etna_inst) {
696 .opcode = INST_OPCODE_MOV,
697 .dst = dst,
698 .src[2] = {
699 .use = 1,
700 .rgroup = INST_RGROUP_UNIFORM_0,
701 .reg = nir_intrinsic_base(intr),
702 .swiz = dst_swiz,
703 .amode = INST_AMODE_ADD_A_X,
704 },
705 });
706 } break;
707 case nir_intrinsic_load_ubo: {
708 /* TODO: if offset is of the form (x + C) then add C to the base instead */
709 unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
710 unsigned dst_swiz;
711 emit_inst(c, &(struct etna_inst) {
712 .opcode = INST_OPCODE_LOAD,
713 .type = INST_TYPE_U32,
714 .dst = ra_dest(c, &intr->dest, &dst_swiz),
715 .src[0] = get_src(c, &intr->src[1]),
716 .src[1] = const_src(c, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
717 });
718 } break;
719 case nir_intrinsic_load_front_face:
720 case nir_intrinsic_load_frag_coord:
721 assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */
722 break;
723 case nir_intrinsic_load_input:
724 case nir_intrinsic_load_instance_id:
725 break;
726 default:
727 compile_error(c, "Unhandled NIR intrinsic type: %s\n",
728 nir_intrinsic_infos[intr->intrinsic].name);
729 }
730 }
731
732 static void
733 emit_instr(struct etna_compile *c, nir_instr * instr)
734 {
735 switch (instr->type) {
736 case nir_instr_type_alu:
737 emit_alu(c, nir_instr_as_alu(instr));
738 break;
739 case nir_instr_type_tex:
740 emit_tex(c, nir_instr_as_tex(instr));
741 break;
742 case nir_instr_type_intrinsic:
743 emit_intrinsic(c, nir_instr_as_intrinsic(instr));
744 break;
745 case nir_instr_type_jump:
746 assert(nir_instr_is_last(instr));
747 case nir_instr_type_load_const:
748 case nir_instr_type_ssa_undef:
749 case nir_instr_type_deref:
750 break;
751 default:
752 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
753 break;
754 }
755 }
756
757 static void
758 emit_block(struct etna_compile *c, nir_block * block)
759 {
760 etna_emit_block_start(c, block->index);
761
762 nir_foreach_instr(instr, block)
763 emit_instr(c, instr);
764
765 /* succs->index < block->index is for the loop case */
766 nir_block *succs = block->successors[0];
767 if (nir_block_ends_in_jump(block) || succs->index < block->index)
768 etna_emit_jump(c, succs->index, SRC_DISABLE);
769 }
770
771 static void
772 emit_cf_list(struct etna_compile *c, struct exec_list *list);
773
774 static void
775 emit_if(struct etna_compile *c, nir_if * nif)
776 {
777 etna_emit_jump(c, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition));
778 emit_cf_list(c, &nif->then_list);
779
780 /* jump at end of then_list to skip else_list
781 * not needed if then_list already ends with a jump or else_list is empty
782 */
783 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
784 !nir_cf_list_is_empty_block(&nif->else_list))
785 etna_emit_jump(c, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE);
786
787 emit_cf_list(c, &nif->else_list);
788 }
789
790 static void
791 emit_cf_list(struct etna_compile *c, struct exec_list *list)
792 {
793 foreach_list_typed(nir_cf_node, node, node, list) {
794 switch (node->type) {
795 case nir_cf_node_block:
796 emit_block(c, nir_cf_node_as_block(node));
797 break;
798 case nir_cf_node_if:
799 emit_if(c, nir_cf_node_as_if(node));
800 break;
801 case nir_cf_node_loop:
802 emit_cf_list(c, &nir_cf_node_as_loop(node)->body);
803 break;
804 default:
805 compile_error(c, "Unknown NIR node type\n");
806 break;
807 }
808 }
809 }
810
811 /* based on nir_lower_vec_to_movs */
812 static unsigned
813 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
814 {
815 assert(start_idx < nir_op_infos[vec->op].num_inputs);
816 unsigned write_mask = (1u << start_idx);
817
818 nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
819 nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
820
821 mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
822 mov->src[0].negate = vec->src[start_idx].negate;
823 mov->src[0].abs = vec->src[start_idx].abs;
824
825 unsigned num_components = 1;
826
827 for (unsigned i = start_idx + 1; i < 4; i++) {
828 if (!(vec->dest.write_mask & (1 << i)))
829 continue;
830
831 if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
832 vec->src[i].negate == vec->src[start_idx].negate &&
833 vec->src[i].abs == vec->src[start_idx].abs) {
834 write_mask |= (1 << i);
835 mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
836 num_components++;
837 }
838 }
839
840 mov->dest.write_mask = (1 << num_components) - 1;
841 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL);
842
843 /* replace vec srcs with inserted mov */
844 for (unsigned i = 0, j = 0; i < 4; i++) {
845 if (!(write_mask & (1 << i)))
846 continue;
847
848 nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa));
849 vec->src[i].swizzle[0] = j++;
850 }
851
852 nir_instr_insert_before(&vec->instr, &mov->instr);
853
854 return write_mask;
855 }
856
857 /*
858 * for vecN instructions:
859 * -merge constant sources into a single src
860 * -insert movs (nir_lower_vec_to_movs equivalent)
861 * for non-vecN instructions:
862 * -try to merge constants as single constant
863 * -insert movs for multiple constants (pre-HALTI5)
864 */
865 static void
866 lower_alu(struct etna_compile *c, nir_alu_instr *alu)
867 {
868 const nir_op_info *info = &nir_op_infos[alu->op];
869
870 nir_builder b;
871 nir_builder_init(&b, c->impl);
872 b.cursor = nir_before_instr(&alu->instr);
873
874 switch (alu->op) {
875 case nir_op_vec2:
876 case nir_op_vec3:
877 case nir_op_vec4:
878 break;
879 default:
880 /* pre-GC7000L can only have 1 uniform src per instruction */
881 if (c->specs->halti >= 5)
882 return;
883
884 nir_const_value value[4] = {};
885 uint8_t swizzle[4][4] = {};
886 unsigned swiz_max = 0, num_const = 0;
887
888 for (unsigned i = 0; i < info->num_inputs; i++) {
889 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
890 if (!cv)
891 continue;
892
893 unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components;
894 for (unsigned j = 0; j < num_components; j++) {
895 int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
896 swizzle[i][j] = idx;
897 swiz_max = MAX2(swiz_max, (unsigned) idx);
898 }
899 num_const++;
900 }
901
902 /* nothing to do */
903 if (num_const <= 1)
904 return;
905
906 /* resolve with single combined const src */
907 if (swiz_max < 4) {
908 nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
909
910 for (unsigned i = 0; i < info->num_inputs; i++) {
911 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
912 if (!cv)
913 continue;
914
915 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
916
917 for (unsigned j = 0; j < 4; j++)
918 alu->src[i].swizzle[j] = swizzle[i][j];
919 }
920 return;
921 }
922
923 /* resolve with movs */
924 num_const = 0;
925 for (unsigned i = 0; i < info->num_inputs; i++) {
926 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
927 if (!cv)
928 continue;
929
930 num_const++;
931 if (num_const == 1)
932 continue;
933
934 nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa);
935 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov));
936 }
937 return;
938 }
939
940 nir_const_value value[4];
941 unsigned num_components = 0;
942
943 for (unsigned i = 0; i < info->num_inputs; i++) {
944 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
945 if (cv)
946 value[num_components++] = cv[alu->src[i].swizzle[0]];
947 }
948
949 /* if there is more than one constant source to the vecN, combine them
950 * into a single load_const (removing the vecN completely if all components
951 * are constant)
952 */
953 if (num_components > 1) {
954 nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value);
955
956 if (num_components == info->num_inputs) {
957 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def));
958 nir_instr_remove(&alu->instr);
959 return;
960 }
961
962 for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
963 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
964 if (!cv)
965 continue;
966
967 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
968 alu->src[i].swizzle[0] = j++;
969 }
970 }
971
972 unsigned finished_write_mask = 0;
973 for (unsigned i = 0; i < 4; i++) {
974 if (!(alu->dest.write_mask & (1 << i)))
975 continue;
976
977 nir_ssa_def *ssa = alu->src[i].src.ssa;
978
979 /* check that vecN instruction is only user of this */
980 bool need_mov = list_length(&ssa->if_uses) != 0;
981 nir_foreach_use(use_src, ssa) {
982 if (use_src->parent_instr != &alu->instr)
983 need_mov = true;
984 }
985
986 nir_instr *instr = ssa->parent_instr;
987 switch (instr->type) {
988 case nir_instr_type_alu:
989 case nir_instr_type_tex:
990 break;
991 case nir_instr_type_intrinsic:
992 if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
993 need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa);
994 break;
995 }
996 default:
997 need_mov = true;
998 }
999
1000 if (need_mov && !(finished_write_mask & (1 << i)))
1001 finished_write_mask |= insert_vec_mov(alu, i, c->nir);
1002 }
1003 }
1004
1005 static bool
1006 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
1007 {
1008 nir_shader *shader = c->nir;
1009 c->impl = nir_shader_get_entrypoint(shader);
1010
1011 bool have_indirect_uniform = false;
1012 unsigned indirect_max = 0;
1013
1014 nir_builder b;
1015 nir_builder_init(&b, c->impl);
1016
1017 /* convert non-dynamic uniform loads to constants, etc */
1018 nir_foreach_block(block, c->impl) {
1019 nir_foreach_instr_safe(instr, block) {
1020 switch(instr->type) {
1021 case nir_instr_type_alu:
1022 /* deals with vecN and const srcs */
1023 lower_alu(c, nir_instr_as_alu(instr));
1024 break;
1025 case nir_instr_type_load_const: {
1026 nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
1027 for (unsigned i = 0; i < load_const->def.num_components; i++)
1028 load_const->value[i] = CONST(load_const->value[i].u32);
1029 } break;
1030 case nir_instr_type_intrinsic: {
1031 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1032 /* TODO: load_ubo can also become a constant in some cases
1033 * (at the moment it can end up emitting a LOAD with two
1034 * uniform sources, which could be a problem on HALTI2)
1035 */
1036 if (intr->intrinsic != nir_intrinsic_load_uniform)
1037 break;
1038 nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1039 if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
1040 have_indirect_uniform = true;
1041 indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1042 break;
1043 }
1044
1045 unsigned base = nir_intrinsic_base(intr);
1046 /* pre halti2 uniform offset will be float */
1047 if (c->specs->halti < 2)
1048 base += (unsigned) off[0].f32;
1049 else
1050 base += off[0].u32;
1051 nir_const_value value[4];
1052
1053 for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
1054 if (nir_intrinsic_base(intr) < 0)
1055 value[i] = TEXSCALE(~nir_intrinsic_base(intr), i);
1056 else
1057 value[i] = UNIFORM(base * 4 + i);
1058 }
1059
1060 b.cursor = nir_after_instr(instr);
1061 nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value);
1062
1063 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
1064 nir_instr_remove(instr);
1065 } break;
1066 default:
1067 break;
1068 }
1069 }
1070 }
1071
1072 /* TODO: only emit required indirect uniform ranges */
1073 if (have_indirect_uniform) {
1074 for (unsigned i = 0; i < indirect_max * 4; i++)
1075 c->consts[i] = UNIFORM(i).u64;
1076 c->const_count = indirect_max;
1077 }
1078
1079 /* add mov for any store output using sysval/const */
1080 nir_foreach_block(block, c->impl) {
1081 nir_foreach_instr_safe(instr, block) {
1082 if (instr->type != nir_instr_type_intrinsic)
1083 continue;
1084
1085 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1086
1087 switch (intr->intrinsic) {
1088 case nir_intrinsic_store_deref: {
1089 nir_src *src = &intr->src[1];
1090 if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) {
1091 b.cursor = nir_before_instr(instr);
1092 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa)));
1093 }
1094 } break;
1095 default:
1096 break;
1097 }
1098 }
1099 }
1100
1101 /* call directly to avoid validation (load_const don't pass validation at this point) */
1102 nir_convert_from_ssa(shader, true);
1103 nir_opt_dce(shader);
1104
1105 ra_assign(c, shader);
1106
1107 emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body);
1108
1109 *num_temps = ra_finish(c);
1110 *num_consts = c->const_count;
1111 return true;
1112 }