etnaviv: move liveness related stuff into own file
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler_nir_emit.h
1 /*
2 * Copyright (c) 2019 Zodiac Inflight Innovations
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "etnaviv_asm.h"
28 #include "etnaviv_context.h"
29 #include "etnaviv_compiler_nir.h"
30
31 #include "compiler/nir/nir.h"
32 #include "compiler/nir/nir_builder.h"
33 #include "util/register_allocate.h"
34
35 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
36 #define SRC_DISABLE ((hw_src){})
37 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
38 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
39
40 #define emit(type, args...) etna_emit_##type(state->c, args)
41
42 typedef struct etna_inst_dst hw_dst;
43 typedef struct etna_inst_src hw_src;
44
45 struct state {
46 struct etna_compile *c;
47
48 unsigned const_count;
49
50 nir_shader *shader;
51 nir_function_impl *impl;
52
53 /* ra state */
54 struct ra_graph *g;
55 struct ra_regs *regs;
56 unsigned *live_map;
57 unsigned num_nodes;
58 };
59
60 static inline hw_src
61 src_swizzle(hw_src src, unsigned swizzle)
62 {
63 if (src.rgroup != INST_RGROUP_IMMEDIATE)
64 src.swiz = inst_swiz_compose(src.swiz, swizzle);
65
66 return src;
67 }
68
69 /* constants are represented as 64-bit ints
70 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
71 */
72
73 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
74 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
75 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
76 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
77
78 static int
79 const_add(uint64_t *c, uint64_t value)
80 {
81 for (unsigned i = 0; i < 4; i++) {
82 if (c[i] == value || !c[i]) {
83 c[i] = value;
84 return i;
85 }
86 }
87 return -1;
88 }
89
90 static hw_src
91 const_src(struct state *state, nir_const_value *value, unsigned num_components)
92 {
93 /* use inline immediates if possible */
94 if (state->c->specs->halti >= 2 && num_components == 1 &&
95 value[0].u64 >> 32 == ETNA_IMMEDIATE_CONSTANT) {
96 uint32_t bits = value[0].u32;
97
98 /* "float" - shifted by 12 */
99 if ((bits & 0xfff) == 0)
100 return etna_immediate_src(0, bits >> 12);
101
102 /* "unsigned" - raw 20 bit value */
103 if (bits < (1 << 20))
104 return etna_immediate_src(2, bits);
105
106 /* "signed" - sign extended 20-bit (sign included) value */
107 if (bits >= 0xfff80000)
108 return etna_immediate_src(1, bits);
109 }
110
111 unsigned i;
112 int swiz = -1;
113 for (i = 0; swiz < 0; i++) {
114 uint64_t *a = &state->c->consts[i*4];
115 uint64_t save[4];
116 memcpy(save, a, sizeof(save));
117 swiz = 0;
118 for (unsigned j = 0; j < num_components; j++) {
119 int c = const_add(a, value[j].u64);
120 if (c < 0) {
121 memcpy(a, save, sizeof(save));
122 swiz = -1;
123 break;
124 }
125 swiz |= c << j * 2;
126 }
127 }
128
129 assert(i <= ETNA_MAX_IMM / 4);
130 state->const_count = MAX2(state->const_count, i);
131
132 return SRC_CONST(i - 1, swiz);
133 }
134
135 /* Swizzles and write masks can be used to layer virtual non-interfering
136 * registers on top of the real VEC4 registers. For example, the virtual
137 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
138 * physical VEC4 base register do not interfere.
139 */
140 enum reg_class {
141 REG_CLASS_VIRT_SCALAR,
142 REG_CLASS_VIRT_VEC2,
143 REG_CLASS_VIRT_VEC3,
144 REG_CLASS_VEC4,
145 /* special vec2 class for fast transcendentals, limited to XY or ZW */
146 REG_CLASS_VIRT_VEC2T,
147 /* special classes for LOAD - contiguous components */
148 REG_CLASS_VIRT_VEC2C,
149 REG_CLASS_VIRT_VEC3C,
150 NUM_REG_CLASSES,
151 };
152
153 enum reg_type {
154 REG_TYPE_VEC4,
155 REG_TYPE_VIRT_VEC3_XYZ,
156 REG_TYPE_VIRT_VEC3_XYW,
157 REG_TYPE_VIRT_VEC3_XZW,
158 REG_TYPE_VIRT_VEC3_YZW,
159 REG_TYPE_VIRT_VEC2_XY,
160 REG_TYPE_VIRT_VEC2_XZ,
161 REG_TYPE_VIRT_VEC2_XW,
162 REG_TYPE_VIRT_VEC2_YZ,
163 REG_TYPE_VIRT_VEC2_YW,
164 REG_TYPE_VIRT_VEC2_ZW,
165 REG_TYPE_VIRT_SCALAR_X,
166 REG_TYPE_VIRT_SCALAR_Y,
167 REG_TYPE_VIRT_SCALAR_Z,
168 REG_TYPE_VIRT_SCALAR_W,
169 REG_TYPE_VIRT_VEC2T_XY,
170 REG_TYPE_VIRT_VEC2T_ZW,
171 REG_TYPE_VIRT_VEC2C_XY,
172 REG_TYPE_VIRT_VEC2C_YZ,
173 REG_TYPE_VIRT_VEC2C_ZW,
174 REG_TYPE_VIRT_VEC3C_XYZ,
175 REG_TYPE_VIRT_VEC3C_YZW,
176 NUM_REG_TYPES,
177 };
178
179 /* writemask when used as dest */
180 static const uint8_t
181 reg_writemask[NUM_REG_TYPES] = {
182 [REG_TYPE_VEC4] = 0xf,
183 [REG_TYPE_VIRT_SCALAR_X] = 0x1,
184 [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
185 [REG_TYPE_VIRT_VEC2_XY] = 0x3,
186 [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
187 [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
188 [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
189 [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
190 [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
191 [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
192 [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
193 [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
194 [REG_TYPE_VIRT_SCALAR_W] = 0x8,
195 [REG_TYPE_VIRT_VEC2_XW] = 0x9,
196 [REG_TYPE_VIRT_VEC2_YW] = 0xa,
197 [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
198 [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
199 [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
200 [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
201 [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
202 [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
203 [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
204 };
205
206 /* how to swizzle when used as a src */
207 static const uint8_t
208 reg_swiz[NUM_REG_TYPES] = {
209 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
210 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
211 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
212 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
213 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
214 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
215 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
216 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
217 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
218 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
219 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
220 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
221 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
222 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
223 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
224 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
225 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
226 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
227 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
228 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
229 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
230 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
231 };
232
233 /* how to swizzle when used as a dest */
234 static const uint8_t
235 reg_dst_swiz[NUM_REG_TYPES] = {
236 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
237 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
238 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
239 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
240 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
241 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
242 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
243 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
244 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
245 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
246 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
247 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
248 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
249 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
250 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
251 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
252 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
253 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
254 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
255 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
256 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
257 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
258 };
259
260 static inline int reg_get_type(int virt_reg)
261 {
262 return virt_reg % NUM_REG_TYPES;
263 }
264
265 static inline int reg_get_base(struct state *state, int virt_reg)
266 {
267 /* offset by 1 to avoid reserved position register */
268 if (state->shader->info.stage == MESA_SHADER_FRAGMENT)
269 return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
270 return virt_reg / NUM_REG_TYPES;
271 }
272
273 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
274 * (fs registers are offset by 1 to avoid reserving r0)
275 */
276 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
277
278 static inline int reg_get_class(int virt_reg)
279 {
280 switch (reg_get_type(virt_reg)) {
281 case REG_TYPE_VEC4:
282 return REG_CLASS_VEC4;
283 case REG_TYPE_VIRT_VEC3_XYZ:
284 case REG_TYPE_VIRT_VEC3_XYW:
285 case REG_TYPE_VIRT_VEC3_XZW:
286 case REG_TYPE_VIRT_VEC3_YZW:
287 return REG_CLASS_VIRT_VEC3;
288 case REG_TYPE_VIRT_VEC2_XY:
289 case REG_TYPE_VIRT_VEC2_XZ:
290 case REG_TYPE_VIRT_VEC2_XW:
291 case REG_TYPE_VIRT_VEC2_YZ:
292 case REG_TYPE_VIRT_VEC2_YW:
293 case REG_TYPE_VIRT_VEC2_ZW:
294 return REG_CLASS_VIRT_VEC2;
295 case REG_TYPE_VIRT_SCALAR_X:
296 case REG_TYPE_VIRT_SCALAR_Y:
297 case REG_TYPE_VIRT_SCALAR_Z:
298 case REG_TYPE_VIRT_SCALAR_W:
299 return REG_CLASS_VIRT_SCALAR;
300 case REG_TYPE_VIRT_VEC2T_XY:
301 case REG_TYPE_VIRT_VEC2T_ZW:
302 return REG_CLASS_VIRT_VEC2T;
303 case REG_TYPE_VIRT_VEC2C_XY:
304 case REG_TYPE_VIRT_VEC2C_YZ:
305 case REG_TYPE_VIRT_VEC2C_ZW:
306 return REG_CLASS_VIRT_VEC2C;
307 case REG_TYPE_VIRT_VEC3C_XYZ:
308 case REG_TYPE_VIRT_VEC3C_YZW:
309 return REG_CLASS_VIRT_VEC3C;
310 }
311
312 assert(false);
313 return 0;
314 }
315
316 /* nir_src to allocated register */
317 static hw_src
318 ra_src(struct state *state, nir_src *src)
319 {
320 unsigned reg = ra_get_node_reg(state->g, state->live_map[src_index(state->impl, src)]);
321 return SRC_REG(reg_get_base(state, reg), reg_swiz[reg_get_type(reg)]);
322 }
323
324 static hw_src
325 get_src(struct state *state, nir_src *src)
326 {
327 if (!src->is_ssa)
328 return ra_src(state, src);
329
330 nir_instr *instr = src->ssa->parent_instr;
331
332 if (instr->pass_flags & BYPASS_SRC) {
333 assert(instr->type == nir_instr_type_alu);
334 nir_alu_instr *alu = nir_instr_as_alu(instr);
335 assert(alu->op == nir_op_mov);
336 return src_swizzle(get_src(state, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
337 }
338
339 switch (instr->type) {
340 case nir_instr_type_load_const:
341 return const_src(state, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
342 case nir_instr_type_intrinsic: {
343 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
344 switch (intr->intrinsic) {
345 case nir_intrinsic_load_input:
346 case nir_intrinsic_load_instance_id:
347 case nir_intrinsic_load_uniform:
348 case nir_intrinsic_load_ubo:
349 return ra_src(state, src);
350 case nir_intrinsic_load_front_face:
351 return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
352 case nir_intrinsic_load_frag_coord:
353 return SRC_REG(0, INST_SWIZ_IDENTITY);
354 default:
355 compile_error(state->c, "Unhandled NIR intrinsic type: %s\n",
356 nir_intrinsic_infos[intr->intrinsic].name);
357 break;
358 }
359 } break;
360 case nir_instr_type_alu:
361 case nir_instr_type_tex:
362 return ra_src(state, src);
363 case nir_instr_type_ssa_undef: {
364 /* return zero to deal with broken Blur demo */
365 nir_const_value value = CONST(0);
366 return src_swizzle(const_src(state, &value, 1), SWIZZLE(X,X,X,X));
367 }
368 default:
369 compile_error(state->c, "Unhandled NIR instruction type: %d\n", instr->type);
370 break;
371 }
372
373 return SRC_DISABLE;
374 }
375
376 static bool
377 vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa)
378 {
379 for (unsigned i = 0; i < 4; i++) {
380 if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa)
381 continue;
382
383 if (vec->src[i].swizzle[0] != i)
384 return true;
385 }
386
387 /* don't deal with possible bypassed vec/mov chain */
388 nir_foreach_use(use_src, ssa) {
389 nir_instr *instr = use_src->parent_instr;
390 if (instr->type != nir_instr_type_alu)
391 continue;
392
393 nir_alu_instr *alu = nir_instr_as_alu(instr);
394
395 switch (alu->op) {
396 case nir_op_mov:
397 case nir_op_vec2:
398 case nir_op_vec3:
399 case nir_op_vec4:
400 return true;
401 default:
402 break;
403 }
404 }
405 return false;
406 }
407
408 /* get allocated dest register for nir_dest
409 * *p_swiz tells how the components need to be placed into register
410 */
411 static hw_dst
412 ra_dest(struct state *state, nir_dest *dest, unsigned *p_swiz)
413 {
414 unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
415 dest = real_dest(dest, &swiz, &mask);
416
417 unsigned r = ra_get_node_reg(state->g, state->live_map[dest_index(state->impl, dest)]);
418 unsigned t = reg_get_type(r);
419
420 *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
421
422 return (hw_dst) {
423 .use = 1,
424 .reg = reg_get_base(state, r),
425 .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
426 };
427 }
428
429 /* precomputed by register_allocate */
430 static unsigned int *q_values[] = {
431 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
432 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
433 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
434 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
435 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
436 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
437 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
438 };
439
440 static void
441 ra_assign(struct state *state, nir_shader *shader)
442 {
443 struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
444 NUM_REG_TYPES, false);
445
446 /* classes always be created from index 0, so equal to the class enum
447 * which represents a register with (c+1) components
448 */
449 for (int c = 0; c < NUM_REG_CLASSES; c++)
450 ra_alloc_reg_class(regs);
451 /* add each register of each class */
452 for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
453 ra_class_add_reg(regs, reg_get_class(r), r);
454 /* set conflicts */
455 for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
456 for (int i = 0; i < NUM_REG_TYPES; i++) {
457 for (int j = 0; j < i; j++) {
458 if (reg_writemask[i] & reg_writemask[j]) {
459 ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
460 NUM_REG_TYPES * r + j);
461 }
462 }
463 }
464 }
465 ra_set_finalize(regs, q_values);
466
467 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
468
469 /* liveness and interference */
470
471 nir_index_blocks(impl);
472 nir_index_ssa_defs(impl);
473 nir_foreach_block(block, impl) {
474 nir_foreach_instr(instr, block)
475 instr->pass_flags = 0;
476 }
477
478 /* this gives an approximation/upper limit on how many nodes are needed
479 * (some ssa values do not represent an allocated register)
480 */
481 unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
482 unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
483 memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
484 struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
485
486 unsigned num_nodes = etna_live_defs(impl, defs, live_map);
487 struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
488
489 /* set classes from num_components */
490 for (unsigned i = 0; i < num_nodes; i++) {
491 nir_instr *instr = defs[i].instr;
492 nir_dest *dest = defs[i].dest;
493 unsigned c = nir_dest_num_components(*dest) - 1;
494
495 if (instr->type == nir_instr_type_alu &&
496 state->c->specs->has_new_transcendentals) {
497 switch (nir_instr_as_alu(instr)->op) {
498 case nir_op_fdiv:
499 case nir_op_flog2:
500 case nir_op_fsin:
501 case nir_op_fcos:
502 assert(dest->is_ssa);
503 c = REG_CLASS_VIRT_VEC2T;
504 default:
505 break;
506 }
507 }
508
509 if (instr->type == nir_instr_type_intrinsic) {
510 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
511 /* can't have dst swizzle or sparse writemask on UBO loads */
512 if (intr->intrinsic == nir_intrinsic_load_ubo) {
513 assert(dest == &intr->dest);
514 if (dest->ssa.num_components == 2)
515 c = REG_CLASS_VIRT_VEC2C;
516 if (dest->ssa.num_components == 3)
517 c = REG_CLASS_VIRT_VEC3C;
518 }
519 }
520
521 ra_set_node_class(g, i, c);
522 }
523
524 nir_foreach_block(block, impl) {
525 nir_foreach_instr(instr, block) {
526 if (instr->type != nir_instr_type_intrinsic)
527 continue;
528
529 nir_dest *dest = dest_for_instr(instr);
530 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
531 unsigned reg;
532
533 switch (intr->intrinsic) {
534 case nir_intrinsic_store_deref: {
535 /* don't want outputs to be swizzled
536 * TODO: better would be to set the type to X/XY/XYZ/XYZW
537 * TODO: what if fragcoord.z is read after writing fragdepth?
538 */
539 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
540 unsigned index = live_map[src_index(impl, &intr->src[1])];
541
542 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
543 deref->var->data.location == FRAG_RESULT_DEPTH) {
544 ra_set_node_reg(g, index, REG_FRAG_DEPTH);
545 } else {
546 ra_set_node_class(g, index, REG_CLASS_VEC4);
547 }
548 } continue;
549 case nir_intrinsic_load_input:
550 reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
551 REG_TYPE_VIRT_SCALAR_X,
552 REG_TYPE_VIRT_VEC2_XY,
553 REG_TYPE_VIRT_VEC3_XYZ,
554 REG_TYPE_VEC4,
555 }[nir_dest_num_components(*dest) - 1];
556 break;
557 case nir_intrinsic_load_instance_id:
558 reg = state->c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
559 break;
560 default:
561 continue;
562 }
563
564 ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
565 }
566 }
567
568 /* add interference for intersecting live ranges */
569 for (unsigned i = 0; i < num_nodes; i++) {
570 assert(defs[i].live_start < defs[i].live_end);
571 for (unsigned j = 0; j < i; j++) {
572 if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
573 continue;
574 ra_add_node_interference(g, i, j);
575 }
576 }
577
578 ralloc_free(defs);
579
580 /* Allocate registers */
581 ASSERTED bool ok = ra_allocate(g);
582 assert(ok);
583
584 state->g = g;
585 state->regs = regs;
586 state->live_map = live_map;
587 state->num_nodes = num_nodes;
588 }
589
590 static unsigned
591 ra_finish(struct state *state)
592 {
593 /* TODO: better way to get number of registers used? */
594 unsigned j = 0;
595 for (unsigned i = 0; i < state->num_nodes; i++) {
596 j = MAX2(j, reg_get_base(state, ra_get_node_reg(state->g, i)) + 1);
597 }
598
599 ralloc_free(state->g);
600 ralloc_free(state->regs);
601 ralloc_free(state->live_map);
602
603 return j;
604 }
605
606 static void
607 emit_alu(struct state *state, nir_alu_instr * alu)
608 {
609 const nir_op_info *info = &nir_op_infos[alu->op];
610
611 /* marked as dead instruction (vecN and other bypassed instr) */
612 if (alu->instr.pass_flags)
613 return;
614
615 assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
616
617 unsigned dst_swiz;
618 hw_dst dst = ra_dest(state, &alu->dest.dest, &dst_swiz);
619
620 /* compose alu write_mask with RA write mask */
621 if (!alu->dest.dest.is_ssa)
622 dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask);
623
624 switch (alu->op) {
625 case nir_op_fdot2:
626 case nir_op_fdot3:
627 case nir_op_fdot4:
628 /* not per-component - don't compose dst_swiz */
629 dst_swiz = INST_SWIZ_IDENTITY;
630 break;
631 default:
632 break;
633 }
634
635 hw_src srcs[3];
636
637 for (int i = 0; i < info->num_inputs; i++) {
638 nir_alu_src *asrc = &alu->src[i];
639 hw_src src;
640
641 src = src_swizzle(get_src(state, &asrc->src), ALU_SWIZ(asrc));
642 src = src_swizzle(src, dst_swiz);
643
644 if (src.rgroup != INST_RGROUP_IMMEDIATE) {
645 src.neg = asrc->negate || (alu->op == nir_op_fneg);
646 src.abs = asrc->abs || (alu->op == nir_op_fabs);
647 } else {
648 assert(!asrc->negate && alu->op != nir_op_fneg);
649 assert(!asrc->abs && alu->op != nir_op_fabs);
650 }
651
652 srcs[i] = src;
653 }
654
655 emit(alu, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat));
656 }
657
658 static void
659 emit_tex(struct state *state, nir_tex_instr * tex)
660 {
661 unsigned dst_swiz;
662 hw_dst dst = ra_dest(state, &tex->dest, &dst_swiz);
663 nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL;
664
665 for (unsigned i = 0; i < tex->num_srcs; i++) {
666 switch (tex->src[i].src_type) {
667 case nir_tex_src_coord:
668 coord = &tex->src[i].src;
669 break;
670 case nir_tex_src_bias:
671 case nir_tex_src_lod:
672 assert(!lod_bias);
673 lod_bias = &tex->src[i].src;
674 break;
675 case nir_tex_src_comparator:
676 compare = &tex->src[i].src;
677 break;
678 default:
679 compile_error(state->c, "Unhandled NIR tex src type: %d\n",
680 tex->src[i].src_type);
681 break;
682 }
683 }
684
685 emit(tex, tex->op, tex->sampler_index, dst_swiz, dst, get_src(state, coord),
686 lod_bias ? get_src(state, lod_bias) : SRC_DISABLE,
687 compare ? get_src(state, compare) : SRC_DISABLE);
688 }
689
690 static void
691 emit_intrinsic(struct state *state, nir_intrinsic_instr * intr)
692 {
693 switch (intr->intrinsic) {
694 case nir_intrinsic_store_deref:
695 emit(output, nir_src_as_deref(intr->src[0])->var, get_src(state, &intr->src[1]));
696 break;
697 case nir_intrinsic_discard_if:
698 emit(discard, get_src(state, &intr->src[0]));
699 break;
700 case nir_intrinsic_discard:
701 emit(discard, SRC_DISABLE);
702 break;
703 case nir_intrinsic_load_uniform: {
704 unsigned dst_swiz;
705 struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
706
707 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
708 emit_inst(state->c, &(struct etna_inst) {
709 .opcode = INST_OPCODE_MOVAR,
710 .dst.write_mask = 0x1,
711 .src[2] = get_src(state, &intr->src[0]),
712 });
713 emit_inst(state->c, &(struct etna_inst) {
714 .opcode = INST_OPCODE_MOV,
715 .dst = dst,
716 .src[2] = {
717 .use = 1,
718 .rgroup = INST_RGROUP_UNIFORM_0,
719 .reg = nir_intrinsic_base(intr),
720 .swiz = dst_swiz,
721 .amode = INST_AMODE_ADD_A_X,
722 },
723 });
724 } break;
725 case nir_intrinsic_load_ubo: {
726 /* TODO: if offset is of the form (x + C) then add C to the base instead */
727 unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
728 unsigned dst_swiz;
729 emit_inst(state->c, &(struct etna_inst) {
730 .opcode = INST_OPCODE_LOAD,
731 .type = INST_TYPE_U32,
732 .dst = ra_dest(state, &intr->dest, &dst_swiz),
733 .src[0] = get_src(state, &intr->src[1]),
734 .src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
735 });
736 } break;
737 case nir_intrinsic_load_front_face:
738 case nir_intrinsic_load_frag_coord:
739 assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */
740 break;
741 case nir_intrinsic_load_input:
742 case nir_intrinsic_load_instance_id:
743 break;
744 default:
745 compile_error(state->c, "Unhandled NIR intrinsic type: %s\n",
746 nir_intrinsic_infos[intr->intrinsic].name);
747 }
748 }
749
750 static void
751 emit_instr(struct state *state, nir_instr * instr)
752 {
753 switch (instr->type) {
754 case nir_instr_type_alu:
755 emit_alu(state, nir_instr_as_alu(instr));
756 break;
757 case nir_instr_type_tex:
758 emit_tex(state, nir_instr_as_tex(instr));
759 break;
760 case nir_instr_type_intrinsic:
761 emit_intrinsic(state, nir_instr_as_intrinsic(instr));
762 break;
763 case nir_instr_type_jump:
764 assert(nir_instr_is_last(instr));
765 case nir_instr_type_load_const:
766 case nir_instr_type_ssa_undef:
767 case nir_instr_type_deref:
768 break;
769 default:
770 compile_error(state->c, "Unhandled NIR instruction type: %d\n", instr->type);
771 break;
772 }
773 }
774
775 static void
776 emit_block(struct state *state, nir_block * block)
777 {
778 emit(block_start, block->index);
779
780 nir_foreach_instr(instr, block)
781 emit_instr(state, instr);
782
783 /* succs->index < block->index is for the loop case */
784 nir_block *succs = block->successors[0];
785 if (nir_block_ends_in_jump(block) || succs->index < block->index)
786 emit(jump, succs->index, SRC_DISABLE);
787 }
788
789 static void
790 emit_cf_list(struct state *state, struct exec_list *list);
791
792 static void
793 emit_if(struct state *state, nir_if * nif)
794 {
795 emit(jump, nir_if_first_else_block(nif)->index, get_src(state, &nif->condition));
796 emit_cf_list(state, &nif->then_list);
797
798 /* jump at end of then_list to skip else_list
799 * not needed if then_list already ends with a jump or else_list is empty
800 */
801 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
802 !nir_cf_list_is_empty_block(&nif->else_list))
803 emit(jump, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE);
804
805 emit_cf_list(state, &nif->else_list);
806 }
807
808 static void
809 emit_cf_list(struct state *state, struct exec_list *list)
810 {
811 foreach_list_typed(nir_cf_node, node, node, list) {
812 switch (node->type) {
813 case nir_cf_node_block:
814 emit_block(state, nir_cf_node_as_block(node));
815 break;
816 case nir_cf_node_if:
817 emit_if(state, nir_cf_node_as_if(node));
818 break;
819 case nir_cf_node_loop:
820 emit_cf_list(state, &nir_cf_node_as_loop(node)->body);
821 break;
822 default:
823 compile_error(state->c, "Unknown NIR node type\n");
824 break;
825 }
826 }
827 }
828
829 /* based on nir_lower_vec_to_movs */
830 static unsigned
831 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
832 {
833 assert(start_idx < nir_op_infos[vec->op].num_inputs);
834 unsigned write_mask = (1u << start_idx);
835
836 nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
837 nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
838
839 mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
840 mov->src[0].negate = vec->src[start_idx].negate;
841 mov->src[0].abs = vec->src[start_idx].abs;
842
843 unsigned num_components = 1;
844
845 for (unsigned i = start_idx + 1; i < 4; i++) {
846 if (!(vec->dest.write_mask & (1 << i)))
847 continue;
848
849 if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
850 vec->src[i].negate == vec->src[start_idx].negate &&
851 vec->src[i].abs == vec->src[start_idx].abs) {
852 write_mask |= (1 << i);
853 mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
854 num_components++;
855 }
856 }
857
858 mov->dest.write_mask = (1 << num_components) - 1;
859 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL);
860
861 /* replace vec srcs with inserted mov */
862 for (unsigned i = 0, j = 0; i < 4; i++) {
863 if (!(write_mask & (1 << i)))
864 continue;
865
866 nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa));
867 vec->src[i].swizzle[0] = j++;
868 }
869
870 nir_instr_insert_before(&vec->instr, &mov->instr);
871
872 return write_mask;
873 }
874
875 /*
876 * for vecN instructions:
877 * -merge constant sources into a single src
878 * -insert movs (nir_lower_vec_to_movs equivalent)
879 * for non-vecN instructions:
880 * -try to merge constants as single constant
881 * -insert movs for multiple constants (pre-HALTI5)
882 */
883 static void
884 lower_alu(struct state *state, nir_alu_instr *alu)
885 {
886 const nir_op_info *info = &nir_op_infos[alu->op];
887
888 nir_builder b;
889 nir_builder_init(&b, state->impl);
890 b.cursor = nir_before_instr(&alu->instr);
891
892 switch (alu->op) {
893 case nir_op_vec2:
894 case nir_op_vec3:
895 case nir_op_vec4:
896 break;
897 default:
898 /* pre-GC7000L can only have 1 uniform src per instruction */
899 if (state->c->specs->halti >= 5)
900 return;
901
902 nir_const_value value[4] = {};
903 uint8_t swizzle[4][4] = {};
904 unsigned swiz_max = 0, num_const = 0;
905
906 for (unsigned i = 0; i < info->num_inputs; i++) {
907 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
908 if (!cv)
909 continue;
910
911 unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components;
912 for (unsigned j = 0; j < num_components; j++) {
913 int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
914 swizzle[i][j] = idx;
915 swiz_max = MAX2(swiz_max, (unsigned) idx);
916 }
917 num_const++;
918 }
919
920 /* nothing to do */
921 if (num_const <= 1)
922 return;
923
924 /* resolve with single combined const src */
925 if (swiz_max < 4) {
926 nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
927
928 for (unsigned i = 0; i < info->num_inputs; i++) {
929 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
930 if (!cv)
931 continue;
932
933 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
934
935 for (unsigned j = 0; j < 4; j++)
936 alu->src[i].swizzle[j] = swizzle[i][j];
937 }
938 return;
939 }
940
941 /* resolve with movs */
942 num_const = 0;
943 for (unsigned i = 0; i < info->num_inputs; i++) {
944 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
945 if (!cv)
946 continue;
947
948 num_const++;
949 if (num_const == 1)
950 continue;
951
952 nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa);
953 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov));
954 }
955 return;
956 }
957
958 nir_const_value value[4];
959 unsigned num_components = 0;
960
961 for (unsigned i = 0; i < info->num_inputs; i++) {
962 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
963 if (cv)
964 value[num_components++] = cv[alu->src[i].swizzle[0]];
965 }
966
967 /* if there is more than one constant source to the vecN, combine them
968 * into a single load_const (removing the vecN completely if all components
969 * are constant)
970 */
971 if (num_components > 1) {
972 nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value);
973
974 if (num_components == info->num_inputs) {
975 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def));
976 nir_instr_remove(&alu->instr);
977 return;
978 }
979
980 for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
981 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
982 if (!cv)
983 continue;
984
985 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
986 alu->src[i].swizzle[0] = j++;
987 }
988 }
989
990 unsigned finished_write_mask = 0;
991 for (unsigned i = 0; i < 4; i++) {
992 if (!(alu->dest.write_mask & (1 << i)))
993 continue;
994
995 nir_ssa_def *ssa = alu->src[i].src.ssa;
996
997 /* check that vecN instruction is only user of this */
998 bool need_mov = list_length(&ssa->if_uses) != 0;
999 nir_foreach_use(use_src, ssa) {
1000 if (use_src->parent_instr != &alu->instr)
1001 need_mov = true;
1002 }
1003
1004 nir_instr *instr = ssa->parent_instr;
1005 switch (instr->type) {
1006 case nir_instr_type_alu:
1007 case nir_instr_type_tex:
1008 break;
1009 case nir_instr_type_intrinsic:
1010 if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
1011 need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa);
1012 break;
1013 }
1014 default:
1015 need_mov = true;
1016 }
1017
1018 if (need_mov && !(finished_write_mask & (1 << i)))
1019 finished_write_mask |= insert_vec_mov(alu, i, state->shader);
1020 }
1021 }
1022
1023 static bool
1024 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
1025 {
1026 nir_shader *shader = c->nir;
1027
1028 struct state state = {
1029 .c = c,
1030 .shader = shader,
1031 .impl = nir_shader_get_entrypoint(shader),
1032 };
1033 bool have_indirect_uniform = false;
1034 unsigned indirect_max = 0;
1035
1036 nir_builder b;
1037 nir_builder_init(&b, state.impl);
1038
1039 /* convert non-dynamic uniform loads to constants, etc */
1040 nir_foreach_block(block, state.impl) {
1041 nir_foreach_instr_safe(instr, block) {
1042 switch(instr->type) {
1043 case nir_instr_type_alu:
1044 /* deals with vecN and const srcs */
1045 lower_alu(&state, nir_instr_as_alu(instr));
1046 break;
1047 case nir_instr_type_load_const: {
1048 nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
1049 for (unsigned i = 0; i < load_const->def.num_components; i++)
1050 load_const->value[i] = CONST(load_const->value[i].u32);
1051 } break;
1052 case nir_instr_type_intrinsic: {
1053 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1054 /* TODO: load_ubo can also become a constant in some cases
1055 * (at the moment it can end up emitting a LOAD with two
1056 * uniform sources, which could be a problem on HALTI2)
1057 */
1058 if (intr->intrinsic != nir_intrinsic_load_uniform)
1059 break;
1060 nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1061 if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
1062 have_indirect_uniform = true;
1063 indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1064 break;
1065 }
1066
1067 unsigned base = nir_intrinsic_base(intr);
1068 /* pre halti2 uniform offset will be float */
1069 if (c->specs->halti < 2)
1070 base += (unsigned) off[0].f32;
1071 else
1072 base += off[0].u32;
1073 nir_const_value value[4];
1074
1075 for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
1076 if (nir_intrinsic_base(intr) < 0)
1077 value[i] = TEXSCALE(~nir_intrinsic_base(intr), i);
1078 else
1079 value[i] = UNIFORM(base * 4 + i);
1080 }
1081
1082 b.cursor = nir_after_instr(instr);
1083 nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value);
1084
1085 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
1086 nir_instr_remove(instr);
1087 } break;
1088 default:
1089 break;
1090 }
1091 }
1092 }
1093
1094 /* TODO: only emit required indirect uniform ranges */
1095 if (have_indirect_uniform) {
1096 for (unsigned i = 0; i < indirect_max * 4; i++)
1097 c->consts[i] = UNIFORM(i).u64;
1098 state.const_count = indirect_max;
1099 }
1100
1101 /* add mov for any store output using sysval/const */
1102 nir_foreach_block(block, state.impl) {
1103 nir_foreach_instr_safe(instr, block) {
1104 if (instr->type != nir_instr_type_intrinsic)
1105 continue;
1106
1107 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1108
1109 switch (intr->intrinsic) {
1110 case nir_intrinsic_store_deref: {
1111 nir_src *src = &intr->src[1];
1112 if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) {
1113 b.cursor = nir_before_instr(instr);
1114 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa)));
1115 }
1116 } break;
1117 default:
1118 break;
1119 }
1120 }
1121 }
1122
1123 /* call directly to avoid validation (load_const don't pass validation at this point) */
1124 nir_convert_from_ssa(shader, true);
1125 nir_opt_dce(shader);
1126
1127 ra_assign(&state, shader);
1128
1129 emit_cf_list(&state, &nir_shader_get_entrypoint(shader)->body);
1130
1131 *num_temps = ra_finish(&state);
1132 *num_consts = state.const_count;
1133 return true;
1134 }