etnaviv: compiled_framebuffer_state: get rid of SE_SCISSOR_*
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler_nir_emit.h
1 /*
2 * Copyright (c) 2019 Zodiac Inflight Innovations
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "etnaviv_asm.h"
28 #include "etnaviv_context.h"
29
30 #include "compiler/nir/nir.h"
31 #include "compiler/nir/nir_builder.h"
32 #include "compiler/nir/nir_worklist.h"
33 #include "util/register_allocate.h"
34
35 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
36 #define SRC_DISABLE ((hw_src){})
37 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
38 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
39
40 #define emit(type, args...) etna_emit_##type(state->c, args)
41
42 typedef struct etna_inst_dst hw_dst;
43 typedef struct etna_inst_src hw_src;
44
45 enum {
46 BYPASS_DST = 1,
47 BYPASS_SRC = 2,
48 };
49
50 struct state {
51 struct etna_compile *c;
52
53 unsigned const_count;
54
55 nir_shader *shader;
56 nir_function_impl *impl;
57
58 /* ra state */
59 struct ra_graph *g;
60 struct ra_regs *regs;
61 unsigned *live_map;
62 unsigned num_nodes;
63 };
64
65 #define compile_error(ctx, args...) ({ \
66 printf(args); \
67 ctx->error = true; \
68 assert(0); \
69 })
70
71 static inline hw_src
72 src_swizzle(hw_src src, unsigned swizzle)
73 {
74 if (src.rgroup != INST_RGROUP_IMMEDIATE)
75 src.swiz = inst_swiz_compose(src.swiz, swizzle);
76
77 return src;
78 }
79
80 static inline bool is_sysval(nir_instr *instr)
81 {
82 if (instr->type != nir_instr_type_intrinsic)
83 return false;
84
85 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
86 return intr->intrinsic == nir_intrinsic_load_front_face ||
87 intr->intrinsic == nir_intrinsic_load_frag_coord;
88 }
89
90 /* constants are represented as 64-bit ints
91 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
92 */
93
94 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
95 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
96 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
97 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
98
99 static int
100 const_add(uint64_t *c, uint64_t value)
101 {
102 for (unsigned i = 0; i < 4; i++) {
103 if (c[i] == value || !c[i]) {
104 c[i] = value;
105 return i;
106 }
107 }
108 return -1;
109 }
110
111 static hw_src
112 const_src(struct state *state, nir_const_value *value, unsigned num_components)
113 {
114 /* use inline immediates if possible */
115 if (state->c->specs->halti >= 2 && num_components == 1 &&
116 value[0].u64 >> 32 == ETNA_IMMEDIATE_CONSTANT) {
117 uint32_t bits = value[0].u32;
118
119 /* "float" - shifted by 12 */
120 if ((bits & 0xfff) == 0)
121 return etna_immediate_src(0, bits >> 12);
122
123 /* "unsigned" - raw 20 bit value */
124 if (bits < (1 << 20))
125 return etna_immediate_src(2, bits);
126
127 /* "signed" - sign extended 20-bit (sign included) value */
128 if (bits >= 0xfff80000)
129 return etna_immediate_src(1, bits);
130 }
131
132 unsigned i;
133 int swiz = -1;
134 for (i = 0; swiz < 0; i++) {
135 uint64_t *a = &state->c->consts[i*4];
136 uint64_t save[4];
137 memcpy(save, a, sizeof(save));
138 swiz = 0;
139 for (unsigned j = 0; j < num_components; j++) {
140 int c = const_add(a, value[j].u64);
141 if (c < 0) {
142 memcpy(a, save, sizeof(save));
143 swiz = -1;
144 break;
145 }
146 swiz |= c << j * 2;
147 }
148 }
149
150 assert(i <= ETNA_MAX_IMM / 4);
151 state->const_count = MAX2(state->const_count, i);
152
153 return SRC_CONST(i - 1, swiz);
154 }
155
156 struct ssa_reg {
157 uint8_t idx;
158 uint8_t src_swizzle;
159 uint8_t dst_swizzle;
160 uint8_t write_mask;
161 };
162
163 /* Swizzles and write masks can be used to layer virtual non-interfering
164 * registers on top of the real VEC4 registers. For example, the virtual
165 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
166 * physical VEC4 base register do not interfere.
167 */
168 enum {
169 REG_CLASS_VIRT_SCALAR,
170 REG_CLASS_VIRT_VEC2,
171 REG_CLASS_VIRT_VEC3,
172 REG_CLASS_VEC4,
173 /* special vec2 class for fast transcendentals, limited to XY or ZW */
174 REG_CLASS_VIRT_VEC2T,
175 /* special classes for LOAD - contiguous components */
176 REG_CLASS_VIRT_VEC2C,
177 REG_CLASS_VIRT_VEC3C,
178 NUM_REG_CLASSES,
179 } reg_class;
180
181 enum {
182 REG_TYPE_VEC4,
183 REG_TYPE_VIRT_VEC3_XYZ,
184 REG_TYPE_VIRT_VEC3_XYW,
185 REG_TYPE_VIRT_VEC3_XZW,
186 REG_TYPE_VIRT_VEC3_YZW,
187 REG_TYPE_VIRT_VEC2_XY,
188 REG_TYPE_VIRT_VEC2_XZ,
189 REG_TYPE_VIRT_VEC2_XW,
190 REG_TYPE_VIRT_VEC2_YZ,
191 REG_TYPE_VIRT_VEC2_YW,
192 REG_TYPE_VIRT_VEC2_ZW,
193 REG_TYPE_VIRT_SCALAR_X,
194 REG_TYPE_VIRT_SCALAR_Y,
195 REG_TYPE_VIRT_SCALAR_Z,
196 REG_TYPE_VIRT_SCALAR_W,
197 REG_TYPE_VIRT_VEC2T_XY,
198 REG_TYPE_VIRT_VEC2T_ZW,
199 REG_TYPE_VIRT_VEC2C_XY,
200 REG_TYPE_VIRT_VEC2C_YZ,
201 REG_TYPE_VIRT_VEC2C_ZW,
202 REG_TYPE_VIRT_VEC3C_XYZ,
203 REG_TYPE_VIRT_VEC3C_YZW,
204 NUM_REG_TYPES,
205 } reg_type;
206
207 /* writemask when used as dest */
208 static const uint8_t
209 reg_writemask[NUM_REG_TYPES] = {
210 [REG_TYPE_VEC4] = 0xf,
211 [REG_TYPE_VIRT_SCALAR_X] = 0x1,
212 [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
213 [REG_TYPE_VIRT_VEC2_XY] = 0x3,
214 [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
215 [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
216 [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
217 [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
218 [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
219 [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
220 [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
221 [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
222 [REG_TYPE_VIRT_SCALAR_W] = 0x8,
223 [REG_TYPE_VIRT_VEC2_XW] = 0x9,
224 [REG_TYPE_VIRT_VEC2_YW] = 0xa,
225 [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
226 [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
227 [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
228 [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
229 [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
230 [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
231 [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
232 };
233
234 /* how to swizzle when used as a src */
235 static const uint8_t
236 reg_swiz[NUM_REG_TYPES] = {
237 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
238 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
239 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
240 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
241 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
242 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
243 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
244 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
245 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
246 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
247 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
248 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
249 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
250 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
251 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
252 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
253 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
254 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
255 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
256 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
257 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
258 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
259 };
260
261 /* how to swizzle when used as a dest */
262 static const uint8_t
263 reg_dst_swiz[NUM_REG_TYPES] = {
264 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
265 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
266 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
267 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
268 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
269 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
270 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
271 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
272 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
273 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
274 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
275 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
276 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
277 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
278 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
279 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
280 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
281 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
282 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
283 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
284 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
285 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
286 };
287
288 static inline int reg_get_type(int virt_reg)
289 {
290 return virt_reg % NUM_REG_TYPES;
291 }
292
293 static inline int reg_get_base(struct state *state, int virt_reg)
294 {
295 /* offset by 1 to avoid reserved position register */
296 if (state->shader->info.stage == MESA_SHADER_FRAGMENT)
297 return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
298 return virt_reg / NUM_REG_TYPES;
299 }
300
301 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
302 * (fs registers are offset by 1 to avoid reserving r0)
303 */
304 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
305
306 static inline int reg_get_class(int virt_reg)
307 {
308 switch (reg_get_type(virt_reg)) {
309 case REG_TYPE_VEC4:
310 return REG_CLASS_VEC4;
311 case REG_TYPE_VIRT_VEC3_XYZ:
312 case REG_TYPE_VIRT_VEC3_XYW:
313 case REG_TYPE_VIRT_VEC3_XZW:
314 case REG_TYPE_VIRT_VEC3_YZW:
315 return REG_CLASS_VIRT_VEC3;
316 case REG_TYPE_VIRT_VEC2_XY:
317 case REG_TYPE_VIRT_VEC2_XZ:
318 case REG_TYPE_VIRT_VEC2_XW:
319 case REG_TYPE_VIRT_VEC2_YZ:
320 case REG_TYPE_VIRT_VEC2_YW:
321 case REG_TYPE_VIRT_VEC2_ZW:
322 return REG_CLASS_VIRT_VEC2;
323 case REG_TYPE_VIRT_SCALAR_X:
324 case REG_TYPE_VIRT_SCALAR_Y:
325 case REG_TYPE_VIRT_SCALAR_Z:
326 case REG_TYPE_VIRT_SCALAR_W:
327 return REG_CLASS_VIRT_SCALAR;
328 case REG_TYPE_VIRT_VEC2T_XY:
329 case REG_TYPE_VIRT_VEC2T_ZW:
330 return REG_CLASS_VIRT_VEC2T;
331 case REG_TYPE_VIRT_VEC2C_XY:
332 case REG_TYPE_VIRT_VEC2C_YZ:
333 case REG_TYPE_VIRT_VEC2C_ZW:
334 return REG_CLASS_VIRT_VEC2C;
335 case REG_TYPE_VIRT_VEC3C_XYZ:
336 case REG_TYPE_VIRT_VEC3C_YZW:
337 return REG_CLASS_VIRT_VEC3C;
338 }
339
340 assert(false);
341 return 0;
342 }
343
344 /* get unique ssa/reg index for nir_src */
345 static unsigned
346 src_index(nir_function_impl *impl, nir_src *src)
347 {
348 return src->is_ssa ? src->ssa->index : (src->reg.reg->index + impl->ssa_alloc);
349 }
350
351 /* get unique ssa/reg index for nir_dest */
352 static unsigned
353 dest_index(nir_function_impl *impl, nir_dest *dest)
354 {
355 return dest->is_ssa ? dest->ssa.index : (dest->reg.reg->index + impl->ssa_alloc);
356 }
357
358 /* nir_src to allocated register */
359 static hw_src
360 ra_src(struct state *state, nir_src *src)
361 {
362 unsigned reg = ra_get_node_reg(state->g, state->live_map[src_index(state->impl, src)]);
363 return SRC_REG(reg_get_base(state, reg), reg_swiz[reg_get_type(reg)]);
364 }
365
366 static hw_src
367 get_src(struct state *state, nir_src *src)
368 {
369 if (!src->is_ssa)
370 return ra_src(state, src);
371
372 nir_instr *instr = src->ssa->parent_instr;
373
374 if (instr->pass_flags & BYPASS_SRC) {
375 assert(instr->type == nir_instr_type_alu);
376 nir_alu_instr *alu = nir_instr_as_alu(instr);
377 assert(alu->op == nir_op_mov);
378 return src_swizzle(get_src(state, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
379 }
380
381 switch (instr->type) {
382 case nir_instr_type_load_const:
383 return const_src(state, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
384 case nir_instr_type_intrinsic: {
385 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
386 switch (intr->intrinsic) {
387 case nir_intrinsic_load_input:
388 case nir_intrinsic_load_instance_id:
389 case nir_intrinsic_load_uniform:
390 case nir_intrinsic_load_ubo:
391 return ra_src(state, src);
392 case nir_intrinsic_load_front_face:
393 return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
394 case nir_intrinsic_load_frag_coord:
395 return SRC_REG(0, INST_SWIZ_IDENTITY);
396 default:
397 compile_error(state->c, "Unhandled NIR intrinsic type: %s\n",
398 nir_intrinsic_infos[intr->intrinsic].name);
399 break;
400 }
401 } break;
402 case nir_instr_type_alu:
403 case nir_instr_type_tex:
404 return ra_src(state, src);
405 case nir_instr_type_ssa_undef: {
406 /* return zero to deal with broken Blur demo */
407 nir_const_value value = CONST(0);
408 return src_swizzle(const_src(state, &value, 1), SWIZZLE(X,X,X,X));
409 }
410 default:
411 compile_error(state->c, "Unhandled NIR instruction type: %d\n", instr->type);
412 break;
413 }
414
415 return SRC_DISABLE;
416 }
417
418 static void
419 update_swiz_mask(nir_alu_instr *alu, nir_dest *dest, unsigned *swiz, unsigned *mask)
420 {
421 if (!swiz)
422 return;
423
424 bool is_vec = dest != NULL;
425 unsigned swizzle = 0, write_mask = 0;
426 for (unsigned i = 0; i < 4; i++) {
427 /* channel not written */
428 if (!(alu->dest.write_mask & (1 << i)))
429 continue;
430 /* src is different (only check for vecN) */
431 if (is_vec && alu->src[i].src.ssa != &dest->ssa)
432 continue;
433
434 unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i];
435 swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2;
436 /* this channel isn't written through this chain */
437 if (*mask & (1 << src_swiz))
438 write_mask |= 1 << i;
439 }
440 *swiz = swizzle;
441 *mask = write_mask;
442 }
443
444 static bool
445 vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa)
446 {
447 for (unsigned i = 0; i < 4; i++) {
448 if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa)
449 continue;
450
451 if (vec->src[i].swizzle[0] != i)
452 return true;
453 }
454
455 /* don't deal with possible bypassed vec/mov chain */
456 nir_foreach_use(use_src, ssa) {
457 nir_instr *instr = use_src->parent_instr;
458 if (instr->type != nir_instr_type_alu)
459 continue;
460
461 nir_alu_instr *alu = nir_instr_as_alu(instr);
462
463 switch (alu->op) {
464 case nir_op_mov:
465 case nir_op_vec2:
466 case nir_op_vec3:
467 case nir_op_vec4:
468 return true;
469 default:
470 break;
471 }
472 }
473 return false;
474 }
475
476 static nir_dest *
477 real_dest(nir_dest *dest, unsigned *swiz, unsigned *mask)
478 {
479 if (!dest || !dest->is_ssa)
480 return dest;
481
482 bool can_bypass_src = !list_length(&dest->ssa.if_uses);
483 nir_instr *p_instr = dest->ssa.parent_instr;
484
485 /* if used by a vecN, the "real" destination becomes the vecN destination
486 * lower_alu guarantees that values used by a vecN are only used by that vecN
487 * we can apply the same logic to movs in a some cases too
488 */
489 nir_foreach_use(use_src, &dest->ssa) {
490 nir_instr *instr = use_src->parent_instr;
491
492 /* src bypass check: for now only deal with tex src mov case
493 * note: for alu don't bypass mov for multiple uniform sources
494 */
495 switch (instr->type) {
496 case nir_instr_type_tex:
497 if (p_instr->type == nir_instr_type_alu &&
498 nir_instr_as_alu(p_instr)->op == nir_op_mov) {
499 break;
500 }
501 default:
502 can_bypass_src = false;
503 break;
504 }
505
506 if (instr->type != nir_instr_type_alu)
507 continue;
508
509 nir_alu_instr *alu = nir_instr_as_alu(instr);
510
511 switch (alu->op) {
512 case nir_op_vec2:
513 case nir_op_vec3:
514 case nir_op_vec4:
515 assert(list_length(&dest->ssa.if_uses) == 0);
516 nir_foreach_use(use_src, &dest->ssa)
517 assert(use_src->parent_instr == instr);
518
519 update_swiz_mask(alu, dest, swiz, mask);
520 break;
521 case nir_op_mov: {
522 switch (dest->ssa.parent_instr->type) {
523 case nir_instr_type_alu:
524 case nir_instr_type_tex:
525 break;
526 default:
527 continue;
528 }
529 if (list_length(&dest->ssa.if_uses) || list_length(&dest->ssa.uses) > 1)
530 continue;
531
532 update_swiz_mask(alu, NULL, swiz, mask);
533 break;
534 };
535 default:
536 continue;
537 }
538
539 assert(!(instr->pass_flags & BYPASS_SRC));
540 instr->pass_flags |= BYPASS_DST;
541 return real_dest(&alu->dest.dest, swiz, mask);
542 }
543
544 if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) {
545 p_instr->pass_flags |= BYPASS_SRC;
546 return NULL;
547 }
548
549 return dest;
550 }
551
552 /* get allocated dest register for nir_dest
553 * *p_swiz tells how the components need to be placed into register
554 */
555 static hw_dst
556 ra_dest(struct state *state, nir_dest *dest, unsigned *p_swiz)
557 {
558 unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
559 dest = real_dest(dest, &swiz, &mask);
560
561 unsigned r = ra_get_node_reg(state->g, state->live_map[dest_index(state->impl, dest)]);
562 unsigned t = reg_get_type(r);
563
564 *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
565
566 return (hw_dst) {
567 .use = 1,
568 .reg = reg_get_base(state, r),
569 .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
570 };
571 }
572
573 /* if instruction dest needs a register, return nir_dest for it */
574 static nir_dest *
575 dest_for_instr(nir_instr *instr)
576 {
577 nir_dest *dest = NULL;
578
579 switch (instr->type) {
580 case nir_instr_type_alu:
581 dest = &nir_instr_as_alu(instr)->dest.dest;
582 break;
583 case nir_instr_type_tex:
584 dest = &nir_instr_as_tex(instr)->dest;
585 break;
586 case nir_instr_type_intrinsic: {
587 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
588 if (intr->intrinsic == nir_intrinsic_load_uniform ||
589 intr->intrinsic == nir_intrinsic_load_ubo ||
590 intr->intrinsic == nir_intrinsic_load_input ||
591 intr->intrinsic == nir_intrinsic_load_instance_id)
592 dest = &intr->dest;
593 } break;
594 case nir_instr_type_deref:
595 return NULL;
596 default:
597 break;
598 }
599 return real_dest(dest, NULL, NULL);
600 }
601
602 struct live_def {
603 nir_instr *instr;
604 nir_dest *dest; /* cached dest_for_instr */
605 unsigned live_start, live_end; /* live range */
606 };
607
608 static void
609 range_include(struct live_def *def, unsigned index)
610 {
611 if (def->live_start > index)
612 def->live_start = index;
613 if (def->live_end < index)
614 def->live_end = index;
615 }
616
617 struct live_defs_state {
618 unsigned num_defs;
619 unsigned bitset_words;
620
621 nir_function_impl *impl;
622 nir_block *block; /* current block pointer */
623 unsigned index; /* current live index */
624
625 struct live_def *defs;
626 unsigned *live_map; /* to map ssa/reg index into defs array */
627
628 nir_block_worklist worklist;
629 };
630
631 static bool
632 init_liveness_block(nir_block *block,
633 struct live_defs_state *state)
634 {
635 block->live_in = reralloc(block, block->live_in, BITSET_WORD,
636 state->bitset_words);
637 memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD));
638
639 block->live_out = reralloc(block, block->live_out, BITSET_WORD,
640 state->bitset_words);
641 memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD));
642
643 nir_block_worklist_push_head(&state->worklist, block);
644
645 return true;
646 }
647
648 static bool
649 set_src_live(nir_src *src, void *void_state)
650 {
651 struct live_defs_state *state = void_state;
652
653 if (src->is_ssa) {
654 nir_instr *instr = src->ssa->parent_instr;
655
656 if (is_sysval(instr) || instr->type == nir_instr_type_deref)
657 return true;
658
659 switch (instr->type) {
660 case nir_instr_type_load_const:
661 case nir_instr_type_ssa_undef:
662 return true;
663 case nir_instr_type_alu: {
664 /* alu op bypass */
665 nir_alu_instr *alu = nir_instr_as_alu(instr);
666 if (instr->pass_flags & BYPASS_SRC) {
667 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
668 set_src_live(&alu->src[i].src, state);
669 return true;
670 }
671 } break;
672 default:
673 break;
674 }
675 }
676
677 unsigned i = state->live_map[src_index(state->impl, src)];
678 assert(i != ~0u);
679
680 BITSET_SET(state->block->live_in, i);
681 range_include(&state->defs[i], state->index);
682
683 return true;
684 }
685
686 static bool
687 propagate_across_edge(nir_block *pred, nir_block *succ,
688 struct live_defs_state *state)
689 {
690 BITSET_WORD progress = 0;
691 for (unsigned i = 0; i < state->bitset_words; ++i) {
692 progress |= succ->live_in[i] & ~pred->live_out[i];
693 pred->live_out[i] |= succ->live_in[i];
694 }
695 return progress != 0;
696 }
697
698 static unsigned
699 live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map)
700 {
701 struct live_defs_state state;
702 unsigned block_live_index[impl->num_blocks + 1];
703
704 state.impl = impl;
705 state.defs = defs;
706 state.live_map = live_map;
707
708 state.num_defs = 0;
709 nir_foreach_block(block, impl) {
710 block_live_index[block->index] = state.num_defs;
711 nir_foreach_instr(instr, block) {
712 nir_dest *dest = dest_for_instr(instr);
713 if (!dest)
714 continue;
715
716 unsigned idx = dest_index(impl, dest);
717 /* register is already in defs */
718 if (live_map[idx] != ~0u)
719 continue;
720
721 defs[state.num_defs] = (struct live_def) {instr, dest, state.num_defs, 0};
722
723 /* input live from the start */
724 if (instr->type == nir_instr_type_intrinsic) {
725 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
726 if (intr->intrinsic == nir_intrinsic_load_input ||
727 intr->intrinsic == nir_intrinsic_load_instance_id)
728 defs[state.num_defs].live_start = 0;
729 }
730
731 live_map[idx] = state.num_defs;
732 state.num_defs++;
733 }
734 }
735 block_live_index[impl->num_blocks] = state.num_defs;
736
737 nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
738
739 /* We now know how many unique ssa definitions we have and we can go
740 * ahead and allocate live_in and live_out sets and add all of the
741 * blocks to the worklist.
742 */
743 state.bitset_words = BITSET_WORDS(state.num_defs);
744 nir_foreach_block(block, impl) {
745 init_liveness_block(block, &state);
746 }
747
748 /* We're now ready to work through the worklist and update the liveness
749 * sets of each of the blocks. By the time we get to this point, every
750 * block in the function implementation has been pushed onto the
751 * worklist in reverse order. As long as we keep the worklist
752 * up-to-date as we go, everything will get covered.
753 */
754 while (!nir_block_worklist_is_empty(&state.worklist)) {
755 /* We pop them off in the reverse order we pushed them on. This way
756 * the first walk of the instructions is backwards so we only walk
757 * once in the case of no control flow.
758 */
759 nir_block *block = nir_block_worklist_pop_head(&state.worklist);
760 state.block = block;
761
762 memcpy(block->live_in, block->live_out,
763 state.bitset_words * sizeof(BITSET_WORD));
764
765 state.index = block_live_index[block->index + 1];
766
767 nir_if *following_if = nir_block_get_following_if(block);
768 if (following_if)
769 set_src_live(&following_if->condition, &state);
770
771 nir_foreach_instr_reverse(instr, block) {
772 /* when we come across the next "live" instruction, decrement index */
773 if (state.index && instr == defs[state.index - 1].instr) {
774 state.index--;
775 /* the only source of writes to registers is phis:
776 * we don't expect any partial write_mask alus
777 * so clearing live_in here is OK
778 */
779 BITSET_CLEAR(block->live_in, state.index);
780 }
781
782 /* don't set_src_live for not-emitted instructions */
783 if (instr->pass_flags)
784 continue;
785
786 unsigned index = state.index;
787
788 /* output live till the end */
789 if (instr->type == nir_instr_type_intrinsic) {
790 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
791 if (intr->intrinsic == nir_intrinsic_store_deref)
792 state.index = ~0u;
793 }
794
795 nir_foreach_src(instr, set_src_live, &state);
796
797 state.index = index;
798 }
799 assert(state.index == block_live_index[block->index]);
800
801 /* Walk over all of the predecessors of the current block updating
802 * their live in with the live out of this one. If anything has
803 * changed, add the predecessor to the work list so that we ensure
804 * that the new information is used.
805 */
806 set_foreach(block->predecessors, entry) {
807 nir_block *pred = (nir_block *)entry->key;
808 if (propagate_across_edge(pred, block, &state))
809 nir_block_worklist_push_tail(&state.worklist, pred);
810 }
811 }
812
813 nir_block_worklist_fini(&state.worklist);
814
815 /* apply live_in/live_out to ranges */
816
817 nir_foreach_block(block, impl) {
818 int i;
819
820 BITSET_FOREACH_SET(i, block->live_in, state.num_defs)
821 range_include(&state.defs[i], block_live_index[block->index]);
822
823 BITSET_FOREACH_SET(i, block->live_out, state.num_defs)
824 range_include(&state.defs[i], block_live_index[block->index + 1]);
825 }
826
827 return state.num_defs;
828 }
829
830 /* precomputed by register_allocate */
831 static unsigned int *q_values[] = {
832 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
833 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
834 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
835 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
836 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
837 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
838 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
839 };
840
841 static void
842 ra_assign(struct state *state, nir_shader *shader)
843 {
844 struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
845 NUM_REG_TYPES, false);
846
847 /* classes always be created from index 0, so equal to the class enum
848 * which represents a register with (c+1) components
849 */
850 for (int c = 0; c < NUM_REG_CLASSES; c++)
851 ra_alloc_reg_class(regs);
852 /* add each register of each class */
853 for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
854 ra_class_add_reg(regs, reg_get_class(r), r);
855 /* set conflicts */
856 for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
857 for (int i = 0; i < NUM_REG_TYPES; i++) {
858 for (int j = 0; j < i; j++) {
859 if (reg_writemask[i] & reg_writemask[j]) {
860 ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
861 NUM_REG_TYPES * r + j);
862 }
863 }
864 }
865 }
866 ra_set_finalize(regs, q_values);
867
868 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
869
870 /* liveness and interference */
871
872 nir_index_blocks(impl);
873 nir_index_ssa_defs(impl);
874 nir_foreach_block(block, impl) {
875 nir_foreach_instr(instr, block)
876 instr->pass_flags = 0;
877 }
878
879 /* this gives an approximation/upper limit on how many nodes are needed
880 * (some ssa values do not represent an allocated register)
881 */
882 unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
883 unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
884 memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
885 struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
886
887 unsigned num_nodes = live_defs(impl, defs, live_map);
888 struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
889
890 /* set classes from num_components */
891 for (unsigned i = 0; i < num_nodes; i++) {
892 nir_instr *instr = defs[i].instr;
893 nir_dest *dest = defs[i].dest;
894 unsigned c = nir_dest_num_components(*dest) - 1;
895
896 if (instr->type == nir_instr_type_alu &&
897 state->c->specs->has_new_transcendentals) {
898 switch (nir_instr_as_alu(instr)->op) {
899 case nir_op_fdiv:
900 case nir_op_flog2:
901 case nir_op_fsin:
902 case nir_op_fcos:
903 assert(dest->is_ssa);
904 c = REG_CLASS_VIRT_VEC2T;
905 default:
906 break;
907 }
908 }
909
910 if (instr->type == nir_instr_type_intrinsic) {
911 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
912 /* can't have dst swizzle or sparse writemask on UBO loads */
913 if (intr->intrinsic == nir_intrinsic_load_ubo) {
914 assert(dest == &intr->dest);
915 if (dest->ssa.num_components == 2)
916 c = REG_CLASS_VIRT_VEC2C;
917 if (dest->ssa.num_components == 3)
918 c = REG_CLASS_VIRT_VEC3C;
919 }
920 }
921
922 ra_set_node_class(g, i, c);
923 }
924
925 nir_foreach_block(block, impl) {
926 nir_foreach_instr(instr, block) {
927 if (instr->type != nir_instr_type_intrinsic)
928 continue;
929
930 nir_dest *dest = dest_for_instr(instr);
931 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
932 unsigned reg;
933
934 switch (intr->intrinsic) {
935 case nir_intrinsic_store_deref: {
936 /* don't want outputs to be swizzled
937 * TODO: better would be to set the type to X/XY/XYZ/XYZW
938 * TODO: what if fragcoord.z is read after writing fragdepth?
939 */
940 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
941 unsigned index = live_map[src_index(impl, &intr->src[1])];
942
943 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
944 deref->var->data.location == FRAG_RESULT_DEPTH) {
945 ra_set_node_reg(g, index, REG_FRAG_DEPTH);
946 } else {
947 ra_set_node_class(g, index, REG_CLASS_VEC4);
948 }
949 } continue;
950 case nir_intrinsic_load_input:
951 reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
952 REG_TYPE_VIRT_SCALAR_X,
953 REG_TYPE_VIRT_VEC2_XY,
954 REG_TYPE_VIRT_VEC3_XYZ,
955 REG_TYPE_VEC4,
956 }[nir_dest_num_components(*dest) - 1];
957 break;
958 case nir_intrinsic_load_instance_id:
959 reg = state->c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
960 break;
961 default:
962 continue;
963 }
964
965 ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
966 }
967 }
968
969 /* add interference for intersecting live ranges */
970 for (unsigned i = 0; i < num_nodes; i++) {
971 assert(defs[i].live_start < defs[i].live_end);
972 for (unsigned j = 0; j < i; j++) {
973 if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
974 continue;
975 ra_add_node_interference(g, i, j);
976 }
977 }
978
979 ralloc_free(defs);
980
981 /* Allocate registers */
982 ASSERTED bool ok = ra_allocate(g);
983 assert(ok);
984
985 state->g = g;
986 state->regs = regs;
987 state->live_map = live_map;
988 state->num_nodes = num_nodes;
989 }
990
991 static unsigned
992 ra_finish(struct state *state)
993 {
994 /* TODO: better way to get number of registers used? */
995 unsigned j = 0;
996 for (unsigned i = 0; i < state->num_nodes; i++) {
997 j = MAX2(j, reg_get_base(state, ra_get_node_reg(state->g, i)) + 1);
998 }
999
1000 ralloc_free(state->g);
1001 ralloc_free(state->regs);
1002 ralloc_free(state->live_map);
1003
1004 return j;
1005 }
1006
1007 static void
1008 emit_alu(struct state *state, nir_alu_instr * alu)
1009 {
1010 const nir_op_info *info = &nir_op_infos[alu->op];
1011
1012 /* marked as dead instruction (vecN and other bypassed instr) */
1013 if (alu->instr.pass_flags)
1014 return;
1015
1016 assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
1017
1018 unsigned dst_swiz;
1019 hw_dst dst = ra_dest(state, &alu->dest.dest, &dst_swiz);
1020
1021 /* compose alu write_mask with RA write mask */
1022 if (!alu->dest.dest.is_ssa)
1023 dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask);
1024
1025 switch (alu->op) {
1026 case nir_op_fdot2:
1027 case nir_op_fdot3:
1028 case nir_op_fdot4:
1029 /* not per-component - don't compose dst_swiz */
1030 dst_swiz = INST_SWIZ_IDENTITY;
1031 break;
1032 default:
1033 break;
1034 }
1035
1036 hw_src srcs[3];
1037
1038 for (int i = 0; i < info->num_inputs; i++) {
1039 nir_alu_src *asrc = &alu->src[i];
1040 hw_src src;
1041
1042 src = src_swizzle(get_src(state, &asrc->src), ALU_SWIZ(asrc));
1043 src = src_swizzle(src, dst_swiz);
1044
1045 if (src.rgroup != INST_RGROUP_IMMEDIATE) {
1046 src.neg = asrc->negate || (alu->op == nir_op_fneg);
1047 src.abs = asrc->abs || (alu->op == nir_op_fabs);
1048 } else {
1049 assert(!asrc->negate && alu->op != nir_op_fneg);
1050 assert(!asrc->abs && alu->op != nir_op_fabs);
1051 }
1052
1053 srcs[i] = src;
1054 }
1055
1056 emit(alu, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat));
1057 }
1058
1059 static void
1060 emit_tex(struct state *state, nir_tex_instr * tex)
1061 {
1062 unsigned dst_swiz;
1063 hw_dst dst = ra_dest(state, &tex->dest, &dst_swiz);
1064 nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL;
1065
1066 for (unsigned i = 0; i < tex->num_srcs; i++) {
1067 switch (tex->src[i].src_type) {
1068 case nir_tex_src_coord:
1069 coord = &tex->src[i].src;
1070 break;
1071 case nir_tex_src_bias:
1072 case nir_tex_src_lod:
1073 assert(!lod_bias);
1074 lod_bias = &tex->src[i].src;
1075 break;
1076 case nir_tex_src_comparator:
1077 compare = &tex->src[i].src;
1078 break;
1079 default:
1080 compile_error(state->c, "Unhandled NIR tex src type: %d\n",
1081 tex->src[i].src_type);
1082 break;
1083 }
1084 }
1085
1086 emit(tex, tex->op, tex->sampler_index, dst_swiz, dst, get_src(state, coord),
1087 lod_bias ? get_src(state, lod_bias) : SRC_DISABLE,
1088 compare ? get_src(state, compare) : SRC_DISABLE);
1089 }
1090
1091 static void
1092 emit_intrinsic(struct state *state, nir_intrinsic_instr * intr)
1093 {
1094 switch (intr->intrinsic) {
1095 case nir_intrinsic_store_deref:
1096 emit(output, nir_src_as_deref(intr->src[0])->var, get_src(state, &intr->src[1]));
1097 break;
1098 case nir_intrinsic_discard_if:
1099 emit(discard, get_src(state, &intr->src[0]));
1100 break;
1101 case nir_intrinsic_discard:
1102 emit(discard, SRC_DISABLE);
1103 break;
1104 case nir_intrinsic_load_uniform: {
1105 unsigned dst_swiz;
1106 struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
1107
1108 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
1109 emit_inst(state->c, &(struct etna_inst) {
1110 .opcode = INST_OPCODE_MOVAR,
1111 .dst.write_mask = 0x1,
1112 .src[2] = get_src(state, &intr->src[0]),
1113 });
1114 emit_inst(state->c, &(struct etna_inst) {
1115 .opcode = INST_OPCODE_MOV,
1116 .dst = dst,
1117 .src[2] = {
1118 .use = 1,
1119 .rgroup = INST_RGROUP_UNIFORM_0,
1120 .reg = nir_intrinsic_base(intr),
1121 .swiz = dst_swiz,
1122 .amode = INST_AMODE_ADD_A_X,
1123 },
1124 });
1125 } break;
1126 case nir_intrinsic_load_ubo: {
1127 /* TODO: if offset is of the form (x + C) then add C to the base instead */
1128 unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
1129 unsigned dst_swiz;
1130 emit_inst(state->c, &(struct etna_inst) {
1131 .opcode = INST_OPCODE_LOAD,
1132 .type = INST_TYPE_U32,
1133 .dst = ra_dest(state, &intr->dest, &dst_swiz),
1134 .src[0] = get_src(state, &intr->src[1]),
1135 .src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
1136 });
1137 } break;
1138 case nir_intrinsic_load_front_face:
1139 case nir_intrinsic_load_frag_coord:
1140 assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */
1141 break;
1142 case nir_intrinsic_load_input:
1143 case nir_intrinsic_load_instance_id:
1144 break;
1145 default:
1146 compile_error(state->c, "Unhandled NIR intrinsic type: %s\n",
1147 nir_intrinsic_infos[intr->intrinsic].name);
1148 }
1149 }
1150
1151 static void
1152 emit_instr(struct state *state, nir_instr * instr)
1153 {
1154 switch (instr->type) {
1155 case nir_instr_type_alu:
1156 emit_alu(state, nir_instr_as_alu(instr));
1157 break;
1158 case nir_instr_type_tex:
1159 emit_tex(state, nir_instr_as_tex(instr));
1160 break;
1161 case nir_instr_type_intrinsic:
1162 emit_intrinsic(state, nir_instr_as_intrinsic(instr));
1163 break;
1164 case nir_instr_type_jump:
1165 assert(nir_instr_is_last(instr));
1166 case nir_instr_type_load_const:
1167 case nir_instr_type_ssa_undef:
1168 case nir_instr_type_deref:
1169 break;
1170 default:
1171 compile_error(state->c, "Unhandled NIR instruction type: %d\n", instr->type);
1172 break;
1173 }
1174 }
1175
1176 static void
1177 emit_block(struct state *state, nir_block * block)
1178 {
1179 emit(block_start, block->index);
1180
1181 nir_foreach_instr(instr, block)
1182 emit_instr(state, instr);
1183
1184 /* succs->index < block->index is for the loop case */
1185 nir_block *succs = block->successors[0];
1186 if (nir_block_ends_in_jump(block) || succs->index < block->index)
1187 emit(jump, succs->index, SRC_DISABLE);
1188 }
1189
1190 static void
1191 emit_cf_list(struct state *state, struct exec_list *list);
1192
1193 static void
1194 emit_if(struct state *state, nir_if * nif)
1195 {
1196 emit(jump, nir_if_first_else_block(nif)->index, get_src(state, &nif->condition));
1197 emit_cf_list(state, &nif->then_list);
1198
1199 /* jump at end of then_list to skip else_list
1200 * not needed if then_list already ends with a jump or else_list is empty
1201 */
1202 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
1203 !nir_cf_list_is_empty_block(&nif->else_list))
1204 emit(jump, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE);
1205
1206 emit_cf_list(state, &nif->else_list);
1207 }
1208
1209 static void
1210 emit_cf_list(struct state *state, struct exec_list *list)
1211 {
1212 foreach_list_typed(nir_cf_node, node, node, list) {
1213 switch (node->type) {
1214 case nir_cf_node_block:
1215 emit_block(state, nir_cf_node_as_block(node));
1216 break;
1217 case nir_cf_node_if:
1218 emit_if(state, nir_cf_node_as_if(node));
1219 break;
1220 case nir_cf_node_loop:
1221 emit_cf_list(state, &nir_cf_node_as_loop(node)->body);
1222 break;
1223 default:
1224 compile_error(state->c, "Unknown NIR node type\n");
1225 break;
1226 }
1227 }
1228 }
1229
1230 /* based on nir_lower_vec_to_movs */
1231 static unsigned
1232 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
1233 {
1234 assert(start_idx < nir_op_infos[vec->op].num_inputs);
1235 unsigned write_mask = (1u << start_idx);
1236
1237 nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
1238 nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
1239
1240 mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
1241 mov->src[0].negate = vec->src[start_idx].negate;
1242 mov->src[0].abs = vec->src[start_idx].abs;
1243
1244 unsigned num_components = 1;
1245
1246 for (unsigned i = start_idx + 1; i < 4; i++) {
1247 if (!(vec->dest.write_mask & (1 << i)))
1248 continue;
1249
1250 if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
1251 vec->src[i].negate == vec->src[start_idx].negate &&
1252 vec->src[i].abs == vec->src[start_idx].abs) {
1253 write_mask |= (1 << i);
1254 mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
1255 num_components++;
1256 }
1257 }
1258
1259 mov->dest.write_mask = (1 << num_components) - 1;
1260 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL);
1261
1262 /* replace vec srcs with inserted mov */
1263 for (unsigned i = 0, j = 0; i < 4; i++) {
1264 if (!(write_mask & (1 << i)))
1265 continue;
1266
1267 nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa));
1268 vec->src[i].swizzle[0] = j++;
1269 }
1270
1271 nir_instr_insert_before(&vec->instr, &mov->instr);
1272
1273 return write_mask;
1274 }
1275
1276 /*
1277 * for vecN instructions:
1278 * -merge constant sources into a single src
1279 * -insert movs (nir_lower_vec_to_movs equivalent)
1280 * for non-vecN instructions:
1281 * -try to merge constants as single constant
1282 * -insert movs for multiple constants (pre-HALTI5)
1283 */
1284 static void
1285 lower_alu(struct state *state, nir_alu_instr *alu)
1286 {
1287 const nir_op_info *info = &nir_op_infos[alu->op];
1288
1289 nir_builder b;
1290 nir_builder_init(&b, state->impl);
1291 b.cursor = nir_before_instr(&alu->instr);
1292
1293 switch (alu->op) {
1294 case nir_op_vec2:
1295 case nir_op_vec3:
1296 case nir_op_vec4:
1297 break;
1298 default:
1299 /* pre-GC7000L can only have 1 uniform src per instruction */
1300 if (state->c->specs->halti >= 5)
1301 return;
1302
1303 nir_const_value value[4] = {};
1304 uint8_t swizzle[4][4] = {};
1305 unsigned swiz_max = 0, num_const = 0;
1306
1307 for (unsigned i = 0; i < info->num_inputs; i++) {
1308 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
1309 if (!cv)
1310 continue;
1311
1312 unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components;
1313 for (unsigned j = 0; j < num_components; j++) {
1314 int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
1315 swizzle[i][j] = idx;
1316 swiz_max = MAX2(swiz_max, (unsigned) idx);
1317 }
1318 num_const++;
1319 }
1320
1321 /* nothing to do */
1322 if (num_const <= 1)
1323 return;
1324
1325 /* resolve with single combined const src */
1326 if (swiz_max < 4) {
1327 nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
1328
1329 for (unsigned i = 0; i < info->num_inputs; i++) {
1330 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
1331 if (!cv)
1332 continue;
1333
1334 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
1335
1336 for (unsigned j = 0; j < 4; j++)
1337 alu->src[i].swizzle[j] = swizzle[i][j];
1338 }
1339 return;
1340 }
1341
1342 /* resolve with movs */
1343 num_const = 0;
1344 for (unsigned i = 0; i < info->num_inputs; i++) {
1345 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
1346 if (!cv)
1347 continue;
1348
1349 num_const++;
1350 if (num_const == 1)
1351 continue;
1352
1353 nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa);
1354 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov));
1355 }
1356 return;
1357 }
1358
1359 nir_const_value value[4];
1360 unsigned num_components = 0;
1361
1362 for (unsigned i = 0; i < info->num_inputs; i++) {
1363 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
1364 if (cv)
1365 value[num_components++] = cv[alu->src[i].swizzle[0]];
1366 }
1367
1368 /* if there is more than one constant source to the vecN, combine them
1369 * into a single load_const (removing the vecN completely if all components
1370 * are constant)
1371 */
1372 if (num_components > 1) {
1373 nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value);
1374
1375 if (num_components == info->num_inputs) {
1376 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def));
1377 nir_instr_remove(&alu->instr);
1378 return;
1379 }
1380
1381 for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
1382 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
1383 if (!cv)
1384 continue;
1385
1386 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
1387 alu->src[i].swizzle[0] = j++;
1388 }
1389 }
1390
1391 unsigned finished_write_mask = 0;
1392 for (unsigned i = 0; i < 4; i++) {
1393 if (!(alu->dest.write_mask & (1 << i)))
1394 continue;
1395
1396 nir_ssa_def *ssa = alu->src[i].src.ssa;
1397
1398 /* check that vecN instruction is only user of this */
1399 bool need_mov = list_length(&ssa->if_uses) != 0;
1400 nir_foreach_use(use_src, ssa) {
1401 if (use_src->parent_instr != &alu->instr)
1402 need_mov = true;
1403 }
1404
1405 nir_instr *instr = ssa->parent_instr;
1406 switch (instr->type) {
1407 case nir_instr_type_alu:
1408 case nir_instr_type_tex:
1409 break;
1410 case nir_instr_type_intrinsic:
1411 if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
1412 need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa);
1413 break;
1414 }
1415 default:
1416 need_mov = true;
1417 }
1418
1419 if (need_mov && !(finished_write_mask & (1 << i)))
1420 finished_write_mask |= insert_vec_mov(alu, i, state->shader);
1421 }
1422 }
1423
1424 static bool
1425 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
1426 {
1427 nir_shader *shader = c->nir;
1428
1429 struct state state = {
1430 .c = c,
1431 .shader = shader,
1432 .impl = nir_shader_get_entrypoint(shader),
1433 };
1434 bool have_indirect_uniform = false;
1435 unsigned indirect_max = 0;
1436
1437 nir_builder b;
1438 nir_builder_init(&b, state.impl);
1439
1440 /* convert non-dynamic uniform loads to constants, etc */
1441 nir_foreach_block(block, state.impl) {
1442 nir_foreach_instr_safe(instr, block) {
1443 switch(instr->type) {
1444 case nir_instr_type_alu:
1445 /* deals with vecN and const srcs */
1446 lower_alu(&state, nir_instr_as_alu(instr));
1447 break;
1448 case nir_instr_type_load_const: {
1449 nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
1450 for (unsigned i = 0; i < load_const->def.num_components; i++)
1451 load_const->value[i] = CONST(load_const->value[i].u32);
1452 } break;
1453 case nir_instr_type_intrinsic: {
1454 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1455 /* TODO: load_ubo can also become a constant in some cases
1456 * (at the moment it can end up emitting a LOAD with two
1457 * uniform sources, which could be a problem on HALTI2)
1458 */
1459 if (intr->intrinsic != nir_intrinsic_load_uniform)
1460 break;
1461 nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1462 if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
1463 have_indirect_uniform = true;
1464 indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1465 break;
1466 }
1467
1468 unsigned base = nir_intrinsic_base(intr);
1469 /* pre halti2 uniform offset will be float */
1470 if (c->specs->halti < 2)
1471 base += (unsigned) off[0].f32;
1472 else
1473 base += off[0].u32;
1474 nir_const_value value[4];
1475
1476 for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
1477 if (nir_intrinsic_base(intr) < 0)
1478 value[i] = TEXSCALE(~nir_intrinsic_base(intr), i);
1479 else
1480 value[i] = UNIFORM(base * 4 + i);
1481 }
1482
1483 b.cursor = nir_after_instr(instr);
1484 nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value);
1485
1486 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
1487 nir_instr_remove(instr);
1488 } break;
1489 default:
1490 break;
1491 }
1492 }
1493 }
1494
1495 /* TODO: only emit required indirect uniform ranges */
1496 if (have_indirect_uniform) {
1497 for (unsigned i = 0; i < indirect_max * 4; i++)
1498 c->consts[i] = UNIFORM(i).u64;
1499 state.const_count = indirect_max;
1500 }
1501
1502 /* add mov for any store output using sysval/const */
1503 nir_foreach_block(block, state.impl) {
1504 nir_foreach_instr_safe(instr, block) {
1505 if (instr->type != nir_instr_type_intrinsic)
1506 continue;
1507
1508 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1509
1510 switch (intr->intrinsic) {
1511 case nir_intrinsic_store_deref: {
1512 nir_src *src = &intr->src[1];
1513 if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) {
1514 b.cursor = nir_before_instr(instr);
1515 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa)));
1516 }
1517 } break;
1518 default:
1519 break;
1520 }
1521 }
1522 }
1523
1524 /* call directly to avoid validation (load_const don't pass validation at this point) */
1525 nir_convert_from_ssa(shader, true);
1526 nir_opt_dce(shader);
1527
1528 ra_assign(&state, shader);
1529
1530 emit_cf_list(&state, &nir_shader_get_entrypoint(shader)->body);
1531
1532 *num_temps = ra_finish(&state);
1533 *num_consts = state.const_count;
1534 return true;
1535 }