etnaviv: merge struct etna_compile and etna_state
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler_nir_emit.h
1 /*
2 * Copyright (c) 2019 Zodiac Inflight Innovations
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "etnaviv_asm.h"
28 #include "etnaviv_context.h"
29 #include "etnaviv_compiler_nir.h"
30
31 #include "compiler/nir/nir.h"
32 #include "compiler/nir/nir_builder.h"
33
34 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
35 #define SRC_DISABLE ((hw_src){})
36 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
37 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
38
39 #define emit(type, args...) etna_emit_##type(c, args)
40
41 typedef struct etna_inst_dst hw_dst;
42 typedef struct etna_inst_src hw_src;
43
44 static inline hw_src
45 src_swizzle(hw_src src, unsigned swizzle)
46 {
47 if (src.rgroup != INST_RGROUP_IMMEDIATE)
48 src.swiz = inst_swiz_compose(src.swiz, swizzle);
49
50 return src;
51 }
52
53 /* constants are represented as 64-bit ints
54 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
55 */
56
57 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
58 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
59 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
60 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
61
62 static int
63 const_add(uint64_t *c, uint64_t value)
64 {
65 for (unsigned i = 0; i < 4; i++) {
66 if (c[i] == value || !c[i]) {
67 c[i] = value;
68 return i;
69 }
70 }
71 return -1;
72 }
73
74 static hw_src
75 const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components)
76 {
77 /* use inline immediates if possible */
78 if (c->specs->halti >= 2 && num_components == 1 &&
79 value[0].u64 >> 32 == ETNA_IMMEDIATE_CONSTANT) {
80 uint32_t bits = value[0].u32;
81
82 /* "float" - shifted by 12 */
83 if ((bits & 0xfff) == 0)
84 return etna_immediate_src(0, bits >> 12);
85
86 /* "unsigned" - raw 20 bit value */
87 if (bits < (1 << 20))
88 return etna_immediate_src(2, bits);
89
90 /* "signed" - sign extended 20-bit (sign included) value */
91 if (bits >= 0xfff80000)
92 return etna_immediate_src(1, bits);
93 }
94
95 unsigned i;
96 int swiz = -1;
97 for (i = 0; swiz < 0; i++) {
98 uint64_t *a = &c->consts[i*4];
99 uint64_t save[4];
100 memcpy(save, a, sizeof(save));
101 swiz = 0;
102 for (unsigned j = 0; j < num_components; j++) {
103 int c = const_add(a, value[j].u64);
104 if (c < 0) {
105 memcpy(a, save, sizeof(save));
106 swiz = -1;
107 break;
108 }
109 swiz |= c << j * 2;
110 }
111 }
112
113 assert(i <= ETNA_MAX_IMM / 4);
114 c->const_count = MAX2(c->const_count, i);
115
116 return SRC_CONST(i - 1, swiz);
117 }
118
119 /* Swizzles and write masks can be used to layer virtual non-interfering
120 * registers on top of the real VEC4 registers. For example, the virtual
121 * VEC3_XYZ register and the virtual SCALAR_W register that use the same
122 * physical VEC4 base register do not interfere.
123 */
124 enum reg_class {
125 REG_CLASS_VIRT_SCALAR,
126 REG_CLASS_VIRT_VEC2,
127 REG_CLASS_VIRT_VEC3,
128 REG_CLASS_VEC4,
129 /* special vec2 class for fast transcendentals, limited to XY or ZW */
130 REG_CLASS_VIRT_VEC2T,
131 /* special classes for LOAD - contiguous components */
132 REG_CLASS_VIRT_VEC2C,
133 REG_CLASS_VIRT_VEC3C,
134 NUM_REG_CLASSES,
135 };
136
137 enum reg_type {
138 REG_TYPE_VEC4,
139 REG_TYPE_VIRT_VEC3_XYZ,
140 REG_TYPE_VIRT_VEC3_XYW,
141 REG_TYPE_VIRT_VEC3_XZW,
142 REG_TYPE_VIRT_VEC3_YZW,
143 REG_TYPE_VIRT_VEC2_XY,
144 REG_TYPE_VIRT_VEC2_XZ,
145 REG_TYPE_VIRT_VEC2_XW,
146 REG_TYPE_VIRT_VEC2_YZ,
147 REG_TYPE_VIRT_VEC2_YW,
148 REG_TYPE_VIRT_VEC2_ZW,
149 REG_TYPE_VIRT_SCALAR_X,
150 REG_TYPE_VIRT_SCALAR_Y,
151 REG_TYPE_VIRT_SCALAR_Z,
152 REG_TYPE_VIRT_SCALAR_W,
153 REG_TYPE_VIRT_VEC2T_XY,
154 REG_TYPE_VIRT_VEC2T_ZW,
155 REG_TYPE_VIRT_VEC2C_XY,
156 REG_TYPE_VIRT_VEC2C_YZ,
157 REG_TYPE_VIRT_VEC2C_ZW,
158 REG_TYPE_VIRT_VEC3C_XYZ,
159 REG_TYPE_VIRT_VEC3C_YZW,
160 NUM_REG_TYPES,
161 };
162
163 /* writemask when used as dest */
164 static const uint8_t
165 reg_writemask[NUM_REG_TYPES] = {
166 [REG_TYPE_VEC4] = 0xf,
167 [REG_TYPE_VIRT_SCALAR_X] = 0x1,
168 [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
169 [REG_TYPE_VIRT_VEC2_XY] = 0x3,
170 [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
171 [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
172 [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
173 [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
174 [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
175 [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
176 [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
177 [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
178 [REG_TYPE_VIRT_SCALAR_W] = 0x8,
179 [REG_TYPE_VIRT_VEC2_XW] = 0x9,
180 [REG_TYPE_VIRT_VEC2_YW] = 0xa,
181 [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
182 [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
183 [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
184 [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
185 [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
186 [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
187 [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
188 };
189
190 /* how to swizzle when used as a src */
191 static const uint8_t
192 reg_swiz[NUM_REG_TYPES] = {
193 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
194 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
195 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
196 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
197 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
198 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
199 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
200 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
201 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
202 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
203 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
204 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
205 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
206 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
207 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
208 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
209 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
210 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
211 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
212 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
213 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
214 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
215 };
216
217 /* how to swizzle when used as a dest */
218 static const uint8_t
219 reg_dst_swiz[NUM_REG_TYPES] = {
220 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
221 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
222 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
223 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
224 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
225 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
226 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
227 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
228 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
229 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
230 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
231 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
232 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
233 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
234 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
235 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
236 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
237 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
238 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
239 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
240 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
241 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
242 };
243
244 static inline int reg_get_type(int virt_reg)
245 {
246 return virt_reg % NUM_REG_TYPES;
247 }
248
249 static inline int reg_get_base(struct etna_compile *c, int virt_reg)
250 {
251 /* offset by 1 to avoid reserved position register */
252 if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
253 return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
254 return virt_reg / NUM_REG_TYPES;
255 }
256
257 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
258 * (fs registers are offset by 1 to avoid reserving r0)
259 */
260 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
261
262 static inline int reg_get_class(int virt_reg)
263 {
264 switch (reg_get_type(virt_reg)) {
265 case REG_TYPE_VEC4:
266 return REG_CLASS_VEC4;
267 case REG_TYPE_VIRT_VEC3_XYZ:
268 case REG_TYPE_VIRT_VEC3_XYW:
269 case REG_TYPE_VIRT_VEC3_XZW:
270 case REG_TYPE_VIRT_VEC3_YZW:
271 return REG_CLASS_VIRT_VEC3;
272 case REG_TYPE_VIRT_VEC2_XY:
273 case REG_TYPE_VIRT_VEC2_XZ:
274 case REG_TYPE_VIRT_VEC2_XW:
275 case REG_TYPE_VIRT_VEC2_YZ:
276 case REG_TYPE_VIRT_VEC2_YW:
277 case REG_TYPE_VIRT_VEC2_ZW:
278 return REG_CLASS_VIRT_VEC2;
279 case REG_TYPE_VIRT_SCALAR_X:
280 case REG_TYPE_VIRT_SCALAR_Y:
281 case REG_TYPE_VIRT_SCALAR_Z:
282 case REG_TYPE_VIRT_SCALAR_W:
283 return REG_CLASS_VIRT_SCALAR;
284 case REG_TYPE_VIRT_VEC2T_XY:
285 case REG_TYPE_VIRT_VEC2T_ZW:
286 return REG_CLASS_VIRT_VEC2T;
287 case REG_TYPE_VIRT_VEC2C_XY:
288 case REG_TYPE_VIRT_VEC2C_YZ:
289 case REG_TYPE_VIRT_VEC2C_ZW:
290 return REG_CLASS_VIRT_VEC2C;
291 case REG_TYPE_VIRT_VEC3C_XYZ:
292 case REG_TYPE_VIRT_VEC3C_YZW:
293 return REG_CLASS_VIRT_VEC3C;
294 }
295
296 assert(false);
297 return 0;
298 }
299
300 /* nir_src to allocated register */
301 static hw_src
302 ra_src(struct etna_compile *c, nir_src *src)
303 {
304 unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]);
305 return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]);
306 }
307
308 static hw_src
309 get_src(struct etna_compile *c, nir_src *src)
310 {
311 if (!src->is_ssa)
312 return ra_src(c, src);
313
314 nir_instr *instr = src->ssa->parent_instr;
315
316 if (instr->pass_flags & BYPASS_SRC) {
317 assert(instr->type == nir_instr_type_alu);
318 nir_alu_instr *alu = nir_instr_as_alu(instr);
319 assert(alu->op == nir_op_mov);
320 return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
321 }
322
323 switch (instr->type) {
324 case nir_instr_type_load_const:
325 return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
326 case nir_instr_type_intrinsic: {
327 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
328 switch (intr->intrinsic) {
329 case nir_intrinsic_load_input:
330 case nir_intrinsic_load_instance_id:
331 case nir_intrinsic_load_uniform:
332 case nir_intrinsic_load_ubo:
333 return ra_src(c, src);
334 case nir_intrinsic_load_front_face:
335 return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
336 case nir_intrinsic_load_frag_coord:
337 return SRC_REG(0, INST_SWIZ_IDENTITY);
338 default:
339 compile_error(c, "Unhandled NIR intrinsic type: %s\n",
340 nir_intrinsic_infos[intr->intrinsic].name);
341 break;
342 }
343 } break;
344 case nir_instr_type_alu:
345 case nir_instr_type_tex:
346 return ra_src(c, src);
347 case nir_instr_type_ssa_undef: {
348 /* return zero to deal with broken Blur demo */
349 nir_const_value value = CONST(0);
350 return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X));
351 }
352 default:
353 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
354 break;
355 }
356
357 return SRC_DISABLE;
358 }
359
360 static bool
361 vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa)
362 {
363 for (unsigned i = 0; i < 4; i++) {
364 if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa)
365 continue;
366
367 if (vec->src[i].swizzle[0] != i)
368 return true;
369 }
370
371 /* don't deal with possible bypassed vec/mov chain */
372 nir_foreach_use(use_src, ssa) {
373 nir_instr *instr = use_src->parent_instr;
374 if (instr->type != nir_instr_type_alu)
375 continue;
376
377 nir_alu_instr *alu = nir_instr_as_alu(instr);
378
379 switch (alu->op) {
380 case nir_op_mov:
381 case nir_op_vec2:
382 case nir_op_vec3:
383 case nir_op_vec4:
384 return true;
385 default:
386 break;
387 }
388 }
389 return false;
390 }
391
392 /* get allocated dest register for nir_dest
393 * *p_swiz tells how the components need to be placed into register
394 */
395 static hw_dst
396 ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz)
397 {
398 unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
399 dest = real_dest(dest, &swiz, &mask);
400
401 unsigned r = ra_get_node_reg(c->g, c->live_map[dest_index(c->impl, dest)]);
402 unsigned t = reg_get_type(r);
403
404 *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
405
406 return (hw_dst) {
407 .use = 1,
408 .reg = reg_get_base(c, r),
409 .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
410 };
411 }
412
413 /* precomputed by register_allocate */
414 static unsigned int *q_values[] = {
415 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
416 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
417 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
418 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
419 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
420 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
421 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
422 };
423
424 static void
425 ra_assign(struct etna_compile *c, nir_shader *shader)
426 {
427 struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
428 NUM_REG_TYPES, false);
429
430 /* classes always be created from index 0, so equal to the class enum
431 * which represents a register with (c+1) components
432 */
433 for (int c = 0; c < NUM_REG_CLASSES; c++)
434 ra_alloc_reg_class(regs);
435 /* add each register of each class */
436 for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
437 ra_class_add_reg(regs, reg_get_class(r), r);
438 /* set conflicts */
439 for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
440 for (int i = 0; i < NUM_REG_TYPES; i++) {
441 for (int j = 0; j < i; j++) {
442 if (reg_writemask[i] & reg_writemask[j]) {
443 ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i,
444 NUM_REG_TYPES * r + j);
445 }
446 }
447 }
448 }
449 ra_set_finalize(regs, q_values);
450
451 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
452
453 /* liveness and interference */
454
455 nir_index_blocks(impl);
456 nir_index_ssa_defs(impl);
457 nir_foreach_block(block, impl) {
458 nir_foreach_instr(instr, block)
459 instr->pass_flags = 0;
460 }
461
462 /* this gives an approximation/upper limit on how many nodes are needed
463 * (some ssa values do not represent an allocated register)
464 */
465 unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc;
466 unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes);
467 memset(live_map, 0xff, sizeof(unsigned) * max_nodes);
468 struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes);
469
470 unsigned num_nodes = etna_live_defs(impl, defs, live_map);
471 struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
472
473 /* set classes from num_components */
474 for (unsigned i = 0; i < num_nodes; i++) {
475 nir_instr *instr = defs[i].instr;
476 nir_dest *dest = defs[i].dest;
477 unsigned comp = nir_dest_num_components(*dest) - 1;
478
479 if (instr->type == nir_instr_type_alu &&
480 c->specs->has_new_transcendentals) {
481 switch (nir_instr_as_alu(instr)->op) {
482 case nir_op_fdiv:
483 case nir_op_flog2:
484 case nir_op_fsin:
485 case nir_op_fcos:
486 assert(dest->is_ssa);
487 comp = REG_CLASS_VIRT_VEC2T;
488 default:
489 break;
490 }
491 }
492
493 if (instr->type == nir_instr_type_intrinsic) {
494 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
495 /* can't have dst swizzle or sparse writemask on UBO loads */
496 if (intr->intrinsic == nir_intrinsic_load_ubo) {
497 assert(dest == &intr->dest);
498 if (dest->ssa.num_components == 2)
499 comp = REG_CLASS_VIRT_VEC2C;
500 if (dest->ssa.num_components == 3)
501 comp = REG_CLASS_VIRT_VEC3C;
502 }
503 }
504
505 ra_set_node_class(g, i, comp);
506 }
507
508 nir_foreach_block(block, impl) {
509 nir_foreach_instr(instr, block) {
510 if (instr->type != nir_instr_type_intrinsic)
511 continue;
512
513 nir_dest *dest = dest_for_instr(instr);
514 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
515 unsigned reg;
516
517 switch (intr->intrinsic) {
518 case nir_intrinsic_store_deref: {
519 /* don't want outputs to be swizzled
520 * TODO: better would be to set the type to X/XY/XYZ/XYZW
521 * TODO: what if fragcoord.z is read after writing fragdepth?
522 */
523 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
524 unsigned index = live_map[src_index(impl, &intr->src[1])];
525
526 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
527 deref->var->data.location == FRAG_RESULT_DEPTH) {
528 ra_set_node_reg(g, index, REG_FRAG_DEPTH);
529 } else {
530 ra_set_node_class(g, index, REG_CLASS_VEC4);
531 }
532 } continue;
533 case nir_intrinsic_load_input:
534 reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) {
535 REG_TYPE_VIRT_SCALAR_X,
536 REG_TYPE_VIRT_VEC2_XY,
537 REG_TYPE_VIRT_VEC3_XYZ,
538 REG_TYPE_VEC4,
539 }[nir_dest_num_components(*dest) - 1];
540 break;
541 case nir_intrinsic_load_instance_id:
542 reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y;
543 break;
544 default:
545 continue;
546 }
547
548 ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg);
549 }
550 }
551
552 /* add interference for intersecting live ranges */
553 for (unsigned i = 0; i < num_nodes; i++) {
554 assert(defs[i].live_start < defs[i].live_end);
555 for (unsigned j = 0; j < i; j++) {
556 if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end)
557 continue;
558 ra_add_node_interference(g, i, j);
559 }
560 }
561
562 ralloc_free(defs);
563
564 /* Allocate registers */
565 ASSERTED bool ok = ra_allocate(g);
566 assert(ok);
567
568 c->g = g;
569 c->regs = regs;
570 c->live_map = live_map;
571 c->num_nodes = num_nodes;
572 }
573
574 static unsigned
575 ra_finish(struct etna_compile *c)
576 {
577 /* TODO: better way to get number of registers used? */
578 unsigned j = 0;
579 for (unsigned i = 0; i < c->num_nodes; i++) {
580 j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1);
581 }
582
583 ralloc_free(c->g);
584 ralloc_free(c->regs);
585 ralloc_free(c->live_map);
586
587 return j;
588 }
589
590 static void
591 emit_alu(struct etna_compile *c, nir_alu_instr * alu)
592 {
593 const nir_op_info *info = &nir_op_infos[alu->op];
594
595 /* marked as dead instruction (vecN and other bypassed instr) */
596 if (alu->instr.pass_flags)
597 return;
598
599 assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
600
601 unsigned dst_swiz;
602 hw_dst dst = ra_dest(c, &alu->dest.dest, &dst_swiz);
603
604 /* compose alu write_mask with RA write mask */
605 if (!alu->dest.dest.is_ssa)
606 dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask);
607
608 switch (alu->op) {
609 case nir_op_fdot2:
610 case nir_op_fdot3:
611 case nir_op_fdot4:
612 /* not per-component - don't compose dst_swiz */
613 dst_swiz = INST_SWIZ_IDENTITY;
614 break;
615 default:
616 break;
617 }
618
619 hw_src srcs[3];
620
621 for (int i = 0; i < info->num_inputs; i++) {
622 nir_alu_src *asrc = &alu->src[i];
623 hw_src src;
624
625 src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc));
626 src = src_swizzle(src, dst_swiz);
627
628 if (src.rgroup != INST_RGROUP_IMMEDIATE) {
629 src.neg = asrc->negate || (alu->op == nir_op_fneg);
630 src.abs = asrc->abs || (alu->op == nir_op_fabs);
631 } else {
632 assert(!asrc->negate && alu->op != nir_op_fneg);
633 assert(!asrc->abs && alu->op != nir_op_fabs);
634 }
635
636 srcs[i] = src;
637 }
638
639 emit(alu, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat));
640 }
641
642 static void
643 emit_tex(struct etna_compile *c, nir_tex_instr * tex)
644 {
645 unsigned dst_swiz;
646 hw_dst dst = ra_dest(c, &tex->dest, &dst_swiz);
647 nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL;
648
649 for (unsigned i = 0; i < tex->num_srcs; i++) {
650 switch (tex->src[i].src_type) {
651 case nir_tex_src_coord:
652 coord = &tex->src[i].src;
653 break;
654 case nir_tex_src_bias:
655 case nir_tex_src_lod:
656 assert(!lod_bias);
657 lod_bias = &tex->src[i].src;
658 break;
659 case nir_tex_src_comparator:
660 compare = &tex->src[i].src;
661 break;
662 default:
663 compile_error(c, "Unhandled NIR tex src type: %d\n",
664 tex->src[i].src_type);
665 break;
666 }
667 }
668
669 emit(tex, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord),
670 lod_bias ? get_src(c, lod_bias) : SRC_DISABLE,
671 compare ? get_src(c, compare) : SRC_DISABLE);
672 }
673
674 static void
675 emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr)
676 {
677 switch (intr->intrinsic) {
678 case nir_intrinsic_store_deref:
679 emit(output, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1]));
680 break;
681 case nir_intrinsic_discard_if:
682 emit(discard, get_src(c, &intr->src[0]));
683 break;
684 case nir_intrinsic_discard:
685 emit(discard, SRC_DISABLE);
686 break;
687 case nir_intrinsic_load_uniform: {
688 unsigned dst_swiz;
689 struct etna_inst_dst dst = ra_dest(c, &intr->dest, &dst_swiz);
690
691 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
692 emit_inst(c, &(struct etna_inst) {
693 .opcode = INST_OPCODE_MOVAR,
694 .dst.write_mask = 0x1,
695 .src[2] = get_src(c, &intr->src[0]),
696 });
697 emit_inst(c, &(struct etna_inst) {
698 .opcode = INST_OPCODE_MOV,
699 .dst = dst,
700 .src[2] = {
701 .use = 1,
702 .rgroup = INST_RGROUP_UNIFORM_0,
703 .reg = nir_intrinsic_base(intr),
704 .swiz = dst_swiz,
705 .amode = INST_AMODE_ADD_A_X,
706 },
707 });
708 } break;
709 case nir_intrinsic_load_ubo: {
710 /* TODO: if offset is of the form (x + C) then add C to the base instead */
711 unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
712 unsigned dst_swiz;
713 emit_inst(c, &(struct etna_inst) {
714 .opcode = INST_OPCODE_LOAD,
715 .type = INST_TYPE_U32,
716 .dst = ra_dest(c, &intr->dest, &dst_swiz),
717 .src[0] = get_src(c, &intr->src[1]),
718 .src[1] = const_src(c, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
719 });
720 } break;
721 case nir_intrinsic_load_front_face:
722 case nir_intrinsic_load_frag_coord:
723 assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */
724 break;
725 case nir_intrinsic_load_input:
726 case nir_intrinsic_load_instance_id:
727 break;
728 default:
729 compile_error(c, "Unhandled NIR intrinsic type: %s\n",
730 nir_intrinsic_infos[intr->intrinsic].name);
731 }
732 }
733
734 static void
735 emit_instr(struct etna_compile *c, nir_instr * instr)
736 {
737 switch (instr->type) {
738 case nir_instr_type_alu:
739 emit_alu(c, nir_instr_as_alu(instr));
740 break;
741 case nir_instr_type_tex:
742 emit_tex(c, nir_instr_as_tex(instr));
743 break;
744 case nir_instr_type_intrinsic:
745 emit_intrinsic(c, nir_instr_as_intrinsic(instr));
746 break;
747 case nir_instr_type_jump:
748 assert(nir_instr_is_last(instr));
749 case nir_instr_type_load_const:
750 case nir_instr_type_ssa_undef:
751 case nir_instr_type_deref:
752 break;
753 default:
754 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
755 break;
756 }
757 }
758
759 static void
760 emit_block(struct etna_compile *c, nir_block * block)
761 {
762 emit(block_start, block->index);
763
764 nir_foreach_instr(instr, block)
765 emit_instr(c, instr);
766
767 /* succs->index < block->index is for the loop case */
768 nir_block *succs = block->successors[0];
769 if (nir_block_ends_in_jump(block) || succs->index < block->index)
770 emit(jump, succs->index, SRC_DISABLE);
771 }
772
773 static void
774 emit_cf_list(struct etna_compile *c, struct exec_list *list);
775
776 static void
777 emit_if(struct etna_compile *c, nir_if * nif)
778 {
779 emit(jump, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition));
780 emit_cf_list(c, &nif->then_list);
781
782 /* jump at end of then_list to skip else_list
783 * not needed if then_list already ends with a jump or else_list is empty
784 */
785 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
786 !nir_cf_list_is_empty_block(&nif->else_list))
787 emit(jump, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE);
788
789 emit_cf_list(c, &nif->else_list);
790 }
791
792 static void
793 emit_cf_list(struct etna_compile *c, struct exec_list *list)
794 {
795 foreach_list_typed(nir_cf_node, node, node, list) {
796 switch (node->type) {
797 case nir_cf_node_block:
798 emit_block(c, nir_cf_node_as_block(node));
799 break;
800 case nir_cf_node_if:
801 emit_if(c, nir_cf_node_as_if(node));
802 break;
803 case nir_cf_node_loop:
804 emit_cf_list(c, &nir_cf_node_as_loop(node)->body);
805 break;
806 default:
807 compile_error(c, "Unknown NIR node type\n");
808 break;
809 }
810 }
811 }
812
813 /* based on nir_lower_vec_to_movs */
814 static unsigned
815 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
816 {
817 assert(start_idx < nir_op_infos[vec->op].num_inputs);
818 unsigned write_mask = (1u << start_idx);
819
820 nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
821 nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
822
823 mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
824 mov->src[0].negate = vec->src[start_idx].negate;
825 mov->src[0].abs = vec->src[start_idx].abs;
826
827 unsigned num_components = 1;
828
829 for (unsigned i = start_idx + 1; i < 4; i++) {
830 if (!(vec->dest.write_mask & (1 << i)))
831 continue;
832
833 if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
834 vec->src[i].negate == vec->src[start_idx].negate &&
835 vec->src[i].abs == vec->src[start_idx].abs) {
836 write_mask |= (1 << i);
837 mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
838 num_components++;
839 }
840 }
841
842 mov->dest.write_mask = (1 << num_components) - 1;
843 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL);
844
845 /* replace vec srcs with inserted mov */
846 for (unsigned i = 0, j = 0; i < 4; i++) {
847 if (!(write_mask & (1 << i)))
848 continue;
849
850 nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa));
851 vec->src[i].swizzle[0] = j++;
852 }
853
854 nir_instr_insert_before(&vec->instr, &mov->instr);
855
856 return write_mask;
857 }
858
859 /*
860 * for vecN instructions:
861 * -merge constant sources into a single src
862 * -insert movs (nir_lower_vec_to_movs equivalent)
863 * for non-vecN instructions:
864 * -try to merge constants as single constant
865 * -insert movs for multiple constants (pre-HALTI5)
866 */
867 static void
868 lower_alu(struct etna_compile *c, nir_alu_instr *alu)
869 {
870 const nir_op_info *info = &nir_op_infos[alu->op];
871
872 nir_builder b;
873 nir_builder_init(&b, c->impl);
874 b.cursor = nir_before_instr(&alu->instr);
875
876 switch (alu->op) {
877 case nir_op_vec2:
878 case nir_op_vec3:
879 case nir_op_vec4:
880 break;
881 default:
882 /* pre-GC7000L can only have 1 uniform src per instruction */
883 if (c->specs->halti >= 5)
884 return;
885
886 nir_const_value value[4] = {};
887 uint8_t swizzle[4][4] = {};
888 unsigned swiz_max = 0, num_const = 0;
889
890 for (unsigned i = 0; i < info->num_inputs; i++) {
891 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
892 if (!cv)
893 continue;
894
895 unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components;
896 for (unsigned j = 0; j < num_components; j++) {
897 int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
898 swizzle[i][j] = idx;
899 swiz_max = MAX2(swiz_max, (unsigned) idx);
900 }
901 num_const++;
902 }
903
904 /* nothing to do */
905 if (num_const <= 1)
906 return;
907
908 /* resolve with single combined const src */
909 if (swiz_max < 4) {
910 nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
911
912 for (unsigned i = 0; i < info->num_inputs; i++) {
913 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
914 if (!cv)
915 continue;
916
917 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
918
919 for (unsigned j = 0; j < 4; j++)
920 alu->src[i].swizzle[j] = swizzle[i][j];
921 }
922 return;
923 }
924
925 /* resolve with movs */
926 num_const = 0;
927 for (unsigned i = 0; i < info->num_inputs; i++) {
928 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
929 if (!cv)
930 continue;
931
932 num_const++;
933 if (num_const == 1)
934 continue;
935
936 nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa);
937 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov));
938 }
939 return;
940 }
941
942 nir_const_value value[4];
943 unsigned num_components = 0;
944
945 for (unsigned i = 0; i < info->num_inputs; i++) {
946 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
947 if (cv)
948 value[num_components++] = cv[alu->src[i].swizzle[0]];
949 }
950
951 /* if there is more than one constant source to the vecN, combine them
952 * into a single load_const (removing the vecN completely if all components
953 * are constant)
954 */
955 if (num_components > 1) {
956 nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value);
957
958 if (num_components == info->num_inputs) {
959 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(def));
960 nir_instr_remove(&alu->instr);
961 return;
962 }
963
964 for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
965 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src);
966 if (!cv)
967 continue;
968
969 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def));
970 alu->src[i].swizzle[0] = j++;
971 }
972 }
973
974 unsigned finished_write_mask = 0;
975 for (unsigned i = 0; i < 4; i++) {
976 if (!(alu->dest.write_mask & (1 << i)))
977 continue;
978
979 nir_ssa_def *ssa = alu->src[i].src.ssa;
980
981 /* check that vecN instruction is only user of this */
982 bool need_mov = list_length(&ssa->if_uses) != 0;
983 nir_foreach_use(use_src, ssa) {
984 if (use_src->parent_instr != &alu->instr)
985 need_mov = true;
986 }
987
988 nir_instr *instr = ssa->parent_instr;
989 switch (instr->type) {
990 case nir_instr_type_alu:
991 case nir_instr_type_tex:
992 break;
993 case nir_instr_type_intrinsic:
994 if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
995 need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa);
996 break;
997 }
998 default:
999 need_mov = true;
1000 }
1001
1002 if (need_mov && !(finished_write_mask & (1 << i)))
1003 finished_write_mask |= insert_vec_mov(alu, i, c->nir);
1004 }
1005 }
1006
1007 static bool
1008 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
1009 {
1010 nir_shader *shader = c->nir;
1011 c->impl = nir_shader_get_entrypoint(shader);
1012
1013 bool have_indirect_uniform = false;
1014 unsigned indirect_max = 0;
1015
1016 nir_builder b;
1017 nir_builder_init(&b, c->impl);
1018
1019 /* convert non-dynamic uniform loads to constants, etc */
1020 nir_foreach_block(block, c->impl) {
1021 nir_foreach_instr_safe(instr, block) {
1022 switch(instr->type) {
1023 case nir_instr_type_alu:
1024 /* deals with vecN and const srcs */
1025 lower_alu(c, nir_instr_as_alu(instr));
1026 break;
1027 case nir_instr_type_load_const: {
1028 nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
1029 for (unsigned i = 0; i < load_const->def.num_components; i++)
1030 load_const->value[i] = CONST(load_const->value[i].u32);
1031 } break;
1032 case nir_instr_type_intrinsic: {
1033 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1034 /* TODO: load_ubo can also become a constant in some cases
1035 * (at the moment it can end up emitting a LOAD with two
1036 * uniform sources, which could be a problem on HALTI2)
1037 */
1038 if (intr->intrinsic != nir_intrinsic_load_uniform)
1039 break;
1040 nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1041 if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
1042 have_indirect_uniform = true;
1043 indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1044 break;
1045 }
1046
1047 unsigned base = nir_intrinsic_base(intr);
1048 /* pre halti2 uniform offset will be float */
1049 if (c->specs->halti < 2)
1050 base += (unsigned) off[0].f32;
1051 else
1052 base += off[0].u32;
1053 nir_const_value value[4];
1054
1055 for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
1056 if (nir_intrinsic_base(intr) < 0)
1057 value[i] = TEXSCALE(~nir_intrinsic_base(intr), i);
1058 else
1059 value[i] = UNIFORM(base * 4 + i);
1060 }
1061
1062 b.cursor = nir_after_instr(instr);
1063 nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value);
1064
1065 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
1066 nir_instr_remove(instr);
1067 } break;
1068 default:
1069 break;
1070 }
1071 }
1072 }
1073
1074 /* TODO: only emit required indirect uniform ranges */
1075 if (have_indirect_uniform) {
1076 for (unsigned i = 0; i < indirect_max * 4; i++)
1077 c->consts[i] = UNIFORM(i).u64;
1078 c->const_count = indirect_max;
1079 }
1080
1081 /* add mov for any store output using sysval/const */
1082 nir_foreach_block(block, c->impl) {
1083 nir_foreach_instr_safe(instr, block) {
1084 if (instr->type != nir_instr_type_intrinsic)
1085 continue;
1086
1087 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1088
1089 switch (intr->intrinsic) {
1090 case nir_intrinsic_store_deref: {
1091 nir_src *src = &intr->src[1];
1092 if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) {
1093 b.cursor = nir_before_instr(instr);
1094 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa)));
1095 }
1096 } break;
1097 default:
1098 break;
1099 }
1100 }
1101 }
1102
1103 /* call directly to avoid validation (load_const don't pass validation at this point) */
1104 nir_convert_from_ssa(shader, true);
1105 nir_opt_dce(shader);
1106
1107 ra_assign(c, shader);
1108
1109 emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body);
1110
1111 *num_temps = ra_finish(c);
1112 *num_consts = c->const_count;
1113 return true;
1114 }