etnaviv: add alternative NIR compiler
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler_nir.c
1 /*
2 * Copyright (c) 2012-2019 Etnaviv Project
3 * Copyright (c) 2019 Zodiac Inflight Innovations
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jonathan Marek <jonathan@marek.ca>
26 * Wladimir J. van der Laan <laanwj@gmail.com>
27 */
28
29 #include "etnaviv_compiler.h"
30 #include "etnaviv_asm.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_debug.h"
33 #include "etnaviv_disasm.h"
34 #include "etnaviv_uniforms.h"
35 #include "etnaviv_util.h"
36
37 #include <math.h>
38 #include "util/u_memory.h"
39 #include "util/register_allocate.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "compiler/nir/nir_worklist.h"
42
43 #include "util/u_half.h"
44
45 struct etna_compile {
46 nir_shader *nir;
47 #define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)
48 const struct etna_specs *specs;
49 struct etna_shader_variant *variant;
50
51 /* register assigned to each output, indexed by driver_location */
52 unsigned output_reg[ETNA_NUM_INPUTS];
53
54 /* block # to instr index */
55 unsigned *block_ptr;
56
57 /* Code generation */
58 int inst_ptr; /* current instruction pointer */
59 struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
60
61 /* There was an error during compilation */
62 bool error;
63 };
64
65 #define compile_error(ctx, args...) ({ \
66 printf(args); \
67 ctx->error = true; \
68 assert(0); \
69 })
70
71 /* io related lowering
72 * run after lower_int_to_float because it adds i2f/f2i ops
73 */
74 static void
75 etna_lower_io(nir_shader *shader, struct etna_shader_variant *v)
76 {
77 bool rb_swap = shader->info.stage == MESA_SHADER_FRAGMENT && v->key.frag_rb_swap;
78
79 unsigned color_location = 0;
80 nir_foreach_variable(var, &shader->outputs) {
81 switch (var->data.location) {
82 case FRAG_RESULT_COLOR:
83 case FRAG_RESULT_DATA0:
84 color_location = var->data.driver_location;
85 break;
86 }
87 }
88
89 nir_foreach_function(function, shader) {
90 nir_builder b;
91 nir_builder_init(&b, function->impl);
92
93 nir_foreach_block(block, function->impl) {
94 nir_foreach_instr_safe(instr, block) {
95 if (instr->type == nir_instr_type_intrinsic) {
96 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
97
98 switch (intr->intrinsic) {
99 case nir_intrinsic_load_front_face: {
100 /* front face inverted (run after int_to_float, so invert as float) */
101 b.cursor = nir_after_instr(instr);
102
103 nir_ssa_def *ssa = nir_seq(&b, &intr->dest.ssa, nir_imm_float(&b, 0.0));
104 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
105 nir_src_for_ssa(ssa),
106 ssa->parent_instr);
107 } break;
108 case nir_intrinsic_store_output: {
109 if (!rb_swap || nir_intrinsic_base(intr) != color_location)
110 break;
111 b.cursor = nir_before_instr(instr);
112
113 nir_ssa_def *ssa = nir_mov(&b, intr->src[0].ssa);
114 nir_alu_instr *alu = nir_instr_as_alu(ssa->parent_instr);
115 alu->src[0].swizzle[0] = 2;
116 alu->src[0].swizzle[2] = 0;
117 nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
118 } break;
119 case nir_intrinsic_load_instance_id: {
120 b.cursor = nir_after_instr(instr);
121 nir_ssa_def *ssa = nir_i2f32(&b, &intr->dest.ssa);
122 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
123 nir_src_for_ssa(ssa),
124 ssa->parent_instr);
125 } break;
126 case nir_intrinsic_load_uniform: {
127 /* multiply by 16 and convert to int */
128 b.cursor = nir_before_instr(instr);
129 nir_ssa_def *ssa = nir_f2u32(&b, nir_fmul(&b, intr->src[0].ssa,
130 nir_imm_float(&b, 16.0f)));
131 nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
132 } break;
133 default:
134 break;
135 }
136 }
137
138 if (instr->type != nir_instr_type_tex)
139 continue;
140
141 nir_tex_instr *tex = nir_instr_as_tex(instr);
142 nir_src *coord = NULL;
143 nir_src *lod_bias = NULL;
144 unsigned lod_bias_idx;
145
146 assert(tex->sampler_index == tex->texture_index);
147
148 for (unsigned i = 0; i < tex->num_srcs; i++) {
149 switch (tex->src[i].src_type) {
150 case nir_tex_src_coord:
151 coord = &tex->src[i].src;
152 break;
153 case nir_tex_src_bias:
154 case nir_tex_src_lod:
155 assert(!lod_bias);
156 lod_bias = &tex->src[i].src;
157 lod_bias_idx = i;
158 break;
159 default:
160 assert(0);
161 break;
162 }
163 }
164
165 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
166 /* use a dummy load_uniform here to represent texcoord scale */
167 b.cursor = nir_before_instr(instr);
168 nir_intrinsic_instr *load =
169 nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_uniform);
170 nir_intrinsic_set_base(load, ~tex->sampler_index);
171 load->num_components = 2;
172 load->src[0] = nir_src_for_ssa(nir_imm_float(&b, 0.0f));
173 nir_ssa_dest_init(&load->instr, &load->dest, 2, 32, NULL);
174 nir_intrinsic_set_type(load, nir_type_float);
175
176 nir_builder_instr_insert(&b, &load->instr);
177
178 nir_ssa_def *new_coord = nir_fmul(&b, coord->ssa, &load->dest.ssa);
179 nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(new_coord));
180 }
181
182 /* pre HALTI5 needs texture sources in a single source */
183
184 if (!lod_bias || v->shader->specs->halti >= 5)
185 continue;
186
187 assert(coord && lod_bias && tex->coord_components < 4);
188
189 nir_alu_instr *vec = nir_alu_instr_create(shader, nir_op_vec4);
190 for (unsigned i = 0; i < tex->coord_components; i++) {
191 vec->src[i].src = nir_src_for_ssa(coord->ssa);
192 vec->src[i].swizzle[0] = i;
193 }
194 for (unsigned i = tex->coord_components; i < 4; i++)
195 vec->src[i].src = nir_src_for_ssa(lod_bias->ssa);
196
197 vec->dest.write_mask = 0xf;
198 nir_ssa_dest_init(&vec->instr, &vec->dest.dest, 4, 32, NULL);
199
200 nir_tex_instr_remove_src(tex, lod_bias_idx);
201 nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(&vec->dest.dest.ssa));
202 tex->coord_components = 4;
203
204 nir_instr_insert_before(&tex->instr, &vec->instr);
205 }
206 }
207 }
208 }
209
210 static void
211 etna_lower_alu_to_scalar(nir_shader *shader, const struct etna_specs *specs)
212 {
213 BITSET_DECLARE(scalar_ops, nir_num_opcodes);
214 BITSET_ZERO(scalar_ops);
215
216 BITSET_SET(scalar_ops, nir_op_frsq);
217 BITSET_SET(scalar_ops, nir_op_frcp);
218 BITSET_SET(scalar_ops, nir_op_flog2);
219 BITSET_SET(scalar_ops, nir_op_fexp2);
220 BITSET_SET(scalar_ops, nir_op_fsqrt);
221 BITSET_SET(scalar_ops, nir_op_fcos);
222 BITSET_SET(scalar_ops, nir_op_fsin);
223 BITSET_SET(scalar_ops, nir_op_fdiv);
224
225 if (!specs->has_halti2_instructions)
226 BITSET_SET(scalar_ops, nir_op_fdot2);
227
228 nir_lower_alu_to_scalar(shader, scalar_ops);
229 }
230
231 static void
232 etna_lower_alu_impl(nir_function_impl *impl, struct etna_compile *c)
233 {
234 nir_shader *shader = impl->function->shader;
235
236 nir_builder b;
237 nir_builder_init(&b, impl);
238
239 /* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */
240 nir_foreach_block(block, impl) {
241 nir_foreach_instr_safe(instr, block) {
242 if (instr->type != nir_instr_type_alu)
243 continue;
244
245 nir_alu_instr *alu = nir_instr_as_alu(instr);
246 /* multiply sin/cos src by constant
247 * TODO: do this earlier (but it breaks const_prop opt)
248 */
249 if (alu->op == nir_op_fsin || alu->op == nir_op_fcos) {
250 b.cursor = nir_before_instr(instr);
251
252 nir_ssa_def *imm = c->specs->has_new_transcendentals ?
253 nir_imm_float(&b, 1.0 / M_PI) :
254 nir_imm_float(&b, 2.0 / M_PI);
255
256 nir_instr_rewrite_src(instr, &alu->src[0].src,
257 nir_src_for_ssa(nir_fmul(&b, alu->src[0].src.ssa, imm)));
258 }
259
260 /* change transcendental ops to vec2 and insert vec1 mul for the result
261 * TODO: do this earlier (but it breaks with optimizations)
262 */
263 if (c->specs->has_new_transcendentals && (
264 alu->op == nir_op_fdiv || alu->op == nir_op_flog2 ||
265 alu->op == nir_op_fsin || alu->op == nir_op_fcos)) {
266 nir_ssa_def *ssa = &alu->dest.dest.ssa;
267
268 assert(ssa->num_components == 1);
269
270 nir_alu_instr *mul = nir_alu_instr_create(shader, nir_op_fmul);
271 mul->src[0].src = mul->src[1].src = nir_src_for_ssa(ssa);
272 mul->src[1].swizzle[0] = 1;
273
274 mul->dest.write_mask = 1;
275 nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
276
277 ssa->num_components = 2;
278
279 mul->dest.saturate = alu->dest.saturate;
280 alu->dest.saturate = 0;
281
282 nir_instr_insert_after(instr, &mul->instr);
283
284 nir_ssa_def_rewrite_uses_after(ssa, nir_src_for_ssa(&mul->dest.dest.ssa), &mul->instr);
285 }
286 }
287 }
288 }
289
290 static void etna_lower_alu(nir_shader *shader, struct etna_compile *c)
291 {
292 nir_foreach_function(function, shader) {
293 if (function->impl)
294 etna_lower_alu_impl(function->impl, c);
295 }
296 }
297
298 static void
299 emit_inst(struct etna_compile *c, struct etna_inst *inst)
300 {
301 c->code[c->inst_ptr++] = *inst;
302 }
303
304 /* to map nir srcs should to etna_inst srcs */
305 enum {
306 SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4),
307 SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4),
308 SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4),
309 SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4),
310 SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4),
311 SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4),
312 SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4),
313 };
314
315 /* info to translate a nir op to etna_inst */
316 struct etna_op_info {
317 uint8_t opcode; /* INST_OPCODE_ */
318 uint8_t src; /* SRC_ enum */
319 uint8_t cond; /* INST_CONDITION_ */
320 uint8_t type; /* INST_TYPE_ */
321 };
322
323 static const struct etna_op_info etna_ops[] = {
324 [0 ... nir_num_opcodes - 1] = {0xff},
325 #undef TRUE
326 #undef FALSE
327 #define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \
328 INST_OPCODE_##op, \
329 SRC_##src, \
330 INST_CONDITION_##cond, \
331 INST_TYPE_##type \
332 }
333 #define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32)
334 #define OP(nir, op, src) OPC(nir, op, src, TRUE)
335 OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0),
336 OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2),
337 OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X),
338 OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT),
339 OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0),
340 OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0),
341 OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0),
342 OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0),
343 OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT),
344 OPC(fcsel, SELECT, 0_1_2, NZ),
345 OP(fdiv, DIV, 0_1_X),
346 OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0),
347
348 /* integer opcodes */
349 OPCT(i2f32, I2F, 0_X_X, TRUE, S32),
350 OPCT(f2u32, F2I, 0_X_X, TRUE, U32),
351 };
352
353 static void
354 etna_emit_block_start(struct etna_compile *c, unsigned block)
355 {
356 c->block_ptr[block] = c->inst_ptr;
357 }
358
359 static void
360 etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
361 struct etna_inst_src src[3], bool saturate)
362 {
363 struct etna_op_info ei = etna_ops[op];
364
365 assert(ei.opcode != 0xff);
366
367 struct etna_inst inst = {
368 .opcode = ei.opcode,
369 .type = ei.type,
370 .cond = ei.cond,
371 .dst = dst,
372 .sat = saturate,
373 };
374
375 switch (op) {
376 case nir_op_fdiv:
377 case nir_op_flog2:
378 case nir_op_fsin:
379 case nir_op_fcos:
380 if (c->specs->has_new_transcendentals)
381 inst.tex.amode = 1;
382 /* fall through */
383 case nir_op_frsq:
384 case nir_op_frcp:
385 case nir_op_fexp2:
386 case nir_op_fsqrt:
387 case nir_op_i2f32:
388 case nir_op_f2u32:
389 /* for these instructions we want src to be in x component
390 * note: on HALTI2+ i2f/f2u are not scalar but we only use them this way currently
391 */
392 src[0].swiz = inst_swiz_compose(src[0].swiz,
393 INST_SWIZ_BROADCAST(ffs(inst.dst.write_mask)-1));
394 default:
395 break;
396 }
397
398 for (unsigned j = 0; j < 3; j++) {
399 unsigned i = ((ei.src >> j*2) & 3);
400 if (i < 3)
401 inst.src[j] = src[i];
402 }
403
404 emit_inst(c, &inst);
405 }
406
407 static void
408 etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,
409 struct etna_inst_dst dst, struct etna_inst_src coord,
410 struct etna_inst_src lod_bias)
411 {
412 struct etna_inst inst = {
413 .dst = dst,
414 .tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset),
415 .tex.swiz = dst_swiz,
416 .src[0] = coord,
417 };
418
419 if (lod_bias.use)
420 inst.src[1] = lod_bias;
421
422 switch (op) {
423 case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break;
424 case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break;
425 case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break;
426 default:
427 assert(0);
428 }
429
430 emit_inst(c, &inst);
431 }
432
433 static void
434 etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition)
435 {
436 if (!condition.use) {
437 emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block });
438 return;
439 }
440
441 struct etna_inst inst = {
442 .opcode = INST_OPCODE_BRANCH,
443 .cond = INST_CONDITION_NOT,
444 .type = INST_TYPE_U32,
445 .src[0] = condition,
446 .imm = block,
447 };
448 inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3);
449 emit_inst(c, &inst);
450 }
451
452 static void
453 etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition)
454 {
455 if (!condition.use) {
456 emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL });
457 return;
458 }
459
460 struct etna_inst inst = {
461 .opcode = INST_OPCODE_TEXKILL,
462 .cond = INST_CONDITION_GZ,
463 .src[0] = condition,
464 };
465 inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3);
466 emit_inst(c, &inst);
467 }
468
469 static void
470 etna_emit_output(struct etna_compile *c, unsigned index, struct etna_inst_src src)
471 {
472 c->output_reg[index] = src.reg;
473 }
474
475 static void
476 etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst,
477 struct etna_inst_src src, struct etna_inst_src base)
478 {
479 emit_inst(c, &(struct etna_inst) {
480 .opcode = INST_OPCODE_LOAD,
481 .type = INST_TYPE_U32,
482 .dst = dst,
483 .src[0] = src,
484 .src[1] = base,
485 });
486 }
487
488 #define OPT(nir, pass, ...) ({ \
489 bool this_progress = false; \
490 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
491 this_progress; \
492 })
493 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
494
495 static void
496 etna_optimize_loop(nir_shader *s)
497 {
498 bool progress;
499 do {
500 progress = false;
501
502 OPT_V(s, nir_lower_vars_to_ssa);
503 progress |= OPT(s, nir_opt_copy_prop_vars);
504 progress |= OPT(s, nir_copy_prop);
505 progress |= OPT(s, nir_opt_dce);
506 progress |= OPT(s, nir_opt_cse);
507 progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
508 progress |= OPT(s, nir_opt_intrinsics);
509 progress |= OPT(s, nir_opt_algebraic);
510 progress |= OPT(s, nir_opt_constant_folding);
511 progress |= OPT(s, nir_opt_dead_cf);
512 if (OPT(s, nir_opt_trivial_continues)) {
513 progress = true;
514 /* If nir_opt_trivial_continues makes progress, then we need to clean
515 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
516 * to make progress.
517 */
518 OPT(s, nir_copy_prop);
519 OPT(s, nir_opt_dce);
520 }
521 progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
522 progress |= OPT(s, nir_opt_if, false);
523 progress |= OPT(s, nir_opt_remove_phis);
524 progress |= OPT(s, nir_opt_undef);
525 }
526 while (progress);
527 }
528
529 static int
530 etna_glsl_type_size(const struct glsl_type *type, bool bindless)
531 {
532 return glsl_count_attribute_slots(type, false);
533 }
534
535 static void
536 copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, unsigned count)
537 {
538 struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
539
540 uinfo->imm_count = count * 4;
541 uinfo->imm_data = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_data));
542 uinfo->imm_contents = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_contents));
543
544 for (unsigned i = 0; i < uinfo->imm_count; i++) {
545 uinfo->imm_data[i] = consts[i];
546 uinfo->imm_contents[i] = consts[i] >> 32;
547 }
548
549 etna_set_shader_uniforms_dirty_flags(sobj);
550 }
551
552 #include "etnaviv_compiler_nir_emit.h"
553
554 bool
555 etna_compile_shader_nir(struct etna_shader_variant *v)
556 {
557 if (unlikely(!v))
558 return false;
559
560 struct etna_compile *c = CALLOC_STRUCT(etna_compile);
561 if (!c)
562 return false;
563
564 c->variant = v;
565 c->specs = v->shader->specs;
566 c->nir = nir_shader_clone(NULL, v->shader->nir);
567
568 nir_shader *s = c->nir;
569 const struct etna_specs *specs = c->specs;
570
571 v->stage = s->info.stage;
572 v->num_loops = 0; /* TODO */
573 v->vs_id_in_reg = -1;
574 v->vs_pos_out_reg = -1;
575 v->vs_pointsize_out_reg = -1;
576 v->ps_color_out_reg = 0; /* 0 for shader that doesn't write fragcolor.. */
577 v->ps_depth_out_reg = -1;
578
579 /* setup input linking */
580 struct etna_shader_io_file *sf = &v->infile;
581 if (s->info.stage == MESA_SHADER_VERTEX) {
582 nir_foreach_variable(var, &s->inputs) {
583 unsigned idx = var->data.driver_location;
584 sf->reg[idx].reg = idx;
585 sf->reg[idx].slot = var->data.location;
586 sf->reg[idx].num_components = 4; /* TODO */
587 sf->num_reg = MAX2(sf->num_reg, idx+1);
588 }
589 } else {
590 unsigned count = 0;
591 nir_foreach_variable(var, &s->inputs) {
592 unsigned idx = var->data.driver_location;
593 sf->reg[idx].reg = idx + 1;
594 sf->reg[idx].slot = var->data.location;
595 sf->reg[idx].num_components = 4; /* TODO */
596 sf->num_reg = MAX2(sf->num_reg, idx+1);
597 count++;
598 }
599 assert(sf->num_reg == count);
600 }
601
602 NIR_PASS_V(s, nir_lower_io, nir_var_all, etna_glsl_type_size,
603 (nir_lower_io_options)0);
604
605 OPT_V(s, nir_lower_regs_to_ssa);
606 OPT_V(s, nir_lower_vars_to_ssa);
607 OPT_V(s, nir_lower_indirect_derefs, nir_var_all);
608 OPT_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u });
609 OPT_V(s, etna_lower_alu_to_scalar, specs);
610
611 etna_optimize_loop(s);
612
613 /* use opt_algebraic between int_to_float and boot_to_float because
614 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops
615 */
616 OPT_V(s, nir_lower_int_to_float);
617 OPT_V(s, nir_opt_algebraic);
618 OPT_V(s, nir_lower_bool_to_float);
619
620 /* after int to float because insert i2f for instance_id */
621 OPT_V(s, etna_lower_io, v);
622
623 etna_optimize_loop(s);
624
625 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
626 nir_print_shader(s, stdout);
627
628 while( OPT(s, nir_opt_vectorize) );
629 OPT_V(s, etna_lower_alu_to_scalar, specs);
630
631 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
632 NIR_PASS_V(s, nir_opt_algebraic_late);
633
634 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
635 NIR_PASS_V(s, nir_copy_prop);
636 NIR_PASS_V(s, nir_lower_to_source_mods, ~nir_lower_int_source_mods);
637 /* need copy prop after uses_to_dest, and before src mods: see
638 * dEQP-GLES2.functional.shaders.random.all_features.fragment.95
639 */
640
641 NIR_PASS_V(s, nir_opt_dce);
642
643 NIR_PASS_V(s, etna_lower_alu, c);
644
645 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
646 nir_print_shader(s, stdout);
647
648 uint64_t consts[ETNA_MAX_IMM] = {};
649
650 unsigned block_ptr[nir_shader_get_entrypoint(s)->num_blocks];
651 c->block_ptr = block_ptr;
652 struct emit_options options = {
653 .max_temps = ETNA_MAX_TEMPS,
654 .max_consts = ETNA_MAX_IMM / 4,
655 .id_reg = sf->num_reg,
656 .single_const_src = c->specs->halti < 5,
657 .etna_new_transcendentals = c->specs->has_new_transcendentals,
658 .user = c,
659 .consts = consts,
660 };
661
662 unsigned num_consts;
663 bool ok = emit_shader(c->nir, &options, &v->num_temps, &num_consts);
664 assert(ok);
665
666 /* empty shader, emit NOP */
667 if (!c->inst_ptr)
668 emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_NOP });
669
670 /* assemble instructions, fixing up labels */
671 uint32_t *code = MALLOC(c->inst_ptr * 16 + 1024);
672 for (unsigned i = 0; i < c->inst_ptr; i++) {
673 struct etna_inst *inst = &c->code[i];
674 if (inst->opcode == INST_OPCODE_BRANCH)
675 inst->imm = block_ptr[inst->imm];
676
677 inst->halti5 = specs->halti >= 5;
678 etna_assemble(&code[i * 4], inst);
679 }
680
681 v->code_size = c->inst_ptr * 4;
682 v->code = code;
683 v->needs_icache = c->inst_ptr > specs->max_instructions;
684
685 copy_uniform_state_to_shader(v, consts, num_consts);
686
687 if (s->info.stage == MESA_SHADER_FRAGMENT) {
688 v->input_count_unk8 = 31; /* XXX what is this */
689
690 nir_foreach_variable(var, &s->outputs) {
691 unsigned reg = c->output_reg[var->data.driver_location];
692 switch (var->data.location) {
693 case FRAG_RESULT_COLOR:
694 case FRAG_RESULT_DATA0: /* DATA0 is used by gallium shaders for color */
695 v->ps_color_out_reg = reg;
696 break;
697 case FRAG_RESULT_DEPTH:
698 v->ps_depth_out_reg = reg;
699 break;
700 default:
701 compile_error(c, "Unsupported fs output %s\n", gl_frag_result_name(var->data.location));
702 }
703 }
704 assert(v->ps_depth_out_reg <= 0);
705 v->outfile.num_reg = 0;
706 ralloc_free(c->nir);
707 FREE(c);
708 return true;
709 }
710
711 v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */
712
713 sf = &v->outfile;
714 sf->num_reg = 0;
715 nir_foreach_variable(var, &s->outputs) {
716 unsigned native = c->output_reg[var->data.driver_location];
717
718 if (var->data.location == VARYING_SLOT_POS) {
719 v->vs_pos_out_reg = native;
720 continue;
721 }
722
723 if (var->data.location == VARYING_SLOT_PSIZ) {
724 v->vs_pointsize_out_reg = native;
725 continue;
726 }
727
728 sf->reg[sf->num_reg].reg = native;
729 sf->reg[sf->num_reg].slot = var->data.location;
730 sf->reg[sf->num_reg].num_components = 4; /* TODO */
731 sf->num_reg++;
732 }
733
734 /* fill in "mystery meat" load balancing value. This value determines how
735 * work is scheduled between VS and PS
736 * in the unified shader architecture. More precisely, it is determined from
737 * the number of VS outputs, as well as chip-specific
738 * vertex output buffer size, vertex cache size, and the number of shader
739 * cores.
740 *
741 * XXX this is a conservative estimate, the "optimal" value is only known for
742 * sure at link time because some
743 * outputs may be unused and thus unmapped. Then again, in the general use
744 * case with GLSL the vertex and fragment
745 * shaders are linked already before submitting to Gallium, thus all outputs
746 * are used.
747 *
748 * note: TGSI compiler counts all outputs (including position and pointsize), here
749 * v->outfile.num_reg only counts varyings, +1 to compensate for the position output
750 * TODO: might have a problem that we don't count pointsize when it is used
751 */
752
753 int half_out = v->outfile.num_reg / 2 + 1;
754 assert(half_out);
755
756 uint32_t b = ((20480 / (specs->vertex_output_buffer_size -
757 2 * half_out * specs->vertex_cache_size)) +
758 9) /
759 10;
760 uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2;
761 v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
762 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
763 VIVS_VS_LOAD_BALANCING_C(0x3f) |
764 VIVS_VS_LOAD_BALANCING_D(0x0f);
765
766 ralloc_free(c->nir);
767 FREE(c);
768 return true;
769 }
770
771 void
772 etna_destroy_shader_nir(struct etna_shader_variant *shader)
773 {
774 assert(shader);
775
776 FREE(shader->code);
777 FREE(shader->uniforms.imm_data);
778 FREE(shader->uniforms.imm_contents);
779 FREE(shader);
780 }
781
782 static const struct etna_shader_inout *
783 etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
784 const struct etna_shader_inout *in)
785 {
786 for (int i = 0; i < sobj->outfile.num_reg; i++)
787 if (sobj->outfile.reg[i].slot == in->slot)
788 return &sobj->outfile.reg[i];
789
790 return NULL;
791 }
792
793 bool
794 etna_link_shader_nir(struct etna_shader_link_info *info,
795 const struct etna_shader_variant *vs,
796 const struct etna_shader_variant *fs)
797 {
798 int comp_ofs = 0;
799 /* For each fragment input we need to find the associated vertex shader
800 * output, which can be found by matching on semantic name and index. A
801 * binary search could be used because the vs outputs are sorted by their
802 * semantic index and grouped by semantic type by fill_in_vs_outputs.
803 */
804 assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
805 info->pcoord_varying_comp_ofs = -1;
806
807 for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
808 const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
809 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
810 struct etna_varying *varying;
811 bool interpolate_always = true;
812
813 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
814
815 if (fsio->reg > info->num_varyings)
816 info->num_varyings = fsio->reg;
817
818 varying = &info->varyings[fsio->reg - 1];
819 varying->num_components = fsio->num_components;
820
821 if (!interpolate_always) /* colors affected by flat shading */
822 varying->pa_attributes = 0x200;
823 else /* texture coord or other bypasses flat shading */
824 varying->pa_attributes = 0x2f1;
825
826 varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
827 varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
828 varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
829 varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
830
831 /* point coord is an input to the PS without matching VS output,
832 * so it gets a varying slot without being assigned a VS register.
833 */
834 if (fsio->slot == VARYING_SLOT_PNTC) {
835 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
836 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
837
838 info->pcoord_varying_comp_ofs = comp_ofs;
839 } else {
840 if (vsio == NULL) { /* not found -- link error */
841 BUG("Semantic value not found in vertex shader outputs\n");
842 return true;
843 }
844 varying->reg = vsio->reg;
845 }
846
847 comp_ofs += varying->num_components;
848 }
849
850 assert(info->num_varyings == fs->infile.num_reg);
851
852 return false;
853 }