2 * Copyright (c) 2012-2019 Etnaviv Project
3 * Copyright (c) 2019 Zodiac Inflight Innovations
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jonathan Marek <jonathan@marek.ca>
26 * Wladimir J. van der Laan <laanwj@gmail.com>
29 #include "etnaviv_compiler.h"
30 #include "etnaviv_asm.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_debug.h"
33 #include "etnaviv_disasm.h"
34 #include "etnaviv_uniforms.h"
35 #include "etnaviv_util.h"
38 #include "util/u_memory.h"
39 #include "util/register_allocate.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "compiler/nir/nir_worklist.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "util/u_half.h"
48 #define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)
49 const struct etna_specs
*specs
;
50 struct etna_shader_variant
*variant
;
52 /* register assigned to each output, indexed by driver_location */
53 unsigned output_reg
[ETNA_NUM_INPUTS
];
55 /* block # to instr index */
59 int inst_ptr
; /* current instruction pointer */
60 struct etna_inst code
[ETNA_MAX_INSTRUCTIONS
* ETNA_INST_SIZE
];
62 /* There was an error during compilation */
66 #define compile_error(ctx, args...) ({ \
72 /* io related lowering
73 * run after lower_int_to_float because it adds i2f/f2i ops
76 etna_lower_io(nir_shader
*shader
, struct etna_shader_variant
*v
)
78 bool rb_swap
= shader
->info
.stage
== MESA_SHADER_FRAGMENT
&& v
->key
.frag_rb_swap
;
80 unsigned color_location
= 0;
81 nir_foreach_variable(var
, &shader
->outputs
) {
82 switch (var
->data
.location
) {
83 case FRAG_RESULT_COLOR
:
84 case FRAG_RESULT_DATA0
:
85 color_location
= var
->data
.driver_location
;
90 nir_foreach_function(function
, shader
) {
92 nir_builder_init(&b
, function
->impl
);
94 nir_foreach_block(block
, function
->impl
) {
95 nir_foreach_instr_safe(instr
, block
) {
96 if (instr
->type
== nir_instr_type_intrinsic
) {
97 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
99 switch (intr
->intrinsic
) {
100 case nir_intrinsic_load_front_face
: {
101 /* HW front_face is 0.0/1.0, not 0/~0u for bool
102 * lower with a comparison with 0
104 intr
->dest
.ssa
.bit_size
= 32;
106 b
.cursor
= nir_after_instr(instr
);
108 nir_ssa_def
*ssa
= nir_ine(&b
, &intr
->dest
.ssa
, nir_imm_int(&b
, 0));
109 if (v
->key
.front_ccw
)
110 nir_instr_as_alu(ssa
->parent_instr
)->op
= nir_op_ieq
;
112 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
,
113 nir_src_for_ssa(ssa
),
116 case nir_intrinsic_store_output
: {
117 if (!rb_swap
|| nir_intrinsic_base(intr
) != color_location
)
119 b
.cursor
= nir_before_instr(instr
);
121 nir_ssa_def
*ssa
= nir_mov(&b
, intr
->src
[0].ssa
);
122 nir_alu_instr
*alu
= nir_instr_as_alu(ssa
->parent_instr
);
123 alu
->src
[0].swizzle
[0] = 2;
124 alu
->src
[0].swizzle
[2] = 0;
125 nir_instr_rewrite_src(instr
, &intr
->src
[0], nir_src_for_ssa(ssa
));
127 case nir_intrinsic_load_uniform
: {
128 /* multiply by 16 and convert to int */
129 b
.cursor
= nir_before_instr(instr
);
130 nir_ssa_def
*ssa
= nir_imul(&b
, intr
->src
[0].ssa
, nir_imm_int(&b
, 16));
131 nir_instr_rewrite_src(instr
, &intr
->src
[0], nir_src_for_ssa(ssa
));
138 if (instr
->type
!= nir_instr_type_tex
)
141 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
142 nir_src
*coord
= NULL
;
143 nir_src
*lod_bias
= NULL
;
144 unsigned lod_bias_idx
;
146 assert(tex
->sampler_index
== tex
->texture_index
);
148 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
149 switch (tex
->src
[i
].src_type
) {
150 case nir_tex_src_coord
:
151 coord
= &tex
->src
[i
].src
;
153 case nir_tex_src_bias
:
154 case nir_tex_src_lod
:
156 lod_bias
= &tex
->src
[i
].src
;
165 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
166 /* use a dummy load_uniform here to represent texcoord scale */
167 b
.cursor
= nir_before_instr(instr
);
168 nir_intrinsic_instr
*load
=
169 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_uniform
);
170 nir_intrinsic_set_base(load
, ~tex
->sampler_index
);
171 load
->num_components
= 2;
172 load
->src
[0] = nir_src_for_ssa(nir_imm_float(&b
, 0.0f
));
173 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 2, 32, NULL
);
174 nir_intrinsic_set_type(load
, nir_type_float
);
176 nir_builder_instr_insert(&b
, &load
->instr
);
178 nir_ssa_def
*new_coord
= nir_fmul(&b
, coord
->ssa
, &load
->dest
.ssa
);
179 nir_instr_rewrite_src(&tex
->instr
, coord
, nir_src_for_ssa(new_coord
));
182 /* pre HALTI5 needs texture sources in a single source */
184 if (!lod_bias
|| v
->shader
->specs
->halti
>= 5)
187 assert(coord
&& lod_bias
&& tex
->coord_components
< 4);
189 nir_alu_instr
*vec
= nir_alu_instr_create(shader
, nir_op_vec4
);
190 for (unsigned i
= 0; i
< tex
->coord_components
; i
++) {
191 vec
->src
[i
].src
= nir_src_for_ssa(coord
->ssa
);
192 vec
->src
[i
].swizzle
[0] = i
;
194 for (unsigned i
= tex
->coord_components
; i
< 4; i
++)
195 vec
->src
[i
].src
= nir_src_for_ssa(lod_bias
->ssa
);
197 vec
->dest
.write_mask
= 0xf;
198 nir_ssa_dest_init(&vec
->instr
, &vec
->dest
.dest
, 4, 32, NULL
);
200 nir_tex_instr_remove_src(tex
, lod_bias_idx
);
201 nir_instr_rewrite_src(&tex
->instr
, coord
, nir_src_for_ssa(&vec
->dest
.dest
.ssa
));
202 tex
->coord_components
= 4;
204 nir_instr_insert_before(&tex
->instr
, &vec
->instr
);
211 etna_alu_to_scalar_filter_cb(const nir_instr
*instr
, const void *data
)
213 const struct etna_specs
*specs
= data
;
215 if (instr
->type
!= nir_instr_type_alu
)
218 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
230 /* TODO: can do better than alu_to_scalar for vector compares */
231 case nir_op_b32all_fequal2
:
232 case nir_op_b32all_fequal3
:
233 case nir_op_b32all_fequal4
:
234 case nir_op_b32any_fnequal2
:
235 case nir_op_b32any_fnequal3
:
236 case nir_op_b32any_fnequal4
:
237 case nir_op_b32all_iequal2
:
238 case nir_op_b32all_iequal3
:
239 case nir_op_b32all_iequal4
:
240 case nir_op_b32any_inequal2
:
241 case nir_op_b32any_inequal3
:
242 case nir_op_b32any_inequal4
:
245 if (!specs
->has_halti2_instructions
)
256 etna_lower_alu_impl(nir_function_impl
*impl
, struct etna_compile
*c
)
258 nir_shader
*shader
= impl
->function
->shader
;
261 nir_builder_init(&b
, impl
);
263 /* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */
264 nir_foreach_block(block
, impl
) {
265 nir_foreach_instr_safe(instr
, block
) {
266 if (instr
->type
!= nir_instr_type_alu
)
269 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
270 /* multiply sin/cos src by constant
271 * TODO: do this earlier (but it breaks const_prop opt)
273 if (alu
->op
== nir_op_fsin
|| alu
->op
== nir_op_fcos
) {
274 b
.cursor
= nir_before_instr(instr
);
276 nir_ssa_def
*imm
= c
->specs
->has_new_transcendentals
?
277 nir_imm_float(&b
, 1.0 / M_PI
) :
278 nir_imm_float(&b
, 2.0 / M_PI
);
280 nir_instr_rewrite_src(instr
, &alu
->src
[0].src
,
281 nir_src_for_ssa(nir_fmul(&b
, alu
->src
[0].src
.ssa
, imm
)));
284 /* change transcendental ops to vec2 and insert vec1 mul for the result
285 * TODO: do this earlier (but it breaks with optimizations)
287 if (c
->specs
->has_new_transcendentals
&& (
288 alu
->op
== nir_op_fdiv
|| alu
->op
== nir_op_flog2
||
289 alu
->op
== nir_op_fsin
|| alu
->op
== nir_op_fcos
)) {
290 nir_ssa_def
*ssa
= &alu
->dest
.dest
.ssa
;
292 assert(ssa
->num_components
== 1);
294 nir_alu_instr
*mul
= nir_alu_instr_create(shader
, nir_op_fmul
);
295 mul
->src
[0].src
= mul
->src
[1].src
= nir_src_for_ssa(ssa
);
296 mul
->src
[1].swizzle
[0] = 1;
298 mul
->dest
.write_mask
= 1;
299 nir_ssa_dest_init(&mul
->instr
, &mul
->dest
.dest
, 1, 32, NULL
);
301 ssa
->num_components
= 2;
303 mul
->dest
.saturate
= alu
->dest
.saturate
;
304 alu
->dest
.saturate
= 0;
306 nir_instr_insert_after(instr
, &mul
->instr
);
308 nir_ssa_def_rewrite_uses_after(ssa
, nir_src_for_ssa(&mul
->dest
.dest
.ssa
), &mul
->instr
);
314 static void etna_lower_alu(nir_shader
*shader
, struct etna_compile
*c
)
316 nir_foreach_function(function
, shader
) {
318 etna_lower_alu_impl(function
->impl
, c
);
323 emit_inst(struct etna_compile
*c
, struct etna_inst
*inst
)
325 c
->code
[c
->inst_ptr
++] = *inst
;
328 /* to map nir srcs should to etna_inst srcs */
330 SRC_0_1_2
= (0 << 0) | (1 << 2) | (2 << 4),
331 SRC_0_1_X
= (0 << 0) | (1 << 2) | (3 << 4),
332 SRC_0_X_X
= (0 << 0) | (3 << 2) | (3 << 4),
333 SRC_0_X_1
= (0 << 0) | (3 << 2) | (1 << 4),
334 SRC_0_1_0
= (0 << 0) | (1 << 2) | (0 << 4),
335 SRC_X_X_0
= (3 << 0) | (3 << 2) | (0 << 4),
336 SRC_0_X_0
= (0 << 0) | (3 << 2) | (0 << 4),
339 /* info to translate a nir op to etna_inst */
340 struct etna_op_info
{
341 uint8_t opcode
; /* INST_OPCODE_ */
342 uint8_t src
; /* SRC_ enum */
343 uint8_t cond
; /* INST_CONDITION_ */
344 uint8_t type
; /* INST_TYPE_ */
347 static const struct etna_op_info etna_ops
[] = {
348 [0 ... nir_num_opcodes
- 1] = {0xff},
351 #define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \
354 INST_CONDITION_##cond, \
357 #define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32)
358 #define IOPC(nir, op, src, cond) OPCT(nir, op, src, cond, S32)
359 #define UOPC(nir, op, src, cond) OPCT(nir, op, src, cond, U32)
360 #define OP(nir, op, src) OPC(nir, op, src, TRUE)
361 #define IOP(nir, op, src) IOPC(nir, op, src, TRUE)
362 #define UOP(nir, op, src) UOPC(nir, op, src, TRUE)
363 OP(mov
, MOV
, X_X_0
), OP(fneg
, MOV
, X_X_0
), OP(fabs
, MOV
, X_X_0
), OP(fsat
, MOV
, X_X_0
),
364 OP(fmul
, MUL
, 0_1_X
), OP(fadd
, ADD
, 0_X_1
), OP(ffma
, MAD
, 0_1_2
),
365 OP(fdot2
, DP2
, 0_1_X
), OP(fdot3
, DP3
, 0_1_X
), OP(fdot4
, DP4
, 0_1_X
),
366 OPC(fmin
, SELECT
, 0_1_0
, GT
), OPC(fmax
, SELECT
, 0_1_0
, LT
),
367 OP(ffract
, FRC
, X_X_0
), OP(frcp
, RCP
, X_X_0
), OP(frsq
, RSQ
, X_X_0
),
368 OP(fsqrt
, SQRT
, X_X_0
), OP(fsin
, SIN
, X_X_0
), OP(fcos
, COS
, X_X_0
),
369 OP(fsign
, SIGN
, X_X_0
), OP(ffloor
, FLOOR
, X_X_0
), OP(fceil
, CEIL
, X_X_0
),
370 OP(flog2
, LOG
, X_X_0
), OP(fexp2
, EXP
, X_X_0
),
371 OPC(seq
, SET
, 0_1_X
, EQ
), OPC(sne
, SET
, 0_1_X
, NE
), OPC(sge
, SET
, 0_1_X
, GE
), OPC(slt
, SET
, 0_1_X
, LT
),
372 OPC(fcsel
, SELECT
, 0_1_2
, NZ
),
373 OP(fdiv
, DIV
, 0_1_X
),
374 OP(fddx
, DSX
, 0_X_0
), OP(fddy
, DSY
, 0_X_0
),
377 IOP(i2f32
, I2F
, 0_X_X
),
378 UOP(u2f32
, I2F
, 0_X_X
),
379 IOP(f2i32
, F2I
, 0_X_X
),
380 UOP(f2u32
, F2I
, 0_X_X
),
381 UOP(b2f32
, AND
, 0_X_X
), /* AND with fui(1.0f) */
382 UOP(b2i32
, AND
, 0_X_X
), /* AND with 1 */
383 OPC(f2b32
, CMP
, 0_X_X
, NE
), /* != 0.0 */
384 UOPC(i2b32
, CMP
, 0_X_X
, NE
), /* != 0 */
387 IOP(iadd
, ADD
, 0_X_1
),
388 IOP(imul
, IMULLO0
, 0_1_X
),
389 /* IOP(imad, IMADLO0, 0_1_2), */
390 IOP(ineg
, ADD
, X_X_0
), /* ADD 0, -x */
391 IOP(iabs
, IABS
, X_X_0
),
392 IOP(isign
, SIGN
, X_X_0
),
393 IOPC(imin
, SELECT
, 0_1_0
, GT
),
394 IOPC(imax
, SELECT
, 0_1_0
, LT
),
395 UOPC(umin
, SELECT
, 0_1_0
, GT
),
396 UOPC(umax
, SELECT
, 0_1_0
, LT
),
399 UOPC(b32csel
, SELECT
, 0_1_2
, NZ
),
401 /* compare with int result */
402 OPC(feq32
, CMP
, 0_1_X
, EQ
),
403 OPC(fne32
, CMP
, 0_1_X
, NE
),
404 OPC(fge32
, CMP
, 0_1_X
, GE
),
405 OPC(flt32
, CMP
, 0_1_X
, LT
),
406 IOPC(ieq32
, CMP
, 0_1_X
, EQ
),
407 IOPC(ine32
, CMP
, 0_1_X
, NE
),
408 IOPC(ige32
, CMP
, 0_1_X
, GE
),
409 IOPC(ilt32
, CMP
, 0_1_X
, LT
),
410 UOPC(uge32
, CMP
, 0_1_X
, GE
),
411 UOPC(ult32
, CMP
, 0_1_X
, LT
),
415 IOP(iand
, AND
, 0_X_1
),
416 IOP(ixor
, XOR
, 0_X_1
),
417 IOP(inot
, NOT
, X_X_0
),
418 IOP(ishl
, LSHIFT
, 0_X_1
),
419 IOP(ishr
, RSHIFT
, 0_X_1
),
420 UOP(ushr
, RSHIFT
, 0_X_1
),
424 etna_emit_block_start(struct etna_compile
*c
, unsigned block
)
426 c
->block_ptr
[block
] = c
->inst_ptr
;
430 etna_emit_alu(struct etna_compile
*c
, nir_op op
, struct etna_inst_dst dst
,
431 struct etna_inst_src src
[3], bool saturate
)
433 struct etna_op_info ei
= etna_ops
[op
];
434 unsigned swiz_scalar
= INST_SWIZ_BROADCAST(ffs(dst
.write_mask
) - 1);
436 assert(ei
.opcode
!= 0xff);
438 struct etna_inst inst
= {
451 if (c
->specs
->has_new_transcendentals
)
459 /* scalar instructions we want src to be in x component */
460 src
[0].swiz
= inst_swiz_compose(src
[0].swiz
, swiz_scalar
);
461 src
[1].swiz
= inst_swiz_compose(src
[1].swiz
, swiz_scalar
);
463 /* deal with instructions which don't have 1:1 mapping */
465 inst
.src
[2] = etna_immediate_float(1.0f
);
468 inst
.src
[2] = etna_immediate_int(1);
471 inst
.src
[1] = etna_immediate_float(0.0f
);
474 inst
.src
[1] = etna_immediate_int(0);
477 inst
.src
[0] = etna_immediate_int(0);
484 /* set the "true" value for CMP instructions */
485 if (inst
.opcode
== INST_OPCODE_CMP
)
486 inst
.src
[2] = etna_immediate_int(-1);
488 for (unsigned j
= 0; j
< 3; j
++) {
489 unsigned i
= ((ei
.src
>> j
*2) & 3);
491 inst
.src
[j
] = src
[i
];
498 etna_emit_tex(struct etna_compile
*c
, nir_texop op
, unsigned texid
, unsigned dst_swiz
,
499 struct etna_inst_dst dst
, struct etna_inst_src coord
,
500 struct etna_inst_src lod_bias
)
502 struct etna_inst inst
= {
504 .tex
.id
= texid
+ (is_fs(c
) ? 0 : c
->specs
->vertex_sampler_offset
),
505 .tex
.swiz
= dst_swiz
,
510 inst
.src
[1] = lod_bias
;
513 case nir_texop_tex
: inst
.opcode
= INST_OPCODE_TEXLD
; break;
514 case nir_texop_txb
: inst
.opcode
= INST_OPCODE_TEXLDB
; break;
515 case nir_texop_txl
: inst
.opcode
= INST_OPCODE_TEXLDL
; break;
524 etna_emit_jump(struct etna_compile
*c
, unsigned block
, struct etna_inst_src condition
)
526 if (!condition
.use
) {
527 emit_inst(c
, &(struct etna_inst
) {.opcode
= INST_OPCODE_BRANCH
, .imm
= block
});
531 struct etna_inst inst
= {
532 .opcode
= INST_OPCODE_BRANCH
,
533 .cond
= INST_CONDITION_NOT
,
534 .type
= INST_TYPE_U32
,
538 inst
.src
[0].swiz
= INST_SWIZ_BROADCAST(inst
.src
[0].swiz
& 3);
543 etna_emit_discard(struct etna_compile
*c
, struct etna_inst_src condition
)
545 if (!condition
.use
) {
546 emit_inst(c
, &(struct etna_inst
) { .opcode
= INST_OPCODE_TEXKILL
});
550 struct etna_inst inst
= {
551 .opcode
= INST_OPCODE_TEXKILL
,
552 .cond
= INST_CONDITION_NZ
,
553 .type
= (c
->specs
->halti
< 2) ? INST_TYPE_F32
: INST_TYPE_U32
,
556 inst
.src
[0].swiz
= INST_SWIZ_BROADCAST(inst
.src
[0].swiz
& 3);
561 etna_emit_output(struct etna_compile
*c
, unsigned index
, struct etna_inst_src src
)
563 c
->output_reg
[index
] = src
.reg
;
567 etna_emit_load_ubo(struct etna_compile
*c
, struct etna_inst_dst dst
,
568 struct etna_inst_src src
, struct etna_inst_src base
)
570 /* convert float offset back to integer */
571 if (c
->specs
->halti
< 2) {
572 emit_inst(c
, &(struct etna_inst
) {
573 .opcode
= INST_OPCODE_F2I
,
574 .type
= INST_TYPE_U32
,
579 emit_inst(c
, &(struct etna_inst
) {
580 .opcode
= INST_OPCODE_LOAD
,
581 .type
= INST_TYPE_U32
,
585 .rgroup
= INST_RGROUP_TEMP
,
587 .swiz
= INST_SWIZ_BROADCAST(ffs(dst
.write_mask
) - 1)
595 emit_inst(c
, &(struct etna_inst
) {
596 .opcode
= INST_OPCODE_LOAD
,
597 .type
= INST_TYPE_U32
,
604 #define OPT(nir, pass, ...) ({ \
605 bool this_progress = false; \
606 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
609 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
612 etna_optimize_loop(nir_shader
*s
)
618 OPT_V(s
, nir_lower_vars_to_ssa
);
619 progress
|= OPT(s
, nir_opt_copy_prop_vars
);
620 progress
|= OPT(s
, nir_copy_prop
);
621 progress
|= OPT(s
, nir_opt_dce
);
622 progress
|= OPT(s
, nir_opt_cse
);
623 progress
|= OPT(s
, nir_opt_peephole_select
, 16, true, true);
624 progress
|= OPT(s
, nir_opt_intrinsics
);
625 progress
|= OPT(s
, nir_opt_algebraic
);
626 progress
|= OPT(s
, nir_opt_constant_folding
);
627 progress
|= OPT(s
, nir_opt_dead_cf
);
628 if (OPT(s
, nir_opt_trivial_continues
)) {
630 /* If nir_opt_trivial_continues makes progress, then we need to clean
631 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
634 OPT(s
, nir_copy_prop
);
637 progress
|= OPT(s
, nir_opt_loop_unroll
, nir_var_all
);
638 progress
|= OPT(s
, nir_opt_if
, false);
639 progress
|= OPT(s
, nir_opt_remove_phis
);
640 progress
|= OPT(s
, nir_opt_undef
);
646 etna_glsl_type_size(const struct glsl_type
*type
, bool bindless
)
648 return glsl_count_attribute_slots(type
, false);
652 copy_uniform_state_to_shader(struct etna_shader_variant
*sobj
, uint64_t *consts
, unsigned count
)
654 struct etna_shader_uniform_info
*uinfo
= &sobj
->uniforms
;
656 uinfo
->imm_count
= count
* 4;
657 uinfo
->imm_data
= MALLOC(uinfo
->imm_count
* sizeof(*uinfo
->imm_data
));
658 uinfo
->imm_contents
= MALLOC(uinfo
->imm_count
* sizeof(*uinfo
->imm_contents
));
660 for (unsigned i
= 0; i
< uinfo
->imm_count
; i
++) {
661 uinfo
->imm_data
[i
] = consts
[i
];
662 uinfo
->imm_contents
[i
] = consts
[i
] >> 32;
665 etna_set_shader_uniforms_dirty_flags(sobj
);
668 #include "etnaviv_compiler_nir_emit.h"
671 etna_compile_shader_nir(struct etna_shader_variant
*v
)
676 struct etna_compile
*c
= CALLOC_STRUCT(etna_compile
);
681 c
->specs
= v
->shader
->specs
;
682 c
->nir
= nir_shader_clone(NULL
, v
->shader
->nir
);
684 nir_shader
*s
= c
->nir
;
685 const struct etna_specs
*specs
= c
->specs
;
687 v
->stage
= s
->info
.stage
;
688 v
->num_loops
= 0; /* TODO */
689 v
->vs_id_in_reg
= -1;
690 v
->vs_pos_out_reg
= -1;
691 v
->vs_pointsize_out_reg
= -1;
692 v
->ps_color_out_reg
= 0; /* 0 for shader that doesn't write fragcolor.. */
693 v
->ps_depth_out_reg
= -1;
695 /* setup input linking */
696 struct etna_shader_io_file
*sf
= &v
->infile
;
697 if (s
->info
.stage
== MESA_SHADER_VERTEX
) {
698 nir_foreach_variable(var
, &s
->inputs
) {
699 unsigned idx
= var
->data
.driver_location
;
700 sf
->reg
[idx
].reg
= idx
;
701 sf
->reg
[idx
].slot
= var
->data
.location
;
702 sf
->reg
[idx
].num_components
= glsl_get_components(var
->type
);
703 sf
->num_reg
= MAX2(sf
->num_reg
, idx
+1);
707 nir_foreach_variable(var
, &s
->inputs
) {
708 unsigned idx
= var
->data
.driver_location
;
709 sf
->reg
[idx
].reg
= idx
+ 1;
710 sf
->reg
[idx
].slot
= var
->data
.location
;
711 sf
->reg
[idx
].num_components
= glsl_get_components(var
->type
);
712 sf
->num_reg
= MAX2(sf
->num_reg
, idx
+1);
715 assert(sf
->num_reg
== count
);
718 NIR_PASS_V(s
, nir_lower_io
, nir_var_all
, etna_glsl_type_size
,
719 (nir_lower_io_options
)0);
721 OPT_V(s
, nir_lower_regs_to_ssa
);
722 OPT_V(s
, nir_lower_vars_to_ssa
);
723 OPT_V(s
, nir_lower_indirect_derefs
, nir_var_all
);
724 OPT_V(s
, nir_lower_tex
, &(struct nir_lower_tex_options
) { .lower_txp
= ~0u });
725 OPT_V(s
, nir_lower_alu_to_scalar
, etna_alu_to_scalar_filter_cb
, specs
);
727 etna_optimize_loop(s
);
729 OPT_V(s
, etna_lower_io
, v
);
731 /* lower pre-halti2 to float (halti0 has integers, but only scalar..) */
732 if (c
->specs
->halti
< 2) {
733 /* use opt_algebraic between int_to_float and boot_to_float because
734 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops
736 OPT_V(s
, nir_lower_int_to_float
);
737 OPT_V(s
, nir_opt_algebraic
);
738 OPT_V(s
, nir_lower_bool_to_float
);
740 OPT_V(s
, nir_lower_idiv
);
741 OPT_V(s
, nir_lower_bool_to_int32
);
744 etna_optimize_loop(s
);
746 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS
))
747 nir_print_shader(s
, stdout
);
749 while( OPT(s
, nir_opt_vectorize
) );
750 OPT_V(s
, nir_lower_alu_to_scalar
, etna_alu_to_scalar_filter_cb
, specs
);
752 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
);
753 NIR_PASS_V(s
, nir_opt_algebraic_late
);
755 NIR_PASS_V(s
, nir_move_vec_src_uses_to_dest
);
756 NIR_PASS_V(s
, nir_copy_prop
);
757 /* only HW supported integer source mod is ineg for iadd instruction (?) */
758 NIR_PASS_V(s
, nir_lower_to_source_mods
, ~nir_lower_int_source_mods
);
759 /* need copy prop after uses_to_dest, and before src mods: see
760 * dEQP-GLES2.functional.shaders.random.all_features.fragment.95
763 NIR_PASS_V(s
, nir_opt_dce
);
765 NIR_PASS_V(s
, etna_lower_alu
, c
);
767 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS
))
768 nir_print_shader(s
, stdout
);
770 uint64_t consts
[ETNA_MAX_IMM
] = {};
772 unsigned block_ptr
[nir_shader_get_entrypoint(s
)->num_blocks
];
773 c
->block_ptr
= block_ptr
;
774 struct emit_options options
= {
775 .max_temps
= ETNA_MAX_TEMPS
,
776 .max_consts
= ETNA_MAX_IMM
/ 4,
777 .id_reg
= sf
->num_reg
,
778 .single_const_src
= c
->specs
->halti
< 5,
779 .etna_new_transcendentals
= c
->specs
->has_new_transcendentals
,
780 .no_integers
= c
->specs
->halti
< 2,
786 ASSERTED
bool ok
= emit_shader(c
->nir
, &options
, &v
->num_temps
, &num_consts
);
789 /* empty shader, emit NOP */
791 emit_inst(c
, &(struct etna_inst
) { .opcode
= INST_OPCODE_NOP
});
793 /* assemble instructions, fixing up labels */
794 uint32_t *code
= MALLOC(c
->inst_ptr
* 16 + 1024);
795 for (unsigned i
= 0; i
< c
->inst_ptr
; i
++) {
796 struct etna_inst
*inst
= &c
->code
[i
];
797 if (inst
->opcode
== INST_OPCODE_BRANCH
)
798 inst
->imm
= block_ptr
[inst
->imm
];
800 inst
->halti5
= specs
->halti
>= 5;
801 etna_assemble(&code
[i
* 4], inst
);
804 v
->code_size
= c
->inst_ptr
* 4;
806 v
->needs_icache
= c
->inst_ptr
> specs
->max_instructions
;
808 copy_uniform_state_to_shader(v
, consts
, num_consts
);
810 if (s
->info
.stage
== MESA_SHADER_FRAGMENT
) {
811 v
->input_count_unk8
= 31; /* XXX what is this */
813 nir_foreach_variable(var
, &s
->outputs
) {
814 unsigned reg
= c
->output_reg
[var
->data
.driver_location
];
815 switch (var
->data
.location
) {
816 case FRAG_RESULT_COLOR
:
817 case FRAG_RESULT_DATA0
: /* DATA0 is used by gallium shaders for color */
818 v
->ps_color_out_reg
= reg
;
820 case FRAG_RESULT_DEPTH
:
821 v
->ps_depth_out_reg
= reg
;
824 compile_error(c
, "Unsupported fs output %s\n", gl_frag_result_name(var
->data
.location
));
827 assert(v
->ps_depth_out_reg
<= 0);
828 v
->outfile
.num_reg
= 0;
834 v
->input_count_unk8
= DIV_ROUND_UP(v
->infile
.num_reg
+ 4, 16); /* XXX what is this */
838 nir_foreach_variable(var
, &s
->outputs
) {
839 unsigned native
= c
->output_reg
[var
->data
.driver_location
];
841 if (var
->data
.location
== VARYING_SLOT_POS
) {
842 v
->vs_pos_out_reg
= native
;
846 if (var
->data
.location
== VARYING_SLOT_PSIZ
) {
847 v
->vs_pointsize_out_reg
= native
;
851 sf
->reg
[sf
->num_reg
].reg
= native
;
852 sf
->reg
[sf
->num_reg
].slot
= var
->data
.location
;
853 sf
->reg
[sf
->num_reg
].num_components
= glsl_get_components(var
->type
);
857 /* fill in "mystery meat" load balancing value. This value determines how
858 * work is scheduled between VS and PS
859 * in the unified shader architecture. More precisely, it is determined from
860 * the number of VS outputs, as well as chip-specific
861 * vertex output buffer size, vertex cache size, and the number of shader
864 * XXX this is a conservative estimate, the "optimal" value is only known for
865 * sure at link time because some
866 * outputs may be unused and thus unmapped. Then again, in the general use
867 * case with GLSL the vertex and fragment
868 * shaders are linked already before submitting to Gallium, thus all outputs
871 * note: TGSI compiler counts all outputs (including position and pointsize), here
872 * v->outfile.num_reg only counts varyings, +1 to compensate for the position output
873 * TODO: might have a problem that we don't count pointsize when it is used
876 int half_out
= v
->outfile
.num_reg
/ 2 + 1;
879 uint32_t b
= ((20480 / (specs
->vertex_output_buffer_size
-
880 2 * half_out
* specs
->vertex_cache_size
)) +
883 uint32_t a
= (b
+ 256 / (specs
->shader_core_count
* half_out
)) / 2;
884 v
->vs_load_balancing
= VIVS_VS_LOAD_BALANCING_A(MIN2(a
, 255)) |
885 VIVS_VS_LOAD_BALANCING_B(MIN2(b
, 255)) |
886 VIVS_VS_LOAD_BALANCING_C(0x3f) |
887 VIVS_VS_LOAD_BALANCING_D(0x0f);
895 etna_destroy_shader_nir(struct etna_shader_variant
*shader
)
900 FREE(shader
->uniforms
.imm_data
);
901 FREE(shader
->uniforms
.imm_contents
);
905 extern const char *tgsi_swizzle_names
[];
907 etna_dump_shader_nir(const struct etna_shader_variant
*shader
)
909 if (shader
->stage
== MESA_SHADER_VERTEX
)
914 etna_disasm(shader
->code
, shader
->code_size
, PRINT_RAW
);
916 printf("num loops: %i\n", shader
->num_loops
);
917 printf("num temps: %i\n", shader
->num_temps
);
918 printf("immediates:\n");
919 for (int idx
= 0; idx
< shader
->uniforms
.imm_count
; ++idx
) {
920 printf(" [%i].%s = %f (0x%08x) (%d)\n",
922 tgsi_swizzle_names
[idx
% 4],
923 *((float *)&shader
->uniforms
.imm_data
[idx
]),
924 shader
->uniforms
.imm_data
[idx
],
925 shader
->uniforms
.imm_contents
[idx
]);
928 for (int idx
= 0; idx
< shader
->infile
.num_reg
; ++idx
) {
929 printf(" [%i] name=%s comps=%i\n", shader
->infile
.reg
[idx
].reg
,
930 (shader
->stage
== MESA_SHADER_VERTEX
) ?
931 gl_vert_attrib_name(shader
->infile
.reg
[idx
].slot
) :
932 gl_varying_slot_name(shader
->infile
.reg
[idx
].slot
),
933 shader
->infile
.reg
[idx
].num_components
);
935 printf("outputs:\n");
936 for (int idx
= 0; idx
< shader
->outfile
.num_reg
; ++idx
) {
937 printf(" [%i] name=%s comps=%i\n", shader
->outfile
.reg
[idx
].reg
,
938 (shader
->stage
== MESA_SHADER_VERTEX
) ?
939 gl_varying_slot_name(shader
->outfile
.reg
[idx
].slot
) :
940 gl_frag_result_name(shader
->outfile
.reg
[idx
].slot
),
941 shader
->outfile
.reg
[idx
].num_components
);
943 printf("special:\n");
944 if (shader
->stage
== MESA_SHADER_VERTEX
) {
945 printf(" vs_pos_out_reg=%i\n", shader
->vs_pos_out_reg
);
946 printf(" vs_pointsize_out_reg=%i\n", shader
->vs_pointsize_out_reg
);
947 printf(" vs_load_balancing=0x%08x\n", shader
->vs_load_balancing
);
949 printf(" ps_color_out_reg=%i\n", shader
->ps_color_out_reg
);
950 printf(" ps_depth_out_reg=%i\n", shader
->ps_depth_out_reg
);
952 printf(" input_count_unk8=0x%08x\n", shader
->input_count_unk8
);
955 static const struct etna_shader_inout
*
956 etna_shader_vs_lookup(const struct etna_shader_variant
*sobj
,
957 const struct etna_shader_inout
*in
)
959 for (int i
= 0; i
< sobj
->outfile
.num_reg
; i
++)
960 if (sobj
->outfile
.reg
[i
].slot
== in
->slot
)
961 return &sobj
->outfile
.reg
[i
];
967 etna_link_shader_nir(struct etna_shader_link_info
*info
,
968 const struct etna_shader_variant
*vs
,
969 const struct etna_shader_variant
*fs
)
972 /* For each fragment input we need to find the associated vertex shader
973 * output, which can be found by matching on semantic name and index. A
974 * binary search could be used because the vs outputs are sorted by their
975 * semantic index and grouped by semantic type by fill_in_vs_outputs.
977 assert(fs
->infile
.num_reg
< ETNA_NUM_INPUTS
);
978 info
->pcoord_varying_comp_ofs
= -1;
980 for (int idx
= 0; idx
< fs
->infile
.num_reg
; ++idx
) {
981 const struct etna_shader_inout
*fsio
= &fs
->infile
.reg
[idx
];
982 const struct etna_shader_inout
*vsio
= etna_shader_vs_lookup(vs
, fsio
);
983 struct etna_varying
*varying
;
984 bool interpolate_always
= true;
986 assert(fsio
->reg
> 0 && fsio
->reg
<= ARRAY_SIZE(info
->varyings
));
988 if (fsio
->reg
> info
->num_varyings
)
989 info
->num_varyings
= fsio
->reg
;
991 varying
= &info
->varyings
[fsio
->reg
- 1];
992 varying
->num_components
= fsio
->num_components
;
994 if (!interpolate_always
) /* colors affected by flat shading */
995 varying
->pa_attributes
= 0x200;
996 else /* texture coord or other bypasses flat shading */
997 varying
->pa_attributes
= 0x2f1;
999 varying
->use
[0] = VARYING_COMPONENT_USE_UNUSED
;
1000 varying
->use
[1] = VARYING_COMPONENT_USE_UNUSED
;
1001 varying
->use
[2] = VARYING_COMPONENT_USE_UNUSED
;
1002 varying
->use
[3] = VARYING_COMPONENT_USE_UNUSED
;
1004 /* point coord is an input to the PS without matching VS output,
1005 * so it gets a varying slot without being assigned a VS register.
1007 if (fsio
->slot
== VARYING_SLOT_PNTC
) {
1008 varying
->use
[0] = VARYING_COMPONENT_USE_POINTCOORD_X
;
1009 varying
->use
[1] = VARYING_COMPONENT_USE_POINTCOORD_Y
;
1011 info
->pcoord_varying_comp_ofs
= comp_ofs
;
1013 if (vsio
== NULL
) { /* not found -- link error */
1014 BUG("Semantic value not found in vertex shader outputs\n");
1017 varying
->reg
= vsio
->reg
;
1020 comp_ofs
+= varying
->num_components
;
1023 assert(info
->num_varyings
== fs
->infile
.num_reg
);