2 * Copyright (c) 2012-2019 Etnaviv Project
3 * Copyright (c) 2019 Zodiac Inflight Innovations
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jonathan Marek <jonathan@marek.ca>
26 * Wladimir J. van der Laan <laanwj@gmail.com>
29 #include "etnaviv_compiler.h"
30 #include "etnaviv_compiler_nir.h"
31 #include "etnaviv_asm.h"
32 #include "etnaviv_context.h"
33 #include "etnaviv_debug.h"
34 #include "etnaviv_disasm.h"
35 #include "etnaviv_nir.h"
36 #include "etnaviv_uniforms.h"
37 #include "etnaviv_util.h"
40 #include "util/u_memory.h"
41 #include "util/register_allocate.h"
42 #include "compiler/nir/nir_builder.h"
44 #include "tgsi/tgsi_strings.h"
45 #include "util/u_half.h"
48 etna_alu_to_scalar_filter_cb(const nir_instr
*instr
, const void *data
)
50 const struct etna_specs
*specs
= data
;
52 if (instr
->type
!= nir_instr_type_alu
)
55 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
67 /* TODO: can do better than alu_to_scalar for vector compares */
68 case nir_op_b32all_fequal2
:
69 case nir_op_b32all_fequal3
:
70 case nir_op_b32all_fequal4
:
71 case nir_op_b32any_fnequal2
:
72 case nir_op_b32any_fnequal3
:
73 case nir_op_b32any_fnequal4
:
74 case nir_op_b32all_iequal2
:
75 case nir_op_b32all_iequal3
:
76 case nir_op_b32all_iequal4
:
77 case nir_op_b32any_inequal2
:
78 case nir_op_b32any_inequal3
:
79 case nir_op_b32any_inequal4
:
82 if (!specs
->has_halti2_instructions
)
93 emit_inst(struct etna_compile
*c
, struct etna_inst
*inst
)
95 c
->code
[c
->inst_ptr
++] = *inst
;
98 /* to map nir srcs should to etna_inst srcs */
100 SRC_0_1_2
= (0 << 0) | (1 << 2) | (2 << 4),
101 SRC_0_1_X
= (0 << 0) | (1 << 2) | (3 << 4),
102 SRC_0_X_X
= (0 << 0) | (3 << 2) | (3 << 4),
103 SRC_0_X_1
= (0 << 0) | (3 << 2) | (1 << 4),
104 SRC_0_1_0
= (0 << 0) | (1 << 2) | (0 << 4),
105 SRC_X_X_0
= (3 << 0) | (3 << 2) | (0 << 4),
106 SRC_0_X_0
= (0 << 0) | (3 << 2) | (0 << 4),
109 /* info to translate a nir op to etna_inst */
110 struct etna_op_info
{
111 uint8_t opcode
; /* INST_OPCODE_ */
112 uint8_t src
; /* SRC_ enum */
113 uint8_t cond
; /* INST_CONDITION_ */
114 uint8_t type
; /* INST_TYPE_ */
117 static const struct etna_op_info etna_ops
[] = {
118 [0 ... nir_num_opcodes
- 1] = {0xff},
121 #define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \
124 INST_CONDITION_##cond, \
127 #define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32)
128 #define IOPC(nir, op, src, cond) OPCT(nir, op, src, cond, S32)
129 #define UOPC(nir, op, src, cond) OPCT(nir, op, src, cond, U32)
130 #define OP(nir, op, src) OPC(nir, op, src, TRUE)
131 #define IOP(nir, op, src) IOPC(nir, op, src, TRUE)
132 #define UOP(nir, op, src) UOPC(nir, op, src, TRUE)
133 OP(mov
, MOV
, X_X_0
), OP(fneg
, MOV
, X_X_0
), OP(fabs
, MOV
, X_X_0
), OP(fsat
, MOV
, X_X_0
),
134 OP(fmul
, MUL
, 0_1_X
), OP(fadd
, ADD
, 0_X_1
), OP(ffma
, MAD
, 0_1_2
),
135 OP(fdot2
, DP2
, 0_1_X
), OP(fdot3
, DP3
, 0_1_X
), OP(fdot4
, DP4
, 0_1_X
),
136 OPC(fmin
, SELECT
, 0_1_0
, GT
), OPC(fmax
, SELECT
, 0_1_0
, LT
),
137 OP(ffract
, FRC
, X_X_0
), OP(frcp
, RCP
, X_X_0
), OP(frsq
, RSQ
, X_X_0
),
138 OP(fsqrt
, SQRT
, X_X_0
), OP(fsin
, SIN
, X_X_0
), OP(fcos
, COS
, X_X_0
),
139 OP(fsign
, SIGN
, X_X_0
), OP(ffloor
, FLOOR
, X_X_0
), OP(fceil
, CEIL
, X_X_0
),
140 OP(flog2
, LOG
, X_X_0
), OP(fexp2
, EXP
, X_X_0
),
141 OPC(seq
, SET
, 0_1_X
, EQ
), OPC(sne
, SET
, 0_1_X
, NE
), OPC(sge
, SET
, 0_1_X
, GE
), OPC(slt
, SET
, 0_1_X
, LT
),
142 OPC(fcsel
, SELECT
, 0_1_2
, NZ
),
143 OP(fdiv
, DIV
, 0_1_X
),
144 OP(fddx
, DSX
, 0_X_0
), OP(fddy
, DSY
, 0_X_0
),
147 IOP(i2f32
, I2F
, 0_X_X
),
148 UOP(u2f32
, I2F
, 0_X_X
),
149 IOP(f2i32
, F2I
, 0_X_X
),
150 UOP(f2u32
, F2I
, 0_X_X
),
151 UOP(b2f32
, AND
, 0_X_X
), /* AND with fui(1.0f) */
152 UOP(b2i32
, AND
, 0_X_X
), /* AND with 1 */
153 OPC(f2b32
, CMP
, 0_X_X
, NE
), /* != 0.0 */
154 UOPC(i2b32
, CMP
, 0_X_X
, NE
), /* != 0 */
157 IOP(iadd
, ADD
, 0_X_1
),
158 IOP(imul
, IMULLO0
, 0_1_X
),
159 /* IOP(imad, IMADLO0, 0_1_2), */
160 IOP(ineg
, ADD
, X_X_0
), /* ADD 0, -x */
161 IOP(iabs
, IABS
, X_X_0
),
162 IOP(isign
, SIGN
, X_X_0
),
163 IOPC(imin
, SELECT
, 0_1_0
, GT
),
164 IOPC(imax
, SELECT
, 0_1_0
, LT
),
165 UOPC(umin
, SELECT
, 0_1_0
, GT
),
166 UOPC(umax
, SELECT
, 0_1_0
, LT
),
169 UOPC(b32csel
, SELECT
, 0_1_2
, NZ
),
171 /* compare with int result */
172 OPC(feq32
, CMP
, 0_1_X
, EQ
),
173 OPC(fne32
, CMP
, 0_1_X
, NE
),
174 OPC(fge32
, CMP
, 0_1_X
, GE
),
175 OPC(flt32
, CMP
, 0_1_X
, LT
),
176 IOPC(ieq32
, CMP
, 0_1_X
, EQ
),
177 IOPC(ine32
, CMP
, 0_1_X
, NE
),
178 IOPC(ige32
, CMP
, 0_1_X
, GE
),
179 IOPC(ilt32
, CMP
, 0_1_X
, LT
),
180 UOPC(uge32
, CMP
, 0_1_X
, GE
),
181 UOPC(ult32
, CMP
, 0_1_X
, LT
),
185 IOP(iand
, AND
, 0_X_1
),
186 IOP(ixor
, XOR
, 0_X_1
),
187 IOP(inot
, NOT
, X_X_0
),
188 IOP(ishl
, LSHIFT
, 0_X_1
),
189 IOP(ishr
, RSHIFT
, 0_X_1
),
190 UOP(ushr
, RSHIFT
, 0_X_1
),
194 etna_emit_block_start(struct etna_compile
*c
, unsigned block
)
196 c
->block_ptr
[block
] = c
->inst_ptr
;
200 etna_emit_alu(struct etna_compile
*c
, nir_op op
, struct etna_inst_dst dst
,
201 struct etna_inst_src src
[3], bool saturate
)
203 struct etna_op_info ei
= etna_ops
[op
];
204 unsigned swiz_scalar
= INST_SWIZ_BROADCAST(ffs(dst
.write_mask
) - 1);
206 if (ei
.opcode
== 0xff)
207 compile_error(c
, "Unhandled ALU op: %s\n", nir_op_infos
[op
].name
);
209 struct etna_inst inst
= {
222 if (c
->specs
->has_new_transcendentals
)
230 /* scalar instructions we want src to be in x component */
231 src
[0].swiz
= inst_swiz_compose(src
[0].swiz
, swiz_scalar
);
232 src
[1].swiz
= inst_swiz_compose(src
[1].swiz
, swiz_scalar
);
234 /* deal with instructions which don't have 1:1 mapping */
236 inst
.src
[2] = etna_immediate_float(1.0f
);
239 inst
.src
[2] = etna_immediate_int(1);
242 inst
.src
[1] = etna_immediate_float(0.0f
);
245 inst
.src
[1] = etna_immediate_int(0);
248 inst
.src
[0] = etna_immediate_int(0);
255 /* set the "true" value for CMP instructions */
256 if (inst
.opcode
== INST_OPCODE_CMP
)
257 inst
.src
[2] = etna_immediate_int(-1);
259 for (unsigned j
= 0; j
< 3; j
++) {
260 unsigned i
= ((ei
.src
>> j
*2) & 3);
262 inst
.src
[j
] = src
[i
];
269 etna_emit_tex(struct etna_compile
*c
, nir_texop op
, unsigned texid
, unsigned dst_swiz
,
270 struct etna_inst_dst dst
, struct etna_inst_src coord
,
271 struct etna_inst_src lod_bias
, struct etna_inst_src compare
)
273 struct etna_inst inst
= {
275 .tex
.id
= texid
+ (is_fs(c
) ? 0 : c
->specs
->vertex_sampler_offset
),
276 .tex
.swiz
= dst_swiz
,
281 inst
.src
[1] = lod_bias
;
284 inst
.src
[2] = compare
;
287 case nir_texop_tex
: inst
.opcode
= INST_OPCODE_TEXLD
; break;
288 case nir_texop_txb
: inst
.opcode
= INST_OPCODE_TEXLDB
; break;
289 case nir_texop_txl
: inst
.opcode
= INST_OPCODE_TEXLDL
; break;
291 compile_error(c
, "Unhandled NIR tex type: %d\n", op
);
298 etna_emit_jump(struct etna_compile
*c
, unsigned block
, struct etna_inst_src condition
)
300 if (!condition
.use
) {
301 emit_inst(c
, &(struct etna_inst
) {.opcode
= INST_OPCODE_BRANCH
, .imm
= block
});
305 struct etna_inst inst
= {
306 .opcode
= INST_OPCODE_BRANCH
,
307 .cond
= INST_CONDITION_NOT
,
308 .type
= INST_TYPE_U32
,
312 inst
.src
[0].swiz
= INST_SWIZ_BROADCAST(inst
.src
[0].swiz
& 3);
317 etna_emit_discard(struct etna_compile
*c
, struct etna_inst_src condition
)
319 if (!condition
.use
) {
320 emit_inst(c
, &(struct etna_inst
) { .opcode
= INST_OPCODE_TEXKILL
});
324 struct etna_inst inst
= {
325 .opcode
= INST_OPCODE_TEXKILL
,
326 .cond
= INST_CONDITION_NZ
,
327 .type
= (c
->specs
->halti
< 2) ? INST_TYPE_F32
: INST_TYPE_U32
,
330 inst
.src
[0].swiz
= INST_SWIZ_BROADCAST(inst
.src
[0].swiz
& 3);
335 etna_emit_output(struct etna_compile
*c
, nir_variable
*var
, struct etna_inst_src src
)
337 struct etna_shader_io_file
*sf
= &c
->variant
->outfile
;
340 switch (var
->data
.location
) {
341 case FRAG_RESULT_COLOR
:
342 case FRAG_RESULT_DATA0
: /* DATA0 is used by gallium shaders for color */
343 c
->variant
->ps_color_out_reg
= src
.reg
;
345 case FRAG_RESULT_DEPTH
:
346 c
->variant
->ps_depth_out_reg
= src
.reg
;
349 unreachable("Unsupported fs output");
354 switch (var
->data
.location
) {
355 case VARYING_SLOT_POS
:
356 c
->variant
->vs_pos_out_reg
= src
.reg
;
358 case VARYING_SLOT_PSIZ
:
359 c
->variant
->vs_pointsize_out_reg
= src
.reg
;
362 sf
->reg
[sf
->num_reg
].reg
= src
.reg
;
363 sf
->reg
[sf
->num_reg
].slot
= var
->data
.location
;
364 sf
->reg
[sf
->num_reg
].num_components
= glsl_get_components(var
->type
);
370 #define OPT(nir, pass, ...) ({ \
371 bool this_progress = false; \
372 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
377 etna_optimize_loop(nir_shader
*s
)
383 NIR_PASS_V(s
, nir_lower_vars_to_ssa
);
384 progress
|= OPT(s
, nir_opt_copy_prop_vars
);
385 progress
|= OPT(s
, nir_copy_prop
);
386 progress
|= OPT(s
, nir_opt_dce
);
387 progress
|= OPT(s
, nir_opt_cse
);
388 progress
|= OPT(s
, nir_opt_peephole_select
, 16, true, true);
389 progress
|= OPT(s
, nir_opt_intrinsics
);
390 progress
|= OPT(s
, nir_opt_algebraic
);
391 progress
|= OPT(s
, nir_opt_constant_folding
);
392 progress
|= OPT(s
, nir_opt_dead_cf
);
393 if (OPT(s
, nir_opt_trivial_continues
)) {
395 /* If nir_opt_trivial_continues makes progress, then we need to clean
396 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
399 OPT(s
, nir_copy_prop
);
402 progress
|= OPT(s
, nir_opt_loop_unroll
, nir_var_all
);
403 progress
|= OPT(s
, nir_opt_if
, false);
404 progress
|= OPT(s
, nir_opt_remove_phis
);
405 progress
|= OPT(s
, nir_opt_undef
);
411 etna_glsl_type_size(const struct glsl_type
*type
, bool bindless
)
413 return glsl_count_attribute_slots(type
, false);
417 copy_uniform_state_to_shader(struct etna_shader_variant
*sobj
, uint64_t *consts
, unsigned count
)
419 struct etna_shader_uniform_info
*uinfo
= &sobj
->uniforms
;
421 uinfo
->imm_count
= count
* 4;
422 uinfo
->imm_data
= MALLOC(uinfo
->imm_count
* sizeof(*uinfo
->imm_data
));
423 uinfo
->imm_contents
= MALLOC(uinfo
->imm_count
* sizeof(*uinfo
->imm_contents
));
425 for (unsigned i
= 0; i
< uinfo
->imm_count
; i
++) {
426 uinfo
->imm_data
[i
] = consts
[i
];
427 uinfo
->imm_contents
[i
] = consts
[i
] >> 32;
430 etna_set_shader_uniforms_dirty_flags(sobj
);
433 #include "etnaviv_compiler_nir_emit.h"
436 etna_compile_check_limits(struct etna_shader_variant
*v
)
438 const struct etna_specs
*specs
= v
->shader
->specs
;
439 int max_uniforms
= (v
->stage
== MESA_SHADER_VERTEX
)
440 ? specs
->max_vs_uniforms
441 : specs
->max_ps_uniforms
;
443 if (!specs
->has_icache
&& v
->needs_icache
) {
444 DBG("Number of instructions (%d) exceeds maximum %d", v
->code_size
/ 4,
445 specs
->max_instructions
);
449 if (v
->num_temps
> specs
->max_registers
) {
450 DBG("Number of registers (%d) exceeds maximum %d", v
->num_temps
,
451 specs
->max_registers
);
455 if (v
->uniforms
.imm_count
/ 4 > max_uniforms
) {
456 DBG("Number of uniforms (%d) exceeds maximum %d",
457 v
->uniforms
.imm_count
/ 4, max_uniforms
);
465 fill_vs_mystery(struct etna_shader_variant
*v
)
467 const struct etna_specs
*specs
= v
->shader
->specs
;
469 v
->input_count_unk8
= DIV_ROUND_UP(v
->infile
.num_reg
+ 4, 16); /* XXX what is this */
471 /* fill in "mystery meat" load balancing value. This value determines how
472 * work is scheduled between VS and PS
473 * in the unified shader architecture. More precisely, it is determined from
474 * the number of VS outputs, as well as chip-specific
475 * vertex output buffer size, vertex cache size, and the number of shader
478 * XXX this is a conservative estimate, the "optimal" value is only known for
479 * sure at link time because some
480 * outputs may be unused and thus unmapped. Then again, in the general use
481 * case with GLSL the vertex and fragment
482 * shaders are linked already before submitting to Gallium, thus all outputs
485 * note: TGSI compiler counts all outputs (including position and pointsize), here
486 * v->outfile.num_reg only counts varyings, +1 to compensate for the position output
487 * TODO: might have a problem that we don't count pointsize when it is used
490 int half_out
= v
->outfile
.num_reg
/ 2 + 1;
493 uint32_t b
= ((20480 / (specs
->vertex_output_buffer_size
-
494 2 * half_out
* specs
->vertex_cache_size
)) +
497 uint32_t a
= (b
+ 256 / (specs
->shader_core_count
* half_out
)) / 2;
498 v
->vs_load_balancing
= VIVS_VS_LOAD_BALANCING_A(MIN2(a
, 255)) |
499 VIVS_VS_LOAD_BALANCING_B(MIN2(b
, 255)) |
500 VIVS_VS_LOAD_BALANCING_C(0x3f) |
501 VIVS_VS_LOAD_BALANCING_D(0x0f);
505 etna_compile_shader_nir(struct etna_shader_variant
*v
)
510 struct etna_compile
*c
= CALLOC_STRUCT(etna_compile
);
515 c
->specs
= v
->shader
->specs
;
516 c
->nir
= nir_shader_clone(NULL
, v
->shader
->nir
);
518 nir_shader
*s
= c
->nir
;
519 const struct etna_specs
*specs
= c
->specs
;
521 v
->stage
= s
->info
.stage
;
522 v
->num_loops
= 0; /* TODO */
523 v
->vs_id_in_reg
= -1;
524 v
->vs_pos_out_reg
= -1;
525 v
->vs_pointsize_out_reg
= -1;
526 v
->ps_color_out_reg
= 0; /* 0 for shader that doesn't write fragcolor.. */
527 v
->ps_depth_out_reg
= -1;
529 /* setup input linking */
530 struct etna_shader_io_file
*sf
= &v
->infile
;
531 if (s
->info
.stage
== MESA_SHADER_VERTEX
) {
532 nir_foreach_variable(var
, &s
->inputs
) {
533 unsigned idx
= var
->data
.driver_location
;
534 sf
->reg
[idx
].reg
= idx
;
535 sf
->reg
[idx
].slot
= var
->data
.location
;
536 sf
->reg
[idx
].num_components
= glsl_get_components(var
->type
);
537 sf
->num_reg
= MAX2(sf
->num_reg
, idx
+1);
541 nir_foreach_variable(var
, &s
->inputs
) {
542 unsigned idx
= var
->data
.driver_location
;
543 sf
->reg
[idx
].reg
= idx
+ 1;
544 sf
->reg
[idx
].slot
= var
->data
.location
;
545 sf
->reg
[idx
].num_components
= glsl_get_components(var
->type
);
546 sf
->num_reg
= MAX2(sf
->num_reg
, idx
+1);
549 assert(sf
->num_reg
== count
);
552 NIR_PASS_V(s
, nir_lower_io
, ~nir_var_shader_out
, etna_glsl_type_size
,
553 (nir_lower_io_options
)0);
555 NIR_PASS_V(s
, nir_lower_regs_to_ssa
);
556 NIR_PASS_V(s
, nir_lower_vars_to_ssa
);
557 NIR_PASS_V(s
, nir_lower_indirect_derefs
, nir_var_all
);
558 NIR_PASS_V(s
, nir_lower_tex
, &(struct nir_lower_tex_options
) { .lower_txp
= ~0u });
559 NIR_PASS_V(s
, nir_lower_alu_to_scalar
, etna_alu_to_scalar_filter_cb
, specs
);
561 etna_optimize_loop(s
);
563 NIR_PASS_V(s
, etna_lower_io
, v
);
565 if (v
->shader
->specs
->vs_need_z_div
)
566 NIR_PASS_V(s
, nir_lower_clip_halfz
);
568 /* lower pre-halti2 to float (halti0 has integers, but only scalar..) */
569 if (c
->specs
->halti
< 2) {
570 /* use opt_algebraic between int_to_float and boot_to_float because
571 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops
573 NIR_PASS_V(s
, nir_lower_int_to_float
);
574 NIR_PASS_V(s
, nir_opt_algebraic
);
575 NIR_PASS_V(s
, nir_lower_bool_to_float
);
577 NIR_PASS_V(s
, nir_lower_idiv
, nir_lower_idiv_fast
);
578 NIR_PASS_V(s
, nir_lower_bool_to_int32
);
581 etna_optimize_loop(s
);
583 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS
))
584 nir_print_shader(s
, stdout
);
586 while( OPT(s
, nir_opt_vectorize
) );
587 NIR_PASS_V(s
, nir_lower_alu_to_scalar
, etna_alu_to_scalar_filter_cb
, specs
);
589 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
590 NIR_PASS_V(s
, nir_opt_algebraic_late
);
592 NIR_PASS_V(s
, nir_move_vec_src_uses_to_dest
);
593 NIR_PASS_V(s
, nir_copy_prop
);
594 /* only HW supported integer source mod is ineg for iadd instruction (?) */
595 NIR_PASS_V(s
, nir_lower_to_source_mods
, ~nir_lower_int_source_mods
);
596 /* need copy prop after uses_to_dest, and before src mods: see
597 * dEQP-GLES2.functional.shaders.random.all_features.fragment.95
600 NIR_PASS_V(s
, nir_opt_dce
);
602 NIR_PASS_V(s
, etna_lower_alu
, c
->specs
->has_new_transcendentals
);
604 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS
))
605 nir_print_shader(s
, stdout
);
607 unsigned block_ptr
[nir_shader_get_entrypoint(s
)->num_blocks
];
608 c
->block_ptr
= block_ptr
;
611 ASSERTED
bool ok
= emit_shader(c
, &v
->num_temps
, &num_consts
);
614 /* empty shader, emit NOP */
616 emit_inst(c
, &(struct etna_inst
) { .opcode
= INST_OPCODE_NOP
});
618 /* assemble instructions, fixing up labels */
619 uint32_t *code
= MALLOC(c
->inst_ptr
* 16);
620 for (unsigned i
= 0; i
< c
->inst_ptr
; i
++) {
621 struct etna_inst
*inst
= &c
->code
[i
];
622 if (inst
->opcode
== INST_OPCODE_BRANCH
)
623 inst
->imm
= block_ptr
[inst
->imm
];
625 inst
->halti5
= specs
->halti
>= 5;
626 etna_assemble(&code
[i
* 4], inst
);
629 v
->code_size
= c
->inst_ptr
* 4;
631 v
->needs_icache
= c
->inst_ptr
> specs
->max_instructions
;
633 copy_uniform_state_to_shader(v
, c
->consts
, num_consts
);
635 if (s
->info
.stage
== MESA_SHADER_FRAGMENT
) {
636 v
->input_count_unk8
= 31; /* XXX what is this */
637 assert(v
->ps_depth_out_reg
<= 0);
642 bool result
= etna_compile_check_limits(v
);
649 etna_destroy_shader_nir(struct etna_shader_variant
*shader
)
654 FREE(shader
->uniforms
.imm_data
);
655 FREE(shader
->uniforms
.imm_contents
);
659 extern const char *tgsi_swizzle_names
[];
661 etna_dump_shader_nir(const struct etna_shader_variant
*shader
)
663 if (shader
->stage
== MESA_SHADER_VERTEX
)
668 etna_disasm(shader
->code
, shader
->code_size
, PRINT_RAW
);
670 printf("num loops: %i\n", shader
->num_loops
);
671 printf("num temps: %i\n", shader
->num_temps
);
672 printf("immediates:\n");
673 for (int idx
= 0; idx
< shader
->uniforms
.imm_count
; ++idx
) {
674 printf(" [%i].%s = %f (0x%08x) (%d)\n",
676 tgsi_swizzle_names
[idx
% 4],
677 *((float *)&shader
->uniforms
.imm_data
[idx
]),
678 shader
->uniforms
.imm_data
[idx
],
679 shader
->uniforms
.imm_contents
[idx
]);
682 for (int idx
= 0; idx
< shader
->infile
.num_reg
; ++idx
) {
683 printf(" [%i] name=%s comps=%i\n", shader
->infile
.reg
[idx
].reg
,
684 (shader
->stage
== MESA_SHADER_VERTEX
) ?
685 gl_vert_attrib_name(shader
->infile
.reg
[idx
].slot
) :
686 gl_varying_slot_name(shader
->infile
.reg
[idx
].slot
),
687 shader
->infile
.reg
[idx
].num_components
);
689 printf("outputs:\n");
690 for (int idx
= 0; idx
< shader
->outfile
.num_reg
; ++idx
) {
691 printf(" [%i] name=%s comps=%i\n", shader
->outfile
.reg
[idx
].reg
,
692 (shader
->stage
== MESA_SHADER_VERTEX
) ?
693 gl_varying_slot_name(shader
->outfile
.reg
[idx
].slot
) :
694 gl_frag_result_name(shader
->outfile
.reg
[idx
].slot
),
695 shader
->outfile
.reg
[idx
].num_components
);
697 printf("special:\n");
698 if (shader
->stage
== MESA_SHADER_VERTEX
) {
699 printf(" vs_pos_out_reg=%i\n", shader
->vs_pos_out_reg
);
700 printf(" vs_pointsize_out_reg=%i\n", shader
->vs_pointsize_out_reg
);
701 printf(" vs_load_balancing=0x%08x\n", shader
->vs_load_balancing
);
703 printf(" ps_color_out_reg=%i\n", shader
->ps_color_out_reg
);
704 printf(" ps_depth_out_reg=%i\n", shader
->ps_depth_out_reg
);
706 printf(" input_count_unk8=0x%08x\n", shader
->input_count_unk8
);
709 static const struct etna_shader_inout
*
710 etna_shader_vs_lookup(const struct etna_shader_variant
*sobj
,
711 const struct etna_shader_inout
*in
)
713 for (int i
= 0; i
< sobj
->outfile
.num_reg
; i
++)
714 if (sobj
->outfile
.reg
[i
].slot
== in
->slot
)
715 return &sobj
->outfile
.reg
[i
];
721 etna_link_shader_nir(struct etna_shader_link_info
*info
,
722 const struct etna_shader_variant
*vs
,
723 const struct etna_shader_variant
*fs
)
726 /* For each fragment input we need to find the associated vertex shader
727 * output, which can be found by matching on semantic name and index. A
728 * binary search could be used because the vs outputs are sorted by their
729 * semantic index and grouped by semantic type by fill_in_vs_outputs.
731 assert(fs
->infile
.num_reg
< ETNA_NUM_INPUTS
);
732 info
->pcoord_varying_comp_ofs
= -1;
734 for (int idx
= 0; idx
< fs
->infile
.num_reg
; ++idx
) {
735 const struct etna_shader_inout
*fsio
= &fs
->infile
.reg
[idx
];
736 const struct etna_shader_inout
*vsio
= etna_shader_vs_lookup(vs
, fsio
);
737 struct etna_varying
*varying
;
738 bool interpolate_always
= true;
740 assert(fsio
->reg
> 0 && fsio
->reg
<= ARRAY_SIZE(info
->varyings
));
742 if (fsio
->reg
> info
->num_varyings
)
743 info
->num_varyings
= fsio
->reg
;
745 varying
= &info
->varyings
[fsio
->reg
- 1];
746 varying
->num_components
= fsio
->num_components
;
748 if (!interpolate_always
) /* colors affected by flat shading */
749 varying
->pa_attributes
= 0x200;
750 else /* texture coord or other bypasses flat shading */
751 varying
->pa_attributes
= 0x2f1;
753 varying
->use
[0] = VARYING_COMPONENT_USE_UNUSED
;
754 varying
->use
[1] = VARYING_COMPONENT_USE_UNUSED
;
755 varying
->use
[2] = VARYING_COMPONENT_USE_UNUSED
;
756 varying
->use
[3] = VARYING_COMPONENT_USE_UNUSED
;
758 /* point coord is an input to the PS without matching VS output,
759 * so it gets a varying slot without being assigned a VS register.
761 if (fsio
->slot
== VARYING_SLOT_PNTC
) {
762 varying
->use
[0] = VARYING_COMPONENT_USE_POINTCOORD_X
;
763 varying
->use
[1] = VARYING_COMPONENT_USE_POINTCOORD_Y
;
765 info
->pcoord_varying_comp_ofs
= comp_ofs
;
767 if (vsio
== NULL
) { /* not found -- link error */
768 BUG("Semantic value not found in vertex shader outputs\n");
771 varying
->reg
= vsio
->reg
;
774 comp_ofs
+= varying
->num_components
;
777 assert(info
->num_varyings
== fs
->infile
.num_reg
);