2 * Copyright (c) 2012-2019 Etnaviv Project
3 * Copyright (c) 2019 Zodiac Inflight Innovations
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jonathan Marek <jonathan@marek.ca>
26 * Wladimir J. van der Laan <laanwj@gmail.com>
29 #include "etnaviv_compiler.h"
30 #include "etnaviv_compiler_nir.h"
31 #include "etnaviv_asm.h"
32 #include "etnaviv_context.h"
33 #include "etnaviv_debug.h"
34 #include "etnaviv_disasm.h"
35 #include "etnaviv_nir.h"
36 #include "etnaviv_uniforms.h"
37 #include "etnaviv_util.h"
40 #include "util/u_memory.h"
41 #include "util/register_allocate.h"
42 #include "compiler/nir/nir_builder.h"
44 #include "tgsi/tgsi_strings.h"
45 #include "util/u_half.h"
48 etna_alu_to_scalar_filter_cb(const nir_instr
*instr
, const void *data
)
50 const struct etna_specs
*specs
= data
;
52 if (instr
->type
!= nir_instr_type_alu
)
55 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
67 /* TODO: can do better than alu_to_scalar for vector compares */
68 case nir_op_b32all_fequal2
:
69 case nir_op_b32all_fequal3
:
70 case nir_op_b32all_fequal4
:
71 case nir_op_b32any_fnequal2
:
72 case nir_op_b32any_fnequal3
:
73 case nir_op_b32any_fnequal4
:
74 case nir_op_b32all_iequal2
:
75 case nir_op_b32all_iequal3
:
76 case nir_op_b32all_iequal4
:
77 case nir_op_b32any_inequal2
:
78 case nir_op_b32any_inequal3
:
79 case nir_op_b32any_inequal4
:
82 if (!specs
->has_halti2_instructions
)
93 etna_emit_block_start(struct etna_compile
*c
, unsigned block
)
95 c
->block_ptr
[block
] = c
->inst_ptr
;
99 etna_emit_output(struct etna_compile
*c
, nir_variable
*var
, struct etna_inst_src src
)
101 struct etna_shader_io_file
*sf
= &c
->variant
->outfile
;
104 switch (var
->data
.location
) {
105 case FRAG_RESULT_COLOR
:
106 case FRAG_RESULT_DATA0
: /* DATA0 is used by gallium shaders for color */
107 c
->variant
->ps_color_out_reg
= src
.reg
;
109 case FRAG_RESULT_DEPTH
:
110 c
->variant
->ps_depth_out_reg
= src
.reg
;
113 unreachable("Unsupported fs output");
118 switch (var
->data
.location
) {
119 case VARYING_SLOT_POS
:
120 c
->variant
->vs_pos_out_reg
= src
.reg
;
122 case VARYING_SLOT_PSIZ
:
123 c
->variant
->vs_pointsize_out_reg
= src
.reg
;
126 sf
->reg
[sf
->num_reg
].reg
= src
.reg
;
127 sf
->reg
[sf
->num_reg
].slot
= var
->data
.location
;
128 sf
->reg
[sf
->num_reg
].num_components
= glsl_get_components(var
->type
);
134 #define OPT(nir, pass, ...) ({ \
135 bool this_progress = false; \
136 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
141 etna_optimize_loop(nir_shader
*s
)
147 NIR_PASS_V(s
, nir_lower_vars_to_ssa
);
148 progress
|= OPT(s
, nir_opt_copy_prop_vars
);
149 progress
|= OPT(s
, nir_copy_prop
);
150 progress
|= OPT(s
, nir_opt_dce
);
151 progress
|= OPT(s
, nir_opt_cse
);
152 progress
|= OPT(s
, nir_opt_peephole_select
, 16, true, true);
153 progress
|= OPT(s
, nir_opt_intrinsics
);
154 progress
|= OPT(s
, nir_opt_algebraic
);
155 progress
|= OPT(s
, nir_opt_constant_folding
);
156 progress
|= OPT(s
, nir_opt_dead_cf
);
157 if (OPT(s
, nir_opt_trivial_continues
)) {
159 /* If nir_opt_trivial_continues makes progress, then we need to clean
160 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
163 OPT(s
, nir_copy_prop
);
166 progress
|= OPT(s
, nir_opt_loop_unroll
, nir_var_all
);
167 progress
|= OPT(s
, nir_opt_if
, false);
168 progress
|= OPT(s
, nir_opt_remove_phis
);
169 progress
|= OPT(s
, nir_opt_undef
);
175 etna_glsl_type_size(const struct glsl_type
*type
, bool bindless
)
177 return glsl_count_attribute_slots(type
, false);
181 copy_uniform_state_to_shader(struct etna_shader_variant
*sobj
, uint64_t *consts
, unsigned count
)
183 struct etna_shader_uniform_info
*uinfo
= &sobj
->uniforms
;
185 uinfo
->imm_count
= count
* 4;
186 uinfo
->imm_data
= MALLOC(uinfo
->imm_count
* sizeof(*uinfo
->imm_data
));
187 uinfo
->imm_contents
= MALLOC(uinfo
->imm_count
* sizeof(*uinfo
->imm_contents
));
189 for (unsigned i
= 0; i
< uinfo
->imm_count
; i
++) {
190 uinfo
->imm_data
[i
] = consts
[i
];
191 uinfo
->imm_contents
[i
] = consts
[i
] >> 32;
194 etna_set_shader_uniforms_dirty_flags(sobj
);
197 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
198 #define SRC_DISABLE ((hw_src){})
199 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s})
200 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s})
202 typedef struct etna_inst_dst hw_dst
;
203 typedef struct etna_inst_src hw_src
;
206 src_swizzle(hw_src src
, unsigned swizzle
)
208 if (src
.rgroup
!= INST_RGROUP_IMMEDIATE
)
209 src
.swiz
= inst_swiz_compose(src
.swiz
, swizzle
);
214 /* constants are represented as 64-bit ints
215 * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
218 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
219 #define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
220 #define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
221 #define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
224 const_add(uint64_t *c
, uint64_t value
)
226 for (unsigned i
= 0; i
< 4; i
++) {
227 if (c
[i
] == value
|| !c
[i
]) {
236 const_src(struct etna_compile
*c
, nir_const_value
*value
, unsigned num_components
)
238 /* use inline immediates if possible */
239 if (c
->specs
->halti
>= 2 && num_components
== 1 &&
240 value
[0].u64
>> 32 == ETNA_IMMEDIATE_CONSTANT
) {
241 uint32_t bits
= value
[0].u32
;
243 /* "float" - shifted by 12 */
244 if ((bits
& 0xfff) == 0)
245 return etna_immediate_src(0, bits
>> 12);
247 /* "unsigned" - raw 20 bit value */
248 if (bits
< (1 << 20))
249 return etna_immediate_src(2, bits
);
251 /* "signed" - sign extended 20-bit (sign included) value */
252 if (bits
>= 0xfff80000)
253 return etna_immediate_src(1, bits
);
258 for (i
= 0; swiz
< 0; i
++) {
259 uint64_t *a
= &c
->consts
[i
*4];
261 memcpy(save
, a
, sizeof(save
));
263 for (unsigned j
= 0; j
< num_components
; j
++) {
264 int c
= const_add(a
, value
[j
].u64
);
266 memcpy(a
, save
, sizeof(save
));
274 assert(i
<= ETNA_MAX_IMM
/ 4);
275 c
->const_count
= MAX2(c
->const_count
, i
);
277 return SRC_CONST(i
- 1, swiz
);
280 /* how to swizzle when used as a src */
282 reg_swiz
[NUM_REG_TYPES
] = {
283 [REG_TYPE_VEC4
] = INST_SWIZ_IDENTITY
,
284 [REG_TYPE_VIRT_SCALAR_X
] = INST_SWIZ_IDENTITY
,
285 [REG_TYPE_VIRT_SCALAR_Y
] = SWIZZLE(Y
, Y
, Y
, Y
),
286 [REG_TYPE_VIRT_VEC2_XY
] = INST_SWIZ_IDENTITY
,
287 [REG_TYPE_VIRT_VEC2T_XY
] = INST_SWIZ_IDENTITY
,
288 [REG_TYPE_VIRT_VEC2C_XY
] = INST_SWIZ_IDENTITY
,
289 [REG_TYPE_VIRT_SCALAR_Z
] = SWIZZLE(Z
, Z
, Z
, Z
),
290 [REG_TYPE_VIRT_VEC2_XZ
] = SWIZZLE(X
, Z
, X
, Z
),
291 [REG_TYPE_VIRT_VEC2_YZ
] = SWIZZLE(Y
, Z
, Y
, Z
),
292 [REG_TYPE_VIRT_VEC2C_YZ
] = SWIZZLE(Y
, Z
, Y
, Z
),
293 [REG_TYPE_VIRT_VEC3_XYZ
] = INST_SWIZ_IDENTITY
,
294 [REG_TYPE_VIRT_VEC3C_XYZ
] = INST_SWIZ_IDENTITY
,
295 [REG_TYPE_VIRT_SCALAR_W
] = SWIZZLE(W
, W
, W
, W
),
296 [REG_TYPE_VIRT_VEC2_XW
] = SWIZZLE(X
, W
, X
, W
),
297 [REG_TYPE_VIRT_VEC2_YW
] = SWIZZLE(Y
, W
, Y
, W
),
298 [REG_TYPE_VIRT_VEC3_XYW
] = SWIZZLE(X
, Y
, W
, X
),
299 [REG_TYPE_VIRT_VEC2_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
300 [REG_TYPE_VIRT_VEC2T_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
301 [REG_TYPE_VIRT_VEC2C_ZW
] = SWIZZLE(Z
, W
, Z
, W
),
302 [REG_TYPE_VIRT_VEC3_XZW
] = SWIZZLE(X
, Z
, W
, X
),
303 [REG_TYPE_VIRT_VEC3_YZW
] = SWIZZLE(Y
, Z
, W
, X
),
304 [REG_TYPE_VIRT_VEC3C_YZW
] = SWIZZLE(Y
, Z
, W
, X
),
307 /* how to swizzle when used as a dest */
309 reg_dst_swiz
[NUM_REG_TYPES
] = {
310 [REG_TYPE_VEC4
] = INST_SWIZ_IDENTITY
,
311 [REG_TYPE_VIRT_SCALAR_X
] = INST_SWIZ_IDENTITY
,
312 [REG_TYPE_VIRT_SCALAR_Y
] = SWIZZLE(X
, X
, X
, X
),
313 [REG_TYPE_VIRT_VEC2_XY
] = INST_SWIZ_IDENTITY
,
314 [REG_TYPE_VIRT_VEC2T_XY
] = INST_SWIZ_IDENTITY
,
315 [REG_TYPE_VIRT_VEC2C_XY
] = INST_SWIZ_IDENTITY
,
316 [REG_TYPE_VIRT_SCALAR_Z
] = SWIZZLE(X
, X
, X
, X
),
317 [REG_TYPE_VIRT_VEC2_XZ
] = SWIZZLE(X
, X
, Y
, Y
),
318 [REG_TYPE_VIRT_VEC2_YZ
] = SWIZZLE(X
, X
, Y
, Y
),
319 [REG_TYPE_VIRT_VEC2C_YZ
] = SWIZZLE(X
, X
, Y
, Y
),
320 [REG_TYPE_VIRT_VEC3_XYZ
] = INST_SWIZ_IDENTITY
,
321 [REG_TYPE_VIRT_VEC3C_XYZ
] = INST_SWIZ_IDENTITY
,
322 [REG_TYPE_VIRT_SCALAR_W
] = SWIZZLE(X
, X
, X
, X
),
323 [REG_TYPE_VIRT_VEC2_XW
] = SWIZZLE(X
, X
, Y
, Y
),
324 [REG_TYPE_VIRT_VEC2_YW
] = SWIZZLE(X
, X
, Y
, Y
),
325 [REG_TYPE_VIRT_VEC3_XYW
] = SWIZZLE(X
, Y
, Z
, Z
),
326 [REG_TYPE_VIRT_VEC2_ZW
] = SWIZZLE(X
, X
, X
, Y
),
327 [REG_TYPE_VIRT_VEC2T_ZW
] = SWIZZLE(X
, X
, X
, Y
),
328 [REG_TYPE_VIRT_VEC2C_ZW
] = SWIZZLE(X
, X
, X
, Y
),
329 [REG_TYPE_VIRT_VEC3_XZW
] = SWIZZLE(X
, Y
, Y
, Z
),
330 [REG_TYPE_VIRT_VEC3_YZW
] = SWIZZLE(X
, X
, Y
, Z
),
331 [REG_TYPE_VIRT_VEC3C_YZW
] = SWIZZLE(X
, X
, Y
, Z
),
334 /* nir_src to allocated register */
336 ra_src(struct etna_compile
*c
, nir_src
*src
)
338 unsigned reg
= ra_get_node_reg(c
->g
, c
->live_map
[src_index(c
->impl
, src
)]);
339 return SRC_REG(reg_get_base(c
, reg
), reg_swiz
[reg_get_type(reg
)]);
343 get_src(struct etna_compile
*c
, nir_src
*src
)
346 return ra_src(c
, src
);
348 nir_instr
*instr
= src
->ssa
->parent_instr
;
350 if (instr
->pass_flags
& BYPASS_SRC
) {
351 assert(instr
->type
== nir_instr_type_alu
);
352 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
353 assert(alu
->op
== nir_op_mov
);
354 return src_swizzle(get_src(c
, &alu
->src
[0].src
), ALU_SWIZ(&alu
->src
[0]));
357 switch (instr
->type
) {
358 case nir_instr_type_load_const
:
359 return const_src(c
, nir_instr_as_load_const(instr
)->value
, src
->ssa
->num_components
);
360 case nir_instr_type_intrinsic
: {
361 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
362 switch (intr
->intrinsic
) {
363 case nir_intrinsic_load_input
:
364 case nir_intrinsic_load_instance_id
:
365 case nir_intrinsic_load_uniform
:
366 case nir_intrinsic_load_ubo
:
367 return ra_src(c
, src
);
368 case nir_intrinsic_load_front_face
:
369 return (hw_src
) { .use
= 1, .rgroup
= INST_RGROUP_INTERNAL
};
370 case nir_intrinsic_load_frag_coord
:
371 return SRC_REG(0, INST_SWIZ_IDENTITY
);
373 compile_error(c
, "Unhandled NIR intrinsic type: %s\n",
374 nir_intrinsic_infos
[intr
->intrinsic
].name
);
378 case nir_instr_type_alu
:
379 case nir_instr_type_tex
:
380 return ra_src(c
, src
);
381 case nir_instr_type_ssa_undef
: {
382 /* return zero to deal with broken Blur demo */
383 nir_const_value value
= CONST(0);
384 return src_swizzle(const_src(c
, &value
, 1), SWIZZLE(X
,X
,X
,X
));
387 compile_error(c
, "Unhandled NIR instruction type: %d\n", instr
->type
);
395 vec_dest_has_swizzle(nir_alu_instr
*vec
, nir_ssa_def
*ssa
)
397 for (unsigned i
= 0; i
< 4; i
++) {
398 if (!(vec
->dest
.write_mask
& (1 << i
)) || vec
->src
[i
].src
.ssa
!= ssa
)
401 if (vec
->src
[i
].swizzle
[0] != i
)
405 /* don't deal with possible bypassed vec/mov chain */
406 nir_foreach_use(use_src
, ssa
) {
407 nir_instr
*instr
= use_src
->parent_instr
;
408 if (instr
->type
!= nir_instr_type_alu
)
411 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
426 /* get allocated dest register for nir_dest
427 * *p_swiz tells how the components need to be placed into register
430 ra_dest(struct etna_compile
*c
, nir_dest
*dest
, unsigned *p_swiz
)
432 unsigned swiz
= INST_SWIZ_IDENTITY
, mask
= 0xf;
433 dest
= real_dest(dest
, &swiz
, &mask
);
435 unsigned r
= ra_get_node_reg(c
->g
, c
->live_map
[dest_index(c
->impl
, dest
)]);
436 unsigned t
= reg_get_type(r
);
438 *p_swiz
= inst_swiz_compose(swiz
, reg_dst_swiz
[t
]);
442 .reg
= reg_get_base(c
, r
),
443 .write_mask
= inst_write_mask_compose(mask
, reg_writemask
[t
]),
448 emit_alu(struct etna_compile
*c
, nir_alu_instr
* alu
)
450 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
452 /* marked as dead instruction (vecN and other bypassed instr) */
453 if (alu
->instr
.pass_flags
)
456 assert(!(alu
->op
>= nir_op_vec2
&& alu
->op
<= nir_op_vec4
));
459 hw_dst dst
= ra_dest(c
, &alu
->dest
.dest
, &dst_swiz
);
461 /* compose alu write_mask with RA write mask */
462 if (!alu
->dest
.dest
.is_ssa
)
463 dst
.write_mask
= inst_write_mask_compose(alu
->dest
.write_mask
, dst
.write_mask
);
469 /* not per-component - don't compose dst_swiz */
470 dst_swiz
= INST_SWIZ_IDENTITY
;
478 for (int i
= 0; i
< info
->num_inputs
; i
++) {
479 nir_alu_src
*asrc
= &alu
->src
[i
];
482 src
= src_swizzle(get_src(c
, &asrc
->src
), ALU_SWIZ(asrc
));
483 src
= src_swizzle(src
, dst_swiz
);
485 if (src
.rgroup
!= INST_RGROUP_IMMEDIATE
) {
486 src
.neg
= asrc
->negate
|| (alu
->op
== nir_op_fneg
);
487 src
.abs
= asrc
->abs
|| (alu
->op
== nir_op_fabs
);
489 assert(!asrc
->negate
&& alu
->op
!= nir_op_fneg
);
490 assert(!asrc
->abs
&& alu
->op
!= nir_op_fabs
);
496 etna_emit_alu(c
, alu
->op
, dst
, srcs
, alu
->dest
.saturate
|| (alu
->op
== nir_op_fsat
));
500 emit_tex(struct etna_compile
*c
, nir_tex_instr
* tex
)
503 hw_dst dst
= ra_dest(c
, &tex
->dest
, &dst_swiz
);
504 nir_src
*coord
= NULL
, *lod_bias
= NULL
, *compare
= NULL
;
506 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
507 switch (tex
->src
[i
].src_type
) {
508 case nir_tex_src_coord
:
509 coord
= &tex
->src
[i
].src
;
511 case nir_tex_src_bias
:
512 case nir_tex_src_lod
:
514 lod_bias
= &tex
->src
[i
].src
;
516 case nir_tex_src_comparator
:
517 compare
= &tex
->src
[i
].src
;
520 compile_error(c
, "Unhandled NIR tex src type: %d\n",
521 tex
->src
[i
].src_type
);
526 etna_emit_tex(c
, tex
->op
, tex
->sampler_index
, dst_swiz
, dst
, get_src(c
, coord
),
527 lod_bias
? get_src(c
, lod_bias
) : SRC_DISABLE
,
528 compare
? get_src(c
, compare
) : SRC_DISABLE
);
532 emit_intrinsic(struct etna_compile
*c
, nir_intrinsic_instr
* intr
)
534 switch (intr
->intrinsic
) {
535 case nir_intrinsic_store_deref
:
536 etna_emit_output(c
, nir_src_as_deref(intr
->src
[0])->var
, get_src(c
, &intr
->src
[1]));
538 case nir_intrinsic_discard_if
:
539 etna_emit_discard(c
, get_src(c
, &intr
->src
[0]));
541 case nir_intrinsic_discard
:
542 etna_emit_discard(c
, SRC_DISABLE
);
544 case nir_intrinsic_load_uniform
: {
546 struct etna_inst_dst dst
= ra_dest(c
, &intr
->dest
, &dst_swiz
);
548 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
549 emit_inst(c
, &(struct etna_inst
) {
550 .opcode
= INST_OPCODE_MOVAR
,
551 .dst
.write_mask
= 0x1,
552 .src
[2] = get_src(c
, &intr
->src
[0]),
554 emit_inst(c
, &(struct etna_inst
) {
555 .opcode
= INST_OPCODE_MOV
,
559 .rgroup
= INST_RGROUP_UNIFORM_0
,
560 .reg
= nir_intrinsic_base(intr
),
562 .amode
= INST_AMODE_ADD_A_X
,
566 case nir_intrinsic_load_ubo
: {
567 /* TODO: if offset is of the form (x + C) then add C to the base instead */
568 unsigned idx
= nir_src_as_const_value(intr
->src
[0])[0].u32
;
570 emit_inst(c
, &(struct etna_inst
) {
571 .opcode
= INST_OPCODE_LOAD
,
572 .type
= INST_TYPE_U32
,
573 .dst
= ra_dest(c
, &intr
->dest
, &dst_swiz
),
574 .src
[0] = get_src(c
, &intr
->src
[1]),
575 .src
[1] = const_src(c
, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR
+ idx
, 0), 1),
578 case nir_intrinsic_load_front_face
:
579 case nir_intrinsic_load_frag_coord
:
580 assert(intr
->dest
.is_ssa
); /* TODO - lower phis could cause this */
582 case nir_intrinsic_load_input
:
583 case nir_intrinsic_load_instance_id
:
586 compile_error(c
, "Unhandled NIR intrinsic type: %s\n",
587 nir_intrinsic_infos
[intr
->intrinsic
].name
);
592 emit_instr(struct etna_compile
*c
, nir_instr
* instr
)
594 switch (instr
->type
) {
595 case nir_instr_type_alu
:
596 emit_alu(c
, nir_instr_as_alu(instr
));
598 case nir_instr_type_tex
:
599 emit_tex(c
, nir_instr_as_tex(instr
));
601 case nir_instr_type_intrinsic
:
602 emit_intrinsic(c
, nir_instr_as_intrinsic(instr
));
604 case nir_instr_type_jump
:
605 assert(nir_instr_is_last(instr
));
606 case nir_instr_type_load_const
:
607 case nir_instr_type_ssa_undef
:
608 case nir_instr_type_deref
:
611 compile_error(c
, "Unhandled NIR instruction type: %d\n", instr
->type
);
617 emit_block(struct etna_compile
*c
, nir_block
* block
)
619 etna_emit_block_start(c
, block
->index
);
621 nir_foreach_instr(instr
, block
)
622 emit_instr(c
, instr
);
624 /* succs->index < block->index is for the loop case */
625 nir_block
*succs
= block
->successors
[0];
626 if (nir_block_ends_in_jump(block
) || succs
->index
< block
->index
)
627 etna_emit_jump(c
, succs
->index
, SRC_DISABLE
);
631 emit_cf_list(struct etna_compile
*c
, struct exec_list
*list
);
634 emit_if(struct etna_compile
*c
, nir_if
* nif
)
636 etna_emit_jump(c
, nir_if_first_else_block(nif
)->index
, get_src(c
, &nif
->condition
));
637 emit_cf_list(c
, &nif
->then_list
);
639 /* jump at end of then_list to skip else_list
640 * not needed if then_list already ends with a jump or else_list is empty
642 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif
)) &&
643 !nir_cf_list_is_empty_block(&nif
->else_list
))
644 etna_emit_jump(c
, nir_if_last_else_block(nif
)->successors
[0]->index
, SRC_DISABLE
);
646 emit_cf_list(c
, &nif
->else_list
);
650 emit_cf_list(struct etna_compile
*c
, struct exec_list
*list
)
652 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
653 switch (node
->type
) {
654 case nir_cf_node_block
:
655 emit_block(c
, nir_cf_node_as_block(node
));
658 emit_if(c
, nir_cf_node_as_if(node
));
660 case nir_cf_node_loop
:
661 emit_cf_list(c
, &nir_cf_node_as_loop(node
)->body
);
664 compile_error(c
, "Unknown NIR node type\n");
670 /* based on nir_lower_vec_to_movs */
672 insert_vec_mov(nir_alu_instr
*vec
, unsigned start_idx
, nir_shader
*shader
)
674 assert(start_idx
< nir_op_infos
[vec
->op
].num_inputs
);
675 unsigned write_mask
= (1u << start_idx
);
677 nir_alu_instr
*mov
= nir_alu_instr_create(shader
, nir_op_mov
);
678 nir_alu_src_copy(&mov
->src
[0], &vec
->src
[start_idx
], mov
);
680 mov
->src
[0].swizzle
[0] = vec
->src
[start_idx
].swizzle
[0];
681 mov
->src
[0].negate
= vec
->src
[start_idx
].negate
;
682 mov
->src
[0].abs
= vec
->src
[start_idx
].abs
;
684 unsigned num_components
= 1;
686 for (unsigned i
= start_idx
+ 1; i
< 4; i
++) {
687 if (!(vec
->dest
.write_mask
& (1 << i
)))
690 if (nir_srcs_equal(vec
->src
[i
].src
, vec
->src
[start_idx
].src
) &&
691 vec
->src
[i
].negate
== vec
->src
[start_idx
].negate
&&
692 vec
->src
[i
].abs
== vec
->src
[start_idx
].abs
) {
693 write_mask
|= (1 << i
);
694 mov
->src
[0].swizzle
[num_components
] = vec
->src
[i
].swizzle
[0];
699 mov
->dest
.write_mask
= (1 << num_components
) - 1;
700 nir_ssa_dest_init(&mov
->instr
, &mov
->dest
.dest
, num_components
, 32, NULL
);
702 /* replace vec srcs with inserted mov */
703 for (unsigned i
= 0, j
= 0; i
< 4; i
++) {
704 if (!(write_mask
& (1 << i
)))
707 nir_instr_rewrite_src(&vec
->instr
, &vec
->src
[i
].src
, nir_src_for_ssa(&mov
->dest
.dest
.ssa
));
708 vec
->src
[i
].swizzle
[0] = j
++;
711 nir_instr_insert_before(&vec
->instr
, &mov
->instr
);
717 * for vecN instructions:
718 * -merge constant sources into a single src
719 * -insert movs (nir_lower_vec_to_movs equivalent)
720 * for non-vecN instructions:
721 * -try to merge constants as single constant
722 * -insert movs for multiple constants (pre-HALTI5)
725 lower_alu(struct etna_compile
*c
, nir_alu_instr
*alu
)
727 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
730 nir_builder_init(&b
, c
->impl
);
731 b
.cursor
= nir_before_instr(&alu
->instr
);
739 /* pre-GC7000L can only have 1 uniform src per instruction */
740 if (c
->specs
->halti
>= 5)
743 nir_const_value value
[4] = {};
744 uint8_t swizzle
[4][4] = {};
745 unsigned swiz_max
= 0, num_const
= 0;
747 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
748 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
752 unsigned num_components
= info
->input_sizes
[i
] ?: alu
->dest
.dest
.ssa
.num_components
;
753 for (unsigned j
= 0; j
< num_components
; j
++) {
754 int idx
= const_add(&value
[0].u64
, cv
[alu
->src
[i
].swizzle
[j
]].u64
);
756 swiz_max
= MAX2(swiz_max
, (unsigned) idx
);
765 /* resolve with single combined const src */
767 nir_ssa_def
*def
= nir_build_imm(&b
, swiz_max
+ 1, 32, value
);
769 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
770 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
774 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(def
));
776 for (unsigned j
= 0; j
< 4; j
++)
777 alu
->src
[i
].swizzle
[j
] = swizzle
[i
][j
];
782 /* resolve with movs */
784 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
785 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
793 nir_ssa_def
*mov
= nir_mov(&b
, alu
->src
[i
].src
.ssa
);
794 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(mov
));
799 nir_const_value value
[4];
800 unsigned num_components
= 0;
802 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
803 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
805 value
[num_components
++] = cv
[alu
->src
[i
].swizzle
[0]];
808 /* if there is more than one constant source to the vecN, combine them
809 * into a single load_const (removing the vecN completely if all components
812 if (num_components
> 1) {
813 nir_ssa_def
*def
= nir_build_imm(&b
, num_components
, 32, value
);
815 if (num_components
== info
->num_inputs
) {
816 nir_ssa_def_rewrite_uses(&alu
->dest
.dest
.ssa
, nir_src_for_ssa(def
));
817 nir_instr_remove(&alu
->instr
);
821 for (unsigned i
= 0, j
= 0; i
< info
->num_inputs
; i
++) {
822 nir_const_value
*cv
= nir_src_as_const_value(alu
->src
[i
].src
);
826 nir_instr_rewrite_src(&alu
->instr
, &alu
->src
[i
].src
, nir_src_for_ssa(def
));
827 alu
->src
[i
].swizzle
[0] = j
++;
831 unsigned finished_write_mask
= 0;
832 for (unsigned i
= 0; i
< 4; i
++) {
833 if (!(alu
->dest
.write_mask
& (1 << i
)))
836 nir_ssa_def
*ssa
= alu
->src
[i
].src
.ssa
;
838 /* check that vecN instruction is only user of this */
839 bool need_mov
= list_length(&ssa
->if_uses
) != 0;
840 nir_foreach_use(use_src
, ssa
) {
841 if (use_src
->parent_instr
!= &alu
->instr
)
845 nir_instr
*instr
= ssa
->parent_instr
;
846 switch (instr
->type
) {
847 case nir_instr_type_alu
:
848 case nir_instr_type_tex
:
850 case nir_instr_type_intrinsic
:
851 if (nir_instr_as_intrinsic(instr
)->intrinsic
== nir_intrinsic_load_input
) {
852 need_mov
= vec_dest_has_swizzle(alu
, &nir_instr_as_intrinsic(instr
)->dest
.ssa
);
860 if (need_mov
&& !(finished_write_mask
& (1 << i
)))
861 finished_write_mask
|= insert_vec_mov(alu
, i
, c
->nir
);
866 emit_shader(struct etna_compile
*c
, unsigned *num_temps
, unsigned *num_consts
)
868 nir_shader
*shader
= c
->nir
;
869 c
->impl
= nir_shader_get_entrypoint(shader
);
871 bool have_indirect_uniform
= false;
872 unsigned indirect_max
= 0;
875 nir_builder_init(&b
, c
->impl
);
877 /* convert non-dynamic uniform loads to constants, etc */
878 nir_foreach_block(block
, c
->impl
) {
879 nir_foreach_instr_safe(instr
, block
) {
880 switch(instr
->type
) {
881 case nir_instr_type_alu
:
882 /* deals with vecN and const srcs */
883 lower_alu(c
, nir_instr_as_alu(instr
));
885 case nir_instr_type_load_const
: {
886 nir_load_const_instr
*load_const
= nir_instr_as_load_const(instr
);
887 for (unsigned i
= 0; i
< load_const
->def
.num_components
; i
++)
888 load_const
->value
[i
] = CONST(load_const
->value
[i
].u32
);
890 case nir_instr_type_intrinsic
: {
891 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
892 /* TODO: load_ubo can also become a constant in some cases
893 * (at the moment it can end up emitting a LOAD with two
894 * uniform sources, which could be a problem on HALTI2)
896 if (intr
->intrinsic
!= nir_intrinsic_load_uniform
)
898 nir_const_value
*off
= nir_src_as_const_value(intr
->src
[0]);
899 if (!off
|| off
[0].u64
>> 32 != ETNA_IMMEDIATE_CONSTANT
) {
900 have_indirect_uniform
= true;
901 indirect_max
= nir_intrinsic_base(intr
) + nir_intrinsic_range(intr
);
905 unsigned base
= nir_intrinsic_base(intr
);
906 /* pre halti2 uniform offset will be float */
907 if (c
->specs
->halti
< 2)
908 base
+= (unsigned) off
[0].f32
;
911 nir_const_value value
[4];
913 for (unsigned i
= 0; i
< intr
->dest
.ssa
.num_components
; i
++) {
914 if (nir_intrinsic_base(intr
) < 0)
915 value
[i
] = TEXSCALE(~nir_intrinsic_base(intr
), i
);
917 value
[i
] = UNIFORM(base
* 4 + i
);
920 b
.cursor
= nir_after_instr(instr
);
921 nir_ssa_def
*def
= nir_build_imm(&b
, intr
->dest
.ssa
.num_components
, 32, value
);
923 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
, nir_src_for_ssa(def
));
924 nir_instr_remove(instr
);
932 /* TODO: only emit required indirect uniform ranges */
933 if (have_indirect_uniform
) {
934 for (unsigned i
= 0; i
< indirect_max
* 4; i
++)
935 c
->consts
[i
] = UNIFORM(i
).u64
;
936 c
->const_count
= indirect_max
;
939 /* add mov for any store output using sysval/const */
940 nir_foreach_block(block
, c
->impl
) {
941 nir_foreach_instr_safe(instr
, block
) {
942 if (instr
->type
!= nir_instr_type_intrinsic
)
945 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
947 switch (intr
->intrinsic
) {
948 case nir_intrinsic_store_deref
: {
949 nir_src
*src
= &intr
->src
[1];
950 if (nir_src_is_const(*src
) || is_sysval(src
->ssa
->parent_instr
)) {
951 b
.cursor
= nir_before_instr(instr
);
952 nir_instr_rewrite_src(instr
, src
, nir_src_for_ssa(nir_mov(&b
, src
->ssa
)));
961 /* call directly to avoid validation (load_const don't pass validation at this point) */
962 nir_convert_from_ssa(shader
, true);
965 etna_ra_assign(c
, shader
);
967 emit_cf_list(c
, &nir_shader_get_entrypoint(shader
)->body
);
969 *num_temps
= etna_ra_finish(c
);
970 *num_consts
= c
->const_count
;
975 etna_compile_check_limits(struct etna_shader_variant
*v
)
977 const struct etna_specs
*specs
= v
->shader
->specs
;
978 int max_uniforms
= (v
->stage
== MESA_SHADER_VERTEX
)
979 ? specs
->max_vs_uniforms
980 : specs
->max_ps_uniforms
;
982 if (!specs
->has_icache
&& v
->needs_icache
) {
983 DBG("Number of instructions (%d) exceeds maximum %d", v
->code_size
/ 4,
984 specs
->max_instructions
);
988 if (v
->num_temps
> specs
->max_registers
) {
989 DBG("Number of registers (%d) exceeds maximum %d", v
->num_temps
,
990 specs
->max_registers
);
994 if (v
->uniforms
.imm_count
/ 4 > max_uniforms
) {
995 DBG("Number of uniforms (%d) exceeds maximum %d",
996 v
->uniforms
.imm_count
/ 4, max_uniforms
);
1004 fill_vs_mystery(struct etna_shader_variant
*v
)
1006 const struct etna_specs
*specs
= v
->shader
->specs
;
1008 v
->input_count_unk8
= DIV_ROUND_UP(v
->infile
.num_reg
+ 4, 16); /* XXX what is this */
1010 /* fill in "mystery meat" load balancing value. This value determines how
1011 * work is scheduled between VS and PS
1012 * in the unified shader architecture. More precisely, it is determined from
1013 * the number of VS outputs, as well as chip-specific
1014 * vertex output buffer size, vertex cache size, and the number of shader
1017 * XXX this is a conservative estimate, the "optimal" value is only known for
1018 * sure at link time because some
1019 * outputs may be unused and thus unmapped. Then again, in the general use
1020 * case with GLSL the vertex and fragment
1021 * shaders are linked already before submitting to Gallium, thus all outputs
1024 * note: TGSI compiler counts all outputs (including position and pointsize), here
1025 * v->outfile.num_reg only counts varyings, +1 to compensate for the position output
1026 * TODO: might have a problem that we don't count pointsize when it is used
1029 int half_out
= v
->outfile
.num_reg
/ 2 + 1;
1032 uint32_t b
= ((20480 / (specs
->vertex_output_buffer_size
-
1033 2 * half_out
* specs
->vertex_cache_size
)) +
1036 uint32_t a
= (b
+ 256 / (specs
->shader_core_count
* half_out
)) / 2;
1037 v
->vs_load_balancing
= VIVS_VS_LOAD_BALANCING_A(MIN2(a
, 255)) |
1038 VIVS_VS_LOAD_BALANCING_B(MIN2(b
, 255)) |
1039 VIVS_VS_LOAD_BALANCING_C(0x3f) |
1040 VIVS_VS_LOAD_BALANCING_D(0x0f);
1044 etna_compile_shader_nir(struct etna_shader_variant
*v
)
1049 struct etna_compile
*c
= CALLOC_STRUCT(etna_compile
);
1054 c
->specs
= v
->shader
->specs
;
1055 c
->nir
= nir_shader_clone(NULL
, v
->shader
->nir
);
1057 nir_shader
*s
= c
->nir
;
1058 const struct etna_specs
*specs
= c
->specs
;
1060 v
->stage
= s
->info
.stage
;
1061 v
->num_loops
= 0; /* TODO */
1062 v
->vs_id_in_reg
= -1;
1063 v
->vs_pos_out_reg
= -1;
1064 v
->vs_pointsize_out_reg
= -1;
1065 v
->ps_color_out_reg
= 0; /* 0 for shader that doesn't write fragcolor.. */
1066 v
->ps_depth_out_reg
= -1;
1068 /* setup input linking */
1069 struct etna_shader_io_file
*sf
= &v
->infile
;
1070 if (s
->info
.stage
== MESA_SHADER_VERTEX
) {
1071 nir_foreach_shader_in_variable(var
, s
) {
1072 unsigned idx
= var
->data
.driver_location
;
1073 sf
->reg
[idx
].reg
= idx
;
1074 sf
->reg
[idx
].slot
= var
->data
.location
;
1075 sf
->reg
[idx
].num_components
= glsl_get_components(var
->type
);
1076 sf
->num_reg
= MAX2(sf
->num_reg
, idx
+1);
1080 nir_foreach_shader_in_variable(var
, s
) {
1081 unsigned idx
= var
->data
.driver_location
;
1082 sf
->reg
[idx
].reg
= idx
+ 1;
1083 sf
->reg
[idx
].slot
= var
->data
.location
;
1084 sf
->reg
[idx
].num_components
= glsl_get_components(var
->type
);
1085 sf
->num_reg
= MAX2(sf
->num_reg
, idx
+1);
1088 assert(sf
->num_reg
== count
);
1091 NIR_PASS_V(s
, nir_lower_io
, nir_var_shader_in
| nir_var_uniform
, etna_glsl_type_size
,
1092 (nir_lower_io_options
)0);
1094 NIR_PASS_V(s
, nir_lower_regs_to_ssa
);
1095 NIR_PASS_V(s
, nir_lower_vars_to_ssa
);
1096 NIR_PASS_V(s
, nir_lower_indirect_derefs
, nir_var_all
);
1097 NIR_PASS_V(s
, nir_lower_tex
, &(struct nir_lower_tex_options
) { .lower_txp
= ~0u });
1098 NIR_PASS_V(s
, nir_lower_alu_to_scalar
, etna_alu_to_scalar_filter_cb
, specs
);
1100 etna_optimize_loop(s
);
1102 NIR_PASS_V(s
, etna_lower_io
, v
);
1104 if (v
->shader
->specs
->vs_need_z_div
)
1105 NIR_PASS_V(s
, nir_lower_clip_halfz
);
1107 /* lower pre-halti2 to float (halti0 has integers, but only scalar..) */
1108 if (c
->specs
->halti
< 2) {
1109 /* use opt_algebraic between int_to_float and boot_to_float because
1110 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops
1112 NIR_PASS_V(s
, nir_lower_int_to_float
);
1113 NIR_PASS_V(s
, nir_opt_algebraic
);
1114 NIR_PASS_V(s
, nir_lower_bool_to_float
);
1116 NIR_PASS_V(s
, nir_lower_idiv
, nir_lower_idiv_fast
);
1117 NIR_PASS_V(s
, nir_lower_bool_to_int32
);
1120 etna_optimize_loop(s
);
1122 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS
))
1123 nir_print_shader(s
, stdout
);
1125 while( OPT(s
, nir_opt_vectorize
, NULL
, NULL
) );
1126 NIR_PASS_V(s
, nir_lower_alu_to_scalar
, etna_alu_to_scalar_filter_cb
, specs
);
1128 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
1129 NIR_PASS_V(s
, nir_opt_algebraic_late
);
1131 NIR_PASS_V(s
, nir_move_vec_src_uses_to_dest
);
1132 NIR_PASS_V(s
, nir_copy_prop
);
1133 /* only HW supported integer source mod is ineg for iadd instruction (?) */
1134 NIR_PASS_V(s
, nir_lower_to_source_mods
, ~nir_lower_int_source_mods
);
1135 /* need copy prop after uses_to_dest, and before src mods: see
1136 * dEQP-GLES2.functional.shaders.random.all_features.fragment.95
1139 NIR_PASS_V(s
, nir_opt_dce
);
1141 NIR_PASS_V(s
, nir_lower_bool_to_bitsize
);
1142 NIR_PASS_V(s
, etna_lower_alu
, c
->specs
->has_new_transcendentals
);
1144 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS
))
1145 nir_print_shader(s
, stdout
);
1147 unsigned block_ptr
[nir_shader_get_entrypoint(s
)->num_blocks
];
1148 c
->block_ptr
= block_ptr
;
1150 unsigned num_consts
;
1151 ASSERTED
bool ok
= emit_shader(c
, &v
->num_temps
, &num_consts
);
1154 /* empty shader, emit NOP */
1156 emit_inst(c
, &(struct etna_inst
) { .opcode
= INST_OPCODE_NOP
});
1158 /* assemble instructions, fixing up labels */
1159 uint32_t *code
= MALLOC(c
->inst_ptr
* 16);
1160 for (unsigned i
= 0; i
< c
->inst_ptr
; i
++) {
1161 struct etna_inst
*inst
= &c
->code
[i
];
1162 if (inst
->opcode
== INST_OPCODE_BRANCH
)
1163 inst
->imm
= block_ptr
[inst
->imm
];
1165 inst
->halti5
= specs
->halti
>= 5;
1166 etna_assemble(&code
[i
* 4], inst
);
1169 v
->code_size
= c
->inst_ptr
* 4;
1171 v
->needs_icache
= c
->inst_ptr
> specs
->max_instructions
;
1173 copy_uniform_state_to_shader(v
, c
->consts
, num_consts
);
1175 if (s
->info
.stage
== MESA_SHADER_FRAGMENT
) {
1176 v
->input_count_unk8
= 31; /* XXX what is this */
1177 assert(v
->ps_depth_out_reg
<= 0);
1182 bool result
= etna_compile_check_limits(v
);
1183 ralloc_free(c
->nir
);
1189 etna_destroy_shader_nir(struct etna_shader_variant
*shader
)
1194 FREE(shader
->uniforms
.imm_data
);
1195 FREE(shader
->uniforms
.imm_contents
);
1199 extern const char *tgsi_swizzle_names
[];
1201 etna_dump_shader_nir(const struct etna_shader_variant
*shader
)
1203 if (shader
->stage
== MESA_SHADER_VERTEX
)
1208 etna_disasm(shader
->code
, shader
->code_size
, PRINT_RAW
);
1210 printf("num loops: %i\n", shader
->num_loops
);
1211 printf("num temps: %i\n", shader
->num_temps
);
1212 printf("immediates:\n");
1213 for (int idx
= 0; idx
< shader
->uniforms
.imm_count
; ++idx
) {
1214 printf(" [%i].%s = %f (0x%08x) (%d)\n",
1216 tgsi_swizzle_names
[idx
% 4],
1217 *((float *)&shader
->uniforms
.imm_data
[idx
]),
1218 shader
->uniforms
.imm_data
[idx
],
1219 shader
->uniforms
.imm_contents
[idx
]);
1221 printf("inputs:\n");
1222 for (int idx
= 0; idx
< shader
->infile
.num_reg
; ++idx
) {
1223 printf(" [%i] name=%s comps=%i\n", shader
->infile
.reg
[idx
].reg
,
1224 (shader
->stage
== MESA_SHADER_VERTEX
) ?
1225 gl_vert_attrib_name(shader
->infile
.reg
[idx
].slot
) :
1226 gl_varying_slot_name(shader
->infile
.reg
[idx
].slot
),
1227 shader
->infile
.reg
[idx
].num_components
);
1229 printf("outputs:\n");
1230 for (int idx
= 0; idx
< shader
->outfile
.num_reg
; ++idx
) {
1231 printf(" [%i] name=%s comps=%i\n", shader
->outfile
.reg
[idx
].reg
,
1232 (shader
->stage
== MESA_SHADER_VERTEX
) ?
1233 gl_varying_slot_name(shader
->outfile
.reg
[idx
].slot
) :
1234 gl_frag_result_name(shader
->outfile
.reg
[idx
].slot
),
1235 shader
->outfile
.reg
[idx
].num_components
);
1237 printf("special:\n");
1238 if (shader
->stage
== MESA_SHADER_VERTEX
) {
1239 printf(" vs_pos_out_reg=%i\n", shader
->vs_pos_out_reg
);
1240 printf(" vs_pointsize_out_reg=%i\n", shader
->vs_pointsize_out_reg
);
1241 printf(" vs_load_balancing=0x%08x\n", shader
->vs_load_balancing
);
1243 printf(" ps_color_out_reg=%i\n", shader
->ps_color_out_reg
);
1244 printf(" ps_depth_out_reg=%i\n", shader
->ps_depth_out_reg
);
1246 printf(" input_count_unk8=0x%08x\n", shader
->input_count_unk8
);
1249 static const struct etna_shader_inout
*
1250 etna_shader_vs_lookup(const struct etna_shader_variant
*sobj
,
1251 const struct etna_shader_inout
*in
)
1253 for (int i
= 0; i
< sobj
->outfile
.num_reg
; i
++)
1254 if (sobj
->outfile
.reg
[i
].slot
== in
->slot
)
1255 return &sobj
->outfile
.reg
[i
];
1261 etna_link_shader_nir(struct etna_shader_link_info
*info
,
1262 const struct etna_shader_variant
*vs
,
1263 const struct etna_shader_variant
*fs
)
1266 /* For each fragment input we need to find the associated vertex shader
1267 * output, which can be found by matching on semantic name and index. A
1268 * binary search could be used because the vs outputs are sorted by their
1269 * semantic index and grouped by semantic type by fill_in_vs_outputs.
1271 assert(fs
->infile
.num_reg
< ETNA_NUM_INPUTS
);
1272 info
->pcoord_varying_comp_ofs
= -1;
1274 for (int idx
= 0; idx
< fs
->infile
.num_reg
; ++idx
) {
1275 const struct etna_shader_inout
*fsio
= &fs
->infile
.reg
[idx
];
1276 const struct etna_shader_inout
*vsio
= etna_shader_vs_lookup(vs
, fsio
);
1277 struct etna_varying
*varying
;
1278 bool interpolate_always
= true;
1280 assert(fsio
->reg
> 0 && fsio
->reg
<= ARRAY_SIZE(info
->varyings
));
1282 if (fsio
->reg
> info
->num_varyings
)
1283 info
->num_varyings
= fsio
->reg
;
1285 varying
= &info
->varyings
[fsio
->reg
- 1];
1286 varying
->num_components
= fsio
->num_components
;
1288 if (!interpolate_always
) /* colors affected by flat shading */
1289 varying
->pa_attributes
= 0x200;
1290 else /* texture coord or other bypasses flat shading */
1291 varying
->pa_attributes
= 0x2f1;
1293 varying
->use
[0] = VARYING_COMPONENT_USE_UNUSED
;
1294 varying
->use
[1] = VARYING_COMPONENT_USE_UNUSED
;
1295 varying
->use
[2] = VARYING_COMPONENT_USE_UNUSED
;
1296 varying
->use
[3] = VARYING_COMPONENT_USE_UNUSED
;
1298 /* point coord is an input to the PS without matching VS output,
1299 * so it gets a varying slot without being assigned a VS register.
1301 if (fsio
->slot
== VARYING_SLOT_PNTC
) {
1302 varying
->use
[0] = VARYING_COMPONENT_USE_POINTCOORD_X
;
1303 varying
->use
[1] = VARYING_COMPONENT_USE_POINTCOORD_Y
;
1305 info
->pcoord_varying_comp_ofs
= comp_ofs
;
1307 if (vsio
== NULL
) { /* not found -- link error */
1308 BUG("Semantic value not found in vertex shader outputs\n");
1311 varying
->reg
= vsio
->reg
;
1314 comp_ofs
+= varying
->num_components
;
1317 assert(info
->num_varyings
== fs
->infile
.num_reg
);