2 * Copyright (c) 2019 Zodiac Inflight Innovations
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Jonathan Marek <jonathan@marek.ca>
27 #include "etnaviv_nir.h"
29 /* io related lowering
30 * run after lower_int_to_float because it adds i2f/f2i ops
33 etna_lower_io(nir_shader
*shader
, struct etna_shader_variant
*v
)
35 nir_foreach_function(function
, shader
) {
37 nir_builder_init(&b
, function
->impl
);
39 nir_foreach_block(block
, function
->impl
) {
40 nir_foreach_instr_safe(instr
, block
) {
41 if (instr
->type
== nir_instr_type_intrinsic
) {
42 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
44 switch (intr
->intrinsic
) {
45 case nir_intrinsic_load_front_face
: {
46 /* HW front_face is 0.0/1.0, not 0/~0u for bool
47 * lower with a comparison with 0
49 intr
->dest
.ssa
.bit_size
= 32;
51 b
.cursor
= nir_after_instr(instr
);
53 nir_ssa_def
*ssa
= nir_ine(&b
, &intr
->dest
.ssa
, nir_imm_int(&b
, 0));
55 nir_instr_as_alu(ssa
->parent_instr
)->op
= nir_op_ieq
;
57 nir_ssa_def_rewrite_uses_after(&intr
->dest
.ssa
,
61 case nir_intrinsic_store_deref
: {
62 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
63 if (shader
->info
.stage
!= MESA_SHADER_FRAGMENT
|| !v
->key
.frag_rb_swap
)
66 assert(deref
->deref_type
== nir_deref_type_var
);
68 if (deref
->var
->data
.location
!= FRAG_RESULT_COLOR
&&
69 deref
->var
->data
.location
!= FRAG_RESULT_DATA0
)
72 b
.cursor
= nir_before_instr(instr
);
74 nir_ssa_def
*ssa
= nir_mov(&b
, intr
->src
[1].ssa
);
75 nir_alu_instr
*alu
= nir_instr_as_alu(ssa
->parent_instr
);
76 alu
->src
[0].swizzle
[0] = 2;
77 alu
->src
[0].swizzle
[2] = 0;
78 nir_instr_rewrite_src(instr
, &intr
->src
[1], nir_src_for_ssa(ssa
));
80 case nir_intrinsic_load_uniform
: {
81 /* convert indirect load_uniform to load_ubo when possible
82 * this is required on HALTI5+ because address register is not implemented
83 * address register loads also arent done optimally
85 if (v
->shader
->specs
->halti
< 2 || nir_src_is_const(intr
->src
[0]))
88 nir_intrinsic_instr
*load_ubo
=
89 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_ubo
);
90 load_ubo
->num_components
= intr
->num_components
;
91 nir_intrinsic_set_align(load_ubo
, intr
->dest
.ssa
.bit_size
/ 8, 0);
92 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
93 load_ubo
->num_components
, 32, NULL
);
95 b
.cursor
= nir_before_instr(instr
);
96 load_ubo
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
97 load_ubo
->src
[1] = nir_src_for_ssa(nir_iadd(&b
,
98 nir_imul(&b
, intr
->src
[0].ssa
, nir_imm_int(&b
, 16)),
99 nir_imm_int(&b
, nir_intrinsic_base(intr
) * 16)));
100 nir_builder_instr_insert(&b
, &load_ubo
->instr
);
101 nir_ssa_def_rewrite_uses(&intr
->dest
.ssa
,
102 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
103 nir_instr_remove(&intr
->instr
);
105 case nir_intrinsic_load_ubo
: {
106 nir_const_value
*idx
= nir_src_as_const_value(intr
->src
[0]);
108 /* offset index by 1, index 0 is used for converted load_uniform */
109 b
.cursor
= nir_before_instr(instr
);
110 nir_instr_rewrite_src(instr
, &intr
->src
[0],
111 nir_src_for_ssa(nir_imm_int(&b
, idx
[0].u32
+ 1)));
113 case nir_intrinsic_load_vertex_id
:
114 case nir_intrinsic_load_instance_id
:
115 /* detect use of vertex_id/instance_id */
116 v
->vs_id_in_reg
= v
->infile
.num_reg
;
123 if (instr
->type
!= nir_instr_type_tex
)
126 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
127 nir_src
*coord
= NULL
;
128 nir_src
*lod_bias
= NULL
;
129 unsigned lod_bias_idx
;
131 assert(tex
->sampler_index
== tex
->texture_index
);
133 for (unsigned i
= 0; i
< tex
->num_srcs
; i
++) {
134 switch (tex
->src
[i
].src_type
) {
135 case nir_tex_src_coord
:
136 coord
= &tex
->src
[i
].src
;
138 case nir_tex_src_bias
:
139 case nir_tex_src_lod
:
141 lod_bias
= &tex
->src
[i
].src
;
144 case nir_tex_src_comparator
:
152 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_RECT
) {
153 /* use a dummy load_uniform here to represent texcoord scale */
154 b
.cursor
= nir_before_instr(instr
);
155 nir_intrinsic_instr
*load
=
156 nir_intrinsic_instr_create(b
.shader
, nir_intrinsic_load_uniform
);
157 nir_intrinsic_set_base(load
, ~tex
->sampler_index
);
158 load
->num_components
= 2;
159 load
->src
[0] = nir_src_for_ssa(nir_imm_float(&b
, 0.0f
));
160 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 2, 32, NULL
);
161 nir_intrinsic_set_type(load
, nir_type_float
);
163 nir_builder_instr_insert(&b
, &load
->instr
);
165 nir_ssa_def
*new_coord
= nir_fmul(&b
, coord
->ssa
, &load
->dest
.ssa
);
166 nir_instr_rewrite_src(&tex
->instr
, coord
, nir_src_for_ssa(new_coord
));
169 /* pre HALTI5 needs texture sources in a single source */
171 if (!lod_bias
|| v
->shader
->specs
->halti
>= 5)
174 assert(coord
&& lod_bias
&& tex
->coord_components
< 4);
176 nir_alu_instr
*vec
= nir_alu_instr_create(shader
, nir_op_vec4
);
177 for (unsigned i
= 0; i
< tex
->coord_components
; i
++) {
178 vec
->src
[i
].src
= nir_src_for_ssa(coord
->ssa
);
179 vec
->src
[i
].swizzle
[0] = i
;
181 for (unsigned i
= tex
->coord_components
; i
< 4; i
++)
182 vec
->src
[i
].src
= nir_src_for_ssa(lod_bias
->ssa
);
184 vec
->dest
.write_mask
= 0xf;
185 nir_ssa_dest_init(&vec
->instr
, &vec
->dest
.dest
, 4, 32, NULL
);
187 nir_tex_instr_remove_src(tex
, lod_bias_idx
);
188 nir_instr_rewrite_src(&tex
->instr
, coord
, nir_src_for_ssa(&vec
->dest
.dest
.ssa
));
189 tex
->coord_components
= 4;
191 nir_instr_insert_before(&tex
->instr
, &vec
->instr
);
198 etna_lower_alu_impl(nir_function_impl
*impl
, bool has_new_transcendentals
)
200 nir_shader
*shader
= impl
->function
->shader
;
203 nir_builder_init(&b
, impl
);
205 /* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */
206 nir_foreach_block(block
, impl
) {
207 nir_foreach_instr_safe(instr
, block
) {
208 if (instr
->type
!= nir_instr_type_alu
)
211 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
212 /* multiply sin/cos src by constant
213 * TODO: do this earlier (but it breaks const_prop opt)
215 if (alu
->op
== nir_op_fsin
|| alu
->op
== nir_op_fcos
) {
216 b
.cursor
= nir_before_instr(instr
);
218 nir_ssa_def
*imm
= has_new_transcendentals
?
219 nir_imm_float(&b
, 1.0 / M_PI
) :
220 nir_imm_float(&b
, 2.0 / M_PI
);
222 nir_instr_rewrite_src(instr
, &alu
->src
[0].src
,
223 nir_src_for_ssa(nir_fmul(&b
, alu
->src
[0].src
.ssa
, imm
)));
226 /* change transcendental ops to vec2 and insert vec1 mul for the result
227 * TODO: do this earlier (but it breaks with optimizations)
229 if (has_new_transcendentals
&& (
230 alu
->op
== nir_op_fdiv
|| alu
->op
== nir_op_flog2
||
231 alu
->op
== nir_op_fsin
|| alu
->op
== nir_op_fcos
)) {
232 nir_ssa_def
*ssa
= &alu
->dest
.dest
.ssa
;
234 assert(ssa
->num_components
== 1);
236 nir_alu_instr
*mul
= nir_alu_instr_create(shader
, nir_op_fmul
);
237 mul
->src
[0].src
= mul
->src
[1].src
= nir_src_for_ssa(ssa
);
238 mul
->src
[1].swizzle
[0] = 1;
240 mul
->dest
.write_mask
= 1;
241 nir_ssa_dest_init(&mul
->instr
, &mul
->dest
.dest
, 1, 32, NULL
);
243 ssa
->num_components
= 2;
245 mul
->dest
.saturate
= alu
->dest
.saturate
;
246 alu
->dest
.saturate
= 0;
248 nir_instr_insert_after(instr
, &mul
->instr
);
250 nir_ssa_def_rewrite_uses_after(ssa
, nir_src_for_ssa(&mul
->dest
.dest
.ssa
), &mul
->instr
);
257 etna_lower_alu(nir_shader
*shader
, bool has_new_transcendentals
)
259 nir_foreach_function(function
, shader
) {
261 etna_lower_alu_impl(function
->impl
, has_new_transcendentals
);