2 * Copyright © 2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Implements most of the fixed function fragment pipeline in shader code.
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
35 * Lowers fixed-function blending to a load of the destination color and a
36 * series of ALU operations before the store of the output.
38 #include "util/u_format.h"
40 #include "glsl/nir/nir_builder.h"
41 #include "nir/tgsi_to_nir.h"
42 #include "vc4_context.h"
44 /** Emits a load of the previous fragment color from the tile buffer. */
46 vc4_nir_get_dst_color(nir_builder
*b
)
48 nir_intrinsic_instr
*load
=
49 nir_intrinsic_instr_create(b
->shader
,
50 nir_intrinsic_load_input
);
51 load
->num_components
= 1;
52 load
->const_index
[0] = VC4_NIR_TLB_COLOR_READ_INPUT
;
53 nir_ssa_dest_init(&load
->instr
, &load
->dest
, 1, NULL
);
54 nir_builder_instr_insert(b
, &load
->instr
);
55 return &load
->dest
.ssa
;
59 vc4_nir_srgb_decode(nir_builder
*b
, nir_ssa_def
*srgb
)
61 nir_ssa_def
*is_low
= nir_flt(b
, srgb
, nir_imm_float(b
, 0.04045));
62 nir_ssa_def
*low
= nir_fmul(b
, srgb
, nir_imm_float(b
, 1.0 / 12.92));
63 nir_ssa_def
*high
= nir_fpow(b
,
66 nir_imm_float(b
, 0.055)),
67 nir_imm_float(b
, 1.0 / 1.055)),
68 nir_imm_float(b
, 2.4));
70 return nir_bcsel(b
, is_low
, low
, high
);
74 vc4_nir_srgb_encode(nir_builder
*b
, nir_ssa_def
*linear
)
76 nir_ssa_def
*is_low
= nir_flt(b
, linear
, nir_imm_float(b
, 0.0031308));
77 nir_ssa_def
*low
= nir_fmul(b
, linear
, nir_imm_float(b
, 12.92));
78 nir_ssa_def
*high
= nir_fsub(b
,
80 nir_imm_float(b
, 1.055),
83 nir_imm_float(b
, 0.41666))),
84 nir_imm_float(b
, 0.055));
86 return nir_bcsel(b
, is_low
, low
, high
);
90 vc4_blend_channel(nir_builder
*b
,
97 case PIPE_BLENDFACTOR_ONE
:
98 return nir_imm_float(b
, 1.0);
99 case PIPE_BLENDFACTOR_SRC_COLOR
:
101 case PIPE_BLENDFACTOR_SRC_ALPHA
:
103 case PIPE_BLENDFACTOR_DST_ALPHA
:
105 case PIPE_BLENDFACTOR_DST_COLOR
:
107 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
112 nir_imm_float(b
, 1.0),
115 return nir_imm_float(b
, 1.0);
117 case PIPE_BLENDFACTOR_CONST_COLOR
:
118 return vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_X
+ channel
);
119 case PIPE_BLENDFACTOR_CONST_ALPHA
:
120 return vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_W
);
121 case PIPE_BLENDFACTOR_ZERO
:
122 return nir_imm_float(b
, 0.0);
123 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
124 return nir_fsub(b
, nir_imm_float(b
, 1.0), src
[channel
]);
125 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
126 return nir_fsub(b
, nir_imm_float(b
, 1.0), src
[3]);
127 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
128 return nir_fsub(b
, nir_imm_float(b
, 1.0), dst
[3]);
129 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
130 return nir_fsub(b
, nir_imm_float(b
, 1.0), dst
[channel
]);
131 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
132 return nir_fsub(b
, nir_imm_float(b
, 1.0),
133 vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_X
+ channel
));
134 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
135 return nir_fsub(b
, nir_imm_float(b
, 1.0),
136 vc4_nir_get_state_uniform(b
, QUNIFORM_BLEND_CONST_COLOR_W
));
139 case PIPE_BLENDFACTOR_SRC1_COLOR
:
140 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
141 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
142 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
144 fprintf(stderr
, "Unknown blend factor %d\n", factor
);
145 return nir_imm_float(b
, 1.0);
150 vc4_blend_func(nir_builder
*b
, nir_ssa_def
*src
, nir_ssa_def
*dst
,
155 return nir_fadd(b
, src
, dst
);
156 case PIPE_BLEND_SUBTRACT
:
157 return nir_fsub(b
, src
, dst
);
158 case PIPE_BLEND_REVERSE_SUBTRACT
:
159 return nir_fsub(b
, dst
, src
);
161 return nir_fmin(b
, src
, dst
);
163 return nir_fmax(b
, src
, dst
);
167 fprintf(stderr
, "Unknown blend func %d\n", func
);
174 vc4_do_blending(struct vc4_compile
*c
, nir_builder
*b
, nir_ssa_def
**result
,
175 nir_ssa_def
**src_color
, nir_ssa_def
**dst_color
)
177 struct pipe_rt_blend_state
*blend
= &c
->fs_key
->blend
;
179 if (!blend
->blend_enable
) {
180 for (int i
= 0; i
< 4; i
++)
181 result
[i
] = src_color
[i
];
185 /* Clamp the src color to [0, 1]. Dest is already clamped. */
186 for (int i
= 0; i
< 4; i
++)
187 src_color
[i
] = nir_fsat(b
, src_color
[i
]);
189 nir_ssa_def
*src_blend
[4], *dst_blend
[4];
190 for (int i
= 0; i
< 4; i
++) {
191 int src_factor
= ((i
!= 3) ? blend
->rgb_src_factor
:
192 blend
->alpha_src_factor
);
193 int dst_factor
= ((i
!= 3) ? blend
->rgb_dst_factor
:
194 blend
->alpha_dst_factor
);
195 src_blend
[i
] = nir_fmul(b
, src_color
[i
],
197 src_color
, dst_color
,
199 dst_blend
[i
] = nir_fmul(b
, dst_color
[i
],
201 src_color
, dst_color
,
205 for (int i
= 0; i
< 4; i
++) {
206 result
[i
] = vc4_blend_func(b
, src_blend
[i
], dst_blend
[i
],
207 ((i
!= 3) ? blend
->rgb_func
:
213 vc4_logicop(nir_builder
*b
, int logicop_func
,
214 nir_ssa_def
*src
, nir_ssa_def
*dst
)
216 switch (logicop_func
) {
217 case PIPE_LOGICOP_CLEAR
:
218 return nir_imm_int(b
, 0);
219 case PIPE_LOGICOP_NOR
:
220 return nir_inot(b
, nir_ior(b
, src
, dst
));
221 case PIPE_LOGICOP_AND_INVERTED
:
222 return nir_iand(b
, nir_inot(b
, src
), dst
);
223 case PIPE_LOGICOP_COPY_INVERTED
:
224 return nir_inot(b
, src
);
225 case PIPE_LOGICOP_AND_REVERSE
:
226 return nir_iand(b
, src
, nir_inot(b
, dst
));
227 case PIPE_LOGICOP_INVERT
:
228 return nir_inot(b
, dst
);
229 case PIPE_LOGICOP_XOR
:
230 return nir_ixor(b
, src
, dst
);
231 case PIPE_LOGICOP_NAND
:
232 return nir_inot(b
, nir_iand(b
, src
, dst
));
233 case PIPE_LOGICOP_AND
:
234 return nir_iand(b
, src
, dst
);
235 case PIPE_LOGICOP_EQUIV
:
236 return nir_inot(b
, nir_ixor(b
, src
, dst
));
237 case PIPE_LOGICOP_NOOP
:
239 case PIPE_LOGICOP_OR_INVERTED
:
240 return nir_ior(b
, nir_inot(b
, src
), dst
);
241 case PIPE_LOGICOP_OR_REVERSE
:
242 return nir_ior(b
, src
, nir_inot(b
, dst
));
243 case PIPE_LOGICOP_OR
:
244 return nir_ior(b
, src
, dst
);
245 case PIPE_LOGICOP_SET
:
246 return nir_imm_int(b
, ~0);
248 fprintf(stderr
, "Unknown logic op %d\n", logicop_func
);
250 case PIPE_LOGICOP_COPY
:
256 vc4_nir_pipe_compare_func(nir_builder
*b
, int func
,
257 nir_ssa_def
*src0
, nir_ssa_def
*src1
)
261 fprintf(stderr
, "Unknown compare func %d\n", func
);
263 case PIPE_FUNC_NEVER
:
264 return nir_imm_int(b
, 0);
265 case PIPE_FUNC_ALWAYS
:
266 return nir_imm_int(b
, ~0);
267 case PIPE_FUNC_EQUAL
:
268 return nir_feq(b
, src0
, src1
);
269 case PIPE_FUNC_NOTEQUAL
:
270 return nir_fne(b
, src0
, src1
);
271 case PIPE_FUNC_GREATER
:
272 return nir_flt(b
, src1
, src0
);
273 case PIPE_FUNC_GEQUAL
:
274 return nir_fge(b
, src0
, src1
);
276 return nir_flt(b
, src0
, src1
);
277 case PIPE_FUNC_LEQUAL
:
278 return nir_fge(b
, src1
, src0
);
283 vc4_nir_emit_alpha_test_discard(struct vc4_compile
*c
, nir_builder
*b
,
286 if (!c
->fs_key
->alpha_test
)
289 nir_ssa_def
*alpha_ref
=
290 vc4_nir_get_state_uniform(b
, QUNIFORM_ALPHA_REF
);
291 nir_ssa_def
*condition
=
292 vc4_nir_pipe_compare_func(b
, c
->fs_key
->alpha_test_func
,
295 nir_intrinsic_instr
*discard
=
296 nir_intrinsic_instr_create(b
->shader
,
297 nir_intrinsic_discard_if
);
298 discard
->num_components
= 1;
299 discard
->src
[0] = nir_src_for_ssa(nir_inot(b
, condition
));
300 nir_builder_instr_insert(b
, &discard
->instr
);
304 vc4_nir_lower_blend_instr(struct vc4_compile
*c
, nir_builder
*b
,
305 nir_intrinsic_instr
*intr
)
307 enum pipe_format color_format
= c
->fs_key
->color_format
;
308 const uint8_t *format_swiz
= vc4_get_format_swizzle(color_format
);
310 /* Pull out the float src/dst color components. */
311 nir_ssa_def
*packed_dst_color
= vc4_nir_get_dst_color(b
);
312 nir_ssa_def
*dst_vec4
= nir_unpack_unorm_4x8(b
, packed_dst_color
);
313 nir_ssa_def
*src_color
[4], *unpacked_dst_color
[4];
314 for (unsigned i
= 0; i
< 4; i
++) {
315 src_color
[i
] = nir_swizzle(b
, intr
->src
[0].ssa
, &i
, 1, false);
316 unpacked_dst_color
[i
] = nir_swizzle(b
, dst_vec4
, &i
, 1, false);
319 /* Unswizzle the destination color. */
320 nir_ssa_def
*dst_color
[4];
321 for (unsigned i
= 0; i
< 4; i
++) {
322 dst_color
[i
] = vc4_nir_get_swizzled_channel(b
,
327 vc4_nir_emit_alpha_test_discard(c
, b
, src_color
[3]);
329 /* Turn dst color to linear. */
330 if (util_format_is_srgb(color_format
)) {
331 for (int i
= 0; i
< 3; i
++)
332 dst_color
[i
] = vc4_nir_srgb_decode(b
, dst_color
[i
]);
335 nir_ssa_def
*blend_color
[4];
336 vc4_do_blending(c
, b
, blend_color
, src_color
, dst_color
);
338 /* sRGB encode the output color */
339 if (util_format_is_srgb(color_format
)) {
340 for (int i
= 0; i
< 3; i
++)
341 blend_color
[i
] = vc4_nir_srgb_encode(b
, blend_color
[i
]);
344 nir_ssa_def
*swizzled_outputs
[4];
345 for (int i
= 0; i
< 4; i
++) {
346 swizzled_outputs
[i
] =
347 vc4_nir_get_swizzled_channel(b
, blend_color
,
351 nir_ssa_def
*packed_color
=
352 nir_pack_unorm_4x8(b
,
357 swizzled_outputs
[3]));
359 packed_color
= vc4_logicop(b
, c
->fs_key
->logicop_func
,
360 packed_color
, packed_dst_color
);
362 /* If the bit isn't set in the color mask, then just return the
363 * original dst color, instead.
365 uint32_t colormask
= 0xffffffff;
366 for (int i
= 0; i
< 4; i
++) {
367 if (format_swiz
[i
] < 4 &&
368 !(c
->fs_key
->blend
.colormask
& (1 << format_swiz
[i
]))) {
369 colormask
&= ~(0xff << (i
* 8));
372 packed_color
= nir_ior(b
,
373 nir_iand(b
, packed_color
,
374 nir_imm_int(b
, colormask
)),
375 nir_iand(b
, packed_dst_color
,
376 nir_imm_int(b
, ~colormask
)));
378 /* Turn the old vec4 output into a store of the packed color. */
379 nir_instr_rewrite_src(&intr
->instr
, &intr
->src
[0],
380 nir_src_for_ssa(packed_color
));
381 intr
->num_components
= 1;
385 vc4_nir_lower_blend_block(nir_block
*block
, void *state
)
387 struct vc4_compile
*c
= state
;
389 nir_foreach_instr(block
, instr
) {
390 if (instr
->type
!= nir_instr_type_intrinsic
)
392 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
393 if (intr
->intrinsic
!= nir_intrinsic_store_output
)
396 nir_variable
*output_var
= NULL
;
397 foreach_list_typed(nir_variable
, var
, node
, &c
->s
->outputs
) {
398 if (var
->data
.driver_location
== intr
->const_index
[0]) {
404 unsigned semantic_name
, semantic_index
;
406 varying_slot_to_tgsi_semantic(output_var
->data
.location
,
407 &semantic_name
, &semantic_index
);
409 if (semantic_name
!= TGSI_SEMANTIC_COLOR
)
412 nir_function_impl
*impl
=
413 nir_cf_node_get_function(&block
->cf_node
);
415 nir_builder_init(&b
, impl
);
416 b
.cursor
= nir_before_instr(&intr
->instr
);
417 vc4_nir_lower_blend_instr(c
, &b
, intr
);
423 vc4_nir_lower_blend(struct vc4_compile
*c
)
425 nir_foreach_overload(c
->s
, overload
) {
426 if (overload
->impl
) {
427 nir_foreach_block(overload
->impl
,
428 vc4_nir_lower_blend_block
, c
);
430 nir_metadata_preserve(overload
->impl
,
431 nir_metadata_block_index
|
432 nir_metadata_dominance
);