gallium/ttn: Convert to using VARYING_SLOT_* / FRAG_RESULT_*.
[mesa.git] / src / gallium / drivers / vc4 / vc4_nir_lower_blend.c
1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask. Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 */
33
34 /**
35 * Lowers fixed-function blending to a load of the destination color and a
36 * series of ALU operations before the store of the output.
37 */
38 #include "util/u_format.h"
39 #include "vc4_qir.h"
40 #include "glsl/nir/nir_builder.h"
41 #include "nir/tgsi_to_nir.h"
42 #include "vc4_context.h"
43
44 /** Emits a load of the previous fragment color from the tile buffer. */
45 static nir_ssa_def *
46 vc4_nir_get_dst_color(nir_builder *b)
47 {
48 nir_intrinsic_instr *load =
49 nir_intrinsic_instr_create(b->shader,
50 nir_intrinsic_load_input);
51 load->num_components = 1;
52 load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
53 nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
54 nir_builder_instr_insert(b, &load->instr);
55 return &load->dest.ssa;
56 }
57
58 static nir_ssa_def *
59 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
60 {
61 nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
62 nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
63 nir_ssa_def *high = nir_fpow(b,
64 nir_fmul(b,
65 nir_fadd(b, srgb,
66 nir_imm_float(b, 0.055)),
67 nir_imm_float(b, 1.0 / 1.055)),
68 nir_imm_float(b, 2.4));
69
70 return nir_bcsel(b, is_low, low, high);
71 }
72
73 static nir_ssa_def *
74 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
75 {
76 nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
77 nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
78 nir_ssa_def *high = nir_fsub(b,
79 nir_fmul(b,
80 nir_imm_float(b, 1.055),
81 nir_fpow(b,
82 linear,
83 nir_imm_float(b, 0.41666))),
84 nir_imm_float(b, 0.055));
85
86 return nir_bcsel(b, is_low, low, high);
87 }
88
89 static nir_ssa_def *
90 vc4_blend_channel(nir_builder *b,
91 nir_ssa_def **src,
92 nir_ssa_def **dst,
93 unsigned factor,
94 int channel)
95 {
96 switch(factor) {
97 case PIPE_BLENDFACTOR_ONE:
98 return nir_imm_float(b, 1.0);
99 case PIPE_BLENDFACTOR_SRC_COLOR:
100 return src[channel];
101 case PIPE_BLENDFACTOR_SRC_ALPHA:
102 return src[3];
103 case PIPE_BLENDFACTOR_DST_ALPHA:
104 return dst[3];
105 case PIPE_BLENDFACTOR_DST_COLOR:
106 return dst[channel];
107 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
108 if (channel != 3) {
109 return nir_fmin(b,
110 src[3],
111 nir_fsub(b,
112 nir_imm_float(b, 1.0),
113 dst[3]));
114 } else {
115 return nir_imm_float(b, 1.0);
116 }
117 case PIPE_BLENDFACTOR_CONST_COLOR:
118 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
119 case PIPE_BLENDFACTOR_CONST_ALPHA:
120 return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
121 case PIPE_BLENDFACTOR_ZERO:
122 return nir_imm_float(b, 0.0);
123 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
124 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
125 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
126 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
127 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
128 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
129 case PIPE_BLENDFACTOR_INV_DST_COLOR:
130 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
131 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
132 return nir_fsub(b, nir_imm_float(b, 1.0),
133 vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
134 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
135 return nir_fsub(b, nir_imm_float(b, 1.0),
136 vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
137
138 default:
139 case PIPE_BLENDFACTOR_SRC1_COLOR:
140 case PIPE_BLENDFACTOR_SRC1_ALPHA:
141 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
142 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
143 /* Unsupported. */
144 fprintf(stderr, "Unknown blend factor %d\n", factor);
145 return nir_imm_float(b, 1.0);
146 }
147 }
148
149 static nir_ssa_def *
150 vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
151 unsigned func)
152 {
153 switch (func) {
154 case PIPE_BLEND_ADD:
155 return nir_fadd(b, src, dst);
156 case PIPE_BLEND_SUBTRACT:
157 return nir_fsub(b, src, dst);
158 case PIPE_BLEND_REVERSE_SUBTRACT:
159 return nir_fsub(b, dst, src);
160 case PIPE_BLEND_MIN:
161 return nir_fmin(b, src, dst);
162 case PIPE_BLEND_MAX:
163 return nir_fmax(b, src, dst);
164
165 default:
166 /* Unsupported. */
167 fprintf(stderr, "Unknown blend func %d\n", func);
168 return src;
169
170 }
171 }
172
173 static void
174 vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
175 nir_ssa_def **src_color, nir_ssa_def **dst_color)
176 {
177 struct pipe_rt_blend_state *blend = &c->fs_key->blend;
178
179 if (!blend->blend_enable) {
180 for (int i = 0; i < 4; i++)
181 result[i] = src_color[i];
182 return;
183 }
184
185 /* Clamp the src color to [0, 1]. Dest is already clamped. */
186 for (int i = 0; i < 4; i++)
187 src_color[i] = nir_fsat(b, src_color[i]);
188
189 nir_ssa_def *src_blend[4], *dst_blend[4];
190 for (int i = 0; i < 4; i++) {
191 int src_factor = ((i != 3) ? blend->rgb_src_factor :
192 blend->alpha_src_factor);
193 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
194 blend->alpha_dst_factor);
195 src_blend[i] = nir_fmul(b, src_color[i],
196 vc4_blend_channel(b,
197 src_color, dst_color,
198 src_factor, i));
199 dst_blend[i] = nir_fmul(b, dst_color[i],
200 vc4_blend_channel(b,
201 src_color, dst_color,
202 dst_factor, i));
203 }
204
205 for (int i = 0; i < 4; i++) {
206 result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
207 ((i != 3) ? blend->rgb_func :
208 blend->alpha_func));
209 }
210 }
211
212 static nir_ssa_def *
213 vc4_logicop(nir_builder *b, int logicop_func,
214 nir_ssa_def *src, nir_ssa_def *dst)
215 {
216 switch (logicop_func) {
217 case PIPE_LOGICOP_CLEAR:
218 return nir_imm_int(b, 0);
219 case PIPE_LOGICOP_NOR:
220 return nir_inot(b, nir_ior(b, src, dst));
221 case PIPE_LOGICOP_AND_INVERTED:
222 return nir_iand(b, nir_inot(b, src), dst);
223 case PIPE_LOGICOP_COPY_INVERTED:
224 return nir_inot(b, src);
225 case PIPE_LOGICOP_AND_REVERSE:
226 return nir_iand(b, src, nir_inot(b, dst));
227 case PIPE_LOGICOP_INVERT:
228 return nir_inot(b, dst);
229 case PIPE_LOGICOP_XOR:
230 return nir_ixor(b, src, dst);
231 case PIPE_LOGICOP_NAND:
232 return nir_inot(b, nir_iand(b, src, dst));
233 case PIPE_LOGICOP_AND:
234 return nir_iand(b, src, dst);
235 case PIPE_LOGICOP_EQUIV:
236 return nir_inot(b, nir_ixor(b, src, dst));
237 case PIPE_LOGICOP_NOOP:
238 return dst;
239 case PIPE_LOGICOP_OR_INVERTED:
240 return nir_ior(b, nir_inot(b, src), dst);
241 case PIPE_LOGICOP_OR_REVERSE:
242 return nir_ior(b, src, nir_inot(b, dst));
243 case PIPE_LOGICOP_OR:
244 return nir_ior(b, src, dst);
245 case PIPE_LOGICOP_SET:
246 return nir_imm_int(b, ~0);
247 default:
248 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
249 /* FALLTHROUGH */
250 case PIPE_LOGICOP_COPY:
251 return src;
252 }
253 }
254
255 static nir_ssa_def *
256 vc4_nir_pipe_compare_func(nir_builder *b, int func,
257 nir_ssa_def *src0, nir_ssa_def *src1)
258 {
259 switch (func) {
260 default:
261 fprintf(stderr, "Unknown compare func %d\n", func);
262 /* FALLTHROUGH */
263 case PIPE_FUNC_NEVER:
264 return nir_imm_int(b, 0);
265 case PIPE_FUNC_ALWAYS:
266 return nir_imm_int(b, ~0);
267 case PIPE_FUNC_EQUAL:
268 return nir_feq(b, src0, src1);
269 case PIPE_FUNC_NOTEQUAL:
270 return nir_fne(b, src0, src1);
271 case PIPE_FUNC_GREATER:
272 return nir_flt(b, src1, src0);
273 case PIPE_FUNC_GEQUAL:
274 return nir_fge(b, src0, src1);
275 case PIPE_FUNC_LESS:
276 return nir_flt(b, src0, src1);
277 case PIPE_FUNC_LEQUAL:
278 return nir_fge(b, src1, src0);
279 }
280 }
281
282 static void
283 vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
284 nir_ssa_def *alpha)
285 {
286 if (!c->fs_key->alpha_test)
287 return;
288
289 nir_ssa_def *alpha_ref =
290 vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
291 nir_ssa_def *condition =
292 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
293 alpha, alpha_ref);
294
295 nir_intrinsic_instr *discard =
296 nir_intrinsic_instr_create(b->shader,
297 nir_intrinsic_discard_if);
298 discard->num_components = 1;
299 discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
300 nir_builder_instr_insert(b, &discard->instr);
301 }
302
303 static void
304 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
305 nir_intrinsic_instr *intr)
306 {
307 enum pipe_format color_format = c->fs_key->color_format;
308 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
309
310 /* Pull out the float src/dst color components. */
311 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
312 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
313 nir_ssa_def *src_color[4], *unpacked_dst_color[4];
314 for (unsigned i = 0; i < 4; i++) {
315 src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
316 unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
317 }
318
319 /* Unswizzle the destination color. */
320 nir_ssa_def *dst_color[4];
321 for (unsigned i = 0; i < 4; i++) {
322 dst_color[i] = vc4_nir_get_swizzled_channel(b,
323 unpacked_dst_color,
324 format_swiz[i]);
325 }
326
327 vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
328
329 /* Turn dst color to linear. */
330 if (util_format_is_srgb(color_format)) {
331 for (int i = 0; i < 3; i++)
332 dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
333 }
334
335 nir_ssa_def *blend_color[4];
336 vc4_do_blending(c, b, blend_color, src_color, dst_color);
337
338 /* sRGB encode the output color */
339 if (util_format_is_srgb(color_format)) {
340 for (int i = 0; i < 3; i++)
341 blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
342 }
343
344 nir_ssa_def *swizzled_outputs[4];
345 for (int i = 0; i < 4; i++) {
346 swizzled_outputs[i] =
347 vc4_nir_get_swizzled_channel(b, blend_color,
348 format_swiz[i]);
349 }
350
351 nir_ssa_def *packed_color =
352 nir_pack_unorm_4x8(b,
353 nir_vec4(b,
354 swizzled_outputs[0],
355 swizzled_outputs[1],
356 swizzled_outputs[2],
357 swizzled_outputs[3]));
358
359 packed_color = vc4_logicop(b, c->fs_key->logicop_func,
360 packed_color, packed_dst_color);
361
362 /* If the bit isn't set in the color mask, then just return the
363 * original dst color, instead.
364 */
365 uint32_t colormask = 0xffffffff;
366 for (int i = 0; i < 4; i++) {
367 if (format_swiz[i] < 4 &&
368 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
369 colormask &= ~(0xff << (i * 8));
370 }
371 }
372 packed_color = nir_ior(b,
373 nir_iand(b, packed_color,
374 nir_imm_int(b, colormask)),
375 nir_iand(b, packed_dst_color,
376 nir_imm_int(b, ~colormask)));
377
378 /* Turn the old vec4 output into a store of the packed color. */
379 nir_instr_rewrite_src(&intr->instr, &intr->src[0],
380 nir_src_for_ssa(packed_color));
381 intr->num_components = 1;
382 }
383
384 static bool
385 vc4_nir_lower_blend_block(nir_block *block, void *state)
386 {
387 struct vc4_compile *c = state;
388
389 nir_foreach_instr(block, instr) {
390 if (instr->type != nir_instr_type_intrinsic)
391 continue;
392 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
393 if (intr->intrinsic != nir_intrinsic_store_output)
394 continue;
395
396 nir_variable *output_var = NULL;
397 foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
398 if (var->data.driver_location == intr->const_index[0]) {
399 output_var = var;
400 break;
401 }
402 }
403 assert(output_var);
404 unsigned semantic_name, semantic_index;
405
406 varying_slot_to_tgsi_semantic(output_var->data.location,
407 &semantic_name, &semantic_index);
408
409 if (semantic_name != TGSI_SEMANTIC_COLOR)
410 continue;
411
412 nir_function_impl *impl =
413 nir_cf_node_get_function(&block->cf_node);
414 nir_builder b;
415 nir_builder_init(&b, impl);
416 b.cursor = nir_before_instr(&intr->instr);
417 vc4_nir_lower_blend_instr(c, &b, intr);
418 }
419 return true;
420 }
421
422 void
423 vc4_nir_lower_blend(struct vc4_compile *c)
424 {
425 nir_foreach_overload(c->s, overload) {
426 if (overload->impl) {
427 nir_foreach_block(overload->impl,
428 vc4_nir_lower_blend_block, c);
429
430 nir_metadata_preserve(overload->impl,
431 nir_metadata_block_index |
432 nir_metadata_dominance);
433 }
434 }
435 }