panfrost: Explicitly convert to 32-bit for logic-ops
[mesa.git] / src / gallium / drivers / panfrost / nir / nir_lower_blend.c
1 /*
2 * Copyright (C) 2019 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file
26 *
27 * Implements the fragment pipeline (blending and writeout) in software, to be
28 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
29 * shader variant on typical GPUs. This pass is useful if hardware lacks
30 * fixed-function blending in part or in full.
31 */
32
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "compiler/nir/nir_format_convert.h"
36 #include "nir_lower_blend.h"
37
38 /* Given processed factors, combine them per a blend function */
39
40 static nir_ssa_def *
41 nir_blend_func(
42 nir_builder *b,
43 enum blend_func func,
44 nir_ssa_def *src, nir_ssa_def *dst)
45 {
46 switch (func) {
47 case BLEND_FUNC_ADD:
48 return nir_fadd(b, src, dst);
49 case BLEND_FUNC_SUBTRACT:
50 return nir_fsub(b, src, dst);
51 case BLEND_FUNC_REVERSE_SUBTRACT:
52 return nir_fsub(b, dst, src);
53 case BLEND_FUNC_MIN:
54 return nir_fmin(b, src, dst);
55 case BLEND_FUNC_MAX:
56 return nir_fmax(b, src, dst);
57 }
58
59 unreachable("Invalid blend function");
60 }
61
62 /* Does this blend function multiply by a blend factor? */
63
64 static bool
65 nir_blend_factored(enum blend_func func)
66 {
67 switch (func) {
68 case BLEND_FUNC_ADD:
69 case BLEND_FUNC_SUBTRACT:
70 case BLEND_FUNC_REVERSE_SUBTRACT:
71 return true;
72 default:
73 return false;
74 }
75 }
76
77 /* Compute a src_alpha_saturate factor */
78 static nir_ssa_def *
79 nir_alpha_saturate(
80 nir_builder *b,
81 nir_ssa_def *src, nir_ssa_def *dst,
82 unsigned chan,
83 bool half)
84 {
85 nir_ssa_def *Asrc = nir_channel(b, src, 3);
86 nir_ssa_def *Adst = nir_channel(b, dst, 3);
87 nir_ssa_def *one = half ? nir_imm_float16(b, 1.0) : nir_imm_float(b, 1.0);
88 nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89
90 return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91 }
92
93 /* Returns a scalar single factor, unmultiplied */
94
95 static nir_ssa_def *
96 nir_blend_factor_value(
97 nir_builder *b,
98 nir_ssa_def *src, nir_ssa_def *dst, nir_ssa_def *bconst,
99 unsigned chan,
100 enum blend_factor factor,
101 bool half)
102 {
103 switch (factor) {
104 case BLEND_FACTOR_ZERO:
105 return half ? nir_imm_float16(b, 0.0) : nir_imm_float(b, 0.0);
106 case BLEND_FACTOR_SRC_COLOR:
107 return nir_channel(b, src, chan);
108 case BLEND_FACTOR_DST_COLOR:
109 return nir_channel(b, dst, chan);
110 case BLEND_FACTOR_SRC_ALPHA:
111 return nir_channel(b, src, 3);
112 case BLEND_FACTOR_DST_ALPHA:
113 return nir_channel(b, dst, 3);
114 case BLEND_FACTOR_CONSTANT_COLOR:
115 return nir_channel(b, bconst, chan);
116 case BLEND_FACTOR_CONSTANT_ALPHA:
117 return nir_channel(b, bconst, 3);
118 case BLEND_FACTOR_SRC_ALPHA_SATURATE:
119 return nir_alpha_saturate(b, src, dst, chan, half);
120 }
121
122 unreachable("Invalid blend factor");
123 }
124
125 static nir_ssa_def *
126 nir_blend_factor(
127 nir_builder *b,
128 nir_ssa_def *raw_scalar,
129 nir_ssa_def *src, nir_ssa_def *dst, nir_ssa_def *bconst,
130 unsigned chan,
131 enum blend_factor factor,
132 bool inverted,
133 bool half)
134 {
135 nir_ssa_def *f =
136 nir_blend_factor_value(b, src, dst, bconst, chan, factor, half);
137
138 nir_ssa_def *unity = half ? nir_imm_float16(b, 1.0) : nir_imm_float(b, 1.0);
139
140 if (inverted)
141 f = nir_fsub(b, unity, f);
142
143 return nir_fmul(b, raw_scalar, f);
144 }
145
146 /* Given a colormask, "blend" with the destination */
147
148 static nir_ssa_def *
149 nir_color_mask(
150 nir_builder *b,
151 unsigned mask,
152 nir_ssa_def *src,
153 nir_ssa_def *dst)
154 {
155 nir_ssa_def *masked[4];
156
157 for (unsigned c = 0; c < 4; ++c) {
158 bool enab = (mask & (1 << c));
159 masked[c] = enab ? nir_channel(b, src, c) : nir_channel(b, dst, c);
160 }
161
162 return nir_vec(b, masked, 4);
163 }
164
165 static nir_ssa_def *
166 nir_logicop_func(
167 nir_builder *b,
168 unsigned func,
169 nir_ssa_def *src, nir_ssa_def *dst)
170 {
171 switch (func) {
172 case PIPE_LOGICOP_CLEAR:
173 return nir_imm_ivec4(b, 0, 0, 0, 0);
174 case PIPE_LOGICOP_NOR:
175 return nir_inot(b, nir_ior(b, src, dst));
176 case PIPE_LOGICOP_AND_INVERTED:
177 return nir_iand(b, nir_inot(b, src), dst);
178 case PIPE_LOGICOP_COPY_INVERTED:
179 return nir_inot(b, src);
180 case PIPE_LOGICOP_AND_REVERSE:
181 return nir_iand(b, src, nir_inot(b, dst));
182 case PIPE_LOGICOP_INVERT:
183 return nir_inot(b, dst);
184 case PIPE_LOGICOP_XOR:
185 return nir_ixor(b, src, dst);
186 case PIPE_LOGICOP_NAND:
187 return nir_inot(b, nir_iand(b, src, dst));
188 case PIPE_LOGICOP_AND:
189 return nir_iand(b, src, dst);
190 case PIPE_LOGICOP_EQUIV:
191 return nir_inot(b, nir_ixor(b, src, dst));
192 case PIPE_LOGICOP_NOOP:
193 return dst;
194 case PIPE_LOGICOP_OR_INVERTED:
195 return nir_ior(b, nir_inot(b, src), dst);
196 case PIPE_LOGICOP_COPY:
197 return src;
198 case PIPE_LOGICOP_OR_REVERSE:
199 return nir_ior(b, src, nir_inot(b, dst));
200 case PIPE_LOGICOP_OR:
201 return nir_ior(b, src, dst);
202 case PIPE_LOGICOP_SET:
203 return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
204 }
205
206 unreachable("Invalid logciop function");
207 }
208
209 static nir_ssa_def *
210 nir_blend_logicop(
211 nir_builder *b,
212 nir_lower_blend_options options,
213 nir_ssa_def *src, nir_ssa_def *dst)
214 {
215 const struct util_format_description *format_desc =
216 util_format_description(options.format);
217
218 if (options.half) {
219 src = nir_f2f32(b, src);
220 dst = nir_f2f32(b, dst);
221 }
222
223 assert(src->num_components <= 4);
224 assert(dst->num_components <= 4);
225
226 unsigned bits[4];
227 for (int i = 0; i < 4; ++i)
228 bits[i] = format_desc->channel[i].size;
229
230 src = nir_format_float_to_unorm(b, src, bits);
231 dst = nir_format_float_to_unorm(b, dst, bits);
232
233 nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);
234
235 if (bits[0] < 32) {
236 nir_const_value mask[4];
237 for (int i = 0; i < 4; ++i)
238 mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
239
240 out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
241 }
242
243 out = nir_format_unorm_to_float(b, out, bits);
244
245 if (options.half)
246 out = nir_f2f16(b, out);
247
248 return out;
249 }
250
251 /* Given a blend state, the source color, and the destination color,
252 * return the blended color
253 */
254
255 static nir_ssa_def *
256 nir_blend(
257 nir_builder *b,
258 nir_lower_blend_options options,
259 nir_ssa_def *src, nir_ssa_def *dst)
260 {
261 if (options.logicop_enable)
262 return nir_blend_logicop(b, options, src, dst);
263
264 /* Grab the blend constant ahead of time */
265 nir_ssa_def *bconst = nir_load_blend_const_color_rgba(b);
266
267 if (options.half)
268 bconst = nir_f2f16(b, bconst);
269
270 /* We blend per channel and recombine later */
271 nir_ssa_def *channels[4];
272
273 for (unsigned c = 0; c < 4; ++c) {
274 /* Decide properties based on channel */
275 nir_lower_blend_channel chan =
276 (c < 3) ? options.rgb : options.alpha;
277
278 nir_ssa_def *psrc = nir_channel(b, src, c);
279 nir_ssa_def *pdst = nir_channel(b, dst, c);
280
281 if (nir_blend_factored(chan.func)) {
282 psrc = nir_blend_factor(
283 b, psrc,
284 src, dst, bconst, c,
285 chan.src_factor, chan.invert_src_factor, options.half);
286
287 pdst = nir_blend_factor(
288 b, pdst,
289 src, dst, bconst, c,
290 chan.dst_factor, chan.invert_dst_factor, options.half);
291 }
292
293 channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
294 }
295
296 /* Then just recombine with an applied colormask */
297 nir_ssa_def *blended = nir_vec(b, channels, 4);
298 return nir_color_mask(b, options.colormask, blended, dst);
299 }
300
301 static bool
302 nir_is_blend_channel_replace(nir_lower_blend_channel chan)
303 {
304 return
305 (chan.src_factor == BLEND_FACTOR_ZERO) &&
306 (chan.dst_factor == BLEND_FACTOR_ZERO) &&
307 (chan.invert_src_factor && !chan.invert_dst_factor) &&
308 (chan.func == BLEND_FUNC_ADD || chan.func == BLEND_FUNC_SUBTRACT || chan.func == BLEND_FUNC_MAX);
309 }
310
311 static bool
312 nir_is_blend_replace(nir_lower_blend_options options)
313 {
314 return
315 nir_is_blend_channel_replace(options.rgb) &&
316 nir_is_blend_channel_replace(options.alpha);
317 }
318
319 void
320 nir_lower_blend(nir_shader *shader, nir_lower_blend_options options)
321 {
322 /* Blend shaders are represented as special fragment shaders */
323 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
324
325 /* Special case replace, since there's nothing to do and we don't want to
326 * degrade intermediate precision (e.g. for non-blendable R32F targets) */
327 if (nir_is_blend_replace(options))
328 return;
329
330 nir_foreach_function(func, shader) {
331 nir_foreach_block(block, func->impl) {
332 nir_foreach_instr_safe(instr, block) {
333 if (instr->type != nir_instr_type_intrinsic)
334 continue;
335
336 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
337 if (intr->intrinsic != nir_intrinsic_store_deref)
338 continue;
339
340 /* TODO: Extending to MRT */
341 nir_variable *var = nir_intrinsic_get_var(intr, 0);
342 if (var->data.location != FRAG_RESULT_COLOR)
343 continue;
344
345 nir_builder b;
346 nir_builder_init(&b, func->impl);
347 b.cursor = nir_before_instr(instr);
348
349 /* Grab the input color */
350 nir_ssa_def *src = nir_ssa_for_src(&b, intr->src[1], 4);
351
352 /* Grab the tilebuffer color - io lowered to load_output */
353 nir_ssa_def *dst = nir_load_var(&b, var);
354
355 /* Blend the two colors per the passed options */
356 nir_ssa_def *blended = nir_blend(&b, options, src, dst);
357
358 /* Write out the final color instead of the input */
359 nir_instr_rewrite_src(instr, &intr->src[1],
360 nir_src_for_ssa(blended));
361
362 }
363 }
364
365 nir_metadata_preserve(func->impl, nir_metadata_block_index |
366 nir_metadata_dominance);
367 }
368 }