c85b39bcaa343691b4c83737a77f9fb562988e02
[mesa.git] / src / compiler / glsl / lower_blend_equation_advanced.cpp
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "ir.h"
25 #include "ir_builder.h"
26 #include "ir_optimization.h"
27 #include "ir_hierarchical_visitor.h"
28 #include "program/prog_instruction.h"
29 #include "program/prog_statevars.h"
30 #include "util/bitscan.h"
31 #include "builtin_functions.h"
32 #include "main/mtypes.h"
33
34 using namespace ir_builder;
35
36 #define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
37 #define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)
38
39 static ir_rvalue *
40 blend_multiply(ir_variable *src, ir_variable *dst)
41 {
42 /* f(Cs,Cd) = Cs*Cd */
43 return mul(src, dst);
44 }
45
46 static ir_rvalue *
47 blend_screen(ir_variable *src, ir_variable *dst)
48 {
49 /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
50 return sub(add(src, dst), mul(src, dst));
51 }
52
53 static ir_rvalue *
54 blend_overlay(ir_variable *src, ir_variable *dst)
55 {
56 void *mem_ctx = ralloc_parent(src);
57
58 /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
59 * 1-2*(1-Cs)*(1-Cd), otherwise
60 */
61 ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
62 ir_rvalue *rule_2 =
63 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
64 return csel(lequal(dst, imm3(0.5f)), rule_1, rule_2);
65 }
66
67 static ir_rvalue *
68 blend_darken(ir_variable *src, ir_variable *dst)
69 {
70 /* f(Cs,Cd) = min(Cs,Cd) */
71 return min2(src, dst);
72 }
73
74 static ir_rvalue *
75 blend_lighten(ir_variable *src, ir_variable *dst)
76 {
77 /* f(Cs,Cd) = max(Cs,Cd) */
78 return max2(src, dst);
79 }
80
81 static ir_rvalue *
82 blend_colordodge(ir_variable *src, ir_variable *dst)
83 {
84 void *mem_ctx = ralloc_parent(src);
85
86 /* f(Cs,Cd) =
87 * 0, if Cd <= 0
88 * min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
89 * 1, if Cd > 0 and Cs >= 1
90 */
91 return csel(lequal(dst, imm3(0)), imm3(0),
92 csel(gequal(src, imm3(1)), imm3(1),
93 min2(imm3(1), div(dst, sub(imm3(1), src)))));
94 }
95
96 static ir_rvalue *
97 blend_colorburn(ir_variable *src, ir_variable *dst)
98 {
99 void *mem_ctx = ralloc_parent(src);
100
101 /* f(Cs,Cd) =
102 * 1, if Cd >= 1
103 * 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
104 * 0, if Cd < 1 and Cs <= 0
105 */
106 return csel(gequal(dst, imm3(1)), imm3(1),
107 csel(lequal(src, imm3(0)), imm3(0),
108 sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst), src)))));
109 }
110
111 static ir_rvalue *
112 blend_hardlight(ir_variable *src, ir_variable *dst)
113 {
114 void *mem_ctx = ralloc_parent(src);
115
116 /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
117 * 1-2*(1-Cs)*(1-Cd), otherwise
118 */
119 ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
120 ir_rvalue *rule_2 =
121 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
122 return csel(lequal(src, imm3(0.5f)), rule_1, rule_2);
123 }
124
125 static ir_rvalue *
126 blend_softlight(ir_variable *src, ir_variable *dst)
127 {
128 void *mem_ctx = ralloc_parent(src);
129
130 /* f(Cs,Cd) =
131 * Cd-(1-2*Cs)*Cd*(1-Cd),
132 * if Cs <= 0.5
133 * Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
134 * if Cs > 0.5 and Cd <= 0.25
135 * Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
136 * if Cs > 0.5 and Cd > 0.25
137 *
138 * We can simplify this to
139 *
140 * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
141 * g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5
142 * Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
143 * sqrt(Cd)-Cd, otherwise
144 */
145 ir_rvalue *factor_1 = mul(dst, sub(imm3(1), dst));
146 ir_rvalue *factor_2 =
147 mul(dst, add(mul(sub(mul(imm3(16), dst), imm3(12)), dst), imm3(3)));
148 ir_rvalue *factor_3 = sub(sqrt(dst), dst);
149 ir_rvalue *factor = csel(lequal(src, imm3(0.5f)), factor_1,
150 csel(lequal(dst, imm3(0.25f)),
151 factor_2, factor_3));
152 return add(dst, mul(sub(mul(imm3(2), src), imm3(1)), factor));
153 }
154
155 static ir_rvalue *
156 blend_difference(ir_variable *src, ir_variable *dst)
157 {
158 return abs(sub(dst, src));
159 }
160
161 static ir_rvalue *
162 blend_exclusion(ir_variable *src, ir_variable *dst)
163 {
164 void *mem_ctx = ralloc_parent(src);
165
166 return add(src, sub(dst, mul(imm3(2), mul(src, dst))));
167 }
168
169 /* Return the minimum of a vec3's components */
170 static ir_rvalue *
171 minv3(ir_variable *v)
172 {
173 return min2(min2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
174 }
175
176 /* Return the maximum of a vec3's components */
177 static ir_rvalue *
178 maxv3(ir_variable *v)
179 {
180 return max2(max2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
181 }
182
183 static ir_rvalue *
184 lumv3(ir_variable *c)
185 {
186 ir_constant_data data;
187 data.f[0] = 0.30;
188 data.f[1] = 0.59;
189 data.f[2] = 0.11;
190
191 void *mem_ctx = ralloc_parent(c);
192
193 /* dot(c, vec3(0.30, 0.59, 0.11)) */
194 return dot(c, new(mem_ctx) ir_constant(glsl_type::vec3_type, &data));
195 }
196
197 static ir_rvalue *
198 satv3(ir_variable *c)
199 {
200 return sub(maxv3(c), minv3(c));
201 }
202
203 /* Take the base RGB color <cbase> and override its luminosity with that
204 * of the RGB color <clum>.
205 *
206 * This follows the equations given in the ES 3.2 (June 15th, 2016)
207 * specification. Revision 16 of GL_KHR_blend_equation_advanced and
208 * revision 9 of GL_NV_blend_equation_advanced specify a different set
209 * of equations. Older revisions match ES 3.2's text, and dEQP expects
210 * the ES 3.2 rules implemented here.
211 */
212 static void
213 set_lum(ir_factory *f,
214 ir_variable *color,
215 ir_variable *cbase,
216 ir_variable *clum)
217 {
218 void *mem_ctx = f->mem_ctx;
219 f->emit(assign(color, add(cbase, sub(lumv3(clum), lumv3(cbase)))));
220
221 ir_variable *llum = f->make_temp(glsl_type::float_type, "__blend_lum");
222 ir_variable *mincol = f->make_temp(glsl_type::float_type, "__blend_mincol");
223 ir_variable *maxcol = f->make_temp(glsl_type::float_type, "__blend_maxcol");
224
225 f->emit(assign(llum, lumv3(color)));
226 f->emit(assign(mincol, minv3(color)));
227 f->emit(assign(maxcol, maxv3(color)));
228
229 f->emit(if_tree(less(mincol, imm1(0)),
230 assign(color, add(llum, div(mul(sub(color, llum), llum),
231 sub(llum, mincol)))),
232 if_tree(greater(maxcol, imm1(1)),
233 assign(color, add(llum, div(mul(sub(color, llum),
234 sub(imm3(1), llum)),
235 sub(maxcol, llum)))))));
236
237 }
238
239 /* Take the base RGB color <cbase> and override its saturation with
240 * that of the RGB color <csat>. The override the luminosity of the
241 * result with that of the RGB color <clum>.
242 */
243 static void
244 set_lum_sat(ir_factory *f,
245 ir_variable *color,
246 ir_variable *cbase,
247 ir_variable *csat,
248 ir_variable *clum)
249 {
250 void *mem_ctx = f->mem_ctx;
251
252 ir_rvalue *minbase = minv3(cbase);
253 ir_rvalue *ssat = satv3(csat);
254
255 ir_variable *sbase = f->make_temp(glsl_type::float_type, "__blend_sbase");
256 f->emit(assign(sbase, satv3(cbase)));
257
258 /* Equivalent (modulo rounding errors) to setting the
259 * smallest (R,G,B) component to 0, the largest to <ssat>,
260 * and interpolating the "middle" component based on its
261 * original value relative to the smallest/largest.
262 */
263 f->emit(if_tree(greater(sbase, imm1(0)),
264 assign(color, div(mul(sub(cbase, minbase), ssat), sbase)),
265 assign(color, imm3(0))));
266 set_lum(f, color, color, clum);
267 }
268
269 static ir_rvalue *
270 is_mode(ir_variable *mode, enum gl_advanced_blend_mode q)
271 {
272 return equal(mode, new(ralloc_parent(mode)) ir_constant(unsigned(q)));
273 }
274
275 static ir_variable *
276 calc_blend_result(ir_factory f,
277 ir_variable *mode,
278 ir_variable *fb,
279 ir_rvalue *blend_src,
280 GLbitfield blend_qualifiers)
281 {
282 void *mem_ctx = f.mem_ctx;
283 ir_variable *result = f.make_temp(glsl_type::vec4_type, "__blend_result");
284
285 /* Save blend_src to a temporary so we can reference it multiple times. */
286 ir_variable *src = f.make_temp(glsl_type::vec4_type, "__blend_src");
287 f.emit(assign(src, blend_src));
288
289 /* If we're not doing advanced blending, just write the original value. */
290 ir_if *if_blending = new(mem_ctx) ir_if(is_mode(mode, BLEND_NONE));
291 f.emit(if_blending);
292 if_blending->then_instructions.push_tail(assign(result, src));
293
294 f.instructions = &if_blending->else_instructions;
295
296 /* (Rs', Gs', Bs') =
297 * (0, 0, 0), if As == 0
298 * (Rs/As, Gs/As, Bs/As), otherwise
299 */
300 ir_variable *src_rgb = f.make_temp(glsl_type::vec3_type, "__blend_src_rgb");
301 ir_variable *src_alpha = f.make_temp(glsl_type::float_type, "__blend_src_a");
302
303 /* (Rd', Gd', Bd') =
304 * (0, 0, 0), if Ad == 0
305 * (Rd/Ad, Gd/Ad, Bd/Ad), otherwise
306 */
307 ir_variable *dst_rgb = f.make_temp(glsl_type::vec3_type, "__blend_dst_rgb");
308 ir_variable *dst_alpha = f.make_temp(glsl_type::float_type, "__blend_dst_a");
309
310 f.emit(assign(dst_alpha, swizzle_w(fb)));
311 f.emit(if_tree(equal(dst_alpha, imm1(0)),
312 assign(dst_rgb, imm3(0)),
313 assign(dst_rgb, csel(equal(swizzle_xyz(fb),
314 swizzle(fb, SWIZZLE_WWWW, 3)),
315 imm3(1),
316 div(swizzle_xyz(fb), dst_alpha)))));
317
318 f.emit(assign(src_alpha, swizzle_w(src)));
319 f.emit(if_tree(equal(src_alpha, imm1(0)),
320 assign(src_rgb, imm3(0)),
321 assign(src_rgb, csel(equal(swizzle_xyz(src),
322 swizzle(src, SWIZZLE_WWWW, 3)),
323 imm3(1),
324 div(swizzle_xyz(src), src_alpha)))));
325
326 ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");
327
328 ir_factory casefactory = f;
329
330 unsigned choices = blend_qualifiers;
331 while (choices) {
332 enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)
333 (1u << u_bit_scan(&choices));
334
335 ir_if *iff = new(mem_ctx) ir_if(is_mode(mode, choice));
336 casefactory.emit(iff);
337 casefactory.instructions = &iff->then_instructions;
338
339 ir_rvalue *val = NULL;
340
341 switch (choice) {
342 case BLEND_MULTIPLY:
343 val = blend_multiply(src_rgb, dst_rgb);
344 break;
345 case BLEND_SCREEN:
346 val = blend_screen(src_rgb, dst_rgb);
347 break;
348 case BLEND_OVERLAY:
349 val = blend_overlay(src_rgb, dst_rgb);
350 break;
351 case BLEND_DARKEN:
352 val = blend_darken(src_rgb, dst_rgb);
353 break;
354 case BLEND_LIGHTEN:
355 val = blend_lighten(src_rgb, dst_rgb);
356 break;
357 case BLEND_COLORDODGE:
358 val = blend_colordodge(src_rgb, dst_rgb);
359 break;
360 case BLEND_COLORBURN:
361 val = blend_colorburn(src_rgb, dst_rgb);
362 break;
363 case BLEND_HARDLIGHT:
364 val = blend_hardlight(src_rgb, dst_rgb);
365 break;
366 case BLEND_SOFTLIGHT:
367 val = blend_softlight(src_rgb, dst_rgb);
368 break;
369 case BLEND_DIFFERENCE:
370 val = blend_difference(src_rgb, dst_rgb);
371 break;
372 case BLEND_EXCLUSION:
373 val = blend_exclusion(src_rgb, dst_rgb);
374 break;
375 case BLEND_HSL_HUE:
376 set_lum_sat(&casefactory, factor, src_rgb, dst_rgb, dst_rgb);
377 break;
378 case BLEND_HSL_SATURATION:
379 set_lum_sat(&casefactory, factor, dst_rgb, src_rgb, dst_rgb);
380 break;
381 case BLEND_HSL_COLOR:
382 set_lum(&casefactory, factor, src_rgb, dst_rgb);
383 break;
384 case BLEND_HSL_LUMINOSITY:
385 set_lum(&casefactory, factor, dst_rgb, src_rgb);
386 break;
387 case BLEND_NONE:
388 case BLEND_ALL:
389 unreachable("not real cases");
390 }
391
392 if (val)
393 casefactory.emit(assign(factor, val));
394
395 casefactory.instructions = &iff->else_instructions;
396 }
397
398 /* p0(As,Ad) = As*Ad
399 * p1(As,Ad) = As*(1-Ad)
400 * p2(As,Ad) = Ad*(1-As)
401 */
402 ir_variable *p0 = f.make_temp(glsl_type::float_type, "__blend_p0");
403 ir_variable *p1 = f.make_temp(glsl_type::float_type, "__blend_p1");
404 ir_variable *p2 = f.make_temp(glsl_type::float_type, "__blend_p2");
405
406 f.emit(assign(p0, mul(src_alpha, dst_alpha)));
407 f.emit(assign(p1, mul(src_alpha, sub(imm1(1), dst_alpha))));
408 f.emit(assign(p2, mul(dst_alpha, sub(imm1(1), src_alpha))));
409
410 /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
411 * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
412 * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
413 * A = X*p0(As,Ad) + Y*p1(As,Ad) + Z*p2(As,Ad)
414 *
415 * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
416 *
417 * In vector form, this is:
418 * RGB = factor * p0 + Cs * p1 + Cd * p2
419 * A = p0 + p1 + p2
420 */
421 f.emit(assign(result,
422 add(add(mul(factor, p0), mul(src_rgb, p1)), mul(dst_rgb, p2)),
423 WRITEMASK_XYZ));
424 f.emit(assign(result, add(add(p0, p1), p2), WRITEMASK_W));
425
426 return result;
427 }
428
429 /**
430 * Dereference var, or var[0] if it's an array.
431 */
432 static ir_dereference *
433 deref_output(ir_variable *var)
434 {
435 void *mem_ctx = ralloc_parent(var);
436
437 ir_dereference *val = new(mem_ctx) ir_dereference_variable(var);
438 if (val->type->is_array()) {
439 ir_constant *index = new(mem_ctx) ir_constant(0);
440 val = new(mem_ctx) ir_dereference_array(val, index);
441 }
442
443 return val;
444 }
445
446 static ir_function_signature *
447 get_main(gl_linked_shader *sh)
448 {
449 ir_function_signature *sig = NULL;
450 /* We can't use _mesa_get_main_function_signature() because we don't
451 * have a symbol table at this point. Just go find main() by hand.
452 */
453 foreach_in_list(ir_instruction, ir, sh->ir) {
454 ir_function *f = ir->as_function();
455 if (f && strcmp(f->name, "main") == 0) {
456 exec_list void_parameters;
457 sig = f->matching_signature(NULL, &void_parameters, false);
458 break;
459 }
460 }
461 assert(sig != NULL); /* main() must exist */
462 return sig;
463 }
464
465 bool
466 lower_blend_equation_advanced(struct gl_linked_shader *sh, bool coherent)
467 {
468 if (sh->Program->sh.fs.BlendSupport == 0)
469 return false;
470
471 /* Lower early returns in main() so there's a single exit point
472 * where we can insert our lowering code.
473 */
474 do_lower_jumps(sh->ir, false, false, true, false, false);
475
476 void *mem_ctx = ralloc_parent(sh->ir);
477
478 ir_variable *fb = new(mem_ctx) ir_variable(glsl_type::vec4_type,
479 "__blend_fb_fetch",
480 ir_var_shader_out);
481 fb->data.location = FRAG_RESULT_DATA0;
482 fb->data.read_only = 1;
483 fb->data.fb_fetch_output = 1;
484 fb->data.memory_coherent = coherent;
485 fb->data.how_declared = ir_var_hidden;
486
487 ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type,
488 "gl_AdvancedBlendModeMESA",
489 ir_var_uniform);
490 mode->data.how_declared = ir_var_hidden;
491 mode->allocate_state_slots(1);
492 ir_state_slot *slot0 = &mode->get_state_slots()[0];
493 slot0->swizzle = SWIZZLE_XXXX;
494 slot0->tokens[0] = STATE_INTERNAL;
495 slot0->tokens[1] = STATE_ADVANCED_BLENDING_MODE;
496 for (int i = 2; i < STATE_LENGTH; i++)
497 slot0->tokens[i] = 0;
498
499 sh->ir->push_head(fb);
500 sh->ir->push_head(mode);
501
502 /* Gather any output variables referring to render target 0.
503 *
504 * ARB_enhanced_layouts irritatingly allows the shader to specify
505 * multiple output variables for the same render target, each of
506 * which writes a subset of the components, starting at location_frac.
507 * The variables can't overlap, thankfully.
508 */
509 ir_variable *outputs[4] = { NULL, NULL, NULL, NULL };
510 foreach_in_list(ir_instruction, ir, sh->ir) {
511 ir_variable *var = ir->as_variable();
512 if (!var || var->data.mode != ir_var_shader_out)
513 continue;
514
515 if (var->data.location == FRAG_RESULT_DATA0 ||
516 var->data.location == FRAG_RESULT_COLOR) {
517 const int components = var->type->without_array()->vector_elements;
518
519 for (int i = 0; i < components; i++) {
520 outputs[var->data.location_frac + i] = var;
521 }
522 }
523 }
524
525 /* Combine values written to outputs into a single RGBA blend source.
526 * We assign <0, 0, 0, 1> to any components with no corresponding output.
527 */
528 ir_rvalue *blend_source;
529 if (outputs[0] && outputs[0]->type->without_array()->vector_elements == 4) {
530 blend_source = deref_output(outputs[0]);
531 } else {
532 ir_rvalue *blend_comps[4];
533 for (int i = 0; i < 4; i++) {
534 ir_variable *var = outputs[i];
535 if (var) {
536 blend_comps[i] = swizzle(deref_output(outputs[i]),
537 i - outputs[i]->data.location_frac, 1);
538 } else {
539 blend_comps[i] = new(mem_ctx) ir_constant(i < 3 ? 0.0f : 1.0f);
540 }
541 }
542
543 blend_source =
544 new(mem_ctx) ir_expression(ir_quadop_vector, glsl_type::vec4_type,
545 blend_comps[0], blend_comps[1],
546 blend_comps[2], blend_comps[3]);
547 }
548
549 ir_function_signature *main = get_main(sh);
550 ir_factory f(&main->body, mem_ctx);
551
552 ir_variable *result_dest =
553 calc_blend_result(f, mode, fb, blend_source,
554 sh->Program->sh.fs.BlendSupport);
555
556 /* Copy the result back to the original values. It would be simpler
557 * to demote the program's output variables, and create a new vec4
558 * output for our result, but this pass runs before we create the
559 * ARB_program_interface_query resource list. So we have to leave
560 * the original outputs in place and use them.
561 */
562 for (int i = 0; i < 4; i++) {
563 if (!outputs[i])
564 continue;
565
566 f.emit(assign(deref_output(outputs[i]), swizzle(result_dest, i, 1),
567 1 << i));
568 }
569
570 validate_ir_tree(sh->ir);
571 return true;
572 }