2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "ir_builder.h"
26 #include "ir_optimization.h"
27 #include "ir_hierarchical_visitor.h"
28 #include "program/prog_instruction.h"
29 #include "program/prog_statevars.h"
30 #include "util/bitscan.h"
31 #include "builtin_functions.h"
33 using namespace ir_builder
;
35 #define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
36 #define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)
39 blend_multiply(ir_variable
*src
, ir_variable
*dst
)
41 /* f(Cs,Cd) = Cs*Cd */
46 blend_screen(ir_variable
*src
, ir_variable
*dst
)
48 /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
49 return sub(add(src
, dst
), mul(src
, dst
));
53 blend_overlay(ir_variable
*src
, ir_variable
*dst
)
55 void *mem_ctx
= ralloc_parent(src
);
57 /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
58 * 1-2*(1-Cs)*(1-Cd), otherwise
60 ir_rvalue
*rule_1
= mul(imm3(2), mul(src
, dst
));
62 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src
), sub(imm3(1), dst
))));
63 return csel(lequal(dst
, imm3(0.5f
)), rule_1
, rule_2
);
67 blend_darken(ir_variable
*src
, ir_variable
*dst
)
69 /* f(Cs,Cd) = min(Cs,Cd) */
70 return min2(src
, dst
);
74 blend_lighten(ir_variable
*src
, ir_variable
*dst
)
76 /* f(Cs,Cd) = max(Cs,Cd) */
77 return max2(src
, dst
);
81 blend_colordodge(ir_variable
*src
, ir_variable
*dst
)
83 void *mem_ctx
= ralloc_parent(src
);
87 * min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
88 * 1, if Cd > 0 and Cs >= 1
90 return csel(lequal(dst
, imm3(0)), imm3(0),
91 csel(gequal(src
, imm3(1)), imm3(1),
92 min2(imm3(1), div(dst
, sub(imm3(1), src
)))));
96 blend_colorburn(ir_variable
*src
, ir_variable
*dst
)
98 void *mem_ctx
= ralloc_parent(src
);
102 * 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
103 * 0, if Cd < 1 and Cs <= 0
105 return csel(gequal(dst
, imm3(1)), imm3(1),
106 csel(lequal(src
, imm3(0)), imm3(0),
107 sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst
), src
)))));
111 blend_hardlight(ir_variable
*src
, ir_variable
*dst
)
113 void *mem_ctx
= ralloc_parent(src
);
115 /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
116 * 1-2*(1-Cs)*(1-Cd), otherwise
118 ir_rvalue
*rule_1
= mul(imm3(2), mul(src
, dst
));
120 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src
), sub(imm3(1), dst
))));
121 return csel(lequal(src
, imm3(0.5f
)), rule_1
, rule_2
);
125 blend_softlight(ir_variable
*src
, ir_variable
*dst
)
127 void *mem_ctx
= ralloc_parent(src
);
130 * Cd-(1-2*Cs)*Cd*(1-Cd),
132 * Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
133 * if Cs > 0.5 and Cd <= 0.25
134 * Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
135 * if Cs > 0.5 and Cd > 0.25
137 * We can simplify this to
139 * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
140 * g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5
141 * Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
142 * sqrt(Cd)-Cd, otherwise
144 ir_rvalue
*factor_1
= mul(dst
, sub(imm3(1), dst
));
145 ir_rvalue
*factor_2
=
146 mul(dst
, add(mul(sub(mul(imm3(16), dst
), imm3(12)), dst
), imm3(3)));
147 ir_rvalue
*factor_3
= sub(sqrt(dst
), dst
);
148 ir_rvalue
*factor
= csel(lequal(src
, imm3(0.5f
)), factor_1
,
149 csel(lequal(dst
, imm3(0.25f
)),
150 factor_2
, factor_3
));
151 return add(dst
, mul(sub(mul(imm3(2), src
), imm3(1)), factor
));
155 blend_difference(ir_variable
*src
, ir_variable
*dst
)
157 return abs(sub(dst
, src
));
161 blend_exclusion(ir_variable
*src
, ir_variable
*dst
)
163 void *mem_ctx
= ralloc_parent(src
);
165 return add(src
, sub(dst
, mul(imm3(2), mul(src
, dst
))));
168 /* Return the minimum of a vec3's components */
170 minv3(ir_variable
*v
)
172 return min2(min2(swizzle_x(v
), swizzle_y(v
)), swizzle_z(v
));
175 /* Return the maximum of a vec3's components */
177 maxv3(ir_variable
*v
)
179 return max2(max2(swizzle_x(v
), swizzle_y(v
)), swizzle_z(v
));
183 lumv3(ir_variable
*c
)
185 ir_constant_data data
;
190 void *mem_ctx
= ralloc_parent(c
);
192 /* dot(c, vec3(0.30, 0.59, 0.11)) */
193 return dot(c
, new(mem_ctx
) ir_constant(glsl_type::vec3_type
, &data
));
197 satv3(ir_variable
*c
)
199 return sub(maxv3(c
), minv3(c
));
202 /* Take the base RGB color <cbase> and override its luminosity with that
203 * of the RGB color <clum>.
205 * This follows the equations given in the ES 3.2 (June 15th, 2016)
206 * specification. Revision 16 of GL_KHR_blend_equation_advanced and
207 * revision 9 of GL_NV_blend_equation_advanced specify a different set
208 * of equations. Older revisions match ES 3.2's text, and dEQP expects
209 * the ES 3.2 rules implemented here.
212 set_lum(ir_factory
*f
,
217 void *mem_ctx
= f
->mem_ctx
;
218 f
->emit(assign(color
, add(cbase
, sub(lumv3(clum
), lumv3(cbase
)))));
220 ir_variable
*llum
= f
->make_temp(glsl_type::float_type
, "__blend_lum");
221 ir_variable
*mincol
= f
->make_temp(glsl_type::float_type
, "__blend_mincol");
222 ir_variable
*maxcol
= f
->make_temp(glsl_type::float_type
, "__blend_maxcol");
224 f
->emit(assign(llum
, lumv3(color
)));
225 f
->emit(assign(mincol
, minv3(color
)));
226 f
->emit(assign(maxcol
, maxv3(color
)));
228 f
->emit(if_tree(less(mincol
, imm1(0)),
229 assign(color
, add(llum
, div(mul(sub(color
, llum
), llum
),
230 sub(llum
, mincol
)))),
231 if_tree(greater(maxcol
, imm1(1)),
232 assign(color
, add(llum
, div(mul(sub(color
, llum
),
234 sub(maxcol
, llum
)))))));
238 /* Take the base RGB color <cbase> and override its saturation with
239 * that of the RGB color <csat>. The override the luminosity of the
240 * result with that of the RGB color <clum>.
243 set_lum_sat(ir_factory
*f
,
249 void *mem_ctx
= f
->mem_ctx
;
251 ir_rvalue
*minbase
= minv3(cbase
);
252 ir_rvalue
*ssat
= satv3(csat
);
254 ir_variable
*sbase
= f
->make_temp(glsl_type::float_type
, "__blend_sbase");
255 f
->emit(assign(sbase
, satv3(cbase
)));
257 /* Equivalent (modulo rounding errors) to setting the
258 * smallest (R,G,B) component to 0, the largest to <ssat>,
259 * and interpolating the "middle" component based on its
260 * original value relative to the smallest/largest.
262 f
->emit(if_tree(greater(sbase
, imm1(0)),
263 assign(color
, div(mul(sub(cbase
, minbase
), ssat
), sbase
)),
264 assign(color
, imm3(0))));
265 set_lum(f
, color
, color
, clum
);
269 is_mode(ir_variable
*mode
, enum gl_advanced_blend_mode q
)
271 return equal(mode
, new(ralloc_parent(mode
)) ir_constant(unsigned(q
)));
275 calc_blend_result(ir_factory f
,
278 ir_rvalue
*blend_src
,
279 GLbitfield blend_qualifiers
)
281 void *mem_ctx
= f
.mem_ctx
;
282 ir_variable
*result
= f
.make_temp(glsl_type::vec4_type
, "__blend_result");
284 /* Save blend_src to a temporary so we can reference it multiple times. */
285 ir_variable
*src
= f
.make_temp(glsl_type::vec4_type
, "__blend_src");
286 f
.emit(assign(src
, blend_src
));
288 /* If we're not doing advanced blending, just write the original value. */
289 ir_if
*if_blending
= new(mem_ctx
) ir_if(is_mode(mode
, BLEND_NONE
));
291 if_blending
->then_instructions
.push_tail(assign(result
, src
));
293 f
.instructions
= &if_blending
->else_instructions
;
296 * (0, 0, 0), if As == 0
297 * (Rs/As, Gs/As, Bs/As), otherwise
299 ir_variable
*src_rgb
= f
.make_temp(glsl_type::vec3_type
, "__blend_src_rgb");
300 ir_variable
*src_alpha
= f
.make_temp(glsl_type::float_type
, "__blend_src_a");
303 * (0, 0, 0), if Ad == 0
304 * (Rd/Ad, Gd/Ad, Bd/Ad), otherwise
306 ir_variable
*dst_rgb
= f
.make_temp(glsl_type::vec3_type
, "__blend_dst_rgb");
307 ir_variable
*dst_alpha
= f
.make_temp(glsl_type::float_type
, "__blend_dst_a");
309 f
.emit(assign(dst_alpha
, swizzle_w(fb
)));
310 f
.emit(if_tree(equal(dst_alpha
, imm1(0)),
311 assign(dst_rgb
, imm3(0)),
312 assign(dst_rgb
, csel(equal(swizzle_xyz(fb
),
313 swizzle(fb
, SWIZZLE_WWWW
, 3)),
315 div(swizzle_xyz(fb
), dst_alpha
)))));
317 f
.emit(assign(src_alpha
, swizzle_w(src
)));
318 f
.emit(if_tree(equal(src_alpha
, imm1(0)),
319 assign(src_rgb
, imm3(0)),
320 assign(src_rgb
, csel(equal(swizzle_xyz(src
),
321 swizzle(src
, SWIZZLE_WWWW
, 3)),
323 div(swizzle_xyz(src
), src_alpha
)))));
325 ir_variable
*factor
= f
.make_temp(glsl_type::vec3_type
, "__blend_factor");
327 ir_factory casefactory
= f
;
329 unsigned choices
= blend_qualifiers
;
331 enum gl_advanced_blend_mode choice
= (enum gl_advanced_blend_mode
)
332 (1u << u_bit_scan(&choices
));
334 ir_if
*iff
= new(mem_ctx
) ir_if(is_mode(mode
, choice
));
335 casefactory
.emit(iff
);
336 casefactory
.instructions
= &iff
->then_instructions
;
338 ir_rvalue
*val
= NULL
;
342 val
= blend_multiply(src_rgb
, dst_rgb
);
345 val
= blend_screen(src_rgb
, dst_rgb
);
348 val
= blend_overlay(src_rgb
, dst_rgb
);
351 val
= blend_darken(src_rgb
, dst_rgb
);
354 val
= blend_lighten(src_rgb
, dst_rgb
);
356 case BLEND_COLORDODGE
:
357 val
= blend_colordodge(src_rgb
, dst_rgb
);
359 case BLEND_COLORBURN
:
360 val
= blend_colorburn(src_rgb
, dst_rgb
);
362 case BLEND_HARDLIGHT
:
363 val
= blend_hardlight(src_rgb
, dst_rgb
);
365 case BLEND_SOFTLIGHT
:
366 val
= blend_softlight(src_rgb
, dst_rgb
);
368 case BLEND_DIFFERENCE
:
369 val
= blend_difference(src_rgb
, dst_rgb
);
371 case BLEND_EXCLUSION
:
372 val
= blend_exclusion(src_rgb
, dst_rgb
);
375 set_lum_sat(&casefactory
, factor
, src_rgb
, dst_rgb
, dst_rgb
);
377 case BLEND_HSL_SATURATION
:
378 set_lum_sat(&casefactory
, factor
, dst_rgb
, src_rgb
, dst_rgb
);
380 case BLEND_HSL_COLOR
:
381 set_lum(&casefactory
, factor
, src_rgb
, dst_rgb
);
383 case BLEND_HSL_LUMINOSITY
:
384 set_lum(&casefactory
, factor
, dst_rgb
, src_rgb
);
388 unreachable("not real cases");
392 casefactory
.emit(assign(factor
, val
));
394 casefactory
.instructions
= &iff
->else_instructions
;
398 * p1(As,Ad) = As*(1-Ad)
399 * p2(As,Ad) = Ad*(1-As)
401 ir_variable
*p0
= f
.make_temp(glsl_type::float_type
, "__blend_p0");
402 ir_variable
*p1
= f
.make_temp(glsl_type::float_type
, "__blend_p1");
403 ir_variable
*p2
= f
.make_temp(glsl_type::float_type
, "__blend_p2");
405 f
.emit(assign(p0
, mul(src_alpha
, dst_alpha
)));
406 f
.emit(assign(p1
, mul(src_alpha
, sub(imm1(1), dst_alpha
))));
407 f
.emit(assign(p2
, mul(dst_alpha
, sub(imm1(1), src_alpha
))));
409 /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
410 * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
411 * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
412 * A = X*p0(As,Ad) + Y*p1(As,Ad) + Z*p2(As,Ad)
414 * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
416 * In vector form, this is:
417 * RGB = factor * p0 + Cs * p1 + Cd * p2
420 f
.emit(assign(result
,
421 add(add(mul(factor
, p0
), mul(src_rgb
, p1
)), mul(dst_rgb
, p2
)),
423 f
.emit(assign(result
, add(add(p0
, p1
), p2
), WRITEMASK_W
));
429 * Dereference var, or var[0] if it's an array.
431 static ir_dereference
*
432 deref_output(ir_variable
*var
)
434 void *mem_ctx
= ralloc_parent(var
);
436 ir_dereference
*val
= new(mem_ctx
) ir_dereference_variable(var
);
437 if (val
->type
->is_array()) {
438 ir_constant
*index
= new(mem_ctx
) ir_constant(0);
439 val
= new(mem_ctx
) ir_dereference_array(val
, index
);
445 static ir_function_signature
*
446 get_main(gl_linked_shader
*sh
)
448 ir_function_signature
*sig
= NULL
;
449 /* We can't use _mesa_get_main_function_signature() because we don't
450 * have a symbol table at this point. Just go find main() by hand.
452 foreach_in_list(ir_instruction
, ir
, sh
->ir
) {
453 ir_function
*f
= ir
->as_function();
454 if (f
&& strcmp(f
->name
, "main") == 0) {
455 exec_list void_parameters
;
456 sig
= f
->matching_signature(NULL
, &void_parameters
, false);
460 assert(sig
!= NULL
); /* main() must exist */
465 lower_blend_equation_advanced(struct gl_linked_shader
*sh
)
467 if (sh
->Program
->sh
.fs
.BlendSupport
== 0)
470 /* Lower early returns in main() so there's a single exit point
471 * where we can insert our lowering code.
473 do_lower_jumps(sh
->ir
, false, false, true, false, false);
475 void *mem_ctx
= ralloc_parent(sh
->ir
);
477 ir_variable
*fb
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
480 fb
->data
.location
= FRAG_RESULT_DATA0
;
481 fb
->data
.read_only
= 1;
482 fb
->data
.fb_fetch_output
= 1;
483 fb
->data
.how_declared
= ir_var_hidden
;
485 ir_variable
*mode
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
486 "gl_AdvancedBlendModeMESA",
488 mode
->data
.how_declared
= ir_var_hidden
;
489 mode
->allocate_state_slots(1);
490 ir_state_slot
*slot0
= &mode
->get_state_slots()[0];
491 slot0
->swizzle
= SWIZZLE_XXXX
;
492 slot0
->tokens
[0] = STATE_INTERNAL
;
493 slot0
->tokens
[1] = STATE_ADVANCED_BLENDING_MODE
;
494 for (int i
= 2; i
< STATE_LENGTH
; i
++)
495 slot0
->tokens
[i
] = 0;
497 sh
->ir
->push_head(fb
);
498 sh
->ir
->push_head(mode
);
500 /* Gather any output variables referring to render target 0.
502 * ARB_enhanced_layouts irritatingly allows the shader to specify
503 * multiple output variables for the same render target, each of
504 * which writes a subset of the components, starting at location_frac.
505 * The variables can't overlap, thankfully.
507 ir_variable
*outputs
[4] = { NULL
, NULL
, NULL
, NULL
};
508 foreach_in_list(ir_instruction
, ir
, sh
->ir
) {
509 ir_variable
*var
= ir
->as_variable();
510 if (!var
|| var
->data
.mode
!= ir_var_shader_out
)
513 if (var
->data
.location
== FRAG_RESULT_DATA0
||
514 var
->data
.location
== FRAG_RESULT_COLOR
) {
515 const int components
= var
->type
->without_array()->vector_elements
;
517 for (int i
= 0; i
< components
; i
++) {
518 outputs
[var
->data
.location_frac
+ i
] = var
;
523 /* Combine values written to outputs into a single RGBA blend source.
524 * We assign <0, 0, 0, 1> to any components with no corresponding output.
526 ir_rvalue
*blend_source
;
527 if (outputs
[0] && outputs
[0]->type
->without_array()->vector_elements
== 4) {
528 blend_source
= deref_output(outputs
[0]);
530 ir_rvalue
*blend_comps
[4];
531 for (int i
= 0; i
< 4; i
++) {
532 ir_variable
*var
= outputs
[i
];
534 blend_comps
[i
] = swizzle(deref_output(outputs
[i
]),
535 i
- outputs
[i
]->data
.location_frac
, 1);
537 blend_comps
[i
] = new(mem_ctx
) ir_constant(i
< 3 ? 0.0f
: 1.0f
);
542 new(mem_ctx
) ir_expression(ir_quadop_vector
, glsl_type::vec4_type
,
543 blend_comps
[0], blend_comps
[1],
544 blend_comps
[2], blend_comps
[3]);
547 ir_function_signature
*main
= get_main(sh
);
548 ir_factory
f(&main
->body
, mem_ctx
);
550 ir_variable
*result_dest
=
551 calc_blend_result(f
, mode
, fb
, blend_source
,
552 sh
->Program
->sh
.fs
.BlendSupport
);
554 /* Copy the result back to the original values. It would be simpler
555 * to demote the program's output variables, and create a new vec4
556 * output for our result, but this pass runs before we create the
557 * ARB_program_interface_query resource list. So we have to leave
558 * the original outputs in place and use them.
560 for (int i
= 0; i
< 4; i
++) {
564 f
.emit(assign(deref_output(outputs
[i
]), swizzle(result_dest
, i
, 1),
568 validate_ir_tree(sh
->ir
);