2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "ir_builder.h"
26 #include "ir_optimization.h"
27 #include "ir_hierarchical_visitor.h"
28 #include "program/prog_instruction.h"
29 #include "program/prog_statevars.h"
30 #include "util/bitscan.h"
31 #include "builtin_functions.h"
32 #include "main/mtypes.h"
34 using namespace ir_builder
;
36 #define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
37 #define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)
40 blend_multiply(ir_variable
*src
, ir_variable
*dst
)
42 /* f(Cs,Cd) = Cs*Cd */
47 blend_screen(ir_variable
*src
, ir_variable
*dst
)
49 /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
50 return sub(add(src
, dst
), mul(src
, dst
));
54 blend_overlay(ir_variable
*src
, ir_variable
*dst
)
56 void *mem_ctx
= ralloc_parent(src
);
58 /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
59 * 1-2*(1-Cs)*(1-Cd), otherwise
61 ir_rvalue
*rule_1
= mul(imm3(2), mul(src
, dst
));
63 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src
), sub(imm3(1), dst
))));
64 return csel(lequal(dst
, imm3(0.5f
)), rule_1
, rule_2
);
68 blend_darken(ir_variable
*src
, ir_variable
*dst
)
70 /* f(Cs,Cd) = min(Cs,Cd) */
71 return min2(src
, dst
);
75 blend_lighten(ir_variable
*src
, ir_variable
*dst
)
77 /* f(Cs,Cd) = max(Cs,Cd) */
78 return max2(src
, dst
);
82 blend_colordodge(ir_variable
*src
, ir_variable
*dst
)
84 void *mem_ctx
= ralloc_parent(src
);
88 * min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
89 * 1, if Cd > 0 and Cs >= 1
91 return csel(lequal(dst
, imm3(0)), imm3(0),
92 csel(gequal(src
, imm3(1)), imm3(1),
93 min2(imm3(1), div(dst
, sub(imm3(1), src
)))));
97 blend_colorburn(ir_variable
*src
, ir_variable
*dst
)
99 void *mem_ctx
= ralloc_parent(src
);
103 * 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
104 * 0, if Cd < 1 and Cs <= 0
106 return csel(gequal(dst
, imm3(1)), imm3(1),
107 csel(lequal(src
, imm3(0)), imm3(0),
108 sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst
), src
)))));
112 blend_hardlight(ir_variable
*src
, ir_variable
*dst
)
114 void *mem_ctx
= ralloc_parent(src
);
116 /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
117 * 1-2*(1-Cs)*(1-Cd), otherwise
119 ir_rvalue
*rule_1
= mul(imm3(2), mul(src
, dst
));
121 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src
), sub(imm3(1), dst
))));
122 return csel(lequal(src
, imm3(0.5f
)), rule_1
, rule_2
);
126 blend_softlight(ir_variable
*src
, ir_variable
*dst
)
128 void *mem_ctx
= ralloc_parent(src
);
131 * Cd-(1-2*Cs)*Cd*(1-Cd),
133 * Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
134 * if Cs > 0.5 and Cd <= 0.25
135 * Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
136 * if Cs > 0.5 and Cd > 0.25
138 * We can simplify this to
140 * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
141 * g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5
142 * Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
143 * sqrt(Cd)-Cd, otherwise
145 ir_rvalue
*factor_1
= mul(dst
, sub(imm3(1), dst
));
146 ir_rvalue
*factor_2
=
147 mul(dst
, add(mul(sub(mul(imm3(16), dst
), imm3(12)), dst
), imm3(3)));
148 ir_rvalue
*factor_3
= sub(sqrt(dst
), dst
);
149 ir_rvalue
*factor
= csel(lequal(src
, imm3(0.5f
)), factor_1
,
150 csel(lequal(dst
, imm3(0.25f
)),
151 factor_2
, factor_3
));
152 return add(dst
, mul(sub(mul(imm3(2), src
), imm3(1)), factor
));
156 blend_difference(ir_variable
*src
, ir_variable
*dst
)
158 return abs(sub(dst
, src
));
162 blend_exclusion(ir_variable
*src
, ir_variable
*dst
)
164 void *mem_ctx
= ralloc_parent(src
);
166 return add(src
, sub(dst
, mul(imm3(2), mul(src
, dst
))));
169 /* Return the minimum of a vec3's components */
171 minv3(ir_variable
*v
)
173 return min2(min2(swizzle_x(v
), swizzle_y(v
)), swizzle_z(v
));
176 /* Return the maximum of a vec3's components */
178 maxv3(ir_variable
*v
)
180 return max2(max2(swizzle_x(v
), swizzle_y(v
)), swizzle_z(v
));
184 lumv3(ir_variable
*c
)
186 ir_constant_data data
;
191 void *mem_ctx
= ralloc_parent(c
);
193 /* dot(c, vec3(0.30, 0.59, 0.11)) */
194 return dot(c
, new(mem_ctx
) ir_constant(glsl_type::vec3_type
, &data
));
198 satv3(ir_variable
*c
)
200 return sub(maxv3(c
), minv3(c
));
203 /* Take the base RGB color <cbase> and override its luminosity with that
204 * of the RGB color <clum>.
206 * This follows the equations given in the ES 3.2 (June 15th, 2016)
207 * specification. Revision 16 of GL_KHR_blend_equation_advanced and
208 * revision 9 of GL_NV_blend_equation_advanced specify a different set
209 * of equations. Older revisions match ES 3.2's text, and dEQP expects
210 * the ES 3.2 rules implemented here.
213 set_lum(ir_factory
*f
,
218 void *mem_ctx
= f
->mem_ctx
;
219 f
->emit(assign(color
, add(cbase
, sub(lumv3(clum
), lumv3(cbase
)))));
221 ir_variable
*llum
= f
->make_temp(glsl_type::float_type
, "__blend_lum");
222 ir_variable
*mincol
= f
->make_temp(glsl_type::float_type
, "__blend_mincol");
223 ir_variable
*maxcol
= f
->make_temp(glsl_type::float_type
, "__blend_maxcol");
225 f
->emit(assign(llum
, lumv3(color
)));
226 f
->emit(assign(mincol
, minv3(color
)));
227 f
->emit(assign(maxcol
, maxv3(color
)));
229 f
->emit(if_tree(less(mincol
, imm1(0)),
230 assign(color
, add(llum
, div(mul(sub(color
, llum
), llum
),
231 sub(llum
, mincol
)))),
232 if_tree(greater(maxcol
, imm1(1)),
233 assign(color
, add(llum
, div(mul(sub(color
, llum
),
235 sub(maxcol
, llum
)))))));
239 /* Take the base RGB color <cbase> and override its saturation with
240 * that of the RGB color <csat>. The override the luminosity of the
241 * result with that of the RGB color <clum>.
244 set_lum_sat(ir_factory
*f
,
250 void *mem_ctx
= f
->mem_ctx
;
252 ir_rvalue
*minbase
= minv3(cbase
);
253 ir_rvalue
*ssat
= satv3(csat
);
255 ir_variable
*sbase
= f
->make_temp(glsl_type::float_type
, "__blend_sbase");
256 f
->emit(assign(sbase
, satv3(cbase
)));
258 /* Equivalent (modulo rounding errors) to setting the
259 * smallest (R,G,B) component to 0, the largest to <ssat>,
260 * and interpolating the "middle" component based on its
261 * original value relative to the smallest/largest.
263 f
->emit(if_tree(greater(sbase
, imm1(0)),
264 assign(color
, div(mul(sub(cbase
, minbase
), ssat
), sbase
)),
265 assign(color
, imm3(0))));
266 set_lum(f
, color
, color
, clum
);
270 is_mode(ir_variable
*mode
, enum gl_advanced_blend_mode q
)
272 return equal(mode
, new(ralloc_parent(mode
)) ir_constant(unsigned(q
)));
276 calc_blend_result(ir_factory f
,
279 ir_rvalue
*blend_src
,
280 GLbitfield blend_qualifiers
)
282 void *mem_ctx
= f
.mem_ctx
;
283 ir_variable
*result
= f
.make_temp(glsl_type::vec4_type
, "__blend_result");
285 /* Save blend_src to a temporary so we can reference it multiple times. */
286 ir_variable
*src
= f
.make_temp(glsl_type::vec4_type
, "__blend_src");
287 f
.emit(assign(src
, blend_src
));
289 /* If we're not doing advanced blending, just write the original value. */
290 ir_if
*if_blending
= new(mem_ctx
) ir_if(is_mode(mode
, BLEND_NONE
));
292 if_blending
->then_instructions
.push_tail(assign(result
, src
));
294 f
.instructions
= &if_blending
->else_instructions
;
297 * (0, 0, 0), if As == 0
298 * (Rs/As, Gs/As, Bs/As), otherwise
300 ir_variable
*src_rgb
= f
.make_temp(glsl_type::vec3_type
, "__blend_src_rgb");
301 ir_variable
*src_alpha
= f
.make_temp(glsl_type::float_type
, "__blend_src_a");
304 * (0, 0, 0), if Ad == 0
305 * (Rd/Ad, Gd/Ad, Bd/Ad), otherwise
307 ir_variable
*dst_rgb
= f
.make_temp(glsl_type::vec3_type
, "__blend_dst_rgb");
308 ir_variable
*dst_alpha
= f
.make_temp(glsl_type::float_type
, "__blend_dst_a");
310 f
.emit(assign(dst_alpha
, swizzle_w(fb
)));
311 f
.emit(if_tree(equal(dst_alpha
, imm1(0)),
312 assign(dst_rgb
, imm3(0)),
313 assign(dst_rgb
, csel(equal(swizzle_xyz(fb
),
314 swizzle(fb
, SWIZZLE_WWWW
, 3)),
316 div(swizzle_xyz(fb
), dst_alpha
)))));
318 f
.emit(assign(src_alpha
, swizzle_w(src
)));
319 f
.emit(if_tree(equal(src_alpha
, imm1(0)),
320 assign(src_rgb
, imm3(0)),
321 assign(src_rgb
, csel(equal(swizzle_xyz(src
),
322 swizzle(src
, SWIZZLE_WWWW
, 3)),
324 div(swizzle_xyz(src
), src_alpha
)))));
326 ir_variable
*factor
= f
.make_temp(glsl_type::vec3_type
, "__blend_factor");
328 ir_factory casefactory
= f
;
330 unsigned choices
= blend_qualifiers
;
332 enum gl_advanced_blend_mode choice
= (enum gl_advanced_blend_mode
)
333 (1u << u_bit_scan(&choices
));
335 ir_if
*iff
= new(mem_ctx
) ir_if(is_mode(mode
, choice
));
336 casefactory
.emit(iff
);
337 casefactory
.instructions
= &iff
->then_instructions
;
339 ir_rvalue
*val
= NULL
;
343 val
= blend_multiply(src_rgb
, dst_rgb
);
346 val
= blend_screen(src_rgb
, dst_rgb
);
349 val
= blend_overlay(src_rgb
, dst_rgb
);
352 val
= blend_darken(src_rgb
, dst_rgb
);
355 val
= blend_lighten(src_rgb
, dst_rgb
);
357 case BLEND_COLORDODGE
:
358 val
= blend_colordodge(src_rgb
, dst_rgb
);
360 case BLEND_COLORBURN
:
361 val
= blend_colorburn(src_rgb
, dst_rgb
);
363 case BLEND_HARDLIGHT
:
364 val
= blend_hardlight(src_rgb
, dst_rgb
);
366 case BLEND_SOFTLIGHT
:
367 val
= blend_softlight(src_rgb
, dst_rgb
);
369 case BLEND_DIFFERENCE
:
370 val
= blend_difference(src_rgb
, dst_rgb
);
372 case BLEND_EXCLUSION
:
373 val
= blend_exclusion(src_rgb
, dst_rgb
);
376 set_lum_sat(&casefactory
, factor
, src_rgb
, dst_rgb
, dst_rgb
);
378 case BLEND_HSL_SATURATION
:
379 set_lum_sat(&casefactory
, factor
, dst_rgb
, src_rgb
, dst_rgb
);
381 case BLEND_HSL_COLOR
:
382 set_lum(&casefactory
, factor
, src_rgb
, dst_rgb
);
384 case BLEND_HSL_LUMINOSITY
:
385 set_lum(&casefactory
, factor
, dst_rgb
, src_rgb
);
389 unreachable("not real cases");
393 casefactory
.emit(assign(factor
, val
));
395 casefactory
.instructions
= &iff
->else_instructions
;
399 * p1(As,Ad) = As*(1-Ad)
400 * p2(As,Ad) = Ad*(1-As)
402 ir_variable
*p0
= f
.make_temp(glsl_type::float_type
, "__blend_p0");
403 ir_variable
*p1
= f
.make_temp(glsl_type::float_type
, "__blend_p1");
404 ir_variable
*p2
= f
.make_temp(glsl_type::float_type
, "__blend_p2");
406 f
.emit(assign(p0
, mul(src_alpha
, dst_alpha
)));
407 f
.emit(assign(p1
, mul(src_alpha
, sub(imm1(1), dst_alpha
))));
408 f
.emit(assign(p2
, mul(dst_alpha
, sub(imm1(1), src_alpha
))));
410 /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
411 * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
412 * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
413 * A = X*p0(As,Ad) + Y*p1(As,Ad) + Z*p2(As,Ad)
415 * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
417 * In vector form, this is:
418 * RGB = factor * p0 + Cs * p1 + Cd * p2
421 f
.emit(assign(result
,
422 add(add(mul(factor
, p0
), mul(src_rgb
, p1
)), mul(dst_rgb
, p2
)),
424 f
.emit(assign(result
, add(add(p0
, p1
), p2
), WRITEMASK_W
));
430 * Dereference var, or var[0] if it's an array.
432 static ir_dereference
*
433 deref_output(ir_variable
*var
)
435 void *mem_ctx
= ralloc_parent(var
);
437 ir_dereference
*val
= new(mem_ctx
) ir_dereference_variable(var
);
438 if (val
->type
->is_array()) {
439 ir_constant
*index
= new(mem_ctx
) ir_constant(0);
440 val
= new(mem_ctx
) ir_dereference_array(val
, index
);
446 static ir_function_signature
*
447 get_main(gl_linked_shader
*sh
)
449 ir_function_signature
*sig
= NULL
;
450 /* We can't use _mesa_get_main_function_signature() because we don't
451 * have a symbol table at this point. Just go find main() by hand.
453 foreach_in_list(ir_instruction
, ir
, sh
->ir
) {
454 ir_function
*f
= ir
->as_function();
455 if (f
&& strcmp(f
->name
, "main") == 0) {
456 exec_list void_parameters
;
457 sig
= f
->matching_signature(NULL
, &void_parameters
, false);
461 assert(sig
!= NULL
); /* main() must exist */
466 lower_blend_equation_advanced(struct gl_linked_shader
*sh
, bool coherent
)
468 if (sh
->Program
->sh
.fs
.BlendSupport
== 0)
471 /* Lower early returns in main() so there's a single exit point
472 * where we can insert our lowering code.
474 do_lower_jumps(sh
->ir
, false, false, true, false, false);
476 void *mem_ctx
= ralloc_parent(sh
->ir
);
478 ir_variable
*fb
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
481 fb
->data
.location
= FRAG_RESULT_DATA0
;
482 fb
->data
.read_only
= 1;
483 fb
->data
.fb_fetch_output
= 1;
484 fb
->data
.memory_coherent
= coherent
;
485 fb
->data
.how_declared
= ir_var_hidden
;
487 ir_variable
*mode
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
488 "gl_AdvancedBlendModeMESA",
490 mode
->data
.how_declared
= ir_var_hidden
;
491 mode
->allocate_state_slots(1);
492 ir_state_slot
*slot0
= &mode
->get_state_slots()[0];
493 slot0
->swizzle
= SWIZZLE_XXXX
;
494 slot0
->tokens
[0] = STATE_INTERNAL
;
495 slot0
->tokens
[1] = STATE_ADVANCED_BLENDING_MODE
;
496 for (int i
= 2; i
< STATE_LENGTH
; i
++)
497 slot0
->tokens
[i
] = 0;
499 sh
->ir
->push_head(fb
);
500 sh
->ir
->push_head(mode
);
502 /* Gather any output variables referring to render target 0.
504 * ARB_enhanced_layouts irritatingly allows the shader to specify
505 * multiple output variables for the same render target, each of
506 * which writes a subset of the components, starting at location_frac.
507 * The variables can't overlap, thankfully.
509 ir_variable
*outputs
[4] = { NULL
, NULL
, NULL
, NULL
};
510 foreach_in_list(ir_instruction
, ir
, sh
->ir
) {
511 ir_variable
*var
= ir
->as_variable();
512 if (!var
|| var
->data
.mode
!= ir_var_shader_out
)
515 if (var
->data
.location
== FRAG_RESULT_DATA0
||
516 var
->data
.location
== FRAG_RESULT_COLOR
) {
517 const int components
= var
->type
->without_array()->vector_elements
;
519 for (int i
= 0; i
< components
; i
++) {
520 outputs
[var
->data
.location_frac
+ i
] = var
;
525 /* Combine values written to outputs into a single RGBA blend source.
526 * We assign <0, 0, 0, 1> to any components with no corresponding output.
528 ir_rvalue
*blend_source
;
529 if (outputs
[0] && outputs
[0]->type
->without_array()->vector_elements
== 4) {
530 blend_source
= deref_output(outputs
[0]);
532 ir_rvalue
*blend_comps
[4];
533 for (int i
= 0; i
< 4; i
++) {
534 ir_variable
*var
= outputs
[i
];
536 blend_comps
[i
] = swizzle(deref_output(outputs
[i
]),
537 i
- outputs
[i
]->data
.location_frac
, 1);
539 blend_comps
[i
] = new(mem_ctx
) ir_constant(i
< 3 ? 0.0f
: 1.0f
);
544 new(mem_ctx
) ir_expression(ir_quadop_vector
, glsl_type::vec4_type
,
545 blend_comps
[0], blend_comps
[1],
546 blend_comps
[2], blend_comps
[3]);
549 ir_function_signature
*main
= get_main(sh
);
550 ir_factory
f(&main
->body
, mem_ctx
);
552 ir_variable
*result_dest
=
553 calc_blend_result(f
, mode
, fb
, blend_source
,
554 sh
->Program
->sh
.fs
.BlendSupport
);
556 /* Copy the result back to the original values. It would be simpler
557 * to demote the program's output variables, and create a new vec4
558 * output for our result, but this pass runs before we create the
559 * ARB_program_interface_query resource list. So we have to leave
560 * the original outputs in place and use them.
562 for (int i
= 0; i
< 4; i
++) {
566 f
.emit(assign(deref_output(outputs
[i
]), swizzle(result_dest
, i
, 1),
570 validate_ir_tree(sh
->ir
);