2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "ir_builder.h"
26 #include "ir_optimization.h"
27 #include "ir_hierarchical_visitor.h"
28 #include "program/prog_instruction.h"
29 #include "program/prog_statevars.h"
30 #include "util/bitscan.h"
32 using namespace ir_builder
;
34 #define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
35 #define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)
38 blend_multiply(ir_variable
*src
, ir_variable
*dst
)
40 /* f(Cs,Cd) = Cs*Cd */
45 blend_screen(ir_variable
*src
, ir_variable
*dst
)
47 /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
48 return sub(add(src
, dst
), mul(src
, dst
));
52 blend_overlay(ir_variable
*src
, ir_variable
*dst
)
54 void *mem_ctx
= ralloc_parent(src
);
56 /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
57 * 1-2*(1-Cs)*(1-Cd), otherwise
59 ir_rvalue
*rule_1
= mul(imm3(2), mul(src
, dst
));
61 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src
), sub(imm3(1), dst
))));
62 return csel(lequal(dst
, imm3(0.5f
)), rule_1
, rule_2
);
66 blend_darken(ir_variable
*src
, ir_variable
*dst
)
68 /* f(Cs,Cd) = min(Cs,Cd) */
69 return min2(src
, dst
);
73 blend_lighten(ir_variable
*src
, ir_variable
*dst
)
75 /* f(Cs,Cd) = max(Cs,Cd) */
76 return max2(src
, dst
);
80 blend_colordodge(ir_variable
*src
, ir_variable
*dst
)
82 void *mem_ctx
= ralloc_parent(src
);
86 * min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
87 * 1, if Cd > 0 and Cs >= 1
89 return csel(lequal(dst
, imm3(0)), imm3(0),
90 csel(gequal(src
, imm3(1)), imm3(1),
91 min2(imm3(1), div(dst
, sub(imm3(1), src
)))));
95 blend_colorburn(ir_variable
*src
, ir_variable
*dst
)
97 void *mem_ctx
= ralloc_parent(src
);
101 * 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
102 * 0, if Cd < 1 and Cs <= 0
104 return csel(gequal(dst
, imm3(1)), imm3(1),
105 csel(lequal(src
, imm3(0)), imm3(0),
106 sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst
), src
)))));
110 blend_hardlight(ir_variable
*src
, ir_variable
*dst
)
112 void *mem_ctx
= ralloc_parent(src
);
114 /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
115 * 1-2*(1-Cs)*(1-Cd), otherwise
117 ir_rvalue
*rule_1
= mul(imm3(2), mul(src
, dst
));
119 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src
), sub(imm3(1), dst
))));
120 return csel(lequal(src
, imm3(0.5f
)), rule_1
, rule_2
);
124 blend_softlight(ir_variable
*src
, ir_variable
*dst
)
126 void *mem_ctx
= ralloc_parent(src
);
129 * Cd-(1-2*Cs)*Cd*(1-Cd),
131 * Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
132 * if Cs > 0.5 and Cd <= 0.25
133 * Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
134 * if Cs > 0.5 and Cd > 0.25
136 * We can simplify this to
138 * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
139 * g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5
140 * Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
141 * sqrt(Cd)-Cd, otherwise
143 ir_rvalue
*factor_1
= mul(dst
, sub(imm3(1), dst
));
144 ir_rvalue
*factor_2
=
145 mul(dst
, add(mul(sub(mul(imm3(16), dst
), imm3(12)), dst
), imm3(3)));
146 ir_rvalue
*factor_3
= sub(sqrt(dst
), dst
);
147 ir_rvalue
*factor
= csel(lequal(src
, imm3(0.5f
)), factor_1
,
148 csel(lequal(dst
, imm3(0.25f
)),
149 factor_2
, factor_3
));
150 return add(dst
, mul(sub(mul(imm3(2), src
), imm3(1)), factor
));
154 blend_difference(ir_variable
*src
, ir_variable
*dst
)
156 return abs(sub(dst
, src
));
160 blend_exclusion(ir_variable
*src
, ir_variable
*dst
)
162 void *mem_ctx
= ralloc_parent(src
);
164 return add(src
, sub(dst
, mul(imm3(2), mul(src
, dst
))));
167 /* Return the minimum of a vec3's components */
169 minv3(ir_variable
*v
)
171 return min2(min2(swizzle_x(v
), swizzle_y(v
)), swizzle_z(v
));
174 /* Return the maximum of a vec3's components */
176 maxv3(ir_variable
*v
)
178 return max2(max2(swizzle_x(v
), swizzle_y(v
)), swizzle_z(v
));
182 lumv3(ir_variable
*c
)
184 ir_constant_data data
;
189 void *mem_ctx
= ralloc_parent(c
);
191 /* dot(c, vec3(0.30, 0.59, 0.11)) */
192 return dot(c
, new(mem_ctx
) ir_constant(glsl_type::vec3_type
, &data
));
196 satv3(ir_variable
*c
)
198 return sub(maxv3(c
), minv3(c
));
201 /* Take the base RGB color <cbase> and override its luminosity with that
202 * of the RGB color <clum>.
204 * This follows the equations given in the ES 3.2 (June 15th, 2016)
205 * specification. Revision 16 of GL_KHR_blend_equation_advanced and
206 * revision 9 of GL_NV_blend_equation_advanced specify a different set
207 * of equations. Older revisions match ES 3.2's text, and dEQP expects
208 * the ES 3.2 rules implemented here.
211 set_lum(ir_factory
*f
,
216 void *mem_ctx
= f
->mem_ctx
;
217 f
->emit(assign(color
, add(cbase
, sub(lumv3(clum
), lumv3(cbase
)))));
219 ir_variable
*llum
= f
->make_temp(glsl_type::float_type
, "__blend_lum");
220 ir_variable
*mincol
= f
->make_temp(glsl_type::float_type
, "__blend_mincol");
221 ir_variable
*maxcol
= f
->make_temp(glsl_type::float_type
, "__blend_maxcol");
223 f
->emit(assign(llum
, lumv3(color
)));
224 f
->emit(assign(mincol
, minv3(color
)));
225 f
->emit(assign(maxcol
, maxv3(color
)));
227 f
->emit(if_tree(less(mincol
, imm1(0)),
228 assign(color
, add(llum
, div(mul(sub(color
, llum
), llum
),
229 sub(llum
, mincol
)))),
230 if_tree(greater(maxcol
, imm1(1)),
231 assign(color
, add(llum
, div(mul(sub(color
, llum
),
233 sub(maxcol
, llum
)))))));
237 /* Take the base RGB color <cbase> and override its saturation with
238 * that of the RGB color <csat>. The override the luminosity of the
239 * result with that of the RGB color <clum>.
242 set_lum_sat(ir_factory
*f
,
248 void *mem_ctx
= f
->mem_ctx
;
250 ir_rvalue
*minbase
= minv3(cbase
);
251 ir_rvalue
*ssat
= satv3(csat
);
253 ir_variable
*sbase
= f
->make_temp(glsl_type::float_type
, "__blend_sbase");
254 f
->emit(assign(sbase
, satv3(cbase
)));
256 /* Equivalent (modulo rounding errors) to setting the
257 * smallest (R,G,B) component to 0, the largest to <ssat>,
258 * and interpolating the "middle" component based on its
259 * original value relative to the smallest/largest.
261 f
->emit(if_tree(greater(sbase
, imm1(0)),
262 assign(color
, div(mul(sub(cbase
, minbase
), ssat
), sbase
)),
263 assign(color
, imm3(0))));
264 set_lum(f
, color
, color
, clum
);
268 is_mode(ir_variable
*mode
, enum gl_advanced_blend_mode q
)
270 return equal(mode
, new(ralloc_parent(mode
)) ir_constant(unsigned(q
)));
274 calc_blend_result(ir_factory f
,
277 ir_rvalue
*blend_src
,
278 GLbitfield blend_qualifiers
)
280 void *mem_ctx
= f
.mem_ctx
;
281 ir_variable
*result
= f
.make_temp(glsl_type::vec4_type
, "__blend_result");
283 /* Save blend_src to a temporary so we can reference it multiple times. */
284 ir_variable
*src
= f
.make_temp(glsl_type::vec4_type
, "__blend_src");
285 f
.emit(assign(src
, blend_src
));
287 /* If we're not doing advanced blending, just write the original value. */
288 ir_if
*if_blending
= new(mem_ctx
) ir_if(is_mode(mode
, BLEND_NONE
));
290 if_blending
->then_instructions
.push_tail(assign(result
, src
));
292 f
.instructions
= &if_blending
->else_instructions
;
295 * (0, 0, 0), if As == 0
296 * (Rs/As, Gs/As, Bs/As), otherwise
298 ir_variable
*src_rgb
= f
.make_temp(glsl_type::vec3_type
, "__blend_src_rgb");
299 ir_variable
*src_alpha
= f
.make_temp(glsl_type::float_type
, "__blend_src_a");
302 * (0, 0, 0), if Ad == 0
303 * (Rd/Ad, Gd/Ad, Bd/Ad), otherwise
305 ir_variable
*dst_rgb
= f
.make_temp(glsl_type::vec3_type
, "__blend_dst_rgb");
306 ir_variable
*dst_alpha
= f
.make_temp(glsl_type::float_type
, "__blend_dst_a");
308 f
.emit(assign(dst_alpha
, swizzle_w(fb
)));
309 f
.emit(if_tree(equal(dst_alpha
, imm1(0)),
310 assign(dst_rgb
, imm3(0)),
311 assign(dst_rgb
, div(swizzle_xyz(fb
), dst_alpha
))));
313 f
.emit(assign(src_alpha
, swizzle_w(src
)));
314 f
.emit(if_tree(equal(src_alpha
, imm1(0)),
315 assign(src_rgb
, imm3(0)),
316 assign(src_rgb
, div(swizzle_xyz(src
), src_alpha
))));
318 ir_variable
*factor
= f
.make_temp(glsl_type::vec3_type
, "__blend_factor");
320 ir_factory casefactory
= f
;
322 unsigned choices
= blend_qualifiers
;
324 enum gl_advanced_blend_mode choice
= (enum gl_advanced_blend_mode
)
325 (1u << u_bit_scan(&choices
));
327 ir_if
*iff
= new(mem_ctx
) ir_if(is_mode(mode
, choice
));
328 casefactory
.emit(iff
);
329 casefactory
.instructions
= &iff
->then_instructions
;
331 ir_rvalue
*val
= NULL
;
335 val
= blend_multiply(src_rgb
, dst_rgb
);
338 val
= blend_screen(src_rgb
, dst_rgb
);
341 val
= blend_overlay(src_rgb
, dst_rgb
);
344 val
= blend_darken(src_rgb
, dst_rgb
);
347 val
= blend_lighten(src_rgb
, dst_rgb
);
349 case BLEND_COLORDODGE
:
350 val
= blend_colordodge(src_rgb
, dst_rgb
);
352 case BLEND_COLORBURN
:
353 val
= blend_colorburn(src_rgb
, dst_rgb
);
355 case BLEND_HARDLIGHT
:
356 val
= blend_hardlight(src_rgb
, dst_rgb
);
358 case BLEND_SOFTLIGHT
:
359 val
= blend_softlight(src_rgb
, dst_rgb
);
361 case BLEND_DIFFERENCE
:
362 val
= blend_difference(src_rgb
, dst_rgb
);
364 case BLEND_EXCLUSION
:
365 val
= blend_exclusion(src_rgb
, dst_rgb
);
368 set_lum_sat(&casefactory
, factor
, src_rgb
, dst_rgb
, dst_rgb
);
370 case BLEND_HSL_SATURATION
:
371 set_lum_sat(&casefactory
, factor
, dst_rgb
, src_rgb
, dst_rgb
);
373 case BLEND_HSL_COLOR
:
374 set_lum(&casefactory
, factor
, src_rgb
, dst_rgb
);
376 case BLEND_HSL_LUMINOSITY
:
377 set_lum(&casefactory
, factor
, dst_rgb
, src_rgb
);
381 unreachable("not real cases");
385 casefactory
.emit(assign(factor
, val
));
387 casefactory
.instructions
= &iff
->else_instructions
;
391 * p1(As,Ad) = As*(1-Ad)
392 * p2(As,Ad) = Ad*(1-As)
394 ir_variable
*p0
= f
.make_temp(glsl_type::float_type
, "__blend_p0");
395 ir_variable
*p1
= f
.make_temp(glsl_type::float_type
, "__blend_p1");
396 ir_variable
*p2
= f
.make_temp(glsl_type::float_type
, "__blend_p2");
398 f
.emit(assign(p0
, mul(src_alpha
, dst_alpha
)));
399 f
.emit(assign(p1
, mul(src_alpha
, sub(imm1(1), dst_alpha
))));
400 f
.emit(assign(p2
, mul(dst_alpha
, sub(imm1(1), src_alpha
))));
402 /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
403 * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
404 * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
405 * A = X*p0(As,Ad) + Y*p1(As,Ad) + Z*p2(As,Ad)
407 * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
409 * In vector form, this is:
410 * RGB = factor * p0 + Cs * p1 + Cd * p2
413 f
.emit(assign(result
,
414 add(add(mul(factor
, p0
), mul(src_rgb
, p1
)), mul(dst_rgb
, p2
)),
416 f
.emit(assign(result
, add(add(p0
, p1
), p2
), WRITEMASK_W
));
422 * Dereference var, or var[0] if it's an array.
424 static ir_dereference
*
425 deref_output(ir_variable
*var
)
427 void *mem_ctx
= ralloc_parent(var
);
429 ir_dereference
*val
= new(mem_ctx
) ir_dereference_variable(var
);
430 if (val
->type
->is_array()) {
431 ir_constant
*index
= new(mem_ctx
) ir_constant(0);
432 val
= new(mem_ctx
) ir_dereference_array(val
, index
);
438 static ir_function_signature
*
439 get_main(gl_linked_shader
*sh
)
441 ir_function_signature
*sig
= NULL
;
442 /* We can't use _mesa_get_main_function_signature() because we don't
443 * have a symbol table at this point. Just go find main() by hand.
445 foreach_in_list(ir_instruction
, ir
, sh
->ir
) {
446 ir_function
*f
= ir
->as_function();
447 if (f
&& strcmp(f
->name
, "main") == 0) {
448 exec_list void_parameters
;
449 sig
= f
->matching_signature(NULL
, &void_parameters
, false);
453 assert(sig
!= NULL
); /* main() must exist */
458 lower_blend_equation_advanced(struct gl_linked_shader
*sh
)
460 if (sh
->info
.BlendSupport
== 0)
463 /* Lower early returns in main() so there's a single exit point
464 * where we can insert our lowering code.
466 do_lower_jumps(sh
->ir
, false, false, true, false, false);
468 void *mem_ctx
= ralloc_parent(sh
->ir
);
470 ir_variable
*fb
= new(mem_ctx
) ir_variable(glsl_type::vec4_type
,
473 fb
->data
.location
= FRAG_RESULT_DATA0
;
474 fb
->data
.read_only
= 1;
475 fb
->data
.fb_fetch_output
= 1;
476 fb
->data
.how_declared
= ir_var_hidden
;
478 ir_variable
*mode
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
479 "gl_AdvancedBlendModeMESA",
481 mode
->data
.how_declared
= ir_var_hidden
;
482 mode
->allocate_state_slots(1);
483 ir_state_slot
*slot0
= &mode
->get_state_slots()[0];
484 slot0
->swizzle
= SWIZZLE_XXXX
;
485 slot0
->tokens
[0] = STATE_INTERNAL
;
486 slot0
->tokens
[1] = STATE_ADVANCED_BLENDING_MODE
;
487 for (int i
= 2; i
< STATE_LENGTH
; i
++)
488 slot0
->tokens
[i
] = 0;
490 sh
->ir
->push_head(fb
);
491 sh
->ir
->push_head(mode
);
493 /* Gather any output variables referring to render target 0.
495 * ARB_enhanced_layouts irritatingly allows the shader to specify
496 * multiple output variables for the same render target, each of
497 * which writes a subset of the components, starting at location_frac.
498 * The variables can't overlap, thankfully.
500 ir_variable
*outputs
[4] = { NULL
, NULL
, NULL
, NULL
};
501 foreach_in_list(ir_instruction
, ir
, sh
->ir
) {
502 ir_variable
*var
= ir
->as_variable();
503 if (!var
|| var
->data
.mode
!= ir_var_shader_out
)
506 if (var
->data
.location
== FRAG_RESULT_DATA0
||
507 var
->data
.location
== FRAG_RESULT_COLOR
) {
508 const int components
= var
->type
->without_array()->vector_elements
;
510 for (int i
= 0; i
< components
; i
++) {
511 outputs
[var
->data
.location_frac
+ i
] = var
;
516 /* Combine values written to outputs into a single RGBA blend source.
517 * We assign <0, 0, 0, 1> to any components with no corresponding output.
519 ir_rvalue
*blend_source
;
520 if (outputs
[0] && outputs
[0]->type
->without_array()->vector_elements
== 4) {
521 blend_source
= deref_output(outputs
[0]);
523 ir_rvalue
*blend_comps
[4];
524 for (int i
= 0; i
< 4; i
++) {
525 ir_variable
*var
= outputs
[i
];
527 blend_comps
[i
] = swizzle(deref_output(outputs
[i
]),
528 i
- outputs
[i
]->data
.location_frac
, 1);
530 blend_comps
[i
] = new(mem_ctx
) ir_constant(i
< 3 ? 0.0f
: 1.0f
);
535 new(mem_ctx
) ir_expression(ir_quadop_vector
, glsl_type::vec4_type
,
536 blend_comps
[0], blend_comps
[1],
537 blend_comps
[2], blend_comps
[3]);
540 ir_function_signature
*main
= get_main(sh
);
541 ir_factory
f(&main
->body
, mem_ctx
);
543 ir_variable
*result_dest
=
544 calc_blend_result(f
, mode
, fb
, blend_source
, sh
->info
.BlendSupport
);
546 /* Copy the result back to the original values. It would be simpler
547 * to demote the program's output variables, and create a new vec4
548 * output for our result, but this pass runs before we create the
549 * ARB_program_interface_query resource list. So we have to leave
550 * the original outputs in place and use them.
552 for (int i
= 0; i
< 4; i
++) {
556 f
.emit(assign(deref_output(outputs
[i
]), swizzle(result_dest
, i
, 1),
560 validate_ir_tree(sh
->ir
);