b31b3ce14c8c749e7873e76b43218b6ecef63101
[mesa.git] / src / gallium / drivers / panfrost / pan_blend_shaders.c
1 /*
2 * © Copyright 2018 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #include <stdio.h>
26 #include "pan_blend_shaders.h"
27 #include "pan_util.h"
28 #include "midgard/midgard_compile.h"
29 #include "compiler/nir/nir_builder.h"
30 #include "nir/nir_lower_blend.h"
31 #include "gallium/auxiliary/util/u_blend.h"
32 #include "util/u_memory.h"
33
34 /*
35 * Implements the command stream portion of programmatic blend shaders.
36 *
37 * On Midgard, common blending operations are accelerated by the fixed-function
38 * blending pipeline. Panfrost supports this fast path via the code in
39 * pan_blending.c. Nevertheless, uncommon blend modes (including some seemingly
40 * simple modes present in ES2) require "blend shaders", a special internal
41 * shader type used for programmable blending.
42 *
43 * Blend shaders operate during the normal blending time, but they bypass the
44 * fixed-function blending pipeline and instead go straight to the Midgard
45 * shader cores. The shaders themselves are essentially just fragment shaders,
46 * making heavy use of uint8 arithmetic to manipulate RGB values for the
47 * framebuffer.
48 *
49 * As is typical with Midgard, shader binaries must be accompanied by
50 * information about the first tag (ORed with the bottom nibble of address,
51 * like usual) and work registers. Work register count is specified in the
52 * blend descriptor, as well as in the coresponding fragment shader's work
53 * count. This suggests that blend shader invocation is tied to fragment shader
54 * execution.
55 *
56 * ---
57 *
58 * As for blend shaders, they use the standard ISA.
59 *
60 * The source pixel colour, including alpha, is preloaded into r0 as a vec4 of
61 * float32.
62 *
63 * The destination pixel colour must be loaded explicitly via load/store ops.
64 * TODO: Investigate.
65 *
66 * They use fragment shader writeout; however, instead of writing a vec4 of
67 * float32 for RGBA encoding, we writeout a vec4 of uint8, using 8-bit imov
68 * instead of 32-bit fmov. The net result is that r0 encodes a single uint32
69 * containing all four channels of the color. Accordingly, the blend shader
70 * epilogue has to scale all four channels by 255 and then type convert to a
71 * uint8.
72 *
73 * ---
74 *
75 * Blend shaders hardcode constants. Naively, this requires recompilation each
76 * time the blend color changes, which is a performance risk. Accordingly, we
77 * 'cheat' a bit: instead of loading the constant, we compile a shader with a
78 * dummy constant, exporting the offset to the immediate in the shader binary,
79 * storing this generic binary and metadata in the CSO itself at CSO create
80 * time.
81 *
82 * We then hot patch in the color into this shader at attachment / color change
83 * time, allowing for CSO create to be the only expensive operation
84 * (compilation).
85 */
86
87 static nir_lower_blend_options
88 nir_make_options(const struct pipe_blend_state *blend, unsigned nr_cbufs)
89 {
90 nir_lower_blend_options options;
91
92 for (unsigned i = 0; i < nr_cbufs; ++i) {
93 /* If blend is disabled, we just use replace mode */
94
95 nir_lower_blend_channel rgb = {
96 .func = BLEND_FUNC_ADD,
97 .src_factor = BLEND_FACTOR_ZERO,
98 .invert_src_factor = true,
99 .dst_factor = BLEND_FACTOR_ZERO,
100 .invert_dst_factor = false
101 };
102
103 nir_lower_blend_channel alpha = rgb;
104
105 if (blend->rt[i].blend_enable) {
106 rgb.func = util_blend_func_to_shader(blend->rt[i].rgb_func);
107 rgb.src_factor = util_blend_factor_to_shader(blend->rt[i].rgb_src_factor);
108 rgb.dst_factor = util_blend_factor_to_shader(blend->rt[i].rgb_dst_factor);
109 rgb.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_src_factor);
110 rgb.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].rgb_dst_factor);
111
112 alpha.func = util_blend_func_to_shader(blend->rt[i].alpha_func);
113 alpha.src_factor = util_blend_factor_to_shader(blend->rt[i].alpha_src_factor);
114 alpha.dst_factor = util_blend_factor_to_shader(blend->rt[i].alpha_dst_factor);
115 alpha.invert_src_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_src_factor);
116 alpha.invert_dst_factor = util_blend_factor_is_inverted(blend->rt[i].alpha_dst_factor);
117 }
118
119 options.rt[i].rgb = rgb;
120 options.rt[i].alpha = alpha;
121
122 options.rt[i].colormask = blend->rt[i].colormask;
123 }
124
125 return options;
126 }
127
128 struct panfrost_blend_shader
129 panfrost_compile_blend_shader(
130 struct panfrost_context *ctx,
131 struct pipe_blend_state *cso,
132 enum pipe_format format)
133 {
134 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
135 struct panfrost_blend_shader res;
136
137 res.ctx = ctx;
138
139 /* Build the shader */
140
141 nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
142 nir_function *fn = nir_function_create(shader, "main");
143 nir_function_impl *impl = nir_function_impl_create(fn);
144
145 /* Create the blend variables */
146
147 nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_Color");
148 nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(GLSL_TYPE_FLOAT, 4), "gl_FragColor");
149
150 c_src->data.location = VARYING_SLOT_COL0;
151 c_out->data.location = FRAG_RESULT_COLOR;
152
153 /* Setup nir_builder */
154
155 nir_builder _b;
156 nir_builder *b = &_b;
157 nir_builder_init(b, impl);
158 b->cursor = nir_before_block(nir_start_block(impl));
159
160 /* Setup inputs */
161
162 nir_ssa_def *s_src = nir_load_var(b, c_src);
163
164 /* Build a trivial blend shader */
165 nir_store_var(b, c_out, s_src, 0xFF);
166
167 nir_lower_blend_options options =
168 nir_make_options(cso, 1);
169 NIR_PASS_V(shader, nir_lower_blend, options);
170
171 NIR_PASS_V(shader, nir_lower_framebuffer, format, screen->gpu_id);
172
173 /* Compile the built shader */
174
175 midgard_program program;
176 midgard_compile_shader_nir(&ctx->compiler, shader, &program, true, screen->gpu_id);
177
178 /* At least two work registers are needed due to an encoding quirk */
179 res.work_count = MAX2(program.work_register_count, 2);
180
181 /* Allow us to patch later */
182 res.patch_index = program.blend_patch_offset;
183 res.first_tag = program.first_tag;
184 res.size = program.compiled.size;
185 res.buffer = program.compiled.data;
186
187 return res;
188 }