panfrost: Extend the panfrost_batch_add_bo() API to pass access flags
[mesa.git] / src / gallium / drivers / panfrost / pan_blend_cso.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 *
26 */
27
28 #include <stdio.h>
29 #include "util/u_memory.h"
30 #include "pan_blend_shaders.h"
31 #include "pan_blending.h"
32 #include "pan_bo.h"
33
34 /* A given Gallium blend state can be encoded to the hardware in numerous,
35 * dramatically divergent ways due to the interactions of blending with
36 * framebuffer formats. Conceptually, there are two modes:
37 *
38 * - Fixed-function blending (for suitable framebuffer formats, suitable blend
39 * state, and suitable blend constant)
40 *
41 * - Blend shaders (for everything else)
42 *
43 * A given Gallium blend configuration will compile to exactly one
44 * fixed-function blend state, if it compiles to any, although the constant
45 * will vary across runs as that is tracked outside of the Gallium CSO.
46 *
47 * However, that same blend configuration will compile to many different blend
48 * shaders, depending on the framebuffer formats active. The rationale is that
49 * blend shaders override not just fixed-function blending but also
50 * fixed-function format conversion. As such, each blend shader must be
51 * hardcoded to a particular framebuffer format to correctly pack/unpack it. As
52 * a concrete example, to the hardware there is no difference (!) between RG16F
53 * and RG16UI -- both are simply 4-byte-per-pixel chunks. Thus both formats
54 * require a blend shader (even with blending is totally disabled!), required
55 * to do conversion as necessary (if necessary).
56 *
57 * All of this state is encapsulated in the panfrost_blend_state struct
58 * (our subclass of pipe_blend_state).
59 */
60
61 /* Given an initialized CSO and a particular framebuffer format, grab a
62 * blend shader, generating and compiling it if it doesn't exist
63 * (lazy-loading in a way). This routine, when the cache hits, should
64 * befast, suitable for calling every draw to avoid wacky dirty
65 * tracking paths. If the cache hits, boom, done. */
66
67 static struct panfrost_blend_shader *
68 panfrost_get_blend_shader(
69 struct panfrost_context *ctx,
70 struct panfrost_blend_state *blend,
71 enum pipe_format fmt,
72 unsigned rt)
73 {
74 /* Prevent NULL collision issues.. */
75 assert(fmt != 0);
76
77 /* Check the cache */
78 struct hash_table_u64 *shaders = blend->rt[rt].shaders;
79
80 struct panfrost_blend_shader *shader =
81 _mesa_hash_table_u64_search(shaders, fmt);
82
83 if (shader)
84 return shader;
85
86 /* Cache miss. Build one instead, cache it, and go */
87
88 struct panfrost_blend_shader generated =
89 panfrost_compile_blend_shader(ctx, &blend->base, fmt);
90
91 shader = mem_dup(&generated, sizeof(generated));
92 _mesa_hash_table_u64_insert(shaders, fmt, shader);
93 return shader;
94 }
95
96 /* Create a blend CSO. Essentially, try to compile a fixed-function
97 * expression and initialize blend shaders */
98
99 static void *
100 panfrost_create_blend_state(struct pipe_context *pipe,
101 const struct pipe_blend_state *blend)
102 {
103 struct panfrost_context *ctx = pan_context(pipe);
104 struct panfrost_blend_state *so = rzalloc(ctx, struct panfrost_blend_state);
105 so->base = *blend;
106
107 /* TODO: The following features are not yet implemented */
108 assert(!blend->logicop_enable);
109 assert(!blend->alpha_to_coverage);
110 assert(!blend->alpha_to_one);
111
112 for (unsigned c = 0; c < PIPE_MAX_COLOR_BUFS; ++c) {
113 struct panfrost_blend_rt *rt = &so->rt[c];
114
115 /* There are two paths. First, we would like to try a
116 * fixed-function if we can */
117
118 /* Without indep blending, the first RT settings replicate */
119
120 unsigned g =
121 blend->independent_blend_enable ? c : 0;
122
123 rt->has_fixed_function =
124 panfrost_make_fixed_blend_mode(
125 &blend->rt[g],
126 &rt->equation,
127 &rt->constant_mask,
128 blend->rt[g].colormask);
129
130 /* Regardless if that works, we also need to initialize
131 * the blend shaders */
132
133 rt->shaders = _mesa_hash_table_u64_create(so);
134 }
135
136 return so;
137 }
138
139 static void
140 panfrost_bind_blend_state(struct pipe_context *pipe,
141 void *cso)
142 {
143 struct panfrost_context *ctx = pan_context(pipe);
144 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
145 struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
146 struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
147 ctx->blend = pblend;
148
149 if (!blend)
150 return;
151
152 if (screen->require_sfbd) {
153 SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
154 }
155
156 /* Shader itself is not dirty, but the shader core is */
157 ctx->dirty |= PAN_DIRTY_FS;
158 }
159
160 static void
161 panfrost_delete_blend_shader(struct hash_entry *entry)
162 {
163 struct panfrost_blend_shader *shader = (struct panfrost_blend_shader *)entry->data;
164 free(shader->buffer);
165 free(shader);
166 }
167
168 static void
169 panfrost_delete_blend_state(struct pipe_context *pipe,
170 void *cso)
171 {
172 struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso;
173
174 for (unsigned c = 0; c < 4; ++c) {
175 struct panfrost_blend_rt *rt = &blend->rt[c];
176 _mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader);
177 }
178 ralloc_free(blend);
179 }
180
181 static void
182 panfrost_set_blend_color(struct pipe_context *pipe,
183 const struct pipe_blend_color *blend_color)
184 {
185 struct panfrost_context *ctx = pan_context(pipe);
186
187 if (blend_color)
188 ctx->blend_color = *blend_color;
189 }
190
191 /* Given a vec4 of constants, reduce it to just a single constant according to
192 * the mask (if we can) */
193
194 static bool
195 panfrost_blend_constant(float *out, float *in, unsigned mask)
196 {
197 /* If there is no components used, it automatically works. Do set a
198 * dummy constant just to avoid reading uninitialized memory. */
199
200 if (!mask) {
201 *out = 0.0;
202 return true;
203 }
204
205 /* Find some starter mask */
206 unsigned first = ffs(mask) - 1;
207 float cons = in[first];
208 mask ^= (1 << first);
209
210 /* Ensure the rest are equal */
211 while (mask) {
212 unsigned i = u_bit_scan(&mask);
213
214 if (in[i] != cons) {
215 *out = 0.0;
216 return false;
217 }
218 }
219
220 /* Otherwise, we're good to go */
221 *out = cons;
222 return true;
223 }
224
225 /* Create a final blend given the context */
226
227 struct panfrost_blend_final
228 panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
229 {
230 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
231
232 /* Grab the format, falling back gracefully if called invalidly (which
233 * has to happen for no-color-attachment FBOs, for instance) */
234 struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
235 enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
236
237 if ((fb->nr_cbufs > rti) && fb->cbufs[rti])
238 fmt = fb->cbufs[rti]->format;
239
240 /* Grab the blend state */
241 struct panfrost_blend_state *blend = ctx->blend;
242 assert(blend);
243
244 struct panfrost_blend_rt *rt = &blend->rt[rti];
245
246 struct panfrost_blend_final final;
247
248 /* First, we'll try a fixed function path */
249 if (rt->has_fixed_function && panfrost_can_fixed_blend(fmt)) {
250 if (panfrost_blend_constant(
251 &final.equation.constant,
252 ctx->blend_color.color,
253 rt->constant_mask)) {
254 /* There's an equation and suitable constant, so we're good to go */
255 final.is_shader = false;
256 final.equation.equation = &rt->equation;
257
258 final.no_blending =
259 (rt->equation.rgb_mode == 0x122) &&
260 (rt->equation.alpha_mode == 0x122) &&
261 (rt->equation.color_mask == 0xf);
262
263 return final;
264 }
265 }
266
267 /* Otherwise, we need to grab a shader */
268 struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti);
269 final.is_shader = true;
270 final.no_blending = false;
271 final.shader.work_count = shader->work_count;
272 final.shader.first_tag = shader->first_tag;
273
274 /* Upload the shader */
275 final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
276 PAN_BO_EXECUTE,
277 PAN_BO_ACCESS_PRIVATE |
278 PAN_BO_ACCESS_READ |
279 PAN_BO_ACCESS_VERTEX_TILER |
280 PAN_BO_ACCESS_FRAGMENT);
281 memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
282
283 if (shader->patch_index) {
284 /* We have to specialize the blend shader to use constants, so
285 * patch in the current constants */
286
287 float *patch = (float *) (final.shader.bo->cpu + shader->patch_index);
288 memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
289 }
290
291 return final;
292 }
293
294 void
295 panfrost_blend_context_init(struct pipe_context *pipe)
296 {
297 pipe->create_blend_state = panfrost_create_blend_state;
298 pipe->bind_blend_state = panfrost_bind_blend_state;
299 pipe->delete_blend_state = panfrost_delete_blend_state;
300
301 pipe->set_blend_color = panfrost_set_blend_color;
302 }