panfrost: XMLify stencil op
[mesa.git] / src / panfrost / lib / pan_blit.c
1 /*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include <math.h>
28 #include <stdio.h>
29 #include "pan_encoder.h"
30 #include "pan_pool.h"
31 #include "pan_scoreboard.h"
32 #include "pan_texture.h"
33 #include "panfrost-quirks.h"
34 #include "../midgard/midgard_compile.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "util/u_math.h"
37
38 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
39 * missing in many cases. We instead use software paths as fallbacks to
40 * implement blits, which are done as TILER jobs. No vertex shader is
41 * necessary since we can supply screen-space coordinates directly.
42 *
43 * This is primarily designed as a fallback for preloads but could be extended
44 * for other clears/blits if needed in the future. */
45
46 static void
47 panfrost_build_blit_shader(panfrost_program *program, unsigned gpu_id, gl_frag_result loc, nir_alu_type T, bool ms)
48 {
49 bool is_colour = loc >= FRAG_RESULT_DATA0;
50
51 nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
52 nir_function *fn = nir_function_create(shader, "main");
53 nir_function_impl *impl = nir_function_impl_create(fn);
54
55 nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "coord");
56 nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(
57 GLSL_TYPE_FLOAT, is_colour ? 4 : 1), "out");
58
59 c_src->data.location = VARYING_SLOT_TEX0;
60 c_out->data.location = loc;
61
62 nir_builder _b;
63 nir_builder *b = &_b;
64 nir_builder_init(b, impl);
65 b->cursor = nir_before_block(nir_start_block(impl));
66
67 nir_ssa_def *coord = nir_load_var(b, c_src);
68
69 nir_tex_instr *tex = nir_tex_instr_create(shader, ms ? 3 : 1);
70
71 tex->dest_type = T;
72
73 if (ms) {
74 tex->src[0].src_type = nir_tex_src_coord;
75 tex->src[0].src = nir_src_for_ssa(nir_f2i32(b, coord));
76 tex->coord_components = 2;
77
78 tex->src[1].src_type = nir_tex_src_ms_index;
79 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
80
81 tex->src[2].src_type = nir_tex_src_lod;
82 tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
83 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
84 tex->op = nir_texop_txf_ms;
85 } else {
86 tex->op = nir_texop_tex;
87
88 tex->src[0].src_type = nir_tex_src_coord;
89 tex->src[0].src = nir_src_for_ssa(coord);
90 tex->coord_components = 2;
91
92 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
93 }
94
95 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
96 nir_builder_instr_insert(b, &tex->instr);
97
98 if (is_colour)
99 nir_store_var(b, c_out, &tex->dest.ssa, 0xFF);
100 else
101 nir_store_var(b, c_out, nir_channel(b, &tex->dest.ssa, 0), 0xFF);
102
103 midgard_compile_shader_nir(shader, program, false, 0, gpu_id, false, true);
104 }
105
106 /* Compile and upload all possible blit shaders ahead-of-time to reduce draw
107 * time overhead. There's only ~30 of them at the moment, so this is fine */
108
109 void
110 panfrost_init_blit_shaders(struct panfrost_device *dev)
111 {
112 static const struct {
113 gl_frag_result loc;
114 unsigned types;
115 } shader_descs[] = {
116 { FRAG_RESULT_DEPTH, 1 << PAN_BLIT_FLOAT },
117 { FRAG_RESULT_STENCIL, 1 << PAN_BLIT_UINT },
118 { FRAG_RESULT_DATA0, ~0 },
119 { FRAG_RESULT_DATA1, ~0 },
120 { FRAG_RESULT_DATA2, ~0 },
121 { FRAG_RESULT_DATA3, ~0 },
122 { FRAG_RESULT_DATA4, ~0 },
123 { FRAG_RESULT_DATA5, ~0 },
124 { FRAG_RESULT_DATA6, ~0 },
125 { FRAG_RESULT_DATA7, ~0 }
126 };
127
128 nir_alu_type nir_types[PAN_BLIT_NUM_TYPES] = {
129 nir_type_float,
130 nir_type_uint,
131 nir_type_int
132 };
133
134 /* Total size = # of shaders * bytes per shader. There are
135 * shaders for each RT (so up to DATA7 -- overestimate is
136 * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
137 * variants. These shaders are simple enough that they should be less
138 * than 8 quadwords each (again, overestimate is fine). */
139
140 unsigned offset = 0;
141 unsigned total_size = (FRAG_RESULT_DATA7 * PAN_BLIT_NUM_TYPES)
142 * (8 * 16) * 2;
143
144 dev->blit_shaders.bo = panfrost_bo_create(dev, total_size, PAN_BO_EXECUTE);
145
146 /* Don't bother generating multisampling variants if we don't actually
147 * support multisampling */
148 bool has_ms = !(dev->quirks & MIDGARD_SFBD);
149
150 for (unsigned ms = 0; ms <= has_ms; ++ms) {
151 for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
152 unsigned loc = shader_descs[i].loc;
153
154 for (enum pan_blit_type T = 0; T < PAN_BLIT_NUM_TYPES; ++T) {
155 if (!(shader_descs[i].types & (1 << T)))
156 continue;
157
158 panfrost_program program;
159 panfrost_build_blit_shader(&program, dev->gpu_id, loc,
160 nir_types[T], ms);
161
162 assert(offset + program.compiled.size < total_size);
163 memcpy(dev->blit_shaders.bo->cpu + offset, program.compiled.data, program.compiled.size);
164
165 dev->blit_shaders.loads[loc][T][ms] = (dev->blit_shaders.bo->gpu + offset) | program.first_tag;
166 offset += ALIGN_POT(program.compiled.size, 64);
167 util_dynarray_fini(&program.compiled);
168 }
169 }
170 }
171 }
172
173 /* Add a shader-based load on Midgard (draw-time for GL). Shaders are
174 * precached */
175
176 void
177 panfrost_load_midg(
178 struct pan_pool *pool,
179 struct pan_scoreboard *scoreboard,
180 mali_ptr blend_shader,
181 mali_ptr fbd,
182 mali_ptr coordinates, unsigned vertex_count,
183 struct pan_image *image,
184 unsigned loc)
185 {
186 unsigned width = u_minify(image->width0, image->first_level);
187 unsigned height = u_minify(image->height0, image->first_level);
188
189 struct mali_viewport viewport = {
190 .clip_minx = -INFINITY,
191 .clip_miny = -INFINITY,
192 .clip_maxx = INFINITY,
193 .clip_maxy = INFINITY,
194 .clip_minz = 0.0,
195 .clip_maxz = 1.0,
196
197 .viewport0 = { 0, 0 },
198 .viewport1 = { MALI_POSITIVE(width), MALI_POSITIVE(height) }
199 };
200
201 union mali_attr varying = {
202 .elements = coordinates | MALI_ATTR_LINEAR,
203 .stride = 4 * sizeof(float),
204 .size = 4 * sizeof(float) * vertex_count,
205 };
206
207 struct mali_attr_meta varying_meta = {
208 .index = 0,
209 .unknown1 = 2,
210 .swizzle = (MALI_CHANNEL_RED << 0) | (MALI_CHANNEL_GREEN << 3),
211 .format = MALI_RGBA32F
212 };
213
214 struct mali_stencil_test stencil = {
215 .mask = 0xFF,
216 .func = MALI_FUNC_ALWAYS,
217 .sfail = MALI_STENCIL_OP_REPLACE,
218 .dpfail = MALI_STENCIL_OP_REPLACE,
219 .dppass = MALI_STENCIL_OP_REPLACE,
220 };
221
222 union midgard_blend replace = {
223 .equation = {
224 .rgb_mode = 0x122,
225 .alpha_mode = 0x122,
226 .color_mask = MALI_MASK_R | MALI_MASK_G | MALI_MASK_B | MALI_MASK_A,
227 }
228 };
229
230 if (blend_shader)
231 replace.shader = blend_shader;
232
233 /* Determine the sampler type needed. Stencil is always sampled as
234 * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
235
236 enum pan_blit_type T =
237 (loc == FRAG_RESULT_STENCIL) ? PAN_BLIT_UINT :
238 (util_format_is_pure_uint(image->format)) ? PAN_BLIT_UINT :
239 (util_format_is_pure_sint(image->format)) ? PAN_BLIT_INT :
240 PAN_BLIT_FLOAT;
241
242 bool ms = image->nr_samples > 1;
243
244 struct mali_shader_meta shader_meta = {
245 .shader = pool->dev->blit_shaders.loads[loc][T][ms],
246 .sampler_count = 1,
247 .texture_count = 1,
248 .varying_count = 1,
249 .midgard1 = {
250 .flags_lo = 0x20,
251 .work_count = 4,
252 },
253 .coverage_mask = 0xF,
254 .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10,
255 .unknown2_4 = 0x4e0,
256 .stencil_mask_front = ~0,
257 .stencil_mask_back = ~0,
258 .stencil_front = stencil,
259 .stencil_back = stencil,
260 .blend = {
261 .shader = blend_shader
262 }
263 };
264
265 if (ms)
266 shader_meta.unknown2_3 |= MALI_HAS_MSAA | MALI_PER_SAMPLE;
267 else
268 shader_meta.unknown2_4 |= MALI_NO_MSAA;
269
270 assert(shader_meta.shader);
271
272 if (pool->dev->quirks & MIDGARD_SFBD) {
273 shader_meta.unknown2_4 |= (0x10 | MALI_NO_DITHER);
274 shader_meta.blend = replace;
275
276 if (loc < FRAG_RESULT_DATA0)
277 shader_meta.blend.equation.color_mask = 0x0;
278 }
279
280 if (loc == FRAG_RESULT_DEPTH) {
281 shader_meta.midgard1.flags_lo |= MALI_WRITES_Z;
282 shader_meta.unknown2_3 |= MALI_DEPTH_WRITEMASK;
283 } else if (loc == FRAG_RESULT_STENCIL) {
284 shader_meta.midgard1.flags_hi |= MALI_WRITES_S;
285 shader_meta.unknown2_4 |= MALI_STENCIL_TEST;
286 } else {
287 shader_meta.midgard1.flags_lo |= MALI_EARLY_Z;
288 }
289
290 /* Create the texture descriptor. We partially compute the base address
291 * ourselves to account for layer, such that the texture descriptor
292 * itself is for a 2D texture with array size 1 even for 3D/array
293 * textures, removing the need to separately key the blit shaders for
294 * 2D and 3D variants */
295
296 struct panfrost_transfer texture_t = panfrost_pool_alloc(pool, sizeof(struct mali_texture_descriptor) + sizeof(mali_ptr) * 2 * MAX2(image->nr_samples, 1));
297
298 panfrost_new_texture(texture_t.cpu,
299 image->width0, image->height0,
300 MAX2(image->nr_samples, 1), 1,
301 image->format, MALI_TEX_2D,
302 image->modifier,
303 image->first_level, image->last_level,
304 0, 0,
305 image->nr_samples,
306 0,
307 (MALI_CHANNEL_RED << 0) | (MALI_CHANNEL_GREEN << 3) | (MALI_CHANNEL_BLUE << 6) | (MALI_CHANNEL_ALPHA << 9),
308 image->bo->gpu + image->first_layer *
309 panfrost_get_layer_stride(image->slices,
310 image->type == MALI_TEX_3D,
311 image->cubemap_stride, image->first_level),
312 image->slices);
313
314 struct mali_sampler_descriptor sampler = {
315 .filter_mode = MALI_SAMP_MAG_NEAREST | MALI_SAMP_MIN_NEAREST,
316 .wrap_s = MALI_WRAP_CLAMP_TO_EDGE,
317 .wrap_t = MALI_WRAP_CLAMP_TO_EDGE,
318 .wrap_r = MALI_WRAP_CLAMP_TO_EDGE,
319 };
320
321 struct panfrost_transfer shader_meta_t = panfrost_pool_alloc(pool, sizeof(shader_meta) + 8 * sizeof(struct midgard_blend_rt));
322 memcpy(shader_meta_t.cpu, &shader_meta, sizeof(shader_meta));
323
324 for (unsigned i = 0; i < 8; ++i) {
325 void *dest = shader_meta_t.cpu + sizeof(shader_meta) + sizeof(struct midgard_blend_rt) * i;
326
327 if (loc == (FRAG_RESULT_DATA0 + i)) {
328 struct midgard_blend_rt blend_rt = {
329 .flags = 0x200 | MALI_BLEND_NO_DITHER,
330 .blend = replace,
331 };
332
333 if (util_format_is_srgb(image->format))
334 blend_rt.flags |= MALI_BLEND_SRGB;
335
336 if (blend_shader) {
337 blend_rt.flags |= MALI_BLEND_MRT_SHADER;
338 blend_rt.blend.shader = blend_shader;
339 }
340
341 memcpy(dest, &blend_rt, sizeof(struct midgard_blend_rt));
342 } else {
343 memset(dest, 0x0, sizeof(struct midgard_blend_rt));
344 }
345 }
346
347 struct midgard_payload_vertex_tiler payload = {
348 .prefix = {
349 .draw_mode = MALI_DRAW_MODE_TRIANGLES,
350 .unknown_draw = 0x3000,
351 .index_count = MALI_POSITIVE(vertex_count)
352 },
353 .postfix = {
354 .gl_enables = 0x7,
355 .position_varying = coordinates,
356 .textures = panfrost_pool_upload(pool, &texture_t.gpu, sizeof(texture_t.gpu)),
357 .sampler_descriptor = panfrost_pool_upload(pool, &sampler, sizeof(sampler)),
358 .shader = shader_meta_t.gpu,
359 .varyings = panfrost_pool_upload(pool, &varying, sizeof(varying)),
360 .varying_meta = panfrost_pool_upload(pool, &varying_meta, sizeof(varying_meta)),
361 .viewport = panfrost_pool_upload(pool, &viewport, sizeof(viewport)),
362 .shared_memory = fbd
363 }
364 };
365
366 panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
367 payload.prefix.workgroups_x_shift_3 = 6;
368
369 panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
370 }