panfrost: Add an helper to update the occclusion query part of a tiler job desc
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26
27 #include "panfrost-quirks.h"
28
29 #include "pan_allocate.h"
30 #include "pan_bo.h"
31 #include "pan_cmdstream.h"
32 #include "pan_context.h"
33 #include "pan_job.h"
34
35 /* TODO: Bifrost requires just a mali_shared_memory, without the rest of the
36 * framebuffer */
37
38 void
39 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
40 struct midgard_payload_vertex_tiler *vt)
41 {
42 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
43 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
44
45 /* If we haven't, reserve space for the framebuffer */
46
47 if (!batch->framebuffer.gpu) {
48 unsigned size = (screen->quirks & MIDGARD_SFBD) ?
49 sizeof(struct mali_single_framebuffer) :
50 sizeof(struct mali_framebuffer);
51
52 batch->framebuffer = panfrost_allocate_transient(batch, size);
53
54 /* Tag the pointer */
55 if (!(screen->quirks & MIDGARD_SFBD))
56 batch->framebuffer.gpu |= MALI_MFBD;
57 }
58
59 vt->postfix.shared_memory = batch->framebuffer.gpu;
60 }
61
62 void
63 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
64 struct midgard_payload_vertex_tiler *tp)
65 {
66 SET_BIT(tp->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
67 if (ctx->occlusion_query)
68 tp->postfix.occlusion_counter = ctx->occlusion_query->bo->gpu;
69 else
70 tp->postfix.occlusion_counter = 0;
71 }
72
73 void
74 panfrost_emit_shader_meta(struct panfrost_batch *batch,
75 enum pipe_shader_type st,
76 struct midgard_payload_vertex_tiler *vtp)
77 {
78 struct panfrost_context *ctx = batch->ctx;
79 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
80
81 if (!ss) {
82 vtp->postfix.shader = 0;
83 return;
84 }
85
86 /* Add the shader BO to the batch. */
87 panfrost_batch_add_bo(batch, ss->bo,
88 PAN_BO_ACCESS_PRIVATE |
89 PAN_BO_ACCESS_READ |
90 panfrost_bo_access_for_stage(st));
91
92 vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe,
93 sizeof(*ss->tripipe));
94 }
95
96 static void
97 panfrost_mali_viewport_init(struct panfrost_context *ctx,
98 struct mali_viewport *mvp)
99 {
100 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
101
102 /* Clip bounds are encoded as floats. The viewport itself is encoded as
103 * (somewhat) asymmetric ints. */
104
105 const struct pipe_scissor_state *ss = &ctx->scissor;
106
107 memset(mvp, 0, sizeof(*mvp));
108
109 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
110 * each direction. Clipping to the viewport in theory should work, but
111 * in practice causes issues when we're not explicitly trying to
112 * scissor */
113
114 *mvp = (struct mali_viewport) {
115 .clip_minx = -INFINITY,
116 .clip_miny = -INFINITY,
117 .clip_maxx = INFINITY,
118 .clip_maxy = INFINITY,
119 };
120
121 /* Always scissor to the viewport by default. */
122 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
123 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
124
125 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
126 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
127
128 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
129 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
130
131 /* Apply the scissor test */
132
133 unsigned minx, miny, maxx, maxy;
134
135 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
136 minx = MAX2(ss->minx, vp_minx);
137 miny = MAX2(ss->miny, vp_miny);
138 maxx = MIN2(ss->maxx, vp_maxx);
139 maxy = MIN2(ss->maxy, vp_maxy);
140 } else {
141 minx = vp_minx;
142 miny = vp_miny;
143 maxx = vp_maxx;
144 maxy = vp_maxy;
145 }
146
147 /* Hardware needs the min/max to be strictly ordered, so flip if we
148 * need to. The viewport transformation in the vertex shader will
149 * handle the negatives if we don't */
150
151 if (miny > maxy) {
152 unsigned temp = miny;
153 miny = maxy;
154 maxy = temp;
155 }
156
157 if (minx > maxx) {
158 unsigned temp = minx;
159 minx = maxx;
160 maxx = temp;
161 }
162
163 if (minz > maxz) {
164 float temp = minz;
165 minz = maxz;
166 maxz = temp;
167 }
168
169 /* Clamp to the framebuffer size as a last check */
170
171 minx = MIN2(ctx->pipe_framebuffer.width, minx);
172 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
173
174 miny = MIN2(ctx->pipe_framebuffer.height, miny);
175 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
176
177 /* Upload */
178
179 mvp->viewport0[0] = minx;
180 mvp->viewport1[0] = MALI_POSITIVE(maxx);
181
182 mvp->viewport0[1] = miny;
183 mvp->viewport1[1] = MALI_POSITIVE(maxy);
184
185 mvp->clip_minz = minz;
186 mvp->clip_maxz = maxz;
187 }
188
189 void
190 panfrost_emit_viewport(struct panfrost_batch *batch,
191 struct midgard_payload_vertex_tiler *tp)
192 {
193 struct panfrost_context *ctx = batch->ctx;
194 struct mali_viewport mvp;
195
196 panfrost_mali_viewport_init(batch->ctx, &mvp);
197
198 /* Update the job, unless we're doing wallpapering (whose lack of
199 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
200 * just... be faster :) */
201
202 if (!ctx->wallpaper_batch)
203 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
204 mvp.viewport0[1],
205 mvp.viewport1[0] + 1,
206 mvp.viewport1[1] + 1);
207
208 tp->postfix.viewport = panfrost_upload_transient(batch, &mvp,
209 sizeof(mvp));
210 }
211
212 static mali_ptr
213 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
214 enum pipe_shader_type st,
215 struct panfrost_constant_buffer *buf,
216 unsigned index)
217 {
218 struct pipe_constant_buffer *cb = &buf->cb[index];
219 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
220
221 if (rsrc) {
222 panfrost_batch_add_bo(batch, rsrc->bo,
223 PAN_BO_ACCESS_SHARED |
224 PAN_BO_ACCESS_READ |
225 panfrost_bo_access_for_stage(st));
226
227 /* Alignment gauranteed by
228 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
229 return rsrc->bo->gpu + cb->buffer_offset;
230 } else if (cb->user_buffer) {
231 return panfrost_upload_transient(batch,
232 cb->user_buffer +
233 cb->buffer_offset,
234 cb->buffer_size);
235 } else {
236 unreachable("No constant buffer");
237 }
238 }
239
240 struct sysval_uniform {
241 union {
242 float f[4];
243 int32_t i[4];
244 uint32_t u[4];
245 uint64_t du[2];
246 };
247 };
248
249 static void
250 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
251 struct sysval_uniform *uniform)
252 {
253 struct panfrost_context *ctx = batch->ctx;
254 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
255
256 uniform->f[0] = vp->scale[0];
257 uniform->f[1] = vp->scale[1];
258 uniform->f[2] = vp->scale[2];
259 }
260
261 static void
262 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
263 struct sysval_uniform *uniform)
264 {
265 struct panfrost_context *ctx = batch->ctx;
266 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
267
268 uniform->f[0] = vp->translate[0];
269 uniform->f[1] = vp->translate[1];
270 uniform->f[2] = vp->translate[2];
271 }
272
273 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
274 enum pipe_shader_type st,
275 unsigned int sysvalid,
276 struct sysval_uniform *uniform)
277 {
278 struct panfrost_context *ctx = batch->ctx;
279 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
280 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
281 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
282 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
283
284 assert(dim);
285 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
286
287 if (dim > 1)
288 uniform->i[1] = u_minify(tex->texture->height0,
289 tex->u.tex.first_level);
290
291 if (dim > 2)
292 uniform->i[2] = u_minify(tex->texture->depth0,
293 tex->u.tex.first_level);
294
295 if (is_array)
296 uniform->i[dim] = tex->texture->array_size;
297 }
298
299 static void
300 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
301 enum pipe_shader_type st,
302 unsigned ssbo_id,
303 struct sysval_uniform *uniform)
304 {
305 struct panfrost_context *ctx = batch->ctx;
306
307 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
308 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
309
310 /* Compute address */
311 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
312
313 panfrost_batch_add_bo(batch, bo,
314 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
315 panfrost_bo_access_for_stage(st));
316
317 /* Upload address and size as sysval */
318 uniform->du[0] = bo->gpu + sb.buffer_offset;
319 uniform->u[2] = sb.buffer_size;
320 }
321
322 static void
323 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
324 enum pipe_shader_type st,
325 unsigned samp_idx,
326 struct sysval_uniform *uniform)
327 {
328 struct panfrost_context *ctx = batch->ctx;
329 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
330
331 uniform->f[0] = sampl->min_lod;
332 uniform->f[1] = sampl->max_lod;
333 uniform->f[2] = sampl->lod_bias;
334
335 /* Even without any errata, Midgard represents "no mipmapping" as
336 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
337 * panfrost_create_sampler_state which also explains our choice of
338 * epsilon value (again to keep behaviour consistent) */
339
340 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
341 uniform->f[1] = uniform->f[0] + (1.0/256.0);
342 }
343
344 static void
345 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
346 struct sysval_uniform *uniform)
347 {
348 struct panfrost_context *ctx = batch->ctx;
349
350 uniform->u[0] = ctx->compute_grid->grid[0];
351 uniform->u[1] = ctx->compute_grid->grid[1];
352 uniform->u[2] = ctx->compute_grid->grid[2];
353 }
354
355 static void
356 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
357 struct panfrost_shader_state *ss,
358 enum pipe_shader_type st)
359 {
360 struct sysval_uniform *uniforms = (void *)buf;
361
362 for (unsigned i = 0; i < ss->sysval_count; ++i) {
363 int sysval = ss->sysval[i];
364
365 switch (PAN_SYSVAL_TYPE(sysval)) {
366 case PAN_SYSVAL_VIEWPORT_SCALE:
367 panfrost_upload_viewport_scale_sysval(batch,
368 &uniforms[i]);
369 break;
370 case PAN_SYSVAL_VIEWPORT_OFFSET:
371 panfrost_upload_viewport_offset_sysval(batch,
372 &uniforms[i]);
373 break;
374 case PAN_SYSVAL_TEXTURE_SIZE:
375 panfrost_upload_txs_sysval(batch, st,
376 PAN_SYSVAL_ID(sysval),
377 &uniforms[i]);
378 break;
379 case PAN_SYSVAL_SSBO:
380 panfrost_upload_ssbo_sysval(batch, st,
381 PAN_SYSVAL_ID(sysval),
382 &uniforms[i]);
383 break;
384 case PAN_SYSVAL_NUM_WORK_GROUPS:
385 panfrost_upload_num_work_groups_sysval(batch,
386 &uniforms[i]);
387 break;
388 case PAN_SYSVAL_SAMPLER:
389 panfrost_upload_sampler_sysval(batch, st,
390 PAN_SYSVAL_ID(sysval),
391 &uniforms[i]);
392 break;
393 default:
394 assert(0);
395 }
396 }
397 }
398
399 static const void *
400 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
401 unsigned index)
402 {
403 struct pipe_constant_buffer *cb = &buf->cb[index];
404 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
405
406 if (rsrc)
407 return rsrc->bo->cpu;
408 else if (cb->user_buffer)
409 return cb->user_buffer;
410 else
411 unreachable("No constant buffer");
412 }
413
414 void
415 panfrost_emit_const_buf(struct panfrost_batch *batch,
416 enum pipe_shader_type stage,
417 struct midgard_payload_vertex_tiler *vtp)
418 {
419 struct panfrost_context *ctx = batch->ctx;
420 struct panfrost_shader_variants *all = ctx->shader[stage];
421
422 if (!all)
423 return;
424
425 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
426
427 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
428
429 /* Uniforms are implicitly UBO #0 */
430 bool has_uniforms = buf->enabled_mask & (1 << 0);
431
432 /* Allocate room for the sysval and the uniforms */
433 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
434 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
435 size_t size = sys_size + uniform_size;
436 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
437 size);
438
439 /* Upload sysvals requested by the shader */
440 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
441
442 /* Upload uniforms */
443 if (has_uniforms && uniform_size) {
444 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
445 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
446 }
447
448 struct mali_vertex_tiler_postfix *postfix = &vtp->postfix;
449
450 /* Next up, attach UBOs. UBO #0 is the uniforms we just
451 * uploaded */
452
453 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
454 assert(ubo_count >= 1);
455
456 size_t sz = sizeof(uint64_t) * ubo_count;
457 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
458 int uniform_count = ss->uniform_count;
459
460 /* Upload uniforms as a UBO */
461 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
462
463 /* The rest are honest-to-goodness UBOs */
464
465 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
466 size_t usz = buf->cb[ubo].buffer_size;
467 bool enabled = buf->enabled_mask & (1 << ubo);
468 bool empty = usz == 0;
469
470 if (!enabled || empty) {
471 /* Stub out disabled UBOs to catch accesses */
472 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
473 continue;
474 }
475
476 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
477 buf, ubo);
478
479 unsigned bytes_per_field = 16;
480 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
481 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
482 }
483
484 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
485 postfix->uniforms = transfer.gpu;
486 postfix->uniform_buffers = ubufs;
487
488 buf->dirty_mask = 0;
489 }
490
491 void
492 panfrost_emit_shared_memory(struct panfrost_batch *batch,
493 const struct pipe_grid_info *info,
494 struct midgard_payload_vertex_tiler *vtp)
495 {
496 struct panfrost_context *ctx = batch->ctx;
497 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
498 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
499 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
500 128));
501 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
502 info->grid[2] * 4;
503 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
504 shared_size,
505 1);
506
507 struct mali_shared_memory shared = {
508 .shared_memory = bo->gpu,
509 .shared_workgroup_count =
510 util_logbase2_ceil(info->grid[0]) +
511 util_logbase2_ceil(info->grid[1]) +
512 util_logbase2_ceil(info->grid[2]),
513 .shared_unk1 = 0x2,
514 .shared_shift = util_logbase2(single_size) - 1
515 };
516
517 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
518 sizeof(shared));
519 }