panfrost: Add an helper to update the rasterizer part of a tiler job desc
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26
27 #include "panfrost-quirks.h"
28
29 #include "pan_allocate.h"
30 #include "pan_bo.h"
31 #include "pan_cmdstream.h"
32 #include "pan_context.h"
33 #include "pan_job.h"
34
35 /* TODO: Bifrost requires just a mali_shared_memory, without the rest of the
36 * framebuffer */
37
38 void
39 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
40 struct midgard_payload_vertex_tiler *vt)
41 {
42 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
43 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
44
45 /* If we haven't, reserve space for the framebuffer */
46
47 if (!batch->framebuffer.gpu) {
48 unsigned size = (screen->quirks & MIDGARD_SFBD) ?
49 sizeof(struct mali_single_framebuffer) :
50 sizeof(struct mali_framebuffer);
51
52 batch->framebuffer = panfrost_allocate_transient(batch, size);
53
54 /* Tag the pointer */
55 if (!(screen->quirks & MIDGARD_SFBD))
56 batch->framebuffer.gpu |= MALI_MFBD;
57 }
58
59 vt->postfix.shared_memory = batch->framebuffer.gpu;
60 }
61
62 void
63 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
64 struct midgard_payload_vertex_tiler *tp)
65 {
66 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
67
68 tp->gl_enables |= 0x7;
69 SET_BIT(tp->gl_enables, MALI_FRONT_CCW_TOP,
70 rasterizer && rasterizer->base.front_ccw);
71 SET_BIT(tp->gl_enables, MALI_CULL_FACE_FRONT,
72 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
73 SET_BIT(tp->gl_enables, MALI_CULL_FACE_BACK,
74 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
75 SET_BIT(tp->prefix.unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
76 rasterizer && rasterizer->base.flatshade_first);
77
78 if (!panfrost_writes_point_size(ctx)) {
79 bool points = tp->prefix.draw_mode == MALI_POINTS;
80 float val = 0.0f;
81
82 if (rasterizer)
83 val = points ?
84 rasterizer->base.point_size :
85 rasterizer->base.line_width;
86
87 tp->primitive_size.constant = val;
88 }
89 }
90
91 void
92 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
93 struct midgard_payload_vertex_tiler *tp)
94 {
95 SET_BIT(tp->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
96 if (ctx->occlusion_query)
97 tp->postfix.occlusion_counter = ctx->occlusion_query->bo->gpu;
98 else
99 tp->postfix.occlusion_counter = 0;
100 }
101
102 void
103 panfrost_emit_shader_meta(struct panfrost_batch *batch,
104 enum pipe_shader_type st,
105 struct midgard_payload_vertex_tiler *vtp)
106 {
107 struct panfrost_context *ctx = batch->ctx;
108 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
109
110 if (!ss) {
111 vtp->postfix.shader = 0;
112 return;
113 }
114
115 /* Add the shader BO to the batch. */
116 panfrost_batch_add_bo(batch, ss->bo,
117 PAN_BO_ACCESS_PRIVATE |
118 PAN_BO_ACCESS_READ |
119 panfrost_bo_access_for_stage(st));
120
121 vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe,
122 sizeof(*ss->tripipe));
123 }
124
125 static void
126 panfrost_mali_viewport_init(struct panfrost_context *ctx,
127 struct mali_viewport *mvp)
128 {
129 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
130
131 /* Clip bounds are encoded as floats. The viewport itself is encoded as
132 * (somewhat) asymmetric ints. */
133
134 const struct pipe_scissor_state *ss = &ctx->scissor;
135
136 memset(mvp, 0, sizeof(*mvp));
137
138 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
139 * each direction. Clipping to the viewport in theory should work, but
140 * in practice causes issues when we're not explicitly trying to
141 * scissor */
142
143 *mvp = (struct mali_viewport) {
144 .clip_minx = -INFINITY,
145 .clip_miny = -INFINITY,
146 .clip_maxx = INFINITY,
147 .clip_maxy = INFINITY,
148 };
149
150 /* Always scissor to the viewport by default. */
151 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
152 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
153
154 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
155 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
156
157 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
158 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
159
160 /* Apply the scissor test */
161
162 unsigned minx, miny, maxx, maxy;
163
164 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
165 minx = MAX2(ss->minx, vp_minx);
166 miny = MAX2(ss->miny, vp_miny);
167 maxx = MIN2(ss->maxx, vp_maxx);
168 maxy = MIN2(ss->maxy, vp_maxy);
169 } else {
170 minx = vp_minx;
171 miny = vp_miny;
172 maxx = vp_maxx;
173 maxy = vp_maxy;
174 }
175
176 /* Hardware needs the min/max to be strictly ordered, so flip if we
177 * need to. The viewport transformation in the vertex shader will
178 * handle the negatives if we don't */
179
180 if (miny > maxy) {
181 unsigned temp = miny;
182 miny = maxy;
183 maxy = temp;
184 }
185
186 if (minx > maxx) {
187 unsigned temp = minx;
188 minx = maxx;
189 maxx = temp;
190 }
191
192 if (minz > maxz) {
193 float temp = minz;
194 minz = maxz;
195 maxz = temp;
196 }
197
198 /* Clamp to the framebuffer size as a last check */
199
200 minx = MIN2(ctx->pipe_framebuffer.width, minx);
201 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
202
203 miny = MIN2(ctx->pipe_framebuffer.height, miny);
204 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
205
206 /* Upload */
207
208 mvp->viewport0[0] = minx;
209 mvp->viewport1[0] = MALI_POSITIVE(maxx);
210
211 mvp->viewport0[1] = miny;
212 mvp->viewport1[1] = MALI_POSITIVE(maxy);
213
214 mvp->clip_minz = minz;
215 mvp->clip_maxz = maxz;
216 }
217
218 void
219 panfrost_emit_viewport(struct panfrost_batch *batch,
220 struct midgard_payload_vertex_tiler *tp)
221 {
222 struct panfrost_context *ctx = batch->ctx;
223 struct mali_viewport mvp;
224
225 panfrost_mali_viewport_init(batch->ctx, &mvp);
226
227 /* Update the job, unless we're doing wallpapering (whose lack of
228 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
229 * just... be faster :) */
230
231 if (!ctx->wallpaper_batch)
232 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
233 mvp.viewport0[1],
234 mvp.viewport1[0] + 1,
235 mvp.viewport1[1] + 1);
236
237 tp->postfix.viewport = panfrost_upload_transient(batch, &mvp,
238 sizeof(mvp));
239 }
240
241 static mali_ptr
242 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
243 enum pipe_shader_type st,
244 struct panfrost_constant_buffer *buf,
245 unsigned index)
246 {
247 struct pipe_constant_buffer *cb = &buf->cb[index];
248 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
249
250 if (rsrc) {
251 panfrost_batch_add_bo(batch, rsrc->bo,
252 PAN_BO_ACCESS_SHARED |
253 PAN_BO_ACCESS_READ |
254 panfrost_bo_access_for_stage(st));
255
256 /* Alignment gauranteed by
257 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
258 return rsrc->bo->gpu + cb->buffer_offset;
259 } else if (cb->user_buffer) {
260 return panfrost_upload_transient(batch,
261 cb->user_buffer +
262 cb->buffer_offset,
263 cb->buffer_size);
264 } else {
265 unreachable("No constant buffer");
266 }
267 }
268
269 struct sysval_uniform {
270 union {
271 float f[4];
272 int32_t i[4];
273 uint32_t u[4];
274 uint64_t du[2];
275 };
276 };
277
278 static void
279 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
280 struct sysval_uniform *uniform)
281 {
282 struct panfrost_context *ctx = batch->ctx;
283 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
284
285 uniform->f[0] = vp->scale[0];
286 uniform->f[1] = vp->scale[1];
287 uniform->f[2] = vp->scale[2];
288 }
289
290 static void
291 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
292 struct sysval_uniform *uniform)
293 {
294 struct panfrost_context *ctx = batch->ctx;
295 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
296
297 uniform->f[0] = vp->translate[0];
298 uniform->f[1] = vp->translate[1];
299 uniform->f[2] = vp->translate[2];
300 }
301
302 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
303 enum pipe_shader_type st,
304 unsigned int sysvalid,
305 struct sysval_uniform *uniform)
306 {
307 struct panfrost_context *ctx = batch->ctx;
308 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
309 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
310 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
311 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
312
313 assert(dim);
314 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
315
316 if (dim > 1)
317 uniform->i[1] = u_minify(tex->texture->height0,
318 tex->u.tex.first_level);
319
320 if (dim > 2)
321 uniform->i[2] = u_minify(tex->texture->depth0,
322 tex->u.tex.first_level);
323
324 if (is_array)
325 uniform->i[dim] = tex->texture->array_size;
326 }
327
328 static void
329 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
330 enum pipe_shader_type st,
331 unsigned ssbo_id,
332 struct sysval_uniform *uniform)
333 {
334 struct panfrost_context *ctx = batch->ctx;
335
336 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
337 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
338
339 /* Compute address */
340 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
341
342 panfrost_batch_add_bo(batch, bo,
343 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
344 panfrost_bo_access_for_stage(st));
345
346 /* Upload address and size as sysval */
347 uniform->du[0] = bo->gpu + sb.buffer_offset;
348 uniform->u[2] = sb.buffer_size;
349 }
350
351 static void
352 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
353 enum pipe_shader_type st,
354 unsigned samp_idx,
355 struct sysval_uniform *uniform)
356 {
357 struct panfrost_context *ctx = batch->ctx;
358 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
359
360 uniform->f[0] = sampl->min_lod;
361 uniform->f[1] = sampl->max_lod;
362 uniform->f[2] = sampl->lod_bias;
363
364 /* Even without any errata, Midgard represents "no mipmapping" as
365 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
366 * panfrost_create_sampler_state which also explains our choice of
367 * epsilon value (again to keep behaviour consistent) */
368
369 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
370 uniform->f[1] = uniform->f[0] + (1.0/256.0);
371 }
372
373 static void
374 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
375 struct sysval_uniform *uniform)
376 {
377 struct panfrost_context *ctx = batch->ctx;
378
379 uniform->u[0] = ctx->compute_grid->grid[0];
380 uniform->u[1] = ctx->compute_grid->grid[1];
381 uniform->u[2] = ctx->compute_grid->grid[2];
382 }
383
384 static void
385 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
386 struct panfrost_shader_state *ss,
387 enum pipe_shader_type st)
388 {
389 struct sysval_uniform *uniforms = (void *)buf;
390
391 for (unsigned i = 0; i < ss->sysval_count; ++i) {
392 int sysval = ss->sysval[i];
393
394 switch (PAN_SYSVAL_TYPE(sysval)) {
395 case PAN_SYSVAL_VIEWPORT_SCALE:
396 panfrost_upload_viewport_scale_sysval(batch,
397 &uniforms[i]);
398 break;
399 case PAN_SYSVAL_VIEWPORT_OFFSET:
400 panfrost_upload_viewport_offset_sysval(batch,
401 &uniforms[i]);
402 break;
403 case PAN_SYSVAL_TEXTURE_SIZE:
404 panfrost_upload_txs_sysval(batch, st,
405 PAN_SYSVAL_ID(sysval),
406 &uniforms[i]);
407 break;
408 case PAN_SYSVAL_SSBO:
409 panfrost_upload_ssbo_sysval(batch, st,
410 PAN_SYSVAL_ID(sysval),
411 &uniforms[i]);
412 break;
413 case PAN_SYSVAL_NUM_WORK_GROUPS:
414 panfrost_upload_num_work_groups_sysval(batch,
415 &uniforms[i]);
416 break;
417 case PAN_SYSVAL_SAMPLER:
418 panfrost_upload_sampler_sysval(batch, st,
419 PAN_SYSVAL_ID(sysval),
420 &uniforms[i]);
421 break;
422 default:
423 assert(0);
424 }
425 }
426 }
427
428 static const void *
429 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
430 unsigned index)
431 {
432 struct pipe_constant_buffer *cb = &buf->cb[index];
433 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
434
435 if (rsrc)
436 return rsrc->bo->cpu;
437 else if (cb->user_buffer)
438 return cb->user_buffer;
439 else
440 unreachable("No constant buffer");
441 }
442
443 void
444 panfrost_emit_const_buf(struct panfrost_batch *batch,
445 enum pipe_shader_type stage,
446 struct midgard_payload_vertex_tiler *vtp)
447 {
448 struct panfrost_context *ctx = batch->ctx;
449 struct panfrost_shader_variants *all = ctx->shader[stage];
450
451 if (!all)
452 return;
453
454 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
455
456 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
457
458 /* Uniforms are implicitly UBO #0 */
459 bool has_uniforms = buf->enabled_mask & (1 << 0);
460
461 /* Allocate room for the sysval and the uniforms */
462 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
463 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
464 size_t size = sys_size + uniform_size;
465 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
466 size);
467
468 /* Upload sysvals requested by the shader */
469 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
470
471 /* Upload uniforms */
472 if (has_uniforms && uniform_size) {
473 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
474 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
475 }
476
477 struct mali_vertex_tiler_postfix *postfix = &vtp->postfix;
478
479 /* Next up, attach UBOs. UBO #0 is the uniforms we just
480 * uploaded */
481
482 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
483 assert(ubo_count >= 1);
484
485 size_t sz = sizeof(uint64_t) * ubo_count;
486 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
487 int uniform_count = ss->uniform_count;
488
489 /* Upload uniforms as a UBO */
490 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
491
492 /* The rest are honest-to-goodness UBOs */
493
494 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
495 size_t usz = buf->cb[ubo].buffer_size;
496 bool enabled = buf->enabled_mask & (1 << ubo);
497 bool empty = usz == 0;
498
499 if (!enabled || empty) {
500 /* Stub out disabled UBOs to catch accesses */
501 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
502 continue;
503 }
504
505 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
506 buf, ubo);
507
508 unsigned bytes_per_field = 16;
509 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
510 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
511 }
512
513 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
514 postfix->uniforms = transfer.gpu;
515 postfix->uniform_buffers = ubufs;
516
517 buf->dirty_mask = 0;
518 }
519
520 void
521 panfrost_emit_shared_memory(struct panfrost_batch *batch,
522 const struct pipe_grid_info *info,
523 struct midgard_payload_vertex_tiler *vtp)
524 {
525 struct panfrost_context *ctx = batch->ctx;
526 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
527 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
528 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
529 128));
530 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
531 info->grid[2] * 4;
532 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
533 shared_size,
534 1);
535
536 struct mali_shared_memory shared = {
537 .shared_memory = bo->gpu,
538 .shared_workgroup_count =
539 util_logbase2_ceil(info->grid[0]) +
540 util_logbase2_ceil(info->grid[1]) +
541 util_logbase2_ceil(info->grid[2]),
542 .shared_unk1 = 0x2,
543 .shared_shift = util_logbase2(single_size) - 1
544 };
545
546 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
547 sizeof(shared));
548 }