panfrost: Move panfrost_attach_vt_framebuffer() to pan_cmdstream.c
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26
27 #include "panfrost-quirks.h"
28
29 #include "pan_allocate.h"
30 #include "pan_bo.h"
31 #include "pan_cmdstream.h"
32 #include "pan_context.h"
33 #include "pan_job.h"
34
35 /* TODO: Bifrost requires just a mali_shared_memory, without the rest of the
36 * framebuffer */
37
38 void
39 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
40 struct midgard_payload_vertex_tiler *vt)
41 {
42 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
43 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
44
45 /* If we haven't, reserve space for the framebuffer */
46
47 if (!batch->framebuffer.gpu) {
48 unsigned size = (screen->quirks & MIDGARD_SFBD) ?
49 sizeof(struct mali_single_framebuffer) :
50 sizeof(struct mali_framebuffer);
51
52 batch->framebuffer = panfrost_allocate_transient(batch, size);
53
54 /* Tag the pointer */
55 if (!(screen->quirks & MIDGARD_SFBD))
56 batch->framebuffer.gpu |= MALI_MFBD;
57 }
58
59 vt->postfix.shared_memory = batch->framebuffer.gpu;
60 }
61
62 void
63 panfrost_emit_shader_meta(struct panfrost_batch *batch,
64 enum pipe_shader_type st,
65 struct midgard_payload_vertex_tiler *vtp)
66 {
67 struct panfrost_context *ctx = batch->ctx;
68 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
69
70 if (!ss) {
71 vtp->postfix.shader = 0;
72 return;
73 }
74
75 /* Add the shader BO to the batch. */
76 panfrost_batch_add_bo(batch, ss->bo,
77 PAN_BO_ACCESS_PRIVATE |
78 PAN_BO_ACCESS_READ |
79 panfrost_bo_access_for_stage(st));
80
81 vtp->postfix.shader = panfrost_upload_transient(batch, ss->tripipe,
82 sizeof(*ss->tripipe));
83 }
84
85 static void
86 panfrost_mali_viewport_init(struct panfrost_context *ctx,
87 struct mali_viewport *mvp)
88 {
89 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
90
91 /* Clip bounds are encoded as floats. The viewport itself is encoded as
92 * (somewhat) asymmetric ints. */
93
94 const struct pipe_scissor_state *ss = &ctx->scissor;
95
96 memset(mvp, 0, sizeof(*mvp));
97
98 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
99 * each direction. Clipping to the viewport in theory should work, but
100 * in practice causes issues when we're not explicitly trying to
101 * scissor */
102
103 *mvp = (struct mali_viewport) {
104 .clip_minx = -INFINITY,
105 .clip_miny = -INFINITY,
106 .clip_maxx = INFINITY,
107 .clip_maxy = INFINITY,
108 };
109
110 /* Always scissor to the viewport by default. */
111 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
112 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
113
114 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
115 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
116
117 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
118 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
119
120 /* Apply the scissor test */
121
122 unsigned minx, miny, maxx, maxy;
123
124 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
125 minx = MAX2(ss->minx, vp_minx);
126 miny = MAX2(ss->miny, vp_miny);
127 maxx = MIN2(ss->maxx, vp_maxx);
128 maxy = MIN2(ss->maxy, vp_maxy);
129 } else {
130 minx = vp_minx;
131 miny = vp_miny;
132 maxx = vp_maxx;
133 maxy = vp_maxy;
134 }
135
136 /* Hardware needs the min/max to be strictly ordered, so flip if we
137 * need to. The viewport transformation in the vertex shader will
138 * handle the negatives if we don't */
139
140 if (miny > maxy) {
141 unsigned temp = miny;
142 miny = maxy;
143 maxy = temp;
144 }
145
146 if (minx > maxx) {
147 unsigned temp = minx;
148 minx = maxx;
149 maxx = temp;
150 }
151
152 if (minz > maxz) {
153 float temp = minz;
154 minz = maxz;
155 maxz = temp;
156 }
157
158 /* Clamp to the framebuffer size as a last check */
159
160 minx = MIN2(ctx->pipe_framebuffer.width, minx);
161 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
162
163 miny = MIN2(ctx->pipe_framebuffer.height, miny);
164 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
165
166 /* Upload */
167
168 mvp->viewport0[0] = minx;
169 mvp->viewport1[0] = MALI_POSITIVE(maxx);
170
171 mvp->viewport0[1] = miny;
172 mvp->viewport1[1] = MALI_POSITIVE(maxy);
173
174 mvp->clip_minz = minz;
175 mvp->clip_maxz = maxz;
176 }
177
178 void
179 panfrost_emit_viewport(struct panfrost_batch *batch,
180 struct midgard_payload_vertex_tiler *tp)
181 {
182 struct panfrost_context *ctx = batch->ctx;
183 struct mali_viewport mvp;
184
185 panfrost_mali_viewport_init(batch->ctx, &mvp);
186
187 /* Update the job, unless we're doing wallpapering (whose lack of
188 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
189 * just... be faster :) */
190
191 if (!ctx->wallpaper_batch)
192 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
193 mvp.viewport0[1],
194 mvp.viewport1[0] + 1,
195 mvp.viewport1[1] + 1);
196
197 tp->postfix.viewport = panfrost_upload_transient(batch, &mvp,
198 sizeof(mvp));
199 }
200
201 static mali_ptr
202 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
203 enum pipe_shader_type st,
204 struct panfrost_constant_buffer *buf,
205 unsigned index)
206 {
207 struct pipe_constant_buffer *cb = &buf->cb[index];
208 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
209
210 if (rsrc) {
211 panfrost_batch_add_bo(batch, rsrc->bo,
212 PAN_BO_ACCESS_SHARED |
213 PAN_BO_ACCESS_READ |
214 panfrost_bo_access_for_stage(st));
215
216 /* Alignment gauranteed by
217 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
218 return rsrc->bo->gpu + cb->buffer_offset;
219 } else if (cb->user_buffer) {
220 return panfrost_upload_transient(batch,
221 cb->user_buffer +
222 cb->buffer_offset,
223 cb->buffer_size);
224 } else {
225 unreachable("No constant buffer");
226 }
227 }
228
229 struct sysval_uniform {
230 union {
231 float f[4];
232 int32_t i[4];
233 uint32_t u[4];
234 uint64_t du[2];
235 };
236 };
237
238 static void
239 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
240 struct sysval_uniform *uniform)
241 {
242 struct panfrost_context *ctx = batch->ctx;
243 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
244
245 uniform->f[0] = vp->scale[0];
246 uniform->f[1] = vp->scale[1];
247 uniform->f[2] = vp->scale[2];
248 }
249
250 static void
251 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
252 struct sysval_uniform *uniform)
253 {
254 struct panfrost_context *ctx = batch->ctx;
255 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
256
257 uniform->f[0] = vp->translate[0];
258 uniform->f[1] = vp->translate[1];
259 uniform->f[2] = vp->translate[2];
260 }
261
262 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
263 enum pipe_shader_type st,
264 unsigned int sysvalid,
265 struct sysval_uniform *uniform)
266 {
267 struct panfrost_context *ctx = batch->ctx;
268 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
269 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
270 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
271 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
272
273 assert(dim);
274 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
275
276 if (dim > 1)
277 uniform->i[1] = u_minify(tex->texture->height0,
278 tex->u.tex.first_level);
279
280 if (dim > 2)
281 uniform->i[2] = u_minify(tex->texture->depth0,
282 tex->u.tex.first_level);
283
284 if (is_array)
285 uniform->i[dim] = tex->texture->array_size;
286 }
287
288 static void
289 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
290 enum pipe_shader_type st,
291 unsigned ssbo_id,
292 struct sysval_uniform *uniform)
293 {
294 struct panfrost_context *ctx = batch->ctx;
295
296 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
297 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
298
299 /* Compute address */
300 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
301
302 panfrost_batch_add_bo(batch, bo,
303 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
304 panfrost_bo_access_for_stage(st));
305
306 /* Upload address and size as sysval */
307 uniform->du[0] = bo->gpu + sb.buffer_offset;
308 uniform->u[2] = sb.buffer_size;
309 }
310
311 static void
312 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
313 enum pipe_shader_type st,
314 unsigned samp_idx,
315 struct sysval_uniform *uniform)
316 {
317 struct panfrost_context *ctx = batch->ctx;
318 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
319
320 uniform->f[0] = sampl->min_lod;
321 uniform->f[1] = sampl->max_lod;
322 uniform->f[2] = sampl->lod_bias;
323
324 /* Even without any errata, Midgard represents "no mipmapping" as
325 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
326 * panfrost_create_sampler_state which also explains our choice of
327 * epsilon value (again to keep behaviour consistent) */
328
329 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
330 uniform->f[1] = uniform->f[0] + (1.0/256.0);
331 }
332
333 static void
334 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
335 struct sysval_uniform *uniform)
336 {
337 struct panfrost_context *ctx = batch->ctx;
338
339 uniform->u[0] = ctx->compute_grid->grid[0];
340 uniform->u[1] = ctx->compute_grid->grid[1];
341 uniform->u[2] = ctx->compute_grid->grid[2];
342 }
343
344 static void
345 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
346 struct panfrost_shader_state *ss,
347 enum pipe_shader_type st)
348 {
349 struct sysval_uniform *uniforms = (void *)buf;
350
351 for (unsigned i = 0; i < ss->sysval_count; ++i) {
352 int sysval = ss->sysval[i];
353
354 switch (PAN_SYSVAL_TYPE(sysval)) {
355 case PAN_SYSVAL_VIEWPORT_SCALE:
356 panfrost_upload_viewport_scale_sysval(batch,
357 &uniforms[i]);
358 break;
359 case PAN_SYSVAL_VIEWPORT_OFFSET:
360 panfrost_upload_viewport_offset_sysval(batch,
361 &uniforms[i]);
362 break;
363 case PAN_SYSVAL_TEXTURE_SIZE:
364 panfrost_upload_txs_sysval(batch, st,
365 PAN_SYSVAL_ID(sysval),
366 &uniforms[i]);
367 break;
368 case PAN_SYSVAL_SSBO:
369 panfrost_upload_ssbo_sysval(batch, st,
370 PAN_SYSVAL_ID(sysval),
371 &uniforms[i]);
372 break;
373 case PAN_SYSVAL_NUM_WORK_GROUPS:
374 panfrost_upload_num_work_groups_sysval(batch,
375 &uniforms[i]);
376 break;
377 case PAN_SYSVAL_SAMPLER:
378 panfrost_upload_sampler_sysval(batch, st,
379 PAN_SYSVAL_ID(sysval),
380 &uniforms[i]);
381 break;
382 default:
383 assert(0);
384 }
385 }
386 }
387
388 static const void *
389 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
390 unsigned index)
391 {
392 struct pipe_constant_buffer *cb = &buf->cb[index];
393 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
394
395 if (rsrc)
396 return rsrc->bo->cpu;
397 else if (cb->user_buffer)
398 return cb->user_buffer;
399 else
400 unreachable("No constant buffer");
401 }
402
403 void
404 panfrost_emit_const_buf(struct panfrost_batch *batch,
405 enum pipe_shader_type stage,
406 struct midgard_payload_vertex_tiler *vtp)
407 {
408 struct panfrost_context *ctx = batch->ctx;
409 struct panfrost_shader_variants *all = ctx->shader[stage];
410
411 if (!all)
412 return;
413
414 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
415
416 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
417
418 /* Uniforms are implicitly UBO #0 */
419 bool has_uniforms = buf->enabled_mask & (1 << 0);
420
421 /* Allocate room for the sysval and the uniforms */
422 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
423 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
424 size_t size = sys_size + uniform_size;
425 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
426 size);
427
428 /* Upload sysvals requested by the shader */
429 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
430
431 /* Upload uniforms */
432 if (has_uniforms && uniform_size) {
433 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
434 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
435 }
436
437 struct mali_vertex_tiler_postfix *postfix = &vtp->postfix;
438
439 /* Next up, attach UBOs. UBO #0 is the uniforms we just
440 * uploaded */
441
442 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
443 assert(ubo_count >= 1);
444
445 size_t sz = sizeof(uint64_t) * ubo_count;
446 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
447 int uniform_count = ss->uniform_count;
448
449 /* Upload uniforms as a UBO */
450 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
451
452 /* The rest are honest-to-goodness UBOs */
453
454 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
455 size_t usz = buf->cb[ubo].buffer_size;
456 bool enabled = buf->enabled_mask & (1 << ubo);
457 bool empty = usz == 0;
458
459 if (!enabled || empty) {
460 /* Stub out disabled UBOs to catch accesses */
461 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
462 continue;
463 }
464
465 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
466 buf, ubo);
467
468 unsigned bytes_per_field = 16;
469 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
470 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
471 }
472
473 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
474 postfix->uniforms = transfer.gpu;
475 postfix->uniform_buffers = ubufs;
476
477 buf->dirty_mask = 0;
478 }
479
480 void
481 panfrost_emit_shared_memory(struct panfrost_batch *batch,
482 const struct pipe_grid_info *info,
483 struct midgard_payload_vertex_tiler *vtp)
484 {
485 struct panfrost_context *ctx = batch->ctx;
486 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
487 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
488 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
489 128));
490 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
491 info->grid[2] * 4;
492 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
493 shared_size,
494 1);
495
496 struct mali_shared_memory shared = {
497 .shared_memory = bo->gpu,
498 .shared_workgroup_count =
499 util_logbase2_ceil(info->grid[0]) +
500 util_logbase2_ceil(info->grid[1]) +
501 util_logbase2_ceil(info->grid[2]),
502 .shared_unk1 = 0x2,
503 .shared_shift = util_logbase2(single_size) - 1
504 };
505
506 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
507 sizeof(shared));
508 }