panfrost: Move shared mem desc emission out of panfrost_launch_grid()
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "pan_allocate.h"
26 #include "pan_bo.h"
27 #include "pan_cmdstream.h"
28 #include "pan_context.h"
29 #include "pan_job.h"
30
31 static void
32 panfrost_mali_viewport_init(struct panfrost_context *ctx,
33 struct mali_viewport *mvp)
34 {
35 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
36
37 /* Clip bounds are encoded as floats. The viewport itself is encoded as
38 * (somewhat) asymmetric ints. */
39
40 const struct pipe_scissor_state *ss = &ctx->scissor;
41
42 memset(mvp, 0, sizeof(*mvp));
43
44 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
45 * each direction. Clipping to the viewport in theory should work, but
46 * in practice causes issues when we're not explicitly trying to
47 * scissor */
48
49 *mvp = (struct mali_viewport) {
50 .clip_minx = -INFINITY,
51 .clip_miny = -INFINITY,
52 .clip_maxx = INFINITY,
53 .clip_maxy = INFINITY,
54 };
55
56 /* Always scissor to the viewport by default. */
57 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
58 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
59
60 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
61 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
62
63 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
64 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
65
66 /* Apply the scissor test */
67
68 unsigned minx, miny, maxx, maxy;
69
70 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
71 minx = MAX2(ss->minx, vp_minx);
72 miny = MAX2(ss->miny, vp_miny);
73 maxx = MIN2(ss->maxx, vp_maxx);
74 maxy = MIN2(ss->maxy, vp_maxy);
75 } else {
76 minx = vp_minx;
77 miny = vp_miny;
78 maxx = vp_maxx;
79 maxy = vp_maxy;
80 }
81
82 /* Hardware needs the min/max to be strictly ordered, so flip if we
83 * need to. The viewport transformation in the vertex shader will
84 * handle the negatives if we don't */
85
86 if (miny > maxy) {
87 unsigned temp = miny;
88 miny = maxy;
89 maxy = temp;
90 }
91
92 if (minx > maxx) {
93 unsigned temp = minx;
94 minx = maxx;
95 maxx = temp;
96 }
97
98 if (minz > maxz) {
99 float temp = minz;
100 minz = maxz;
101 maxz = temp;
102 }
103
104 /* Clamp to the framebuffer size as a last check */
105
106 minx = MIN2(ctx->pipe_framebuffer.width, minx);
107 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
108
109 miny = MIN2(ctx->pipe_framebuffer.height, miny);
110 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
111
112 /* Upload */
113
114 mvp->viewport0[0] = minx;
115 mvp->viewport1[0] = MALI_POSITIVE(maxx);
116
117 mvp->viewport0[1] = miny;
118 mvp->viewport1[1] = MALI_POSITIVE(maxy);
119
120 mvp->clip_minz = minz;
121 mvp->clip_maxz = maxz;
122 }
123
124 void
125 panfrost_emit_viewport(struct panfrost_batch *batch,
126 struct midgard_payload_vertex_tiler *tp)
127 {
128 struct panfrost_context *ctx = batch->ctx;
129 struct mali_viewport mvp;
130
131 panfrost_mali_viewport_init(batch->ctx, &mvp);
132
133 /* Update the job, unless we're doing wallpapering (whose lack of
134 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
135 * just... be faster :) */
136
137 if (!ctx->wallpaper_batch)
138 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
139 mvp.viewport0[1],
140 mvp.viewport1[0] + 1,
141 mvp.viewport1[1] + 1);
142
143 tp->postfix.viewport = panfrost_upload_transient(batch, &mvp,
144 sizeof(mvp));
145 }
146
147 static mali_ptr
148 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
149 enum pipe_shader_type st,
150 struct panfrost_constant_buffer *buf,
151 unsigned index)
152 {
153 struct pipe_constant_buffer *cb = &buf->cb[index];
154 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
155
156 if (rsrc) {
157 panfrost_batch_add_bo(batch, rsrc->bo,
158 PAN_BO_ACCESS_SHARED |
159 PAN_BO_ACCESS_READ |
160 panfrost_bo_access_for_stage(st));
161
162 /* Alignment gauranteed by
163 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
164 return rsrc->bo->gpu + cb->buffer_offset;
165 } else if (cb->user_buffer) {
166 return panfrost_upload_transient(batch,
167 cb->user_buffer +
168 cb->buffer_offset,
169 cb->buffer_size);
170 } else {
171 unreachable("No constant buffer");
172 }
173 }
174
175 struct sysval_uniform {
176 union {
177 float f[4];
178 int32_t i[4];
179 uint32_t u[4];
180 uint64_t du[2];
181 };
182 };
183
184 static void
185 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
186 struct sysval_uniform *uniform)
187 {
188 struct panfrost_context *ctx = batch->ctx;
189 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
190
191 uniform->f[0] = vp->scale[0];
192 uniform->f[1] = vp->scale[1];
193 uniform->f[2] = vp->scale[2];
194 }
195
196 static void
197 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
198 struct sysval_uniform *uniform)
199 {
200 struct panfrost_context *ctx = batch->ctx;
201 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
202
203 uniform->f[0] = vp->translate[0];
204 uniform->f[1] = vp->translate[1];
205 uniform->f[2] = vp->translate[2];
206 }
207
208 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
209 enum pipe_shader_type st,
210 unsigned int sysvalid,
211 struct sysval_uniform *uniform)
212 {
213 struct panfrost_context *ctx = batch->ctx;
214 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
215 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
216 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
217 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
218
219 assert(dim);
220 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
221
222 if (dim > 1)
223 uniform->i[1] = u_minify(tex->texture->height0,
224 tex->u.tex.first_level);
225
226 if (dim > 2)
227 uniform->i[2] = u_minify(tex->texture->depth0,
228 tex->u.tex.first_level);
229
230 if (is_array)
231 uniform->i[dim] = tex->texture->array_size;
232 }
233
234 static void
235 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
236 enum pipe_shader_type st,
237 unsigned ssbo_id,
238 struct sysval_uniform *uniform)
239 {
240 struct panfrost_context *ctx = batch->ctx;
241
242 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
243 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
244
245 /* Compute address */
246 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
247
248 panfrost_batch_add_bo(batch, bo,
249 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
250 panfrost_bo_access_for_stage(st));
251
252 /* Upload address and size as sysval */
253 uniform->du[0] = bo->gpu + sb.buffer_offset;
254 uniform->u[2] = sb.buffer_size;
255 }
256
257 static void
258 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
259 enum pipe_shader_type st,
260 unsigned samp_idx,
261 struct sysval_uniform *uniform)
262 {
263 struct panfrost_context *ctx = batch->ctx;
264 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
265
266 uniform->f[0] = sampl->min_lod;
267 uniform->f[1] = sampl->max_lod;
268 uniform->f[2] = sampl->lod_bias;
269
270 /* Even without any errata, Midgard represents "no mipmapping" as
271 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
272 * panfrost_create_sampler_state which also explains our choice of
273 * epsilon value (again to keep behaviour consistent) */
274
275 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
276 uniform->f[1] = uniform->f[0] + (1.0/256.0);
277 }
278
279 static void
280 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
281 struct sysval_uniform *uniform)
282 {
283 struct panfrost_context *ctx = batch->ctx;
284
285 uniform->u[0] = ctx->compute_grid->grid[0];
286 uniform->u[1] = ctx->compute_grid->grid[1];
287 uniform->u[2] = ctx->compute_grid->grid[2];
288 }
289
290 static void
291 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
292 struct panfrost_shader_state *ss,
293 enum pipe_shader_type st)
294 {
295 struct sysval_uniform *uniforms = (void *)buf;
296
297 for (unsigned i = 0; i < ss->sysval_count; ++i) {
298 int sysval = ss->sysval[i];
299
300 switch (PAN_SYSVAL_TYPE(sysval)) {
301 case PAN_SYSVAL_VIEWPORT_SCALE:
302 panfrost_upload_viewport_scale_sysval(batch,
303 &uniforms[i]);
304 break;
305 case PAN_SYSVAL_VIEWPORT_OFFSET:
306 panfrost_upload_viewport_offset_sysval(batch,
307 &uniforms[i]);
308 break;
309 case PAN_SYSVAL_TEXTURE_SIZE:
310 panfrost_upload_txs_sysval(batch, st,
311 PAN_SYSVAL_ID(sysval),
312 &uniforms[i]);
313 break;
314 case PAN_SYSVAL_SSBO:
315 panfrost_upload_ssbo_sysval(batch, st,
316 PAN_SYSVAL_ID(sysval),
317 &uniforms[i]);
318 break;
319 case PAN_SYSVAL_NUM_WORK_GROUPS:
320 panfrost_upload_num_work_groups_sysval(batch,
321 &uniforms[i]);
322 break;
323 case PAN_SYSVAL_SAMPLER:
324 panfrost_upload_sampler_sysval(batch, st,
325 PAN_SYSVAL_ID(sysval),
326 &uniforms[i]);
327 break;
328 default:
329 assert(0);
330 }
331 }
332 }
333
334 static const void *
335 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
336 unsigned index)
337 {
338 struct pipe_constant_buffer *cb = &buf->cb[index];
339 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
340
341 if (rsrc)
342 return rsrc->bo->cpu;
343 else if (cb->user_buffer)
344 return cb->user_buffer;
345 else
346 unreachable("No constant buffer");
347 }
348
349 void
350 panfrost_emit_const_buf(struct panfrost_batch *batch,
351 enum pipe_shader_type stage,
352 struct midgard_payload_vertex_tiler *vtp)
353 {
354 struct panfrost_context *ctx = batch->ctx;
355 struct panfrost_shader_variants *all = ctx->shader[stage];
356
357 if (!all)
358 return;
359
360 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
361
362 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
363
364 /* Uniforms are implicitly UBO #0 */
365 bool has_uniforms = buf->enabled_mask & (1 << 0);
366
367 /* Allocate room for the sysval and the uniforms */
368 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
369 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
370 size_t size = sys_size + uniform_size;
371 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
372 size);
373
374 /* Upload sysvals requested by the shader */
375 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
376
377 /* Upload uniforms */
378 if (has_uniforms && uniform_size) {
379 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
380 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
381 }
382
383 struct mali_vertex_tiler_postfix *postfix = &vtp->postfix;
384
385 /* Next up, attach UBOs. UBO #0 is the uniforms we just
386 * uploaded */
387
388 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
389 assert(ubo_count >= 1);
390
391 size_t sz = sizeof(uint64_t) * ubo_count;
392 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
393 int uniform_count = ss->uniform_count;
394
395 /* Upload uniforms as a UBO */
396 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
397
398 /* The rest are honest-to-goodness UBOs */
399
400 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
401 size_t usz = buf->cb[ubo].buffer_size;
402 bool enabled = buf->enabled_mask & (1 << ubo);
403 bool empty = usz == 0;
404
405 if (!enabled || empty) {
406 /* Stub out disabled UBOs to catch accesses */
407 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
408 continue;
409 }
410
411 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
412 buf, ubo);
413
414 unsigned bytes_per_field = 16;
415 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
416 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
417 }
418
419 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
420 postfix->uniforms = transfer.gpu;
421 postfix->uniform_buffers = ubufs;
422
423 buf->dirty_mask = 0;
424 }
425
426 void
427 panfrost_emit_shared_memory(struct panfrost_batch *batch,
428 const struct pipe_grid_info *info,
429 struct midgard_payload_vertex_tiler *vtp)
430 {
431 struct panfrost_context *ctx = batch->ctx;
432 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
433 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
434 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
435 128));
436 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
437 info->grid[2] * 4;
438 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
439 shared_size,
440 1);
441
442 struct mali_shared_memory shared = {
443 .shared_memory = bo->gpu,
444 .shared_workgroup_count =
445 util_logbase2_ceil(info->grid[0]) +
446 util_logbase2_ceil(info->grid[1]) +
447 util_logbase2_ceil(info->grid[2]),
448 .shared_unk1 = 0x2,
449 .shared_shift = util_logbase2(single_size) - 1
450 };
451
452 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
453 sizeof(shared));
454 }