panfrost: Extract panfrost_batch_reserve_framebuffer
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28
29 #include "panfrost-quirks.h"
30
31 #include "pan_pool.h"
32 #include "pan_bo.h"
33 #include "pan_cmdstream.h"
34 #include "pan_context.h"
35 #include "pan_job.h"
36
37 /* If a BO is accessed for a particular shader stage, will it be in the primary
38 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
39 * fragment will be primary, e.g. compute jobs will be considered
40 * "vertex/tiler" by analogy */
41
42 static inline uint32_t
43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
44 {
45 assert(stage == PIPE_SHADER_FRAGMENT ||
46 stage == PIPE_SHADER_VERTEX ||
47 stage == PIPE_SHADER_COMPUTE);
48
49 return stage == PIPE_SHADER_FRAGMENT ?
50 PAN_BO_ACCESS_FRAGMENT :
51 PAN_BO_ACCESS_VERTEX_TILER;
52 }
53
54 static void
55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
56 struct mali_vertex_tiler_postfix *postfix)
57 {
58 struct panfrost_device *dev = pan_device(ctx->base.screen);
59 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
60
61 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
62 struct mali_shared_memory shared = {
63 .stack_shift = shift,
64 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
65 .shared_workgroup_count = ~0,
66 };
67 postfix->shared_memory = panfrost_pool_upload(&batch->pool, &shared, sizeof(shared));
68 }
69
70 static void
71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
72 struct mali_vertex_tiler_postfix *postfix)
73 {
74 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
75 postfix->shared_memory = panfrost_batch_reserve_framebuffer(batch);
76 }
77
78 static void
79 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
80 struct mali_vertex_tiler_prefix *prefix,
81 struct mali_vertex_tiler_postfix *postfix)
82 {
83 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
84
85 postfix->gl_enables |= 0x7;
86 SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
87 rasterizer && rasterizer->base.front_ccw);
88 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
89 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
90 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
91 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
92 SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
93 rasterizer && rasterizer->base.flatshade_first);
94 }
95
96 void
97 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
98 struct mali_vertex_tiler_prefix *prefix,
99 union midgard_primitive_size *primitive_size)
100 {
101 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
102
103 if (!panfrost_writes_point_size(ctx)) {
104 bool points = prefix->draw_mode == MALI_POINTS;
105 float val = 0.0f;
106
107 if (rasterizer)
108 val = points ?
109 rasterizer->base.point_size :
110 rasterizer->base.line_width;
111
112 primitive_size->constant = val;
113 }
114 }
115
116 static void
117 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
118 struct mali_vertex_tiler_postfix *postfix)
119 {
120 SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
121 if (ctx->occlusion_query) {
122 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
123 panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
124 PAN_BO_ACCESS_SHARED |
125 PAN_BO_ACCESS_RW |
126 PAN_BO_ACCESS_FRAGMENT);
127 } else {
128 postfix->occlusion_counter = 0;
129 }
130 }
131
132 void
133 panfrost_vt_init(struct panfrost_context *ctx,
134 enum pipe_shader_type stage,
135 struct mali_vertex_tiler_prefix *prefix,
136 struct mali_vertex_tiler_postfix *postfix)
137 {
138 struct panfrost_device *device = pan_device(ctx->base.screen);
139
140 if (!ctx->shader[stage])
141 return;
142
143 memset(prefix, 0, sizeof(*prefix));
144 memset(postfix, 0, sizeof(*postfix));
145
146 if (device->quirks & IS_BIFROST) {
147 postfix->gl_enables = 0x2;
148 panfrost_vt_emit_shared_memory(ctx, postfix);
149 } else {
150 postfix->gl_enables = 0x6;
151 panfrost_vt_attach_framebuffer(ctx, postfix);
152 }
153
154 if (stage == PIPE_SHADER_FRAGMENT) {
155 panfrost_vt_update_occlusion_query(ctx, postfix);
156 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
157 }
158 }
159
160 static unsigned
161 panfrost_translate_index_size(unsigned size)
162 {
163 switch (size) {
164 case 1:
165 return MALI_DRAW_INDEXED_UINT8;
166
167 case 2:
168 return MALI_DRAW_INDEXED_UINT16;
169
170 case 4:
171 return MALI_DRAW_INDEXED_UINT32;
172
173 default:
174 unreachable("Invalid index size");
175 }
176 }
177
178 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
179 * good for the duration of the draw (transient), could last longer. Also get
180 * the bounds on the index buffer for the range accessed by the draw. We do
181 * these operations together because there are natural optimizations which
182 * require them to be together. */
183
184 static mali_ptr
185 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
186 const struct pipe_draw_info *info,
187 unsigned *min_index, unsigned *max_index)
188 {
189 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
190 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
191 off_t offset = info->start * info->index_size;
192 bool needs_indices = true;
193 mali_ptr out = 0;
194
195 if (info->max_index != ~0u) {
196 *min_index = info->min_index;
197 *max_index = info->max_index;
198 needs_indices = false;
199 }
200
201 if (!info->has_user_indices) {
202 /* Only resources can be directly mapped */
203 panfrost_batch_add_bo(batch, rsrc->bo,
204 PAN_BO_ACCESS_SHARED |
205 PAN_BO_ACCESS_READ |
206 PAN_BO_ACCESS_VERTEX_TILER);
207 out = rsrc->bo->gpu + offset;
208
209 /* Check the cache */
210 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
211 info->start,
212 info->count,
213 min_index,
214 max_index);
215 } else {
216 /* Otherwise, we need to upload to transient memory */
217 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
218 out = panfrost_pool_upload(&batch->pool, ibuf8 + offset,
219 info->count *
220 info->index_size);
221 }
222
223 if (needs_indices) {
224 /* Fallback */
225 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
226
227 if (!info->has_user_indices)
228 panfrost_minmax_cache_add(rsrc->index_cache,
229 info->start, info->count,
230 *min_index, *max_index);
231 }
232
233 return out;
234 }
235
236 void
237 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
238 const struct pipe_draw_info *info,
239 enum mali_draw_mode draw_mode,
240 struct mali_vertex_tiler_postfix *vertex_postfix,
241 struct mali_vertex_tiler_prefix *tiler_prefix,
242 struct mali_vertex_tiler_postfix *tiler_postfix,
243 unsigned *vertex_count,
244 unsigned *padded_count)
245 {
246 tiler_prefix->draw_mode = draw_mode;
247
248 unsigned draw_flags = 0;
249
250 if (panfrost_writes_point_size(ctx))
251 draw_flags |= MALI_DRAW_VARYING_SIZE;
252
253 if (info->primitive_restart)
254 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
255
256 /* These doesn't make much sense */
257
258 draw_flags |= 0x3000;
259
260 if (info->index_size) {
261 unsigned min_index = 0, max_index = 0;
262
263 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
264 info,
265 &min_index,
266 &max_index);
267
268 /* Use the corresponding values */
269 *vertex_count = max_index - min_index + 1;
270 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
271 tiler_prefix->offset_bias_correction = -min_index;
272 tiler_prefix->index_count = MALI_POSITIVE(info->count);
273 draw_flags |= panfrost_translate_index_size(info->index_size);
274 } else {
275 tiler_prefix->indices = 0;
276 *vertex_count = ctx->vertex_count;
277 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
278 tiler_prefix->offset_bias_correction = 0;
279 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
280 }
281
282 tiler_prefix->unknown_draw = draw_flags;
283
284 /* Encode the padded vertex count */
285
286 if (info->instance_count > 1) {
287 *padded_count = panfrost_padded_vertex_count(*vertex_count);
288
289 unsigned shift = __builtin_ctz(ctx->padded_count);
290 unsigned k = ctx->padded_count >> (shift + 1);
291
292 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
293 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
294 } else {
295 *padded_count = *vertex_count;
296
297 /* Reset instancing state */
298 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
299 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
300 }
301 }
302
303 static void
304 panfrost_shader_meta_init(struct panfrost_context *ctx,
305 enum pipe_shader_type st,
306 struct mali_shader_meta *meta)
307 {
308 const struct panfrost_device *dev = pan_device(ctx->base.screen);
309 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
310
311 memset(meta, 0, sizeof(*meta));
312 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
313 meta->attribute_count = ss->attribute_count;
314 meta->varying_count = ss->varying_count;
315 meta->texture_count = ctx->sampler_view_count[st];
316 meta->sampler_count = ctx->sampler_count[st];
317
318 if (dev->quirks & IS_BIFROST) {
319 if (st == PIPE_SHADER_VERTEX)
320 meta->bifrost1.unk1 = 0x800000;
321 else {
322 /* First clause ATEST |= 0x4000000.
323 * Less than 32 regs |= 0x200 */
324 meta->bifrost1.unk1 = 0x950020;
325 }
326
327 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
328 if (st == PIPE_SHADER_VERTEX)
329 meta->bifrost2.preload_regs = 0xC0;
330 else {
331 meta->bifrost2.preload_regs = 0x1;
332 SET_BIT(meta->bifrost2.preload_regs, 0x10, ss->reads_frag_coord);
333 }
334
335 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
336 ss->uniform_cutoff);
337 } else {
338 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
339 ss->uniform_cutoff);
340 meta->midgard1.work_count = ss->work_reg_count;
341
342 /* TODO: This is not conformant on ES3 */
343 meta->midgard1.flags_hi = MALI_SUPPRESS_INF_NAN;
344
345 meta->midgard1.flags_lo = 0x20;
346 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
347
348 SET_BIT(meta->midgard1.flags_hi, MALI_WRITES_GLOBAL, ss->writes_global);
349 }
350 }
351
352 static unsigned
353 panfrost_translate_compare_func(enum pipe_compare_func in)
354 {
355 switch (in) {
356 case PIPE_FUNC_NEVER:
357 return MALI_FUNC_NEVER;
358
359 case PIPE_FUNC_LESS:
360 return MALI_FUNC_LESS;
361
362 case PIPE_FUNC_EQUAL:
363 return MALI_FUNC_EQUAL;
364
365 case PIPE_FUNC_LEQUAL:
366 return MALI_FUNC_LEQUAL;
367
368 case PIPE_FUNC_GREATER:
369 return MALI_FUNC_GREATER;
370
371 case PIPE_FUNC_NOTEQUAL:
372 return MALI_FUNC_NOTEQUAL;
373
374 case PIPE_FUNC_GEQUAL:
375 return MALI_FUNC_GEQUAL;
376
377 case PIPE_FUNC_ALWAYS:
378 return MALI_FUNC_ALWAYS;
379
380 default:
381 unreachable("Invalid func");
382 }
383 }
384
385 static unsigned
386 panfrost_translate_stencil_op(enum pipe_stencil_op in)
387 {
388 switch (in) {
389 case PIPE_STENCIL_OP_KEEP:
390 return MALI_STENCIL_KEEP;
391
392 case PIPE_STENCIL_OP_ZERO:
393 return MALI_STENCIL_ZERO;
394
395 case PIPE_STENCIL_OP_REPLACE:
396 return MALI_STENCIL_REPLACE;
397
398 case PIPE_STENCIL_OP_INCR:
399 return MALI_STENCIL_INCR;
400
401 case PIPE_STENCIL_OP_DECR:
402 return MALI_STENCIL_DECR;
403
404 case PIPE_STENCIL_OP_INCR_WRAP:
405 return MALI_STENCIL_INCR_WRAP;
406
407 case PIPE_STENCIL_OP_DECR_WRAP:
408 return MALI_STENCIL_DECR_WRAP;
409
410 case PIPE_STENCIL_OP_INVERT:
411 return MALI_STENCIL_INVERT;
412
413 default:
414 unreachable("Invalid stencil op");
415 }
416 }
417
418 static unsigned
419 translate_tex_wrap(enum pipe_tex_wrap w)
420 {
421 switch (w) {
422 case PIPE_TEX_WRAP_REPEAT:
423 return MALI_WRAP_REPEAT;
424
425 case PIPE_TEX_WRAP_CLAMP:
426 return MALI_WRAP_CLAMP;
427
428 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
429 return MALI_WRAP_CLAMP_TO_EDGE;
430
431 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
432 return MALI_WRAP_CLAMP_TO_BORDER;
433
434 case PIPE_TEX_WRAP_MIRROR_REPEAT:
435 return MALI_WRAP_MIRRORED_REPEAT;
436
437 case PIPE_TEX_WRAP_MIRROR_CLAMP:
438 return MALI_WRAP_MIRRORED_CLAMP;
439
440 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
441 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
442
443 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
444 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
445
446 default:
447 unreachable("Invalid wrap");
448 }
449 }
450
451 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
452 struct mali_sampler_descriptor *hw)
453 {
454 unsigned func = panfrost_translate_compare_func(cso->compare_func);
455 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
456 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
457 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
458 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
459 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
460 unsigned mip_filter = mip_linear ?
461 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
462 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
463
464 *hw = (struct mali_sampler_descriptor) {
465 .filter_mode = min_filter | mag_filter | mip_filter |
466 normalized,
467 .wrap_s = translate_tex_wrap(cso->wrap_s),
468 .wrap_t = translate_tex_wrap(cso->wrap_t),
469 .wrap_r = translate_tex_wrap(cso->wrap_r),
470 .compare_func = panfrost_flip_compare_func(func),
471 .border_color = {
472 cso->border_color.f[0],
473 cso->border_color.f[1],
474 cso->border_color.f[2],
475 cso->border_color.f[3]
476 },
477 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
478 .max_lod = FIXED_16(cso->max_lod, false),
479 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
480 .seamless_cube_map = cso->seamless_cube_map,
481 };
482
483 /* If necessary, we disable mipmapping in the sampler descriptor by
484 * clamping the LOD as tight as possible (from 0 to epsilon,
485 * essentially -- remember these are fixed point numbers, so
486 * epsilon=1/256) */
487
488 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
489 hw->max_lod = hw->min_lod + 1;
490 }
491
492 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
493 struct bifrost_sampler_descriptor *hw)
494 {
495 *hw = (struct bifrost_sampler_descriptor) {
496 .unk1 = 0x1,
497 .wrap_s = translate_tex_wrap(cso->wrap_s),
498 .wrap_t = translate_tex_wrap(cso->wrap_t),
499 .wrap_r = translate_tex_wrap(cso->wrap_r),
500 .unk8 = 0x8,
501 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
502 .norm_coords = cso->normalized_coords,
503 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
504 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
505 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
506 .max_lod = FIXED_16(cso->max_lod, false),
507 };
508
509 /* If necessary, we disable mipmapping in the sampler descriptor by
510 * clamping the LOD as tight as possible (from 0 to epsilon,
511 * essentially -- remember these are fixed point numbers, so
512 * epsilon=1/256) */
513
514 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
515 hw->max_lod = hw->min_lod + 1;
516 }
517
518 static void
519 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
520 struct mali_stencil_test *out)
521 {
522 out->ref = 0; /* Gallium gets it from elsewhere */
523
524 out->mask = in->valuemask;
525 out->func = panfrost_translate_compare_func(in->func);
526 out->sfail = panfrost_translate_stencil_op(in->fail_op);
527 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
528 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
529 }
530
531 static void
532 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
533 struct mali_shader_meta *fragmeta)
534 {
535 if (!ctx->rasterizer) {
536 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
537 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
538 fragmeta->depth_units = 0.0f;
539 fragmeta->depth_factor = 0.0f;
540 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
541 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
542 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, true);
543 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, true);
544 return;
545 }
546
547 struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
548
549 bool msaa = rast->multisample;
550
551 /* TODO: Sample size */
552 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
553 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
554
555 SET_BIT(fragmeta->unknown2_3, MALI_PER_SAMPLE,
556 msaa && ctx->min_samples > 1);
557
558 fragmeta->depth_units = rast->offset_units * 2.0f;
559 fragmeta->depth_factor = rast->offset_scale;
560
561 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
562
563 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, rast->offset_tri);
564 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, rast->offset_tri);
565
566 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, rast->depth_clip_near);
567 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, rast->depth_clip_far);
568 }
569
570 static void
571 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
572 struct mali_shader_meta *fragmeta)
573 {
574 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
575 int zfunc = PIPE_FUNC_ALWAYS;
576
577 if (!zsa) {
578 struct pipe_stencil_state default_stencil = {
579 .enabled = 0,
580 .func = PIPE_FUNC_ALWAYS,
581 .fail_op = MALI_STENCIL_KEEP,
582 .zfail_op = MALI_STENCIL_KEEP,
583 .zpass_op = MALI_STENCIL_KEEP,
584 .writemask = 0xFF,
585 .valuemask = 0xFF
586 };
587
588 panfrost_make_stencil_state(&default_stencil,
589 &fragmeta->stencil_front);
590 fragmeta->stencil_mask_front = default_stencil.writemask;
591 fragmeta->stencil_back = fragmeta->stencil_front;
592 fragmeta->stencil_mask_back = default_stencil.writemask;
593 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
594 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
595 } else {
596 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
597 zsa->stencil[0].enabled);
598 panfrost_make_stencil_state(&zsa->stencil[0],
599 &fragmeta->stencil_front);
600 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
601 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
602
603 /* If back-stencil is not enabled, use the front values */
604
605 if (zsa->stencil[1].enabled) {
606 panfrost_make_stencil_state(&zsa->stencil[1],
607 &fragmeta->stencil_back);
608 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
609 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
610 } else {
611 fragmeta->stencil_back = fragmeta->stencil_front;
612 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
613 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
614 }
615
616 if (zsa->depth.enabled)
617 zfunc = zsa->depth.func;
618
619 /* Depth state (TODO: Refactor) */
620
621 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
622 zsa->depth.writemask);
623 }
624
625 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
626 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
627 }
628
629 static bool
630 panfrost_fs_required(
631 struct panfrost_shader_state *fs,
632 struct panfrost_blend_final *blend,
633 unsigned rt_count)
634 {
635 /* If we generally have side effects */
636 if (fs->fs_sidefx)
637 return true;
638
639 /* If colour is written we need to execute */
640 for (unsigned i = 0; i < rt_count; ++i) {
641 if (!blend[i].no_colour)
642 return true;
643 }
644
645 /* If depth is written and not implied we need to execute.
646 * TODO: Predicate on Z/S writes being enabled */
647 return (fs->writes_depth || fs->writes_stencil);
648 }
649
650 static void
651 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
652 struct mali_shader_meta *fragmeta,
653 void *rts)
654 {
655 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
656 const struct panfrost_device *dev = pan_device(ctx->base.screen);
657 struct panfrost_shader_state *fs;
658 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
659
660 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
661 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
662 !ctx->blend->base.dither);
663
664 SET_BIT(fragmeta->unknown2_4, MALI_ALPHA_TO_COVERAGE,
665 ctx->blend->base.alpha_to_coverage);
666
667 /* Get blending setup */
668 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
669
670 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
671 unsigned shader_offset = 0;
672 struct panfrost_bo *shader_bo = NULL;
673
674 for (unsigned c = 0; c < rt_count; ++c)
675 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
676 &shader_offset);
677
678 /* Disable shader execution if we can */
679 if (dev->quirks & MIDGARD_SHADERLESS
680 && !panfrost_fs_required(fs, blend, rt_count)) {
681 fragmeta->shader = 0;
682 fragmeta->attribute_count = 0;
683 fragmeta->varying_count = 0;
684 fragmeta->texture_count = 0;
685 fragmeta->sampler_count = 0;
686
687 /* This feature is not known to work on Bifrost */
688 fragmeta->midgard1.work_count = 1;
689 fragmeta->midgard1.uniform_count = 0;
690 fragmeta->midgard1.uniform_buffer_count = 0;
691 }
692
693 /* If there is a blend shader, work registers are shared. We impose 8
694 * work registers as a limit for blend shaders. Should be lower XXX */
695
696 if (!(dev->quirks & IS_BIFROST)) {
697 for (unsigned c = 0; c < rt_count; ++c) {
698 if (blend[c].is_shader) {
699 fragmeta->midgard1.work_count =
700 MAX2(fragmeta->midgard1.work_count, 8);
701 }
702 }
703 }
704
705 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
706 * copied to the blend_meta appended (by convention), but this is the
707 * field actually read by the hardware. (Or maybe both are read...?).
708 * Specify the last RTi with a blend shader. */
709
710 fragmeta->blend.shader = 0;
711
712 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
713 if (!blend[rt].is_shader)
714 continue;
715
716 fragmeta->blend.shader = blend[rt].shader.gpu |
717 blend[rt].shader.first_tag;
718 break;
719 }
720
721 if (dev->quirks & MIDGARD_SFBD) {
722 /* When only a single render target platform is used, the blend
723 * information is inside the shader meta itself. We additionally
724 * need to signal CAN_DISCARD for nontrivial blend modes (so
725 * we're able to read back the destination buffer) */
726
727 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
728 blend[0].is_shader);
729
730 if (!blend[0].is_shader) {
731 fragmeta->blend.equation = *blend[0].equation.equation;
732 fragmeta->blend.constant = blend[0].equation.constant;
733 }
734
735 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
736 !blend[0].no_blending || fs->can_discard);
737
738 batch->draws |= PIPE_CLEAR_COLOR0;
739 return;
740 }
741
742 if (dev->quirks & IS_BIFROST) {
743 bool no_blend = true;
744
745 for (unsigned i = 0; i < rt_count; ++i)
746 no_blend &= (blend[i].no_blending | blend[i].no_colour);
747
748 SET_BIT(fragmeta->bifrost1.unk1, MALI_BIFROST_EARLY_Z,
749 !fs->can_discard && !fs->writes_depth && no_blend);
750 }
751
752 /* Additional blend descriptor tacked on for jobs using MFBD */
753
754 for (unsigned i = 0; i < rt_count; ++i) {
755 unsigned flags = 0;
756
757 if (ctx->pipe_framebuffer.nr_cbufs > i && !blend[i].no_colour) {
758 flags = 0x200;
759 batch->draws |= (PIPE_CLEAR_COLOR0 << i);
760
761 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
762 (ctx->pipe_framebuffer.cbufs[i]) &&
763 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
764
765 SET_BIT(flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
766 SET_BIT(flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
767 SET_BIT(flags, MALI_BLEND_SRGB, is_srgb);
768 SET_BIT(flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
769 }
770
771 if (dev->quirks & IS_BIFROST) {
772 struct bifrost_blend_rt *brts = rts;
773
774 brts[i].flags = flags;
775
776 if (blend[i].is_shader) {
777 /* The blend shader's address needs to be at
778 * the same top 32 bit as the fragment shader.
779 * TODO: Ensure that's always the case.
780 */
781 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
782 (fs->bo->gpu & (0xffffffffull << 32)));
783 brts[i].shader = blend[i].shader.gpu;
784 brts[i].unk2 = 0x0;
785 } else if (ctx->pipe_framebuffer.nr_cbufs > i) {
786 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
787 const struct util_format_description *format_desc;
788 format_desc = util_format_description(format);
789
790 brts[i].equation = *blend[i].equation.equation;
791
792 /* TODO: this is a bit more complicated */
793 brts[i].constant = blend[i].equation.constant;
794
795 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
796
797 /* 0x19 disables blending and forces REPLACE
798 * mode (equivalent to rgb_mode = alpha_mode =
799 * x122, colour mask = 0xF). 0x1a allows
800 * blending. */
801 brts[i].unk2 = blend[i].no_blending ? 0x19 : 0x1a;
802
803 brts[i].shader_type = fs->blend_types[i];
804 } else {
805 /* Dummy attachment for depth-only */
806 brts[i].unk2 = 0x3;
807 brts[i].shader_type = fs->blend_types[i];
808 }
809 } else {
810 struct midgard_blend_rt *mrts = rts;
811 mrts[i].flags = flags;
812
813 if (blend[i].is_shader) {
814 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
815 } else {
816 mrts[i].blend.equation = *blend[i].equation.equation;
817 mrts[i].blend.constant = blend[i].equation.constant;
818 }
819 }
820 }
821 }
822
823 static void
824 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
825 struct mali_shader_meta *fragmeta,
826 void *rts)
827 {
828 const struct panfrost_device *dev = pan_device(ctx->base.screen);
829 struct panfrost_shader_state *fs;
830
831 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
832
833 bool msaa = ctx->rasterizer && ctx->rasterizer->base.multisample;
834 fragmeta->coverage_mask = (msaa ? ctx->sample_mask : ~0) & 0xF;
835
836 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10;
837 fragmeta->unknown2_4 = 0x4e0;
838
839 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
840 * is required (independent of 32-bit/64-bit descriptors), or why it's
841 * not used on later GPU revisions. Otherwise, all shader jobs fault on
842 * these earlier chips (perhaps this is a chicken bit of some kind).
843 * More investigation is needed. */
844
845 SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
846
847 if (dev->quirks & IS_BIFROST) {
848 /* TODO */
849 } else {
850 /* Depending on whether it's legal to in the given shader, we try to
851 * enable early-z testing. TODO: respect e-z force */
852
853 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
854 !fs->can_discard && !fs->writes_global &&
855 !fs->writes_depth && !fs->writes_stencil &&
856 !ctx->blend->base.alpha_to_coverage);
857
858 /* Add the writes Z/S flags if needed. */
859 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
860 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
861
862 /* Any time texturing is used, derivatives are implicitly calculated,
863 * so we need to enable helper invocations */
864
865 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
866 fs->helper_invocations);
867
868 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
869
870 bool depth_enabled = fs->writes_depth ||
871 (zsa && zsa->depth.enabled && zsa->depth.func != PIPE_FUNC_ALWAYS);
872
873 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_TILEBUFFER,
874 fs->outputs_read || (!depth_enabled && fs->can_discard));
875 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_ZS, depth_enabled && fs->can_discard);
876 }
877
878 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
879 panfrost_frag_meta_zsa_update(ctx, fragmeta);
880 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
881 }
882
883 void
884 panfrost_emit_shader_meta(struct panfrost_batch *batch,
885 enum pipe_shader_type st,
886 struct mali_vertex_tiler_postfix *postfix)
887 {
888 struct panfrost_context *ctx = batch->ctx;
889 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
890
891 if (!ss) {
892 postfix->shader = 0;
893 return;
894 }
895
896 struct mali_shader_meta meta;
897
898 panfrost_shader_meta_init(ctx, st, &meta);
899
900 /* Add the shader BO to the batch. */
901 panfrost_batch_add_bo(batch, ss->bo,
902 PAN_BO_ACCESS_PRIVATE |
903 PAN_BO_ACCESS_READ |
904 panfrost_bo_access_for_stage(st));
905
906 mali_ptr shader_ptr;
907
908 if (st == PIPE_SHADER_FRAGMENT) {
909 struct panfrost_device *dev = pan_device(ctx->base.screen);
910 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
911 size_t desc_size = sizeof(meta);
912 void *rts = NULL;
913 struct panfrost_transfer xfer;
914 unsigned rt_size;
915
916 if (dev->quirks & MIDGARD_SFBD)
917 rt_size = 0;
918 else if (dev->quirks & IS_BIFROST)
919 rt_size = sizeof(struct bifrost_blend_rt);
920 else
921 rt_size = sizeof(struct midgard_blend_rt);
922
923 desc_size += rt_size * rt_count;
924
925 if (rt_size)
926 rts = rzalloc_size(ctx, rt_size * rt_count);
927
928 panfrost_frag_shader_meta_init(ctx, &meta, rts);
929
930 xfer = panfrost_pool_alloc(&batch->pool, desc_size);
931
932 memcpy(xfer.cpu, &meta, sizeof(meta));
933 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
934
935 if (rt_size)
936 ralloc_free(rts);
937
938 shader_ptr = xfer.gpu;
939 } else {
940 shader_ptr = panfrost_pool_upload(&batch->pool, &meta,
941 sizeof(meta));
942 }
943
944 postfix->shader = shader_ptr;
945 }
946
947 static void
948 panfrost_mali_viewport_init(struct panfrost_context *ctx,
949 struct mali_viewport *mvp)
950 {
951 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
952
953 /* Clip bounds are encoded as floats. The viewport itself is encoded as
954 * (somewhat) asymmetric ints. */
955
956 const struct pipe_scissor_state *ss = &ctx->scissor;
957
958 memset(mvp, 0, sizeof(*mvp));
959
960 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
961 * each direction. Clipping to the viewport in theory should work, but
962 * in practice causes issues when we're not explicitly trying to
963 * scissor */
964
965 *mvp = (struct mali_viewport) {
966 .clip_minx = -INFINITY,
967 .clip_miny = -INFINITY,
968 .clip_maxx = INFINITY,
969 .clip_maxy = INFINITY,
970 };
971
972 /* Always scissor to the viewport by default. */
973 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
974 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
975
976 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
977 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
978
979 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
980 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
981
982 /* Apply the scissor test */
983
984 unsigned minx, miny, maxx, maxy;
985
986 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
987 minx = MAX2(ss->minx, vp_minx);
988 miny = MAX2(ss->miny, vp_miny);
989 maxx = MIN2(ss->maxx, vp_maxx);
990 maxy = MIN2(ss->maxy, vp_maxy);
991 } else {
992 minx = vp_minx;
993 miny = vp_miny;
994 maxx = vp_maxx;
995 maxy = vp_maxy;
996 }
997
998 /* Hardware needs the min/max to be strictly ordered, so flip if we
999 * need to. The viewport transformation in the vertex shader will
1000 * handle the negatives if we don't */
1001
1002 if (miny > maxy) {
1003 unsigned temp = miny;
1004 miny = maxy;
1005 maxy = temp;
1006 }
1007
1008 if (minx > maxx) {
1009 unsigned temp = minx;
1010 minx = maxx;
1011 maxx = temp;
1012 }
1013
1014 if (minz > maxz) {
1015 float temp = minz;
1016 minz = maxz;
1017 maxz = temp;
1018 }
1019
1020 /* Clamp to the framebuffer size as a last check */
1021
1022 minx = MIN2(ctx->pipe_framebuffer.width, minx);
1023 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
1024
1025 miny = MIN2(ctx->pipe_framebuffer.height, miny);
1026 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
1027
1028 /* Upload */
1029
1030 mvp->viewport0[0] = minx;
1031 mvp->viewport1[0] = MALI_POSITIVE(maxx);
1032
1033 mvp->viewport0[1] = miny;
1034 mvp->viewport1[1] = MALI_POSITIVE(maxy);
1035
1036 bool clip_near = true;
1037 bool clip_far = true;
1038
1039 if (ctx->rasterizer) {
1040 clip_near = ctx->rasterizer->base.depth_clip_near;
1041 clip_far = ctx->rasterizer->base.depth_clip_far;
1042 }
1043
1044 mvp->clip_minz = clip_near ? minz : -INFINITY;
1045 mvp->clip_maxz = clip_far ? maxz : INFINITY;
1046 }
1047
1048 void
1049 panfrost_emit_viewport(struct panfrost_batch *batch,
1050 struct mali_vertex_tiler_postfix *tiler_postfix)
1051 {
1052 struct panfrost_context *ctx = batch->ctx;
1053 struct mali_viewport mvp;
1054
1055 panfrost_mali_viewport_init(batch->ctx, &mvp);
1056
1057 /* Update the job, unless we're doing wallpapering (whose lack of
1058 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
1059 * just... be faster :) */
1060
1061 if (!ctx->wallpaper_batch)
1062 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
1063 mvp.viewport0[1],
1064 mvp.viewport1[0] + 1,
1065 mvp.viewport1[1] + 1);
1066
1067 tiler_postfix->viewport = panfrost_pool_upload(&batch->pool, &mvp,
1068 sizeof(mvp));
1069 }
1070
1071 static mali_ptr
1072 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
1073 enum pipe_shader_type st,
1074 struct panfrost_constant_buffer *buf,
1075 unsigned index)
1076 {
1077 struct pipe_constant_buffer *cb = &buf->cb[index];
1078 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1079
1080 if (rsrc) {
1081 panfrost_batch_add_bo(batch, rsrc->bo,
1082 PAN_BO_ACCESS_SHARED |
1083 PAN_BO_ACCESS_READ |
1084 panfrost_bo_access_for_stage(st));
1085
1086 /* Alignment gauranteed by
1087 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1088 return rsrc->bo->gpu + cb->buffer_offset;
1089 } else if (cb->user_buffer) {
1090 return panfrost_pool_upload(&batch->pool,
1091 cb->user_buffer +
1092 cb->buffer_offset,
1093 cb->buffer_size);
1094 } else {
1095 unreachable("No constant buffer");
1096 }
1097 }
1098
1099 struct sysval_uniform {
1100 union {
1101 float f[4];
1102 int32_t i[4];
1103 uint32_t u[4];
1104 uint64_t du[2];
1105 };
1106 };
1107
1108 static void
1109 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1110 struct sysval_uniform *uniform)
1111 {
1112 struct panfrost_context *ctx = batch->ctx;
1113 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1114
1115 uniform->f[0] = vp->scale[0];
1116 uniform->f[1] = vp->scale[1];
1117 uniform->f[2] = vp->scale[2];
1118 }
1119
1120 static void
1121 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1122 struct sysval_uniform *uniform)
1123 {
1124 struct panfrost_context *ctx = batch->ctx;
1125 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1126
1127 uniform->f[0] = vp->translate[0];
1128 uniform->f[1] = vp->translate[1];
1129 uniform->f[2] = vp->translate[2];
1130 }
1131
1132 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1133 enum pipe_shader_type st,
1134 unsigned int sysvalid,
1135 struct sysval_uniform *uniform)
1136 {
1137 struct panfrost_context *ctx = batch->ctx;
1138 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1139 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1140 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1141 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1142
1143 assert(dim);
1144 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1145
1146 if (dim > 1)
1147 uniform->i[1] = u_minify(tex->texture->height0,
1148 tex->u.tex.first_level);
1149
1150 if (dim > 2)
1151 uniform->i[2] = u_minify(tex->texture->depth0,
1152 tex->u.tex.first_level);
1153
1154 if (is_array)
1155 uniform->i[dim] = tex->texture->array_size;
1156 }
1157
1158 static void
1159 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1160 enum pipe_shader_type st,
1161 unsigned ssbo_id,
1162 struct sysval_uniform *uniform)
1163 {
1164 struct panfrost_context *ctx = batch->ctx;
1165
1166 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1167 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1168
1169 /* Compute address */
1170 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1171
1172 panfrost_batch_add_bo(batch, bo,
1173 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1174 panfrost_bo_access_for_stage(st));
1175
1176 /* Upload address and size as sysval */
1177 uniform->du[0] = bo->gpu + sb.buffer_offset;
1178 uniform->u[2] = sb.buffer_size;
1179 }
1180
1181 static void
1182 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1183 enum pipe_shader_type st,
1184 unsigned samp_idx,
1185 struct sysval_uniform *uniform)
1186 {
1187 struct panfrost_context *ctx = batch->ctx;
1188 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1189
1190 uniform->f[0] = sampl->min_lod;
1191 uniform->f[1] = sampl->max_lod;
1192 uniform->f[2] = sampl->lod_bias;
1193
1194 /* Even without any errata, Midgard represents "no mipmapping" as
1195 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1196 * panfrost_create_sampler_state which also explains our choice of
1197 * epsilon value (again to keep behaviour consistent) */
1198
1199 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1200 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1201 }
1202
1203 static void
1204 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1205 struct sysval_uniform *uniform)
1206 {
1207 struct panfrost_context *ctx = batch->ctx;
1208
1209 uniform->u[0] = ctx->compute_grid->grid[0];
1210 uniform->u[1] = ctx->compute_grid->grid[1];
1211 uniform->u[2] = ctx->compute_grid->grid[2];
1212 }
1213
1214 static void
1215 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1216 struct panfrost_shader_state *ss,
1217 enum pipe_shader_type st)
1218 {
1219 struct sysval_uniform *uniforms = (void *)buf;
1220
1221 for (unsigned i = 0; i < ss->sysval_count; ++i) {
1222 int sysval = ss->sysval[i];
1223
1224 switch (PAN_SYSVAL_TYPE(sysval)) {
1225 case PAN_SYSVAL_VIEWPORT_SCALE:
1226 panfrost_upload_viewport_scale_sysval(batch,
1227 &uniforms[i]);
1228 break;
1229 case PAN_SYSVAL_VIEWPORT_OFFSET:
1230 panfrost_upload_viewport_offset_sysval(batch,
1231 &uniforms[i]);
1232 break;
1233 case PAN_SYSVAL_TEXTURE_SIZE:
1234 panfrost_upload_txs_sysval(batch, st,
1235 PAN_SYSVAL_ID(sysval),
1236 &uniforms[i]);
1237 break;
1238 case PAN_SYSVAL_SSBO:
1239 panfrost_upload_ssbo_sysval(batch, st,
1240 PAN_SYSVAL_ID(sysval),
1241 &uniforms[i]);
1242 break;
1243 case PAN_SYSVAL_NUM_WORK_GROUPS:
1244 panfrost_upload_num_work_groups_sysval(batch,
1245 &uniforms[i]);
1246 break;
1247 case PAN_SYSVAL_SAMPLER:
1248 panfrost_upload_sampler_sysval(batch, st,
1249 PAN_SYSVAL_ID(sysval),
1250 &uniforms[i]);
1251 break;
1252 default:
1253 assert(0);
1254 }
1255 }
1256 }
1257
1258 static const void *
1259 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1260 unsigned index)
1261 {
1262 struct pipe_constant_buffer *cb = &buf->cb[index];
1263 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1264
1265 if (rsrc)
1266 return rsrc->bo->cpu;
1267 else if (cb->user_buffer)
1268 return cb->user_buffer;
1269 else
1270 unreachable("No constant buffer");
1271 }
1272
1273 void
1274 panfrost_emit_const_buf(struct panfrost_batch *batch,
1275 enum pipe_shader_type stage,
1276 struct mali_vertex_tiler_postfix *postfix)
1277 {
1278 struct panfrost_context *ctx = batch->ctx;
1279 struct panfrost_shader_variants *all = ctx->shader[stage];
1280
1281 if (!all)
1282 return;
1283
1284 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1285
1286 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1287
1288 /* Uniforms are implicitly UBO #0 */
1289 bool has_uniforms = buf->enabled_mask & (1 << 0);
1290
1291 /* Allocate room for the sysval and the uniforms */
1292 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1293 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1294 size_t size = sys_size + uniform_size;
1295 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1296 size);
1297
1298 /* Upload sysvals requested by the shader */
1299 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1300
1301 /* Upload uniforms */
1302 if (has_uniforms && uniform_size) {
1303 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1304 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1305 }
1306
1307 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1308 * uploaded */
1309
1310 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1311 assert(ubo_count >= 1);
1312
1313 size_t sz = sizeof(uint64_t) * ubo_count;
1314 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1315 int uniform_count = ss->uniform_count;
1316
1317 /* Upload uniforms as a UBO */
1318 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1319
1320 /* The rest are honest-to-goodness UBOs */
1321
1322 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1323 size_t usz = buf->cb[ubo].buffer_size;
1324 bool enabled = buf->enabled_mask & (1 << ubo);
1325 bool empty = usz == 0;
1326
1327 if (!enabled || empty) {
1328 /* Stub out disabled UBOs to catch accesses */
1329 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1330 continue;
1331 }
1332
1333 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1334 buf, ubo);
1335
1336 unsigned bytes_per_field = 16;
1337 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1338 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1339 }
1340
1341 mali_ptr ubufs = panfrost_pool_upload(&batch->pool, ubos, sz);
1342 postfix->uniforms = transfer.gpu;
1343 postfix->uniform_buffers = ubufs;
1344
1345 buf->dirty_mask = 0;
1346 }
1347
1348 void
1349 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1350 const struct pipe_grid_info *info,
1351 struct midgard_payload_vertex_tiler *vtp)
1352 {
1353 struct panfrost_context *ctx = batch->ctx;
1354 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1355 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1356 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1357 128));
1358 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1359 info->grid[2] * 4;
1360 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1361 shared_size,
1362 1);
1363
1364 struct mali_shared_memory shared = {
1365 .shared_memory = bo->gpu,
1366 .shared_workgroup_count =
1367 util_logbase2_ceil(info->grid[0]) +
1368 util_logbase2_ceil(info->grid[1]) +
1369 util_logbase2_ceil(info->grid[2]),
1370 .shared_unk1 = 0x2,
1371 .shared_shift = util_logbase2(single_size) - 1
1372 };
1373
1374 vtp->postfix.shared_memory = panfrost_pool_upload(&batch->pool, &shared,
1375 sizeof(shared));
1376 }
1377
1378 static mali_ptr
1379 panfrost_get_tex_desc(struct panfrost_batch *batch,
1380 enum pipe_shader_type st,
1381 struct panfrost_sampler_view *view)
1382 {
1383 if (!view)
1384 return (mali_ptr) 0;
1385
1386 struct pipe_sampler_view *pview = &view->base;
1387 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1388
1389 /* Add the BO to the job so it's retained until the job is done. */
1390
1391 panfrost_batch_add_bo(batch, rsrc->bo,
1392 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1393 panfrost_bo_access_for_stage(st));
1394
1395 panfrost_batch_add_bo(batch, view->bo,
1396 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1397 panfrost_bo_access_for_stage(st));
1398
1399 return view->bo->gpu;
1400 }
1401
1402 static void
1403 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1404 struct pipe_context *pctx)
1405 {
1406 struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1407 if (view->texture_bo != rsrc->bo->gpu ||
1408 view->layout != rsrc->layout) {
1409 panfrost_bo_unreference(view->bo);
1410 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1411 }
1412 }
1413
1414 void
1415 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1416 enum pipe_shader_type stage,
1417 struct mali_vertex_tiler_postfix *postfix)
1418 {
1419 struct panfrost_context *ctx = batch->ctx;
1420 struct panfrost_device *device = pan_device(ctx->base.screen);
1421
1422 if (!ctx->sampler_view_count[stage])
1423 return;
1424
1425 if (device->quirks & IS_BIFROST) {
1426 struct bifrost_texture_descriptor *descriptors;
1427
1428 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1429 ctx->sampler_view_count[stage]);
1430
1431 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1432 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1433 struct pipe_sampler_view *pview = &view->base;
1434 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1435 panfrost_update_sampler_view(view, &ctx->base);
1436
1437 /* Add the BOs to the job so they are retained until the job is done. */
1438
1439 panfrost_batch_add_bo(batch, rsrc->bo,
1440 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1441 panfrost_bo_access_for_stage(stage));
1442
1443 panfrost_batch_add_bo(batch, view->bo,
1444 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1445 panfrost_bo_access_for_stage(stage));
1446
1447 memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1448 }
1449
1450 postfix->textures = panfrost_pool_upload(&batch->pool,
1451 descriptors,
1452 sizeof(struct bifrost_texture_descriptor) *
1453 ctx->sampler_view_count[stage]);
1454
1455 free(descriptors);
1456 } else {
1457 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1458
1459 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1460 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1461
1462 panfrost_update_sampler_view(view, &ctx->base);
1463
1464 trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1465 }
1466
1467 postfix->textures = panfrost_pool_upload(&batch->pool,
1468 trampolines,
1469 sizeof(uint64_t) *
1470 ctx->sampler_view_count[stage]);
1471 }
1472 }
1473
1474 void
1475 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1476 enum pipe_shader_type stage,
1477 struct mali_vertex_tiler_postfix *postfix)
1478 {
1479 struct panfrost_context *ctx = batch->ctx;
1480 struct panfrost_device *device = pan_device(ctx->base.screen);
1481
1482 if (!ctx->sampler_count[stage])
1483 return;
1484
1485 if (device->quirks & IS_BIFROST) {
1486 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1487 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1488 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1489 transfer_size);
1490 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1491
1492 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1493 desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1494
1495 postfix->sampler_descriptor = transfer.gpu;
1496 } else {
1497 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1498 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1499 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1500 transfer_size);
1501 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1502
1503 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1504 desc[i] = ctx->samplers[stage][i]->midgard_hw;
1505
1506 postfix->sampler_descriptor = transfer.gpu;
1507 }
1508 }
1509
1510 void
1511 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1512 struct mali_vertex_tiler_postfix *vertex_postfix)
1513 {
1514 struct panfrost_context *ctx = batch->ctx;
1515
1516 if (!ctx->vertex)
1517 return;
1518
1519 struct panfrost_vertex_state *so = ctx->vertex;
1520
1521 panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1522 vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
1523 sizeof(*so->hw) *
1524 PAN_MAX_ATTRIBUTE);
1525 }
1526
1527 void
1528 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1529 struct mali_vertex_tiler_postfix *vertex_postfix)
1530 {
1531 struct panfrost_context *ctx = batch->ctx;
1532 struct panfrost_vertex_state *so = ctx->vertex;
1533
1534 /* Staged mali_attr, and index into them. i =/= k, depending on the
1535 * vertex buffer mask and instancing. Twice as much room is allocated,
1536 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1537 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1538 unsigned k = 0;
1539
1540 for (unsigned i = 0; i < so->num_elements; ++i) {
1541 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1542 * means duplicating some vertex buffers (who cares? aside from
1543 * maybe some caching implications but I somehow doubt that
1544 * matters) */
1545
1546 struct pipe_vertex_element *elem = &so->pipe[i];
1547 unsigned vbi = elem->vertex_buffer_index;
1548
1549 /* The exception to 1:1 mapping is that we can have multiple
1550 * entries (NPOT divisors), so we fixup anyways */
1551
1552 so->hw[i].index = k;
1553
1554 if (!(ctx->vb_mask & (1 << vbi)))
1555 continue;
1556
1557 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1558 struct panfrost_resource *rsrc;
1559
1560 rsrc = pan_resource(buf->buffer.resource);
1561 if (!rsrc)
1562 continue;
1563
1564 /* Align to 64 bytes by masking off the lower bits. This
1565 * will be adjusted back when we fixup the src_offset in
1566 * mali_attr_meta */
1567
1568 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1569 mali_ptr addr = raw_addr & ~63;
1570 unsigned chopped_addr = raw_addr - addr;
1571
1572 /* Add a dependency of the batch on the vertex buffer */
1573 panfrost_batch_add_bo(batch, rsrc->bo,
1574 PAN_BO_ACCESS_SHARED |
1575 PAN_BO_ACCESS_READ |
1576 PAN_BO_ACCESS_VERTEX_TILER);
1577
1578 /* Set common fields */
1579 attrs[k].elements = addr;
1580 attrs[k].stride = buf->stride;
1581
1582 /* Since we advanced the base pointer, we shrink the buffer
1583 * size */
1584 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1585
1586 /* We need to add the extra size we masked off (for
1587 * correctness) so the data doesn't get clamped away */
1588 attrs[k].size += chopped_addr;
1589
1590 /* For non-instancing make sure we initialize */
1591 attrs[k].shift = attrs[k].extra_flags = 0;
1592
1593 /* Instancing uses a dramatically different code path than
1594 * linear, so dispatch for the actual emission now that the
1595 * common code is finished */
1596
1597 unsigned divisor = elem->instance_divisor;
1598
1599 if (divisor && ctx->instance_count == 1) {
1600 /* Silly corner case where there's a divisor(=1) but
1601 * there's no legitimate instancing. So we want *every*
1602 * attribute to be the same. So set stride to zero so
1603 * we don't go anywhere. */
1604
1605 attrs[k].size = attrs[k].stride + chopped_addr;
1606 attrs[k].stride = 0;
1607 attrs[k++].elements |= MALI_ATTR_LINEAR;
1608 } else if (ctx->instance_count <= 1) {
1609 /* Normal, non-instanced attributes */
1610 attrs[k++].elements |= MALI_ATTR_LINEAR;
1611 } else {
1612 unsigned instance_shift = vertex_postfix->instance_shift;
1613 unsigned instance_odd = vertex_postfix->instance_odd;
1614
1615 k += panfrost_vertex_instanced(ctx->padded_count,
1616 instance_shift,
1617 instance_odd,
1618 divisor, &attrs[k]);
1619 }
1620 }
1621
1622 /* Add special gl_VertexID/gl_InstanceID buffers */
1623
1624 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1625 so->hw[PAN_VERTEX_ID].index = k++;
1626 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1627 so->hw[PAN_INSTANCE_ID].index = k++;
1628
1629 /* Upload whatever we emitted and go */
1630
1631 vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs,
1632 k * sizeof(*attrs));
1633 }
1634
1635 static mali_ptr
1636 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1637 unsigned stride, unsigned count)
1638 {
1639 /* Fill out the descriptor */
1640 slot->stride = stride;
1641 slot->size = stride * count;
1642 slot->shift = slot->extra_flags = 0;
1643
1644 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1645 slot->size);
1646
1647 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1648
1649 return transfer.gpu;
1650 }
1651
1652 static unsigned
1653 panfrost_streamout_offset(unsigned stride, unsigned offset,
1654 struct pipe_stream_output_target *target)
1655 {
1656 return (target->buffer_offset + (offset * stride * 4)) & 63;
1657 }
1658
1659 static void
1660 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1661 unsigned stride, unsigned offset, unsigned count,
1662 struct pipe_stream_output_target *target)
1663 {
1664 /* Fill out the descriptor */
1665 slot->stride = stride * 4;
1666 slot->shift = slot->extra_flags = 0;
1667
1668 unsigned max_size = target->buffer_size;
1669 unsigned expected_size = slot->stride * count;
1670
1671 /* Grab the BO and bind it to the batch */
1672 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1673
1674 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1675 * the perspective of the TILER and FRAGMENT.
1676 */
1677 panfrost_batch_add_bo(batch, bo,
1678 PAN_BO_ACCESS_SHARED |
1679 PAN_BO_ACCESS_RW |
1680 PAN_BO_ACCESS_VERTEX_TILER |
1681 PAN_BO_ACCESS_FRAGMENT);
1682
1683 /* We will have an offset applied to get alignment */
1684 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1685 slot->elements = (addr & ~63) | MALI_ATTR_LINEAR;
1686 slot->size = MIN2(max_size, expected_size) + (addr & 63);
1687 }
1688
1689 static bool
1690 has_point_coord(unsigned mask, gl_varying_slot loc)
1691 {
1692 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1693 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1694 else if (loc == VARYING_SLOT_PNTC)
1695 return (mask & (1 << 8));
1696 else
1697 return false;
1698 }
1699
1700 /* Helpers for manipulating stream out information so we can pack varyings
1701 * accordingly. Compute the src_offset for a given captured varying */
1702
1703 static struct pipe_stream_output *
1704 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1705 {
1706 for (unsigned i = 0; i < info->num_outputs; ++i) {
1707 if (info->output[i].register_index == loc)
1708 return &info->output[i];
1709 }
1710
1711 unreachable("Varying not captured");
1712 }
1713
1714 static unsigned
1715 pan_varying_size(enum mali_format fmt)
1716 {
1717 unsigned type = MALI_EXTRACT_TYPE(fmt);
1718 unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1719 unsigned bits = MALI_EXTRACT_BITS(fmt);
1720 unsigned bpc = 0;
1721
1722 if (bits == MALI_CHANNEL_FLOAT) {
1723 /* No doubles */
1724 bool fp16 = (type == MALI_FORMAT_SINT);
1725 assert(fp16 || (type == MALI_FORMAT_UNORM));
1726
1727 bpc = fp16 ? 2 : 4;
1728 } else {
1729 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1730
1731 /* See the enums */
1732 bits = 1 << bits;
1733 assert(bits >= 8);
1734 bpc = bits / 8;
1735 }
1736
1737 return bpc * chan;
1738 }
1739
1740 /* Indices for named (non-XFB) varyings that are present. These are packed
1741 * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1742 * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1743 * of a given special field given a shift S by:
1744 *
1745 * idx = popcount(P & ((1 << S) - 1))
1746 *
1747 * That is... look at all of the varyings that come earlier and count them, the
1748 * count is the new index since plus one. Likewise, the total number of special
1749 * buffers required is simply popcount(P)
1750 */
1751
1752 enum pan_special_varying {
1753 PAN_VARY_GENERAL = 0,
1754 PAN_VARY_POSITION = 1,
1755 PAN_VARY_PSIZ = 2,
1756 PAN_VARY_PNTCOORD = 3,
1757 PAN_VARY_FACE = 4,
1758 PAN_VARY_FRAGCOORD = 5,
1759
1760 /* Keep last */
1761 PAN_VARY_MAX,
1762 };
1763
1764 /* Given a varying, figure out which index it correpsonds to */
1765
1766 static inline unsigned
1767 pan_varying_index(unsigned present, enum pan_special_varying v)
1768 {
1769 unsigned mask = (1 << v) - 1;
1770 return util_bitcount(present & mask);
1771 }
1772
1773 /* Get the base offset for XFB buffers, which by convention come after
1774 * everything else. Wrapper function for semantic reasons; by construction this
1775 * is just popcount. */
1776
1777 static inline unsigned
1778 pan_xfb_base(unsigned present)
1779 {
1780 return util_bitcount(present);
1781 }
1782
1783 /* Computes the present mask for varyings so we can start emitting varying records */
1784
1785 static inline unsigned
1786 pan_varying_present(
1787 struct panfrost_shader_state *vs,
1788 struct panfrost_shader_state *fs,
1789 unsigned quirks)
1790 {
1791 /* At the moment we always emit general and position buffers. Not
1792 * strictly necessary but usually harmless */
1793
1794 unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1795
1796 /* Enable special buffers by the shader info */
1797
1798 if (vs->writes_point_size)
1799 present |= (1 << PAN_VARY_PSIZ);
1800
1801 if (fs->reads_point_coord)
1802 present |= (1 << PAN_VARY_PNTCOORD);
1803
1804 if (fs->reads_face)
1805 present |= (1 << PAN_VARY_FACE);
1806
1807 if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1808 present |= (1 << PAN_VARY_FRAGCOORD);
1809
1810 /* Also, if we have a point sprite, we need a point coord buffer */
1811
1812 for (unsigned i = 0; i < fs->varying_count; i++) {
1813 gl_varying_slot loc = fs->varyings_loc[i];
1814
1815 if (has_point_coord(fs->point_sprite_mask, loc))
1816 present |= (1 << PAN_VARY_PNTCOORD);
1817 }
1818
1819 return present;
1820 }
1821
1822 /* Emitters for varying records */
1823
1824 static struct mali_attr_meta
1825 pan_emit_vary(unsigned present, enum pan_special_varying buf,
1826 unsigned quirks, enum mali_format format,
1827 unsigned offset)
1828 {
1829 unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
1830
1831 struct mali_attr_meta meta = {
1832 .index = pan_varying_index(present, buf),
1833 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1834 .swizzle = quirks & HAS_SWIZZLES ?
1835 panfrost_get_default_swizzle(nr_channels) :
1836 panfrost_bifrost_swizzle(nr_channels),
1837 .format = format,
1838 .src_offset = offset
1839 };
1840
1841 return meta;
1842 }
1843
1844 /* General varying that is unused */
1845
1846 static struct mali_attr_meta
1847 pan_emit_vary_only(unsigned present, unsigned quirks)
1848 {
1849 return pan_emit_vary(present, 0, quirks, MALI_VARYING_DISCARD, 0);
1850 }
1851
1852 /* Special records */
1853
1854 static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1855 [PAN_VARY_POSITION] = MALI_VARYING_POS,
1856 [PAN_VARY_PSIZ] = MALI_R16F,
1857 [PAN_VARY_PNTCOORD] = MALI_R16F,
1858 [PAN_VARY_FACE] = MALI_R32I,
1859 [PAN_VARY_FRAGCOORD] = MALI_RGBA32F
1860 };
1861
1862 static struct mali_attr_meta
1863 pan_emit_vary_special(unsigned present, enum pan_special_varying buf,
1864 unsigned quirks)
1865 {
1866 assert(buf < PAN_VARY_MAX);
1867 return pan_emit_vary(present, buf, quirks, pan_varying_formats[buf], 0);
1868 }
1869
1870 static enum mali_format
1871 pan_xfb_format(enum mali_format format, unsigned nr)
1872 {
1873 if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1874 return MALI_R32F | MALI_NR_CHANNELS(nr);
1875 else
1876 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1877 }
1878
1879 /* Transform feedback records. Note struct pipe_stream_output is (if packed as
1880 * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1881 * value. */
1882
1883 static struct mali_attr_meta
1884 pan_emit_vary_xfb(unsigned present,
1885 unsigned max_xfb,
1886 unsigned *streamout_offsets,
1887 unsigned quirks,
1888 enum mali_format format,
1889 struct pipe_stream_output o)
1890 {
1891 /* Otherwise construct a record for it */
1892 struct mali_attr_meta meta = {
1893 /* XFB buffers come after everything else */
1894 .index = pan_xfb_base(present) + o.output_buffer,
1895
1896 /* As usual unknown bit */
1897 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1898
1899 /* Override swizzle with number of channels */
1900 .swizzle = quirks & HAS_SWIZZLES ?
1901 panfrost_get_default_swizzle(o.num_components) :
1902 panfrost_bifrost_swizzle(o.num_components),
1903
1904 /* Override number of channels and precision to highp */
1905 .format = pan_xfb_format(format, o.num_components),
1906
1907 /* Apply given offsets together */
1908 .src_offset = (o.dst_offset * 4) /* dwords */
1909 + streamout_offsets[o.output_buffer]
1910 };
1911
1912 return meta;
1913 }
1914
1915 /* Determine if we should capture a varying for XFB. This requires actually
1916 * having a buffer for it. If we don't capture it, we'll fallback to a general
1917 * varying path (linked or unlinked, possibly discarding the write) */
1918
1919 static bool
1920 panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1921 unsigned loc, unsigned max_xfb)
1922 {
1923 if (!(xfb->so_mask & (1ll << loc)))
1924 return false;
1925
1926 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1927 return o->output_buffer < max_xfb;
1928 }
1929
1930 /* Higher-level wrapper around all of the above, classifying a varying into one
1931 * of the above types */
1932
1933 static struct mali_attr_meta
1934 panfrost_emit_varying(
1935 struct panfrost_shader_state *stage,
1936 struct panfrost_shader_state *other,
1937 struct panfrost_shader_state *xfb,
1938 unsigned present,
1939 unsigned max_xfb,
1940 unsigned *streamout_offsets,
1941 unsigned quirks,
1942 unsigned *gen_offsets,
1943 enum mali_format *gen_formats,
1944 unsigned *gen_stride,
1945 unsigned idx,
1946 bool should_alloc,
1947 bool is_fragment)
1948 {
1949 gl_varying_slot loc = stage->varyings_loc[idx];
1950 enum mali_format format = stage->varyings[idx];
1951
1952 /* Override format to match linkage */
1953 if (!should_alloc && gen_formats[idx])
1954 format = gen_formats[idx];
1955
1956 if (has_point_coord(stage->point_sprite_mask, loc)) {
1957 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1958 } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1959 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1960 return pan_emit_vary_xfb(present, max_xfb, streamout_offsets, quirks, format, *o);
1961 } else if (loc == VARYING_SLOT_POS) {
1962 if (is_fragment)
1963 return pan_emit_vary_special(present, PAN_VARY_FRAGCOORD, quirks);
1964 else
1965 return pan_emit_vary_special(present, PAN_VARY_POSITION, quirks);
1966 } else if (loc == VARYING_SLOT_PSIZ) {
1967 return pan_emit_vary_special(present, PAN_VARY_PSIZ, quirks);
1968 } else if (loc == VARYING_SLOT_PNTC) {
1969 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1970 } else if (loc == VARYING_SLOT_FACE) {
1971 return pan_emit_vary_special(present, PAN_VARY_FACE, quirks);
1972 }
1973
1974 /* We've exhausted special cases, so it's otherwise a general varying. Check if we're linked */
1975 signed other_idx = -1;
1976
1977 for (unsigned j = 0; j < other->varying_count; ++j) {
1978 if (other->varyings_loc[j] == loc) {
1979 other_idx = j;
1980 break;
1981 }
1982 }
1983
1984 if (other_idx < 0)
1985 return pan_emit_vary_only(present, quirks);
1986
1987 unsigned offset = gen_offsets[other_idx];
1988
1989 if (should_alloc) {
1990 /* We're linked, so allocate a space via a watermark allocation */
1991 enum mali_format alt = other->varyings[other_idx];
1992
1993 /* Do interpolation at minimum precision */
1994 unsigned size_main = pan_varying_size(format);
1995 unsigned size_alt = pan_varying_size(alt);
1996 unsigned size = MIN2(size_main, size_alt);
1997
1998 /* If a varying is marked for XFB but not actually captured, we
1999 * should match the format to the format that would otherwise
2000 * be used for XFB, since dEQP checks for invariance here. It's
2001 * unclear if this is required by the spec. */
2002
2003 if (xfb->so_mask & (1ull << loc)) {
2004 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
2005 format = pan_xfb_format(format, o->num_components);
2006 size = pan_varying_size(format);
2007 } else if (size == size_alt) {
2008 format = alt;
2009 }
2010
2011 gen_offsets[idx] = *gen_stride;
2012 gen_formats[other_idx] = format;
2013 offset = *gen_stride;
2014 *gen_stride += size;
2015 }
2016
2017 return pan_emit_vary(present, PAN_VARY_GENERAL,
2018 quirks, format, offset);
2019 }
2020
2021 static void
2022 pan_emit_special_input(union mali_attr *varyings,
2023 unsigned present,
2024 enum pan_special_varying v,
2025 mali_ptr addr)
2026 {
2027 if (present & (1 << v)) {
2028 /* Ensure we write exactly once for performance and with fields
2029 * zeroed appropriately to avoid flakes */
2030
2031 union mali_attr s = {
2032 .elements = addr
2033 };
2034
2035 varyings[pan_varying_index(present, v)] = s;
2036 }
2037 }
2038
2039 void
2040 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2041 unsigned vertex_count,
2042 struct mali_vertex_tiler_postfix *vertex_postfix,
2043 struct mali_vertex_tiler_postfix *tiler_postfix,
2044 union midgard_primitive_size *primitive_size)
2045 {
2046 /* Load the shaders */
2047 struct panfrost_context *ctx = batch->ctx;
2048 struct panfrost_device *dev = pan_device(ctx->base.screen);
2049 struct panfrost_shader_state *vs, *fs;
2050 size_t vs_size, fs_size;
2051
2052 /* Allocate the varying descriptor */
2053
2054 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2055 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2056 vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
2057 fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
2058
2059 struct panfrost_transfer trans = panfrost_pool_alloc(&batch->pool,
2060 vs_size +
2061 fs_size);
2062
2063 struct pipe_stream_output_info *so = &vs->stream_output;
2064 unsigned present = pan_varying_present(vs, fs, dev->quirks);
2065
2066 /* Check if this varying is linked by us. This is the case for
2067 * general-purpose, non-captured varyings. If it is, link it. If it's
2068 * not, use the provided stream out information to determine the
2069 * offset, since it was already linked for us. */
2070
2071 unsigned gen_offsets[32];
2072 enum mali_format gen_formats[32];
2073 memset(gen_offsets, 0, sizeof(gen_offsets));
2074 memset(gen_formats, 0, sizeof(gen_formats));
2075
2076 unsigned gen_stride = 0;
2077 assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
2078 assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
2079
2080 unsigned streamout_offsets[32];
2081
2082 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2083 streamout_offsets[i] = panfrost_streamout_offset(
2084 so->stride[i],
2085 ctx->streamout.offsets[i],
2086 ctx->streamout.targets[i]);
2087 }
2088
2089 struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
2090 struct mali_attr_meta *ofs = ovs + vs->varying_count;
2091
2092 for (unsigned i = 0; i < vs->varying_count; i++) {
2093 ovs[i] = panfrost_emit_varying(vs, fs, vs, present,
2094 ctx->streamout.num_targets, streamout_offsets,
2095 dev->quirks,
2096 gen_offsets, gen_formats, &gen_stride, i, true, false);
2097 }
2098
2099 for (unsigned i = 0; i < fs->varying_count; i++) {
2100 ofs[i] = panfrost_emit_varying(fs, vs, vs, present,
2101 ctx->streamout.num_targets, streamout_offsets,
2102 dev->quirks,
2103 gen_offsets, gen_formats, &gen_stride, i, false, true);
2104 }
2105
2106 unsigned xfb_base = pan_xfb_base(present);
2107 struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
2108 sizeof(union mali_attr) * (xfb_base + ctx->streamout.num_targets));
2109 union mali_attr *varyings = (union mali_attr *) T.cpu;
2110
2111 /* Emit the stream out buffers */
2112
2113 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
2114 ctx->vertex_count);
2115
2116 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2117 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2118 so->stride[i],
2119 ctx->streamout.offsets[i],
2120 out_count,
2121 ctx->streamout.targets[i]);
2122 }
2123
2124 panfrost_emit_varyings(batch,
2125 &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2126 gen_stride, vertex_count);
2127
2128 /* fp32 vec4 gl_Position */
2129 tiler_postfix->position_varying = panfrost_emit_varyings(batch,
2130 &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2131 sizeof(float) * 4, vertex_count);
2132
2133 if (present & (1 << PAN_VARY_PSIZ)) {
2134 primitive_size->pointer = panfrost_emit_varyings(batch,
2135 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2136 2, vertex_count);
2137 }
2138
2139 pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_VARYING_POINT_COORD);
2140 pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_VARYING_FRONT_FACING);
2141 pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_VARYING_FRAG_COORD);
2142
2143 vertex_postfix->varyings = T.gpu;
2144 tiler_postfix->varyings = T.gpu;
2145
2146 vertex_postfix->varying_meta = trans.gpu;
2147 tiler_postfix->varying_meta = trans.gpu + vs_size;
2148 }
2149
2150 void
2151 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2152 struct mali_vertex_tiler_prefix *vertex_prefix,
2153 struct mali_vertex_tiler_postfix *vertex_postfix,
2154 struct mali_vertex_tiler_prefix *tiler_prefix,
2155 struct mali_vertex_tiler_postfix *tiler_postfix,
2156 union midgard_primitive_size *primitive_size)
2157 {
2158 struct panfrost_context *ctx = batch->ctx;
2159 struct panfrost_device *device = pan_device(ctx->base.screen);
2160 bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
2161 struct bifrost_payload_vertex bifrost_vertex = {0,};
2162 struct bifrost_payload_tiler bifrost_tiler = {0,};
2163 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
2164 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
2165 void *vp, *tp;
2166 size_t vp_size, tp_size;
2167
2168 if (device->quirks & IS_BIFROST) {
2169 bifrost_vertex.prefix = *vertex_prefix;
2170 bifrost_vertex.postfix = *vertex_postfix;
2171 vp = &bifrost_vertex;
2172 vp_size = sizeof(bifrost_vertex);
2173
2174 bifrost_tiler.prefix = *tiler_prefix;
2175 bifrost_tiler.tiler.primitive_size = *primitive_size;
2176 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
2177 bifrost_tiler.postfix = *tiler_postfix;
2178 tp = &bifrost_tiler;
2179 tp_size = sizeof(bifrost_tiler);
2180 } else {
2181 midgard_vertex.prefix = *vertex_prefix;
2182 midgard_vertex.postfix = *vertex_postfix;
2183 vp = &midgard_vertex;
2184 vp_size = sizeof(midgard_vertex);
2185
2186 midgard_tiler.prefix = *tiler_prefix;
2187 midgard_tiler.postfix = *tiler_postfix;
2188 midgard_tiler.primitive_size = *primitive_size;
2189 tp = &midgard_tiler;
2190 tp_size = sizeof(midgard_tiler);
2191 }
2192
2193 if (wallpapering) {
2194 /* Inject in reverse order, with "predicted" job indices.
2195 * THIS IS A HACK XXX */
2196 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false,
2197 batch->scoreboard.job_index + 2, tp, tp_size, true);
2198 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2199 vp, vp_size, true);
2200 return;
2201 }
2202
2203 /* If rasterizer discard is enable, only submit the vertex */
2204
2205 bool rasterizer_discard = ctx->rasterizer &&
2206 ctx->rasterizer->base.rasterizer_discard;
2207
2208 unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2209 vp, vp_size, false);
2210
2211 if (rasterizer_discard)
2212 return;
2213
2214 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false, vertex, tp, tp_size,
2215 false);
2216 }
2217
2218 /* TODO: stop hardcoding this */
2219 mali_ptr
2220 panfrost_emit_sample_locations(struct panfrost_batch *batch)
2221 {
2222 uint16_t locations[] = {
2223 128, 128,
2224 0, 256,
2225 0, 256,
2226 0, 256,
2227 0, 256,
2228 0, 256,
2229 0, 256,
2230 0, 256,
2231 0, 256,
2232 0, 256,
2233 0, 256,
2234 0, 256,
2235 0, 256,
2236 0, 256,
2237 0, 256,
2238 0, 256,
2239 0, 256,
2240 0, 256,
2241 0, 256,
2242 0, 256,
2243 0, 256,
2244 0, 256,
2245 0, 256,
2246 0, 256,
2247 0, 256,
2248 0, 256,
2249 0, 256,
2250 0, 256,
2251 0, 256,
2252 0, 256,
2253 0, 256,
2254 0, 256,
2255 128, 128,
2256 0, 0,
2257 0, 0,
2258 0, 0,
2259 0, 0,
2260 0, 0,
2261 0, 0,
2262 0, 0,
2263 0, 0,
2264 0, 0,
2265 0, 0,
2266 0, 0,
2267 0, 0,
2268 0, 0,
2269 0, 0,
2270 0, 0,
2271 };
2272
2273 return panfrost_pool_upload(&batch->pool, locations, 96 * sizeof(uint16_t));
2274 }