panfrost: Don't leak temporary descriptors array
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28
29 #include "panfrost-quirks.h"
30
31 #include "pan_allocate.h"
32 #include "pan_bo.h"
33 #include "pan_cmdstream.h"
34 #include "pan_context.h"
35 #include "pan_job.h"
36
37 /* If a BO is accessed for a particular shader stage, will it be in the primary
38 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
39 * fragment will be primary, e.g. compute jobs will be considered
40 * "vertex/tiler" by analogy */
41
42 static inline uint32_t
43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
44 {
45 assert(stage == PIPE_SHADER_FRAGMENT ||
46 stage == PIPE_SHADER_VERTEX ||
47 stage == PIPE_SHADER_COMPUTE);
48
49 return stage == PIPE_SHADER_FRAGMENT ?
50 PAN_BO_ACCESS_FRAGMENT :
51 PAN_BO_ACCESS_VERTEX_TILER;
52 }
53
54 static void
55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
56 struct mali_vertex_tiler_postfix *postfix)
57 {
58 struct panfrost_device *dev = pan_device(ctx->base.screen);
59 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
60
61 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
62 struct mali_shared_memory shared = {
63 .stack_shift = shift,
64 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
65 .shared_workgroup_count = ~0,
66 };
67 postfix->shared_memory = panfrost_upload_transient(batch, &shared, sizeof(shared));
68 }
69
70 static void
71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
72 struct mali_vertex_tiler_postfix *postfix)
73 {
74 struct panfrost_device *dev = pan_device(ctx->base.screen);
75 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
76
77 /* If we haven't, reserve space for the framebuffer */
78
79 if (!batch->framebuffer.gpu) {
80 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
81 sizeof(struct mali_single_framebuffer) :
82 sizeof(struct mali_framebuffer);
83
84 batch->framebuffer = panfrost_allocate_transient(batch, size);
85
86 /* Tag the pointer */
87 if (!(dev->quirks & MIDGARD_SFBD))
88 batch->framebuffer.gpu |= MALI_MFBD;
89 }
90
91 postfix->shared_memory = batch->framebuffer.gpu;
92 }
93
94 static void
95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
96 struct mali_vertex_tiler_prefix *prefix,
97 struct mali_vertex_tiler_postfix *postfix)
98 {
99 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
100
101 postfix->gl_enables |= 0x7;
102 SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
103 rasterizer && rasterizer->base.front_ccw);
104 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
105 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
106 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
107 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
108 SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
109 rasterizer && rasterizer->base.flatshade_first);
110 }
111
112 void
113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
114 struct mali_vertex_tiler_prefix *prefix,
115 union midgard_primitive_size *primitive_size)
116 {
117 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
118
119 if (!panfrost_writes_point_size(ctx)) {
120 bool points = prefix->draw_mode == MALI_POINTS;
121 float val = 0.0f;
122
123 if (rasterizer)
124 val = points ?
125 rasterizer->base.point_size :
126 rasterizer->base.line_width;
127
128 primitive_size->constant = val;
129 }
130 }
131
132 static void
133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
134 struct mali_vertex_tiler_postfix *postfix)
135 {
136 SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
137 if (ctx->occlusion_query)
138 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
139 else
140 postfix->occlusion_counter = 0;
141 }
142
143 void
144 panfrost_vt_init(struct panfrost_context *ctx,
145 enum pipe_shader_type stage,
146 struct mali_vertex_tiler_prefix *prefix,
147 struct mali_vertex_tiler_postfix *postfix)
148 {
149 struct panfrost_device *device = pan_device(ctx->base.screen);
150
151 if (!ctx->shader[stage])
152 return;
153
154 memset(prefix, 0, sizeof(*prefix));
155 memset(postfix, 0, sizeof(*postfix));
156
157 if (device->quirks & IS_BIFROST) {
158 postfix->gl_enables = 0x2;
159 panfrost_vt_emit_shared_memory(ctx, postfix);
160 } else {
161 postfix->gl_enables = 0x6;
162 panfrost_vt_attach_framebuffer(ctx, postfix);
163 }
164
165 if (stage == PIPE_SHADER_FRAGMENT) {
166 panfrost_vt_update_occlusion_query(ctx, postfix);
167 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
168 }
169 }
170
171 static unsigned
172 panfrost_translate_index_size(unsigned size)
173 {
174 switch (size) {
175 case 1:
176 return MALI_DRAW_INDEXED_UINT8;
177
178 case 2:
179 return MALI_DRAW_INDEXED_UINT16;
180
181 case 4:
182 return MALI_DRAW_INDEXED_UINT32;
183
184 default:
185 unreachable("Invalid index size");
186 }
187 }
188
189 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
190 * good for the duration of the draw (transient), could last longer. Also get
191 * the bounds on the index buffer for the range accessed by the draw. We do
192 * these operations together because there are natural optimizations which
193 * require them to be together. */
194
195 static mali_ptr
196 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
197 const struct pipe_draw_info *info,
198 unsigned *min_index, unsigned *max_index)
199 {
200 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
201 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
202 off_t offset = info->start * info->index_size;
203 bool needs_indices = true;
204 mali_ptr out = 0;
205
206 if (info->max_index != ~0u) {
207 *min_index = info->min_index;
208 *max_index = info->max_index;
209 needs_indices = false;
210 }
211
212 if (!info->has_user_indices) {
213 /* Only resources can be directly mapped */
214 panfrost_batch_add_bo(batch, rsrc->bo,
215 PAN_BO_ACCESS_SHARED |
216 PAN_BO_ACCESS_READ |
217 PAN_BO_ACCESS_VERTEX_TILER);
218 out = rsrc->bo->gpu + offset;
219
220 /* Check the cache */
221 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
222 info->start,
223 info->count,
224 min_index,
225 max_index);
226 } else {
227 /* Otherwise, we need to upload to transient memory */
228 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
229 out = panfrost_upload_transient(batch, ibuf8 + offset,
230 info->count *
231 info->index_size);
232 }
233
234 if (needs_indices) {
235 /* Fallback */
236 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
237
238 if (!info->has_user_indices)
239 panfrost_minmax_cache_add(rsrc->index_cache,
240 info->start, info->count,
241 *min_index, *max_index);
242 }
243
244 return out;
245 }
246
247 void
248 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
249 const struct pipe_draw_info *info,
250 enum mali_draw_mode draw_mode,
251 struct mali_vertex_tiler_postfix *vertex_postfix,
252 struct mali_vertex_tiler_prefix *tiler_prefix,
253 struct mali_vertex_tiler_postfix *tiler_postfix,
254 unsigned *vertex_count,
255 unsigned *padded_count)
256 {
257 tiler_prefix->draw_mode = draw_mode;
258
259 unsigned draw_flags = 0;
260
261 if (panfrost_writes_point_size(ctx))
262 draw_flags |= MALI_DRAW_VARYING_SIZE;
263
264 if (info->primitive_restart)
265 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
266
267 /* These doesn't make much sense */
268
269 draw_flags |= 0x3000;
270
271 if (info->index_size) {
272 unsigned min_index = 0, max_index = 0;
273
274 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
275 info,
276 &min_index,
277 &max_index);
278
279 /* Use the corresponding values */
280 *vertex_count = max_index - min_index + 1;
281 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
282 tiler_prefix->offset_bias_correction = -min_index;
283 tiler_prefix->index_count = MALI_POSITIVE(info->count);
284 draw_flags |= panfrost_translate_index_size(info->index_size);
285 } else {
286 tiler_prefix->indices = 0;
287 *vertex_count = ctx->vertex_count;
288 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
289 tiler_prefix->offset_bias_correction = 0;
290 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
291 }
292
293 tiler_prefix->unknown_draw = draw_flags;
294
295 /* Encode the padded vertex count */
296
297 if (info->instance_count > 1) {
298 *padded_count = panfrost_padded_vertex_count(*vertex_count);
299
300 unsigned shift = __builtin_ctz(ctx->padded_count);
301 unsigned k = ctx->padded_count >> (shift + 1);
302
303 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
304 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
305 } else {
306 *padded_count = *vertex_count;
307
308 /* Reset instancing state */
309 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
310 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
311 }
312 }
313
314 static void
315 panfrost_shader_meta_init(struct panfrost_context *ctx,
316 enum pipe_shader_type st,
317 struct mali_shader_meta *meta)
318 {
319 const struct panfrost_device *dev = pan_device(ctx->base.screen);
320 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
321
322 memset(meta, 0, sizeof(*meta));
323 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
324 meta->attribute_count = ss->attribute_count;
325 meta->varying_count = ss->varying_count;
326 meta->texture_count = ctx->sampler_view_count[st];
327 meta->sampler_count = ctx->sampler_count[st];
328
329 if (dev->quirks & IS_BIFROST) {
330 meta->bifrost1.unk1 = 0x800200;
331 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
332 meta->bifrost2.preload_regs = 0xC0;
333 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
334 ss->uniform_cutoff);
335 } else {
336 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
337 ss->uniform_cutoff);
338 meta->midgard1.work_count = ss->work_reg_count;
339 meta->midgard1.flags_hi = 0x8; /* XXX */
340 meta->midgard1.flags_lo = 0x220;
341 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
342 }
343
344 }
345
346 static unsigned
347 panfrost_translate_compare_func(enum pipe_compare_func in)
348 {
349 switch (in) {
350 case PIPE_FUNC_NEVER:
351 return MALI_FUNC_NEVER;
352
353 case PIPE_FUNC_LESS:
354 return MALI_FUNC_LESS;
355
356 case PIPE_FUNC_EQUAL:
357 return MALI_FUNC_EQUAL;
358
359 case PIPE_FUNC_LEQUAL:
360 return MALI_FUNC_LEQUAL;
361
362 case PIPE_FUNC_GREATER:
363 return MALI_FUNC_GREATER;
364
365 case PIPE_FUNC_NOTEQUAL:
366 return MALI_FUNC_NOTEQUAL;
367
368 case PIPE_FUNC_GEQUAL:
369 return MALI_FUNC_GEQUAL;
370
371 case PIPE_FUNC_ALWAYS:
372 return MALI_FUNC_ALWAYS;
373
374 default:
375 unreachable("Invalid func");
376 }
377 }
378
379 static unsigned
380 panfrost_translate_stencil_op(enum pipe_stencil_op in)
381 {
382 switch (in) {
383 case PIPE_STENCIL_OP_KEEP:
384 return MALI_STENCIL_KEEP;
385
386 case PIPE_STENCIL_OP_ZERO:
387 return MALI_STENCIL_ZERO;
388
389 case PIPE_STENCIL_OP_REPLACE:
390 return MALI_STENCIL_REPLACE;
391
392 case PIPE_STENCIL_OP_INCR:
393 return MALI_STENCIL_INCR;
394
395 case PIPE_STENCIL_OP_DECR:
396 return MALI_STENCIL_DECR;
397
398 case PIPE_STENCIL_OP_INCR_WRAP:
399 return MALI_STENCIL_INCR_WRAP;
400
401 case PIPE_STENCIL_OP_DECR_WRAP:
402 return MALI_STENCIL_DECR_WRAP;
403
404 case PIPE_STENCIL_OP_INVERT:
405 return MALI_STENCIL_INVERT;
406
407 default:
408 unreachable("Invalid stencil op");
409 }
410 }
411
412 static unsigned
413 translate_tex_wrap(enum pipe_tex_wrap w)
414 {
415 switch (w) {
416 case PIPE_TEX_WRAP_REPEAT:
417 return MALI_WRAP_REPEAT;
418
419 case PIPE_TEX_WRAP_CLAMP:
420 return MALI_WRAP_CLAMP;
421
422 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
423 return MALI_WRAP_CLAMP_TO_EDGE;
424
425 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
426 return MALI_WRAP_CLAMP_TO_BORDER;
427
428 case PIPE_TEX_WRAP_MIRROR_REPEAT:
429 return MALI_WRAP_MIRRORED_REPEAT;
430
431 case PIPE_TEX_WRAP_MIRROR_CLAMP:
432 return MALI_WRAP_MIRRORED_CLAMP;
433
434 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
435 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
436
437 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
438 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
439
440 default:
441 unreachable("Invalid wrap");
442 }
443 }
444
445 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
446 struct mali_sampler_descriptor *hw)
447 {
448 unsigned func = panfrost_translate_compare_func(cso->compare_func);
449 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
450 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
451 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
452 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
453 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
454 unsigned mip_filter = mip_linear ?
455 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
456 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
457
458 *hw = (struct mali_sampler_descriptor) {
459 .filter_mode = min_filter | mag_filter | mip_filter |
460 normalized,
461 .wrap_s = translate_tex_wrap(cso->wrap_s),
462 .wrap_t = translate_tex_wrap(cso->wrap_t),
463 .wrap_r = translate_tex_wrap(cso->wrap_r),
464 .compare_func = panfrost_flip_compare_func(func),
465 .border_color = {
466 cso->border_color.f[0],
467 cso->border_color.f[1],
468 cso->border_color.f[2],
469 cso->border_color.f[3]
470 },
471 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
472 .max_lod = FIXED_16(cso->max_lod, false),
473 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
474 .seamless_cube_map = cso->seamless_cube_map,
475 };
476
477 /* If necessary, we disable mipmapping in the sampler descriptor by
478 * clamping the LOD as tight as possible (from 0 to epsilon,
479 * essentially -- remember these are fixed point numbers, so
480 * epsilon=1/256) */
481
482 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
483 hw->max_lod = hw->min_lod + 1;
484 }
485
486 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
487 struct bifrost_sampler_descriptor *hw)
488 {
489 *hw = (struct bifrost_sampler_descriptor) {
490 .unk1 = 0x1,
491 .wrap_s = translate_tex_wrap(cso->wrap_s),
492 .wrap_t = translate_tex_wrap(cso->wrap_t),
493 .wrap_r = translate_tex_wrap(cso->wrap_r),
494 .unk8 = 0x8,
495 .unk2 = 0x2,
496 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
497 .norm_coords = cso->normalized_coords,
498 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
499 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
500 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
501 .max_lod = FIXED_16(cso->max_lod, false),
502 };
503
504 /* If necessary, we disable mipmapping in the sampler descriptor by
505 * clamping the LOD as tight as possible (from 0 to epsilon,
506 * essentially -- remember these are fixed point numbers, so
507 * epsilon=1/256) */
508
509 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
510 hw->max_lod = hw->min_lod + 1;
511 }
512
513 static void
514 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
515 struct mali_stencil_test *out)
516 {
517 out->ref = 0; /* Gallium gets it from elsewhere */
518
519 out->mask = in->valuemask;
520 out->func = panfrost_translate_compare_func(in->func);
521 out->sfail = panfrost_translate_stencil_op(in->fail_op);
522 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
523 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
524 }
525
526 static void
527 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
528 struct mali_shader_meta *fragmeta)
529 {
530 if (!ctx->rasterizer) {
531 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
532 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
533 fragmeta->depth_units = 0.0f;
534 fragmeta->depth_factor = 0.0f;
535 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
536 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
537 return;
538 }
539
540 bool msaa = ctx->rasterizer->base.multisample;
541
542 /* TODO: Sample size */
543 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
544 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
545 fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
546 fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
547
548 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
549
550 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
551 ctx->rasterizer->base.offset_tri);
552 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
553 ctx->rasterizer->base.offset_tri);
554 }
555
556 static void
557 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
558 struct mali_shader_meta *fragmeta)
559 {
560 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
561 int zfunc = PIPE_FUNC_ALWAYS;
562
563 if (!zsa) {
564 struct pipe_stencil_state default_stencil = {
565 .enabled = 0,
566 .func = PIPE_FUNC_ALWAYS,
567 .fail_op = MALI_STENCIL_KEEP,
568 .zfail_op = MALI_STENCIL_KEEP,
569 .zpass_op = MALI_STENCIL_KEEP,
570 .writemask = 0xFF,
571 .valuemask = 0xFF
572 };
573
574 panfrost_make_stencil_state(&default_stencil,
575 &fragmeta->stencil_front);
576 fragmeta->stencil_mask_front = default_stencil.writemask;
577 fragmeta->stencil_back = fragmeta->stencil_front;
578 fragmeta->stencil_mask_back = default_stencil.writemask;
579 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
580 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
581 } else {
582 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
583 zsa->stencil[0].enabled);
584 panfrost_make_stencil_state(&zsa->stencil[0],
585 &fragmeta->stencil_front);
586 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
587 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
588
589 /* If back-stencil is not enabled, use the front values */
590
591 if (zsa->stencil[1].enabled) {
592 panfrost_make_stencil_state(&zsa->stencil[1],
593 &fragmeta->stencil_back);
594 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
595 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
596 } else {
597 fragmeta->stencil_back = fragmeta->stencil_front;
598 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
599 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
600 }
601
602 if (zsa->depth.enabled)
603 zfunc = zsa->depth.func;
604
605 /* Depth state (TODO: Refactor) */
606
607 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
608 zsa->depth.writemask);
609 }
610
611 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
612 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
613 }
614
615 static void
616 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
617 struct mali_shader_meta *fragmeta,
618 void *rts)
619 {
620 const struct panfrost_device *dev = pan_device(ctx->base.screen);
621
622 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
623 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
624 !ctx->blend->base.dither);
625
626 /* Get blending setup */
627 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
628
629 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
630 unsigned shader_offset = 0;
631 struct panfrost_bo *shader_bo = NULL;
632
633 for (unsigned c = 0; c < rt_count; ++c)
634 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
635 &shader_offset);
636
637 /* If there is a blend shader, work registers are shared. XXX: opt */
638
639 for (unsigned c = 0; c < rt_count; ++c) {
640 if (blend[c].is_shader)
641 fragmeta->midgard1.work_count = 16;
642 }
643
644 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
645 * copied to the blend_meta appended (by convention), but this is the
646 * field actually read by the hardware. (Or maybe both are read...?).
647 * Specify the last RTi with a blend shader. */
648
649 fragmeta->blend.shader = 0;
650
651 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
652 if (!blend[rt].is_shader)
653 continue;
654
655 fragmeta->blend.shader = blend[rt].shader.gpu |
656 blend[rt].shader.first_tag;
657 break;
658 }
659
660 if (dev->quirks & MIDGARD_SFBD) {
661 /* When only a single render target platform is used, the blend
662 * information is inside the shader meta itself. We additionally
663 * need to signal CAN_DISCARD for nontrivial blend modes (so
664 * we're able to read back the destination buffer) */
665
666 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
667 blend[0].is_shader);
668
669 if (!blend[0].is_shader) {
670 fragmeta->blend.equation = *blend[0].equation.equation;
671 fragmeta->blend.constant = blend[0].equation.constant;
672 }
673
674 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
675 !blend[0].no_blending);
676 return;
677 }
678
679 /* Additional blend descriptor tacked on for jobs using MFBD */
680
681 for (unsigned i = 0; i < rt_count; ++i) {
682 if (dev->quirks & IS_BIFROST) {
683 struct bifrost_blend_rt *brts = rts;
684 struct panfrost_shader_state *fs;
685 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
686
687 brts[i].flags = 0x200;
688 if (blend[i].is_shader) {
689 /* The blend shader's address needs to be at
690 * the same top 32 bit as the fragment shader.
691 * TODO: Ensure that's always the case.
692 */
693 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
694 (fs->bo->gpu & (0xffffffffull << 32)));
695 brts[i].shader = blend[i].shader.gpu;
696 brts[i].unk2 = 0x0;
697 } else {
698 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
699 const struct util_format_description *format_desc;
700 format_desc = util_format_description(format);
701
702 brts[i].equation = *blend[i].equation.equation;
703
704 /* TODO: this is a bit more complicated */
705 brts[i].constant = blend[i].equation.constant;
706
707 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
708 brts[i].unk2 = 0x19;
709
710 brts[i].shader_type = fs->blend_types[i];
711 }
712 } else {
713 struct midgard_blend_rt *mrts = rts;
714
715 mrts[i].flags = 0x200;
716
717 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
718 (ctx->pipe_framebuffer.cbufs[i]) &&
719 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
720
721 SET_BIT(mrts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
722 SET_BIT(mrts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
723 SET_BIT(mrts[i].flags, MALI_BLEND_SRGB, is_srgb);
724 SET_BIT(mrts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
725
726 if (blend[i].is_shader) {
727 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
728 } else {
729 mrts[i].blend.equation = *blend[i].equation.equation;
730 mrts[i].blend.constant = blend[i].equation.constant;
731 }
732 }
733 }
734 }
735
736 static void
737 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
738 struct mali_shader_meta *fragmeta,
739 void *rts)
740 {
741 const struct panfrost_device *dev = pan_device(ctx->base.screen);
742 struct panfrost_shader_state *fs;
743
744 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
745
746 fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
747 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
748 fragmeta->unknown2_4 = 0x4e0;
749
750 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
751 * is required (independent of 32-bit/64-bit descriptors), or why it's
752 * not used on later GPU revisions. Otherwise, all shader jobs fault on
753 * these earlier chips (perhaps this is a chicken bit of some kind).
754 * More investigation is needed. */
755
756 SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
757
758 /* Depending on whether it's legal to in the given shader, we try to
759 * enable early-z testing (or forward-pixel kill?) */
760
761 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
762 !fs->can_discard && !fs->writes_depth);
763
764 /* Add the writes Z/S flags if needed. */
765 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
766 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
767
768 /* Any time texturing is used, derivatives are implicitly calculated,
769 * so we need to enable helper invocations */
770
771 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
772 fs->helper_invocations);
773
774 /* CAN_DISCARD should be set if the fragment shader possibly contains a
775 * 'discard' instruction. It is likely this is related to optimizations
776 * related to forward-pixel kill, as per "Mali Performance 3: Is
777 * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
778
779 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
780 SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
781
782 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
783 panfrost_frag_meta_zsa_update(ctx, fragmeta);
784 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
785 }
786
787 void
788 panfrost_emit_shader_meta(struct panfrost_batch *batch,
789 enum pipe_shader_type st,
790 struct mali_vertex_tiler_postfix *postfix)
791 {
792 struct panfrost_context *ctx = batch->ctx;
793 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
794
795 if (!ss) {
796 postfix->shader = 0;
797 return;
798 }
799
800 struct mali_shader_meta meta;
801
802 panfrost_shader_meta_init(ctx, st, &meta);
803
804 /* Add the shader BO to the batch. */
805 panfrost_batch_add_bo(batch, ss->bo,
806 PAN_BO_ACCESS_PRIVATE |
807 PAN_BO_ACCESS_READ |
808 panfrost_bo_access_for_stage(st));
809
810 mali_ptr shader_ptr;
811
812 if (st == PIPE_SHADER_FRAGMENT) {
813 struct panfrost_device *dev = pan_device(ctx->base.screen);
814 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
815 size_t desc_size = sizeof(meta);
816 void *rts = NULL;
817 struct panfrost_transfer xfer;
818 unsigned rt_size;
819
820 if (dev->quirks & MIDGARD_SFBD)
821 rt_size = 0;
822 else if (dev->quirks & IS_BIFROST)
823 rt_size = sizeof(struct bifrost_blend_rt);
824 else
825 rt_size = sizeof(struct midgard_blend_rt);
826
827 desc_size += rt_size * rt_count;
828
829 if (rt_size)
830 rts = rzalloc_size(ctx, rt_size * rt_count);
831
832 panfrost_frag_shader_meta_init(ctx, &meta, rts);
833
834 xfer = panfrost_allocate_transient(batch, desc_size);
835
836 memcpy(xfer.cpu, &meta, sizeof(meta));
837 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
838
839 if (rt_size)
840 ralloc_free(rts);
841
842 shader_ptr = xfer.gpu;
843 } else {
844 shader_ptr = panfrost_upload_transient(batch, &meta,
845 sizeof(meta));
846 }
847
848 postfix->shader = shader_ptr;
849 }
850
851 static void
852 panfrost_mali_viewport_init(struct panfrost_context *ctx,
853 struct mali_viewport *mvp)
854 {
855 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
856
857 /* Clip bounds are encoded as floats. The viewport itself is encoded as
858 * (somewhat) asymmetric ints. */
859
860 const struct pipe_scissor_state *ss = &ctx->scissor;
861
862 memset(mvp, 0, sizeof(*mvp));
863
864 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
865 * each direction. Clipping to the viewport in theory should work, but
866 * in practice causes issues when we're not explicitly trying to
867 * scissor */
868
869 *mvp = (struct mali_viewport) {
870 .clip_minx = -INFINITY,
871 .clip_miny = -INFINITY,
872 .clip_maxx = INFINITY,
873 .clip_maxy = INFINITY,
874 };
875
876 /* Always scissor to the viewport by default. */
877 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
878 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
879
880 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
881 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
882
883 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
884 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
885
886 /* Apply the scissor test */
887
888 unsigned minx, miny, maxx, maxy;
889
890 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
891 minx = MAX2(ss->minx, vp_minx);
892 miny = MAX2(ss->miny, vp_miny);
893 maxx = MIN2(ss->maxx, vp_maxx);
894 maxy = MIN2(ss->maxy, vp_maxy);
895 } else {
896 minx = vp_minx;
897 miny = vp_miny;
898 maxx = vp_maxx;
899 maxy = vp_maxy;
900 }
901
902 /* Hardware needs the min/max to be strictly ordered, so flip if we
903 * need to. The viewport transformation in the vertex shader will
904 * handle the negatives if we don't */
905
906 if (miny > maxy) {
907 unsigned temp = miny;
908 miny = maxy;
909 maxy = temp;
910 }
911
912 if (minx > maxx) {
913 unsigned temp = minx;
914 minx = maxx;
915 maxx = temp;
916 }
917
918 if (minz > maxz) {
919 float temp = minz;
920 minz = maxz;
921 maxz = temp;
922 }
923
924 /* Clamp to the framebuffer size as a last check */
925
926 minx = MIN2(ctx->pipe_framebuffer.width, minx);
927 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
928
929 miny = MIN2(ctx->pipe_framebuffer.height, miny);
930 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
931
932 /* Upload */
933
934 mvp->viewport0[0] = minx;
935 mvp->viewport1[0] = MALI_POSITIVE(maxx);
936
937 mvp->viewport0[1] = miny;
938 mvp->viewport1[1] = MALI_POSITIVE(maxy);
939
940 mvp->clip_minz = minz;
941 mvp->clip_maxz = maxz;
942 }
943
944 void
945 panfrost_emit_viewport(struct panfrost_batch *batch,
946 struct mali_vertex_tiler_postfix *tiler_postfix)
947 {
948 struct panfrost_context *ctx = batch->ctx;
949 struct mali_viewport mvp;
950
951 panfrost_mali_viewport_init(batch->ctx, &mvp);
952
953 /* Update the job, unless we're doing wallpapering (whose lack of
954 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
955 * just... be faster :) */
956
957 if (!ctx->wallpaper_batch)
958 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
959 mvp.viewport0[1],
960 mvp.viewport1[0] + 1,
961 mvp.viewport1[1] + 1);
962
963 tiler_postfix->viewport = panfrost_upload_transient(batch, &mvp,
964 sizeof(mvp));
965 }
966
967 static mali_ptr
968 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
969 enum pipe_shader_type st,
970 struct panfrost_constant_buffer *buf,
971 unsigned index)
972 {
973 struct pipe_constant_buffer *cb = &buf->cb[index];
974 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
975
976 if (rsrc) {
977 panfrost_batch_add_bo(batch, rsrc->bo,
978 PAN_BO_ACCESS_SHARED |
979 PAN_BO_ACCESS_READ |
980 panfrost_bo_access_for_stage(st));
981
982 /* Alignment gauranteed by
983 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
984 return rsrc->bo->gpu + cb->buffer_offset;
985 } else if (cb->user_buffer) {
986 return panfrost_upload_transient(batch,
987 cb->user_buffer +
988 cb->buffer_offset,
989 cb->buffer_size);
990 } else {
991 unreachable("No constant buffer");
992 }
993 }
994
995 struct sysval_uniform {
996 union {
997 float f[4];
998 int32_t i[4];
999 uint32_t u[4];
1000 uint64_t du[2];
1001 };
1002 };
1003
1004 static void
1005 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1006 struct sysval_uniform *uniform)
1007 {
1008 struct panfrost_context *ctx = batch->ctx;
1009 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1010
1011 uniform->f[0] = vp->scale[0];
1012 uniform->f[1] = vp->scale[1];
1013 uniform->f[2] = vp->scale[2];
1014 }
1015
1016 static void
1017 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1018 struct sysval_uniform *uniform)
1019 {
1020 struct panfrost_context *ctx = batch->ctx;
1021 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1022
1023 uniform->f[0] = vp->translate[0];
1024 uniform->f[1] = vp->translate[1];
1025 uniform->f[2] = vp->translate[2];
1026 }
1027
1028 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1029 enum pipe_shader_type st,
1030 unsigned int sysvalid,
1031 struct sysval_uniform *uniform)
1032 {
1033 struct panfrost_context *ctx = batch->ctx;
1034 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1035 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1036 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1037 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1038
1039 assert(dim);
1040 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1041
1042 if (dim > 1)
1043 uniform->i[1] = u_minify(tex->texture->height0,
1044 tex->u.tex.first_level);
1045
1046 if (dim > 2)
1047 uniform->i[2] = u_minify(tex->texture->depth0,
1048 tex->u.tex.first_level);
1049
1050 if (is_array)
1051 uniform->i[dim] = tex->texture->array_size;
1052 }
1053
1054 static void
1055 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1056 enum pipe_shader_type st,
1057 unsigned ssbo_id,
1058 struct sysval_uniform *uniform)
1059 {
1060 struct panfrost_context *ctx = batch->ctx;
1061
1062 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1063 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1064
1065 /* Compute address */
1066 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1067
1068 panfrost_batch_add_bo(batch, bo,
1069 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1070 panfrost_bo_access_for_stage(st));
1071
1072 /* Upload address and size as sysval */
1073 uniform->du[0] = bo->gpu + sb.buffer_offset;
1074 uniform->u[2] = sb.buffer_size;
1075 }
1076
1077 static void
1078 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1079 enum pipe_shader_type st,
1080 unsigned samp_idx,
1081 struct sysval_uniform *uniform)
1082 {
1083 struct panfrost_context *ctx = batch->ctx;
1084 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1085
1086 uniform->f[0] = sampl->min_lod;
1087 uniform->f[1] = sampl->max_lod;
1088 uniform->f[2] = sampl->lod_bias;
1089
1090 /* Even without any errata, Midgard represents "no mipmapping" as
1091 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1092 * panfrost_create_sampler_state which also explains our choice of
1093 * epsilon value (again to keep behaviour consistent) */
1094
1095 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1096 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1097 }
1098
1099 static void
1100 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1101 struct sysval_uniform *uniform)
1102 {
1103 struct panfrost_context *ctx = batch->ctx;
1104
1105 uniform->u[0] = ctx->compute_grid->grid[0];
1106 uniform->u[1] = ctx->compute_grid->grid[1];
1107 uniform->u[2] = ctx->compute_grid->grid[2];
1108 }
1109
1110 static void
1111 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1112 struct panfrost_shader_state *ss,
1113 enum pipe_shader_type st)
1114 {
1115 struct sysval_uniform *uniforms = (void *)buf;
1116
1117 for (unsigned i = 0; i < ss->sysval_count; ++i) {
1118 int sysval = ss->sysval[i];
1119
1120 switch (PAN_SYSVAL_TYPE(sysval)) {
1121 case PAN_SYSVAL_VIEWPORT_SCALE:
1122 panfrost_upload_viewport_scale_sysval(batch,
1123 &uniforms[i]);
1124 break;
1125 case PAN_SYSVAL_VIEWPORT_OFFSET:
1126 panfrost_upload_viewport_offset_sysval(batch,
1127 &uniforms[i]);
1128 break;
1129 case PAN_SYSVAL_TEXTURE_SIZE:
1130 panfrost_upload_txs_sysval(batch, st,
1131 PAN_SYSVAL_ID(sysval),
1132 &uniforms[i]);
1133 break;
1134 case PAN_SYSVAL_SSBO:
1135 panfrost_upload_ssbo_sysval(batch, st,
1136 PAN_SYSVAL_ID(sysval),
1137 &uniforms[i]);
1138 break;
1139 case PAN_SYSVAL_NUM_WORK_GROUPS:
1140 panfrost_upload_num_work_groups_sysval(batch,
1141 &uniforms[i]);
1142 break;
1143 case PAN_SYSVAL_SAMPLER:
1144 panfrost_upload_sampler_sysval(batch, st,
1145 PAN_SYSVAL_ID(sysval),
1146 &uniforms[i]);
1147 break;
1148 default:
1149 assert(0);
1150 }
1151 }
1152 }
1153
1154 static const void *
1155 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1156 unsigned index)
1157 {
1158 struct pipe_constant_buffer *cb = &buf->cb[index];
1159 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1160
1161 if (rsrc)
1162 return rsrc->bo->cpu;
1163 else if (cb->user_buffer)
1164 return cb->user_buffer;
1165 else
1166 unreachable("No constant buffer");
1167 }
1168
1169 void
1170 panfrost_emit_const_buf(struct panfrost_batch *batch,
1171 enum pipe_shader_type stage,
1172 struct mali_vertex_tiler_postfix *postfix)
1173 {
1174 struct panfrost_context *ctx = batch->ctx;
1175 struct panfrost_shader_variants *all = ctx->shader[stage];
1176
1177 if (!all)
1178 return;
1179
1180 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1181
1182 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1183
1184 /* Uniforms are implicitly UBO #0 */
1185 bool has_uniforms = buf->enabled_mask & (1 << 0);
1186
1187 /* Allocate room for the sysval and the uniforms */
1188 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1189 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1190 size_t size = sys_size + uniform_size;
1191 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1192 size);
1193
1194 /* Upload sysvals requested by the shader */
1195 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1196
1197 /* Upload uniforms */
1198 if (has_uniforms && uniform_size) {
1199 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1200 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1201 }
1202
1203 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1204 * uploaded */
1205
1206 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1207 assert(ubo_count >= 1);
1208
1209 size_t sz = sizeof(uint64_t) * ubo_count;
1210 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1211 int uniform_count = ss->uniform_count;
1212
1213 /* Upload uniforms as a UBO */
1214 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1215
1216 /* The rest are honest-to-goodness UBOs */
1217
1218 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1219 size_t usz = buf->cb[ubo].buffer_size;
1220 bool enabled = buf->enabled_mask & (1 << ubo);
1221 bool empty = usz == 0;
1222
1223 if (!enabled || empty) {
1224 /* Stub out disabled UBOs to catch accesses */
1225 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1226 continue;
1227 }
1228
1229 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1230 buf, ubo);
1231
1232 unsigned bytes_per_field = 16;
1233 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1234 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1235 }
1236
1237 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1238 postfix->uniforms = transfer.gpu;
1239 postfix->uniform_buffers = ubufs;
1240
1241 buf->dirty_mask = 0;
1242 }
1243
1244 void
1245 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1246 const struct pipe_grid_info *info,
1247 struct midgard_payload_vertex_tiler *vtp)
1248 {
1249 struct panfrost_context *ctx = batch->ctx;
1250 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1251 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1252 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1253 128));
1254 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1255 info->grid[2] * 4;
1256 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1257 shared_size,
1258 1);
1259
1260 struct mali_shared_memory shared = {
1261 .shared_memory = bo->gpu,
1262 .shared_workgroup_count =
1263 util_logbase2_ceil(info->grid[0]) +
1264 util_logbase2_ceil(info->grid[1]) +
1265 util_logbase2_ceil(info->grid[2]),
1266 .shared_unk1 = 0x2,
1267 .shared_shift = util_logbase2(single_size) - 1
1268 };
1269
1270 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1271 sizeof(shared));
1272 }
1273
1274 static mali_ptr
1275 panfrost_get_tex_desc(struct panfrost_batch *batch,
1276 enum pipe_shader_type st,
1277 struct panfrost_sampler_view *view)
1278 {
1279 if (!view)
1280 return (mali_ptr) 0;
1281
1282 struct pipe_sampler_view *pview = &view->base;
1283 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1284
1285 /* Add the BO to the job so it's retained until the job is done. */
1286
1287 panfrost_batch_add_bo(batch, rsrc->bo,
1288 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1289 panfrost_bo_access_for_stage(st));
1290
1291 panfrost_batch_add_bo(batch, view->midgard_bo,
1292 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1293 panfrost_bo_access_for_stage(st));
1294
1295 return view->midgard_bo->gpu;
1296 }
1297
1298 void
1299 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1300 enum pipe_shader_type stage,
1301 struct mali_vertex_tiler_postfix *postfix)
1302 {
1303 struct panfrost_context *ctx = batch->ctx;
1304 struct panfrost_device *device = pan_device(ctx->base.screen);
1305
1306 if (!ctx->sampler_view_count[stage])
1307 return;
1308
1309 if (device->quirks & IS_BIFROST) {
1310 struct bifrost_texture_descriptor *descriptors;
1311
1312 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1313 ctx->sampler_view_count[stage]);
1314
1315 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1316 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1317 struct pipe_sampler_view *pview = &view->base;
1318 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1319
1320 panfrost_batch_add_bo(batch, rsrc->bo,
1321 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1322 panfrost_bo_access_for_stage(stage));
1323
1324 memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1325 }
1326
1327 postfix->textures = panfrost_upload_transient(batch,
1328 descriptors,
1329 sizeof(struct bifrost_texture_descriptor) *
1330 ctx->sampler_view_count[stage]);
1331
1332 free(descriptors);
1333 } else {
1334 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1335
1336 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1337 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1338 ctx->sampler_views[stage][i]);
1339
1340 postfix->textures = panfrost_upload_transient(batch,
1341 trampolines,
1342 sizeof(uint64_t) *
1343 ctx->sampler_view_count[stage]);
1344 }
1345 }
1346
1347 void
1348 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1349 enum pipe_shader_type stage,
1350 struct mali_vertex_tiler_postfix *postfix)
1351 {
1352 struct panfrost_context *ctx = batch->ctx;
1353 struct panfrost_device *device = pan_device(ctx->base.screen);
1354
1355 if (!ctx->sampler_count[stage])
1356 return;
1357
1358 if (device->quirks & IS_BIFROST) {
1359 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1360 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1361 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1362 transfer_size);
1363 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1364
1365 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1366 desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1367
1368 postfix->sampler_descriptor = transfer.gpu;
1369 } else {
1370 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1371 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1372 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1373 transfer_size);
1374 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1375
1376 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1377 desc[i] = ctx->samplers[stage][i]->midgard_hw;
1378
1379 postfix->sampler_descriptor = transfer.gpu;
1380 }
1381 }
1382
1383 void
1384 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1385 struct mali_vertex_tiler_postfix *vertex_postfix)
1386 {
1387 struct panfrost_context *ctx = batch->ctx;
1388
1389 if (!ctx->vertex)
1390 return;
1391
1392 struct panfrost_vertex_state *so = ctx->vertex;
1393
1394 panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1395 vertex_postfix->attribute_meta = panfrost_upload_transient(batch, so->hw,
1396 sizeof(*so->hw) *
1397 PAN_MAX_ATTRIBUTE);
1398 }
1399
1400 void
1401 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1402 struct mali_vertex_tiler_postfix *vertex_postfix)
1403 {
1404 struct panfrost_context *ctx = batch->ctx;
1405 struct panfrost_vertex_state *so = ctx->vertex;
1406
1407 /* Staged mali_attr, and index into them. i =/= k, depending on the
1408 * vertex buffer mask and instancing. Twice as much room is allocated,
1409 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1410 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1411 unsigned k = 0;
1412
1413 for (unsigned i = 0; i < so->num_elements; ++i) {
1414 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1415 * means duplicating some vertex buffers (who cares? aside from
1416 * maybe some caching implications but I somehow doubt that
1417 * matters) */
1418
1419 struct pipe_vertex_element *elem = &so->pipe[i];
1420 unsigned vbi = elem->vertex_buffer_index;
1421
1422 /* The exception to 1:1 mapping is that we can have multiple
1423 * entries (NPOT divisors), so we fixup anyways */
1424
1425 so->hw[i].index = k;
1426
1427 if (!(ctx->vb_mask & (1 << vbi)))
1428 continue;
1429
1430 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1431 struct panfrost_resource *rsrc;
1432
1433 rsrc = pan_resource(buf->buffer.resource);
1434 if (!rsrc)
1435 continue;
1436
1437 /* Align to 64 bytes by masking off the lower bits. This
1438 * will be adjusted back when we fixup the src_offset in
1439 * mali_attr_meta */
1440
1441 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1442 mali_ptr addr = raw_addr & ~63;
1443 unsigned chopped_addr = raw_addr - addr;
1444
1445 /* Add a dependency of the batch on the vertex buffer */
1446 panfrost_batch_add_bo(batch, rsrc->bo,
1447 PAN_BO_ACCESS_SHARED |
1448 PAN_BO_ACCESS_READ |
1449 PAN_BO_ACCESS_VERTEX_TILER);
1450
1451 /* Set common fields */
1452 attrs[k].elements = addr;
1453 attrs[k].stride = buf->stride;
1454
1455 /* Since we advanced the base pointer, we shrink the buffer
1456 * size */
1457 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1458
1459 /* We need to add the extra size we masked off (for
1460 * correctness) so the data doesn't get clamped away */
1461 attrs[k].size += chopped_addr;
1462
1463 /* For non-instancing make sure we initialize */
1464 attrs[k].shift = attrs[k].extra_flags = 0;
1465
1466 /* Instancing uses a dramatically different code path than
1467 * linear, so dispatch for the actual emission now that the
1468 * common code is finished */
1469
1470 unsigned divisor = elem->instance_divisor;
1471
1472 if (divisor && ctx->instance_count == 1) {
1473 /* Silly corner case where there's a divisor(=1) but
1474 * there's no legitimate instancing. So we want *every*
1475 * attribute to be the same. So set stride to zero so
1476 * we don't go anywhere. */
1477
1478 attrs[k].size = attrs[k].stride + chopped_addr;
1479 attrs[k].stride = 0;
1480 attrs[k++].elements |= MALI_ATTR_LINEAR;
1481 } else if (ctx->instance_count <= 1) {
1482 /* Normal, non-instanced attributes */
1483 attrs[k++].elements |= MALI_ATTR_LINEAR;
1484 } else {
1485 unsigned instance_shift = vertex_postfix->instance_shift;
1486 unsigned instance_odd = vertex_postfix->instance_odd;
1487
1488 k += panfrost_vertex_instanced(ctx->padded_count,
1489 instance_shift,
1490 instance_odd,
1491 divisor, &attrs[k]);
1492 }
1493 }
1494
1495 /* Add special gl_VertexID/gl_InstanceID buffers */
1496
1497 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1498 so->hw[PAN_VERTEX_ID].index = k++;
1499 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1500 so->hw[PAN_INSTANCE_ID].index = k++;
1501
1502 /* Upload whatever we emitted and go */
1503
1504 vertex_postfix->attributes = panfrost_upload_transient(batch, attrs,
1505 k * sizeof(*attrs));
1506 }
1507
1508 static mali_ptr
1509 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1510 unsigned stride, unsigned count)
1511 {
1512 /* Fill out the descriptor */
1513 slot->stride = stride;
1514 slot->size = stride * count;
1515 slot->shift = slot->extra_flags = 0;
1516
1517 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1518 slot->size);
1519
1520 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1521
1522 return transfer.gpu;
1523 }
1524
1525 static void
1526 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1527 unsigned stride, unsigned offset, unsigned count,
1528 struct pipe_stream_output_target *target)
1529 {
1530 /* Fill out the descriptor */
1531 slot->stride = stride * 4;
1532 slot->shift = slot->extra_flags = 0;
1533
1534 unsigned max_size = target->buffer_size;
1535 unsigned expected_size = slot->stride * count;
1536
1537 slot->size = MIN2(max_size, expected_size);
1538
1539 /* Grab the BO and bind it to the batch */
1540 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1541
1542 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1543 * the perspective of the TILER and FRAGMENT.
1544 */
1545 panfrost_batch_add_bo(batch, bo,
1546 PAN_BO_ACCESS_SHARED |
1547 PAN_BO_ACCESS_RW |
1548 PAN_BO_ACCESS_VERTEX_TILER |
1549 PAN_BO_ACCESS_FRAGMENT);
1550
1551 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1552 slot->elements = addr;
1553 }
1554
1555 /* Given a shader and buffer indices, link varying metadata together */
1556
1557 static bool
1558 is_special_varying(gl_varying_slot loc)
1559 {
1560 switch (loc) {
1561 case VARYING_SLOT_POS:
1562 case VARYING_SLOT_PSIZ:
1563 case VARYING_SLOT_PNTC:
1564 case VARYING_SLOT_FACE:
1565 return true;
1566 default:
1567 return false;
1568 }
1569 }
1570
1571 static void
1572 panfrost_emit_varying_meta(void *outptr, struct panfrost_shader_state *ss,
1573 signed general, signed gl_Position,
1574 signed gl_PointSize, signed gl_PointCoord,
1575 signed gl_FrontFacing)
1576 {
1577 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
1578
1579 for (unsigned i = 0; i < ss->varying_count; ++i) {
1580 gl_varying_slot location = ss->varyings_loc[i];
1581 int index = -1;
1582
1583 switch (location) {
1584 case VARYING_SLOT_POS:
1585 index = gl_Position;
1586 break;
1587 case VARYING_SLOT_PSIZ:
1588 index = gl_PointSize;
1589 break;
1590 case VARYING_SLOT_PNTC:
1591 index = gl_PointCoord;
1592 break;
1593 case VARYING_SLOT_FACE:
1594 index = gl_FrontFacing;
1595 break;
1596 default:
1597 index = general;
1598 break;
1599 }
1600
1601 assert(index >= 0);
1602 out[i].index = index;
1603 }
1604 }
1605
1606 static bool
1607 has_point_coord(unsigned mask, gl_varying_slot loc)
1608 {
1609 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1610 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1611 else if (loc == VARYING_SLOT_PNTC)
1612 return (mask & (1 << 8));
1613 else
1614 return false;
1615 }
1616
1617 /* Helpers for manipulating stream out information so we can pack varyings
1618 * accordingly. Compute the src_offset for a given captured varying */
1619
1620 static struct pipe_stream_output *
1621 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1622 {
1623 for (unsigned i = 0; i < info->num_outputs; ++i) {
1624 if (info->output[i].register_index == loc)
1625 return &info->output[i];
1626 }
1627
1628 unreachable("Varying not captured");
1629 }
1630
1631 /* TODO: Integers */
1632 static enum mali_format
1633 pan_xfb_format(unsigned nr_components)
1634 {
1635 switch (nr_components) {
1636 case 1: return MALI_R32F;
1637 case 2: return MALI_RG32F;
1638 case 3: return MALI_RGB32F;
1639 case 4: return MALI_RGBA32F;
1640 default: unreachable("Invalid format");
1641 }
1642 }
1643
1644 void
1645 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1646 unsigned vertex_count,
1647 struct mali_vertex_tiler_postfix *vertex_postfix,
1648 struct mali_vertex_tiler_postfix *tiler_postfix,
1649 union midgard_primitive_size *primitive_size)
1650 {
1651 /* Load the shaders */
1652 struct panfrost_context *ctx = batch->ctx;
1653 struct panfrost_shader_state *vs, *fs;
1654 unsigned int num_gen_varyings = 0;
1655 size_t vs_size, fs_size;
1656
1657 /* Allocate the varying descriptor */
1658
1659 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1660 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
1661 vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
1662 fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
1663
1664 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
1665 vs_size +
1666 fs_size);
1667
1668 struct pipe_stream_output_info *so = &vs->stream_output;
1669
1670 /* Check if this varying is linked by us. This is the case for
1671 * general-purpose, non-captured varyings. If it is, link it. If it's
1672 * not, use the provided stream out information to determine the
1673 * offset, since it was already linked for us. */
1674
1675 for (unsigned i = 0; i < vs->varying_count; i++) {
1676 gl_varying_slot loc = vs->varyings_loc[i];
1677
1678 bool special = is_special_varying(loc);
1679 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1680
1681 if (captured) {
1682 struct pipe_stream_output *o = pan_get_so(so, loc);
1683
1684 unsigned dst_offset = o->dst_offset * 4; /* dwords */
1685 vs->varyings[i].src_offset = dst_offset;
1686 } else if (!special) {
1687 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
1688 }
1689 }
1690
1691 /* Conversely, we need to set src_offset for the captured varyings.
1692 * Here, the layout is defined by the stream out info, not us */
1693
1694 /* Link up with fragment varyings */
1695 bool reads_point_coord = fs->reads_point_coord;
1696
1697 for (unsigned i = 0; i < fs->varying_count; i++) {
1698 gl_varying_slot loc = fs->varyings_loc[i];
1699 unsigned src_offset;
1700 signed vs_idx = -1;
1701
1702 /* Link up */
1703 for (unsigned j = 0; j < vs->varying_count; ++j) {
1704 if (vs->varyings_loc[j] == loc) {
1705 vs_idx = j;
1706 break;
1707 }
1708 }
1709
1710 /* Either assign or reuse */
1711 if (vs_idx >= 0)
1712 src_offset = vs->varyings[vs_idx].src_offset;
1713 else
1714 src_offset = 16 * (num_gen_varyings++);
1715
1716 fs->varyings[i].src_offset = src_offset;
1717
1718 if (has_point_coord(fs->point_sprite_mask, loc))
1719 reads_point_coord = true;
1720 }
1721
1722 memcpy(trans.cpu, vs->varyings, vs_size);
1723 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
1724
1725 union mali_attr varyings[PIPE_MAX_ATTRIBS] = {0};
1726
1727 /* Figure out how many streamout buffers could be bound */
1728 unsigned so_count = ctx->streamout.num_targets;
1729 for (unsigned i = 0; i < vs->varying_count; i++) {
1730 gl_varying_slot loc = vs->varyings_loc[i];
1731
1732 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1733 if (!captured) continue;
1734
1735 struct pipe_stream_output *o = pan_get_so(so, loc);
1736 so_count = MAX2(so_count, o->output_buffer + 1);
1737 }
1738
1739 signed idx = so_count;
1740 signed general = idx++;
1741 signed gl_Position = idx++;
1742 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
1743 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
1744 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
1745 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
1746
1747 /* Emit the stream out buffers */
1748
1749 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1750 ctx->vertex_count);
1751
1752 for (unsigned i = 0; i < so_count; ++i) {
1753 if (i < ctx->streamout.num_targets) {
1754 panfrost_emit_streamout(batch, &varyings[i],
1755 so->stride[i],
1756 ctx->streamout.offsets[i],
1757 out_count,
1758 ctx->streamout.targets[i]);
1759 } else {
1760 /* Emit a dummy buffer */
1761 panfrost_emit_varyings(batch, &varyings[i],
1762 so->stride[i] * 4,
1763 out_count);
1764
1765 /* Clear the attribute type */
1766 varyings[i].elements &= ~0xF;
1767 }
1768 }
1769
1770 panfrost_emit_varyings(batch, &varyings[general],
1771 num_gen_varyings * 16,
1772 vertex_count);
1773
1774 mali_ptr varyings_p;
1775
1776 /* fp32 vec4 gl_Position */
1777 varyings_p = panfrost_emit_varyings(batch, &varyings[gl_Position],
1778 sizeof(float) * 4, vertex_count);
1779 tiler_postfix->position_varying = varyings_p;
1780
1781
1782 if (panfrost_writes_point_size(ctx)) {
1783 varyings_p = panfrost_emit_varyings(batch,
1784 &varyings[gl_PointSize],
1785 2, vertex_count);
1786 primitive_size->pointer = varyings_p;
1787 }
1788
1789 if (reads_point_coord)
1790 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
1791
1792 if (fs->reads_face)
1793 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
1794
1795 if (fs->reads_frag_coord)
1796 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
1797
1798 struct panfrost_device *device = pan_device(ctx->base.screen);
1799 assert(!(device->quirks & IS_BIFROST) || !(reads_point_coord || fs->reads_face || fs->reads_frag_coord));
1800
1801 /* Let's go ahead and link varying meta to the buffer in question, now
1802 * that that information is available. VARYING_SLOT_POS is mapped to
1803 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
1804 * */
1805
1806 panfrost_emit_varying_meta(trans.cpu, vs, general, gl_Position,
1807 gl_PointSize, gl_PointCoord,
1808 gl_FrontFacing);
1809
1810 panfrost_emit_varying_meta(trans.cpu + vs_size, fs, general,
1811 gl_FragCoord, gl_PointSize,
1812 gl_PointCoord, gl_FrontFacing);
1813
1814 /* Replace streamout */
1815
1816 struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
1817 struct mali_attr_meta *ofs = ovs + vs->varying_count;
1818
1819 for (unsigned i = 0; i < vs->varying_count; i++) {
1820 gl_varying_slot loc = vs->varyings_loc[i];
1821
1822 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1823 if (!captured)
1824 continue;
1825
1826 struct pipe_stream_output *o = pan_get_so(so, loc);
1827 ovs[i].index = o->output_buffer;
1828
1829 /* Set the type appropriately. TODO: Integer varyings XXX */
1830 assert(o->stream == 0);
1831 ovs[i].format = pan_xfb_format(o->num_components);
1832 ovs[i].swizzle = panfrost_get_default_swizzle(o->num_components);
1833
1834 /* Link to the fragment */
1835 signed fs_idx = -1;
1836
1837 /* Link up */
1838 for (unsigned j = 0; j < fs->varying_count; ++j) {
1839 if (fs->varyings_loc[j] == loc) {
1840 fs_idx = j;
1841 break;
1842 }
1843 }
1844
1845 if (fs_idx >= 0) {
1846 ofs[fs_idx].index = ovs[i].index;
1847 ofs[fs_idx].format = ovs[i].format;
1848 ofs[fs_idx].swizzle = ovs[i].swizzle;
1849 }
1850 }
1851
1852 /* Replace point sprite */
1853 for (unsigned i = 0; i < fs->varying_count; i++) {
1854 /* If we have a point sprite replacement, handle that here. We
1855 * have to translate location first. TODO: Flip y in shader.
1856 * We're already keying ... just time crunch .. */
1857
1858 if (has_point_coord(fs->point_sprite_mask,
1859 fs->varyings_loc[i])) {
1860 ofs[i].index = gl_PointCoord;
1861
1862 /* Swizzle out the z/w to 0/1 */
1863 ofs[i].format = MALI_RG16F;
1864 ofs[i].swizzle = panfrost_get_default_swizzle(2);
1865 }
1866 }
1867
1868 /* Fix up unaligned addresses */
1869 for (unsigned i = 0; i < so_count; ++i) {
1870 if (varyings[i].elements < MALI_RECORD_SPECIAL)
1871 continue;
1872
1873 unsigned align = (varyings[i].elements & 63);
1874
1875 /* While we're at it, the SO buffers are linear */
1876
1877 if (!align) {
1878 varyings[i].elements |= MALI_ATTR_LINEAR;
1879 continue;
1880 }
1881
1882 /* We need to adjust alignment */
1883 varyings[i].elements &= ~63;
1884 varyings[i].elements |= MALI_ATTR_LINEAR;
1885 varyings[i].size += align;
1886
1887 for (unsigned v = 0; v < vs->varying_count; ++v) {
1888 if (ovs[v].index != i)
1889 continue;
1890
1891 ovs[v].src_offset = vs->varyings[v].src_offset + align;
1892 }
1893
1894 for (unsigned f = 0; f < fs->varying_count; ++f) {
1895 if (ofs[f].index != i)
1896 continue;
1897
1898 ofs[f].src_offset = fs->varyings[f].src_offset + align;
1899 }
1900 }
1901
1902 varyings_p = panfrost_upload_transient(batch, varyings,
1903 idx * sizeof(*varyings));
1904 vertex_postfix->varyings = varyings_p;
1905 tiler_postfix->varyings = varyings_p;
1906
1907 vertex_postfix->varying_meta = trans.gpu;
1908 tiler_postfix->varying_meta = trans.gpu + vs_size;
1909 }
1910
1911 void
1912 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1913 struct mali_vertex_tiler_prefix *vertex_prefix,
1914 struct mali_vertex_tiler_postfix *vertex_postfix,
1915 struct mali_vertex_tiler_prefix *tiler_prefix,
1916 struct mali_vertex_tiler_postfix *tiler_postfix,
1917 union midgard_primitive_size *primitive_size)
1918 {
1919 struct panfrost_context *ctx = batch->ctx;
1920 struct panfrost_device *device = pan_device(ctx->base.screen);
1921 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1922 struct bifrost_payload_vertex bifrost_vertex = {0,};
1923 struct bifrost_payload_tiler bifrost_tiler = {0,};
1924 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
1925 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
1926 void *vp, *tp;
1927 size_t vp_size, tp_size;
1928
1929 if (device->quirks & IS_BIFROST) {
1930 bifrost_vertex.prefix = *vertex_prefix;
1931 bifrost_vertex.postfix = *vertex_postfix;
1932 vp = &bifrost_vertex;
1933 vp_size = sizeof(bifrost_vertex);
1934
1935 bifrost_tiler.prefix = *tiler_prefix;
1936 bifrost_tiler.tiler.primitive_size = *primitive_size;
1937 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
1938 bifrost_tiler.postfix = *tiler_postfix;
1939 tp = &bifrost_tiler;
1940 tp_size = sizeof(bifrost_tiler);
1941 } else {
1942 midgard_vertex.prefix = *vertex_prefix;
1943 midgard_vertex.postfix = *vertex_postfix;
1944 vp = &midgard_vertex;
1945 vp_size = sizeof(midgard_vertex);
1946
1947 midgard_tiler.prefix = *tiler_prefix;
1948 midgard_tiler.postfix = *tiler_postfix;
1949 midgard_tiler.primitive_size = *primitive_size;
1950 tp = &midgard_tiler;
1951 tp_size = sizeof(midgard_tiler);
1952 }
1953
1954 if (wallpapering) {
1955 /* Inject in reverse order, with "predicted" job indices.
1956 * THIS IS A HACK XXX */
1957 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1958 batch->job_index + 2, tp, tp_size, true);
1959 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1960 vp, vp_size, true);
1961 return;
1962 }
1963
1964 /* If rasterizer discard is enable, only submit the vertex */
1965
1966 bool rasterizer_discard = ctx->rasterizer &&
1967 ctx->rasterizer->base.rasterizer_discard;
1968
1969 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1970 vp, vp_size, false);
1971
1972 if (rasterizer_discard)
1973 return;
1974
1975 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, tp_size,
1976 false);
1977 }
1978
1979 /* TODO: stop hardcoding this */
1980 mali_ptr
1981 panfrost_emit_sample_locations(struct panfrost_batch *batch)
1982 {
1983 uint16_t locations[] = {
1984 128, 128,
1985 0, 256,
1986 0, 256,
1987 0, 256,
1988 0, 256,
1989 0, 256,
1990 0, 256,
1991 0, 256,
1992 0, 256,
1993 0, 256,
1994 0, 256,
1995 0, 256,
1996 0, 256,
1997 0, 256,
1998 0, 256,
1999 0, 256,
2000 0, 256,
2001 0, 256,
2002 0, 256,
2003 0, 256,
2004 0, 256,
2005 0, 256,
2006 0, 256,
2007 0, 256,
2008 0, 256,
2009 0, 256,
2010 0, 256,
2011 0, 256,
2012 0, 256,
2013 0, 256,
2014 0, 256,
2015 0, 256,
2016 128, 128,
2017 0, 0,
2018 0, 0,
2019 0, 0,
2020 0, 0,
2021 0, 0,
2022 0, 0,
2023 0, 0,
2024 0, 0,
2025 0, 0,
2026 0, 0,
2027 0, 0,
2028 0, 0,
2029 0, 0,
2030 0, 0,
2031 0, 0,
2032 };
2033
2034 return panfrost_upload_transient(batch, locations, 96 * sizeof(uint16_t));
2035 }