panfrost: Fix background showing when using discard
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28
29 #include "panfrost-quirks.h"
30
31 #include "pan_allocate.h"
32 #include "pan_bo.h"
33 #include "pan_cmdstream.h"
34 #include "pan_context.h"
35 #include "pan_job.h"
36
37 /* If a BO is accessed for a particular shader stage, will it be in the primary
38 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
39 * fragment will be primary, e.g. compute jobs will be considered
40 * "vertex/tiler" by analogy */
41
42 static inline uint32_t
43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
44 {
45 assert(stage == PIPE_SHADER_FRAGMENT ||
46 stage == PIPE_SHADER_VERTEX ||
47 stage == PIPE_SHADER_COMPUTE);
48
49 return stage == PIPE_SHADER_FRAGMENT ?
50 PAN_BO_ACCESS_FRAGMENT :
51 PAN_BO_ACCESS_VERTEX_TILER;
52 }
53
54 static void
55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
56 struct mali_vertex_tiler_postfix *postfix)
57 {
58 struct panfrost_device *dev = pan_device(ctx->base.screen);
59 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
60
61 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
62 struct mali_shared_memory shared = {
63 .stack_shift = shift,
64 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
65 .shared_workgroup_count = ~0,
66 };
67 postfix->shared_memory = panfrost_upload_transient(batch, &shared, sizeof(shared));
68 }
69
70 static void
71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
72 struct mali_vertex_tiler_postfix *postfix)
73 {
74 struct panfrost_device *dev = pan_device(ctx->base.screen);
75 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
76
77 /* If we haven't, reserve space for the framebuffer */
78
79 if (!batch->framebuffer.gpu) {
80 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
81 sizeof(struct mali_single_framebuffer) :
82 sizeof(struct mali_framebuffer);
83
84 batch->framebuffer = panfrost_allocate_transient(batch, size);
85
86 /* Tag the pointer */
87 if (!(dev->quirks & MIDGARD_SFBD))
88 batch->framebuffer.gpu |= MALI_MFBD;
89 }
90
91 postfix->shared_memory = batch->framebuffer.gpu;
92 }
93
94 static void
95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
96 struct mali_vertex_tiler_prefix *prefix,
97 struct mali_vertex_tiler_postfix *postfix)
98 {
99 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
100
101 postfix->gl_enables |= 0x7;
102 SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
103 rasterizer && rasterizer->base.front_ccw);
104 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
105 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
106 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
107 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
108 SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
109 rasterizer && rasterizer->base.flatshade_first);
110 }
111
112 void
113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
114 struct mali_vertex_tiler_prefix *prefix,
115 union midgard_primitive_size *primitive_size)
116 {
117 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
118
119 if (!panfrost_writes_point_size(ctx)) {
120 bool points = prefix->draw_mode == MALI_POINTS;
121 float val = 0.0f;
122
123 if (rasterizer)
124 val = points ?
125 rasterizer->base.point_size :
126 rasterizer->base.line_width;
127
128 primitive_size->constant = val;
129 }
130 }
131
132 static void
133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
134 struct mali_vertex_tiler_postfix *postfix)
135 {
136 SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
137 if (ctx->occlusion_query)
138 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
139 else
140 postfix->occlusion_counter = 0;
141 }
142
143 void
144 panfrost_vt_init(struct panfrost_context *ctx,
145 enum pipe_shader_type stage,
146 struct mali_vertex_tiler_prefix *prefix,
147 struct mali_vertex_tiler_postfix *postfix)
148 {
149 struct panfrost_device *device = pan_device(ctx->base.screen);
150
151 if (!ctx->shader[stage])
152 return;
153
154 memset(prefix, 0, sizeof(*prefix));
155 memset(postfix, 0, sizeof(*postfix));
156
157 if (device->quirks & IS_BIFROST) {
158 postfix->gl_enables = 0x2;
159 panfrost_vt_emit_shared_memory(ctx, postfix);
160 } else {
161 postfix->gl_enables = 0x6;
162 panfrost_vt_attach_framebuffer(ctx, postfix);
163 }
164
165 if (stage == PIPE_SHADER_FRAGMENT) {
166 panfrost_vt_update_occlusion_query(ctx, postfix);
167 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
168 }
169 }
170
171 static unsigned
172 panfrost_translate_index_size(unsigned size)
173 {
174 switch (size) {
175 case 1:
176 return MALI_DRAW_INDEXED_UINT8;
177
178 case 2:
179 return MALI_DRAW_INDEXED_UINT16;
180
181 case 4:
182 return MALI_DRAW_INDEXED_UINT32;
183
184 default:
185 unreachable("Invalid index size");
186 }
187 }
188
189 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
190 * good for the duration of the draw (transient), could last longer. Also get
191 * the bounds on the index buffer for the range accessed by the draw. We do
192 * these operations together because there are natural optimizations which
193 * require them to be together. */
194
195 static mali_ptr
196 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
197 const struct pipe_draw_info *info,
198 unsigned *min_index, unsigned *max_index)
199 {
200 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
201 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
202 off_t offset = info->start * info->index_size;
203 bool needs_indices = true;
204 mali_ptr out = 0;
205
206 if (info->max_index != ~0u) {
207 *min_index = info->min_index;
208 *max_index = info->max_index;
209 needs_indices = false;
210 }
211
212 if (!info->has_user_indices) {
213 /* Only resources can be directly mapped */
214 panfrost_batch_add_bo(batch, rsrc->bo,
215 PAN_BO_ACCESS_SHARED |
216 PAN_BO_ACCESS_READ |
217 PAN_BO_ACCESS_VERTEX_TILER);
218 out = rsrc->bo->gpu + offset;
219
220 /* Check the cache */
221 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
222 info->start,
223 info->count,
224 min_index,
225 max_index);
226 } else {
227 /* Otherwise, we need to upload to transient memory */
228 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
229 out = panfrost_upload_transient(batch, ibuf8 + offset,
230 info->count *
231 info->index_size);
232 }
233
234 if (needs_indices) {
235 /* Fallback */
236 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
237
238 if (!info->has_user_indices)
239 panfrost_minmax_cache_add(rsrc->index_cache,
240 info->start, info->count,
241 *min_index, *max_index);
242 }
243
244 return out;
245 }
246
247 void
248 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
249 const struct pipe_draw_info *info,
250 enum mali_draw_mode draw_mode,
251 struct mali_vertex_tiler_postfix *vertex_postfix,
252 struct mali_vertex_tiler_prefix *tiler_prefix,
253 struct mali_vertex_tiler_postfix *tiler_postfix,
254 unsigned *vertex_count,
255 unsigned *padded_count)
256 {
257 tiler_prefix->draw_mode = draw_mode;
258
259 unsigned draw_flags = 0;
260
261 if (panfrost_writes_point_size(ctx))
262 draw_flags |= MALI_DRAW_VARYING_SIZE;
263
264 if (info->primitive_restart)
265 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
266
267 /* These doesn't make much sense */
268
269 draw_flags |= 0x3000;
270
271 if (info->index_size) {
272 unsigned min_index = 0, max_index = 0;
273
274 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
275 info,
276 &min_index,
277 &max_index);
278
279 /* Use the corresponding values */
280 *vertex_count = max_index - min_index + 1;
281 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
282 tiler_prefix->offset_bias_correction = -min_index;
283 tiler_prefix->index_count = MALI_POSITIVE(info->count);
284 draw_flags |= panfrost_translate_index_size(info->index_size);
285 } else {
286 tiler_prefix->indices = 0;
287 *vertex_count = ctx->vertex_count;
288 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
289 tiler_prefix->offset_bias_correction = 0;
290 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
291 }
292
293 tiler_prefix->unknown_draw = draw_flags;
294
295 /* Encode the padded vertex count */
296
297 if (info->instance_count > 1) {
298 *padded_count = panfrost_padded_vertex_count(*vertex_count);
299
300 unsigned shift = __builtin_ctz(ctx->padded_count);
301 unsigned k = ctx->padded_count >> (shift + 1);
302
303 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
304 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
305 } else {
306 *padded_count = *vertex_count;
307
308 /* Reset instancing state */
309 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
310 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
311 }
312 }
313
314 static void
315 panfrost_shader_meta_init(struct panfrost_context *ctx,
316 enum pipe_shader_type st,
317 struct mali_shader_meta *meta)
318 {
319 const struct panfrost_device *dev = pan_device(ctx->base.screen);
320 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
321
322 memset(meta, 0, sizeof(*meta));
323 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
324 meta->attribute_count = ss->attribute_count;
325 meta->varying_count = ss->varying_count;
326 meta->texture_count = ctx->sampler_view_count[st];
327 meta->sampler_count = ctx->sampler_count[st];
328
329 if (dev->quirks & IS_BIFROST) {
330 if (st == PIPE_SHADER_VERTEX)
331 meta->bifrost1.unk1 = 0x800000;
332 else {
333 /* First clause ATEST |= 0x4000000.
334 * Less than 32 regs |= 0x200 */
335 meta->bifrost1.unk1 = 0x958020;
336 }
337
338 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
339 if (st == PIPE_SHADER_VERTEX)
340 meta->bifrost2.preload_regs = 0xC0;
341 else
342 meta->bifrost2.preload_regs = 0x1;
343 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
344 ss->uniform_cutoff);
345 } else {
346 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
347 ss->uniform_cutoff);
348 meta->midgard1.work_count = ss->work_reg_count;
349 meta->midgard1.flags_hi = 0x8; /* XXX */
350 meta->midgard1.flags_lo = 0x220;
351 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
352 }
353 }
354
355 static unsigned
356 panfrost_translate_compare_func(enum pipe_compare_func in)
357 {
358 switch (in) {
359 case PIPE_FUNC_NEVER:
360 return MALI_FUNC_NEVER;
361
362 case PIPE_FUNC_LESS:
363 return MALI_FUNC_LESS;
364
365 case PIPE_FUNC_EQUAL:
366 return MALI_FUNC_EQUAL;
367
368 case PIPE_FUNC_LEQUAL:
369 return MALI_FUNC_LEQUAL;
370
371 case PIPE_FUNC_GREATER:
372 return MALI_FUNC_GREATER;
373
374 case PIPE_FUNC_NOTEQUAL:
375 return MALI_FUNC_NOTEQUAL;
376
377 case PIPE_FUNC_GEQUAL:
378 return MALI_FUNC_GEQUAL;
379
380 case PIPE_FUNC_ALWAYS:
381 return MALI_FUNC_ALWAYS;
382
383 default:
384 unreachable("Invalid func");
385 }
386 }
387
388 static unsigned
389 panfrost_translate_stencil_op(enum pipe_stencil_op in)
390 {
391 switch (in) {
392 case PIPE_STENCIL_OP_KEEP:
393 return MALI_STENCIL_KEEP;
394
395 case PIPE_STENCIL_OP_ZERO:
396 return MALI_STENCIL_ZERO;
397
398 case PIPE_STENCIL_OP_REPLACE:
399 return MALI_STENCIL_REPLACE;
400
401 case PIPE_STENCIL_OP_INCR:
402 return MALI_STENCIL_INCR;
403
404 case PIPE_STENCIL_OP_DECR:
405 return MALI_STENCIL_DECR;
406
407 case PIPE_STENCIL_OP_INCR_WRAP:
408 return MALI_STENCIL_INCR_WRAP;
409
410 case PIPE_STENCIL_OP_DECR_WRAP:
411 return MALI_STENCIL_DECR_WRAP;
412
413 case PIPE_STENCIL_OP_INVERT:
414 return MALI_STENCIL_INVERT;
415
416 default:
417 unreachable("Invalid stencil op");
418 }
419 }
420
421 static unsigned
422 translate_tex_wrap(enum pipe_tex_wrap w)
423 {
424 switch (w) {
425 case PIPE_TEX_WRAP_REPEAT:
426 return MALI_WRAP_REPEAT;
427
428 case PIPE_TEX_WRAP_CLAMP:
429 return MALI_WRAP_CLAMP;
430
431 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
432 return MALI_WRAP_CLAMP_TO_EDGE;
433
434 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
435 return MALI_WRAP_CLAMP_TO_BORDER;
436
437 case PIPE_TEX_WRAP_MIRROR_REPEAT:
438 return MALI_WRAP_MIRRORED_REPEAT;
439
440 case PIPE_TEX_WRAP_MIRROR_CLAMP:
441 return MALI_WRAP_MIRRORED_CLAMP;
442
443 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
444 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
445
446 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
447 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
448
449 default:
450 unreachable("Invalid wrap");
451 }
452 }
453
454 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
455 struct mali_sampler_descriptor *hw)
456 {
457 unsigned func = panfrost_translate_compare_func(cso->compare_func);
458 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
459 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
460 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
461 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
462 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
463 unsigned mip_filter = mip_linear ?
464 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
465 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
466
467 *hw = (struct mali_sampler_descriptor) {
468 .filter_mode = min_filter | mag_filter | mip_filter |
469 normalized,
470 .wrap_s = translate_tex_wrap(cso->wrap_s),
471 .wrap_t = translate_tex_wrap(cso->wrap_t),
472 .wrap_r = translate_tex_wrap(cso->wrap_r),
473 .compare_func = panfrost_flip_compare_func(func),
474 .border_color = {
475 cso->border_color.f[0],
476 cso->border_color.f[1],
477 cso->border_color.f[2],
478 cso->border_color.f[3]
479 },
480 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
481 .max_lod = FIXED_16(cso->max_lod, false),
482 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
483 .seamless_cube_map = cso->seamless_cube_map,
484 };
485
486 /* If necessary, we disable mipmapping in the sampler descriptor by
487 * clamping the LOD as tight as possible (from 0 to epsilon,
488 * essentially -- remember these are fixed point numbers, so
489 * epsilon=1/256) */
490
491 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
492 hw->max_lod = hw->min_lod + 1;
493 }
494
495 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
496 struct bifrost_sampler_descriptor *hw)
497 {
498 *hw = (struct bifrost_sampler_descriptor) {
499 .unk1 = 0x1,
500 .wrap_s = translate_tex_wrap(cso->wrap_s),
501 .wrap_t = translate_tex_wrap(cso->wrap_t),
502 .wrap_r = translate_tex_wrap(cso->wrap_r),
503 .unk8 = 0x8,
504 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
505 .norm_coords = cso->normalized_coords,
506 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
507 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
508 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
509 .max_lod = FIXED_16(cso->max_lod, false),
510 };
511
512 /* If necessary, we disable mipmapping in the sampler descriptor by
513 * clamping the LOD as tight as possible (from 0 to epsilon,
514 * essentially -- remember these are fixed point numbers, so
515 * epsilon=1/256) */
516
517 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
518 hw->max_lod = hw->min_lod + 1;
519 }
520
521 static void
522 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
523 struct mali_stencil_test *out)
524 {
525 out->ref = 0; /* Gallium gets it from elsewhere */
526
527 out->mask = in->valuemask;
528 out->func = panfrost_translate_compare_func(in->func);
529 out->sfail = panfrost_translate_stencil_op(in->fail_op);
530 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
531 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
532 }
533
534 static void
535 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
536 struct mali_shader_meta *fragmeta)
537 {
538 if (!ctx->rasterizer) {
539 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
540 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
541 fragmeta->depth_units = 0.0f;
542 fragmeta->depth_factor = 0.0f;
543 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
544 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
545 return;
546 }
547
548 bool msaa = ctx->rasterizer->base.multisample;
549
550 /* TODO: Sample size */
551 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
552 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
553 fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
554 fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
555
556 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
557
558 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
559 ctx->rasterizer->base.offset_tri);
560 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
561 ctx->rasterizer->base.offset_tri);
562 }
563
564 static void
565 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
566 struct mali_shader_meta *fragmeta)
567 {
568 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
569 int zfunc = PIPE_FUNC_ALWAYS;
570
571 if (!zsa) {
572 struct pipe_stencil_state default_stencil = {
573 .enabled = 0,
574 .func = PIPE_FUNC_ALWAYS,
575 .fail_op = MALI_STENCIL_KEEP,
576 .zfail_op = MALI_STENCIL_KEEP,
577 .zpass_op = MALI_STENCIL_KEEP,
578 .writemask = 0xFF,
579 .valuemask = 0xFF
580 };
581
582 panfrost_make_stencil_state(&default_stencil,
583 &fragmeta->stencil_front);
584 fragmeta->stencil_mask_front = default_stencil.writemask;
585 fragmeta->stencil_back = fragmeta->stencil_front;
586 fragmeta->stencil_mask_back = default_stencil.writemask;
587 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
588 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
589 } else {
590 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
591 zsa->stencil[0].enabled);
592 panfrost_make_stencil_state(&zsa->stencil[0],
593 &fragmeta->stencil_front);
594 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
595 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
596
597 /* If back-stencil is not enabled, use the front values */
598
599 if (zsa->stencil[1].enabled) {
600 panfrost_make_stencil_state(&zsa->stencil[1],
601 &fragmeta->stencil_back);
602 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
603 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
604 } else {
605 fragmeta->stencil_back = fragmeta->stencil_front;
606 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
607 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
608 }
609
610 if (zsa->depth.enabled)
611 zfunc = zsa->depth.func;
612
613 /* Depth state (TODO: Refactor) */
614
615 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
616 zsa->depth.writemask);
617 }
618
619 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
620 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
621 }
622
623 static void
624 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
625 struct mali_shader_meta *fragmeta,
626 void *rts)
627 {
628 const struct panfrost_device *dev = pan_device(ctx->base.screen);
629
630 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
631 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
632 !ctx->blend->base.dither);
633
634 /* Get blending setup */
635 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
636
637 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
638 unsigned shader_offset = 0;
639 struct panfrost_bo *shader_bo = NULL;
640
641 for (unsigned c = 0; c < rt_count; ++c)
642 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
643 &shader_offset);
644
645 /* If there is a blend shader, work registers are shared. XXX: opt */
646
647 if (!(dev->quirks & IS_BIFROST)) {
648 for (unsigned c = 0; c < rt_count; ++c) {
649 if (blend[c].is_shader)
650 fragmeta->midgard1.work_count = 16;
651 }
652 }
653
654 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
655 * copied to the blend_meta appended (by convention), but this is the
656 * field actually read by the hardware. (Or maybe both are read...?).
657 * Specify the last RTi with a blend shader. */
658
659 fragmeta->blend.shader = 0;
660
661 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
662 if (!blend[rt].is_shader)
663 continue;
664
665 fragmeta->blend.shader = blend[rt].shader.gpu |
666 blend[rt].shader.first_tag;
667 break;
668 }
669
670 if (dev->quirks & MIDGARD_SFBD) {
671 /* When only a single render target platform is used, the blend
672 * information is inside the shader meta itself. We additionally
673 * need to signal CAN_DISCARD for nontrivial blend modes (so
674 * we're able to read back the destination buffer) */
675
676 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
677 blend[0].is_shader);
678
679 if (!blend[0].is_shader) {
680 fragmeta->blend.equation = *blend[0].equation.equation;
681 fragmeta->blend.constant = blend[0].equation.constant;
682 }
683
684 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
685 !blend[0].no_blending);
686 return;
687 }
688
689 /* Additional blend descriptor tacked on for jobs using MFBD */
690
691 for (unsigned i = 0; i < rt_count; ++i) {
692 if (dev->quirks & IS_BIFROST) {
693 struct bifrost_blend_rt *brts = rts;
694 struct panfrost_shader_state *fs;
695 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
696
697 brts[i].flags = 0x200;
698 if (blend[i].is_shader) {
699 /* The blend shader's address needs to be at
700 * the same top 32 bit as the fragment shader.
701 * TODO: Ensure that's always the case.
702 */
703 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
704 (fs->bo->gpu & (0xffffffffull << 32)));
705 brts[i].shader = blend[i].shader.gpu;
706 brts[i].unk2 = 0x0;
707 } else {
708 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
709 const struct util_format_description *format_desc;
710 format_desc = util_format_description(format);
711
712 brts[i].equation = *blend[i].equation.equation;
713
714 /* TODO: this is a bit more complicated */
715 brts[i].constant = blend[i].equation.constant;
716
717 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
718 brts[i].unk2 = 0x19;
719
720 brts[i].shader_type = fs->blend_types[i];
721 }
722 } else {
723 struct midgard_blend_rt *mrts = rts;
724
725 mrts[i].flags = 0x200;
726
727 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
728 (ctx->pipe_framebuffer.cbufs[i]) &&
729 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
730
731 SET_BIT(mrts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
732 SET_BIT(mrts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
733 SET_BIT(mrts[i].flags, MALI_BLEND_SRGB, is_srgb);
734 SET_BIT(mrts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
735
736 if (blend[i].is_shader) {
737 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
738 } else {
739 mrts[i].blend.equation = *blend[i].equation.equation;
740 mrts[i].blend.constant = blend[i].equation.constant;
741 }
742 }
743 }
744 }
745
746 static void
747 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
748 struct mali_shader_meta *fragmeta,
749 void *rts)
750 {
751 const struct panfrost_device *dev = pan_device(ctx->base.screen);
752 struct panfrost_shader_state *fs;
753
754 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
755
756 fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
757 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
758 fragmeta->unknown2_4 = 0x4e0;
759
760 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
761 * is required (independent of 32-bit/64-bit descriptors), or why it's
762 * not used on later GPU revisions. Otherwise, all shader jobs fault on
763 * these earlier chips (perhaps this is a chicken bit of some kind).
764 * More investigation is needed. */
765
766 SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
767
768 if (dev->quirks & IS_BIFROST) {
769 /* TODO */
770 } else {
771 /* Depending on whether it's legal to in the given shader, we try to
772 * enable early-z testing (or forward-pixel kill?) */
773
774 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
775 !fs->can_discard && !fs->writes_depth);
776
777 /* Add the writes Z/S flags if needed. */
778 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
779 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
780
781 /* Any time texturing is used, derivatives are implicitly calculated,
782 * so we need to enable helper invocations */
783
784 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
785 fs->helper_invocations);
786
787 /* CAN_DISCARD should be set if the fragment shader possibly contains a
788 * 'discard' instruction. It is likely this is related to optimizations
789 * related to forward-pixel kill, as per "Mali Performance 3: Is
790 * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
791
792 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
793
794 bool depth_enabled = fs->writes_depth ||
795 (zsa && zsa->depth.enabled && zsa->depth.func != PIPE_FUNC_ALWAYS);
796
797 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
798 SET_BIT(fragmeta->midgard1.flags_lo, 0x400, !depth_enabled && fs->can_discard);
799 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_ZS, depth_enabled && fs->can_discard);
800 }
801
802 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
803 panfrost_frag_meta_zsa_update(ctx, fragmeta);
804 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
805 }
806
807 void
808 panfrost_emit_shader_meta(struct panfrost_batch *batch,
809 enum pipe_shader_type st,
810 struct mali_vertex_tiler_postfix *postfix)
811 {
812 struct panfrost_context *ctx = batch->ctx;
813 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
814
815 if (!ss) {
816 postfix->shader = 0;
817 return;
818 }
819
820 struct mali_shader_meta meta;
821
822 panfrost_shader_meta_init(ctx, st, &meta);
823
824 /* Add the shader BO to the batch. */
825 panfrost_batch_add_bo(batch, ss->bo,
826 PAN_BO_ACCESS_PRIVATE |
827 PAN_BO_ACCESS_READ |
828 panfrost_bo_access_for_stage(st));
829
830 mali_ptr shader_ptr;
831
832 if (st == PIPE_SHADER_FRAGMENT) {
833 struct panfrost_device *dev = pan_device(ctx->base.screen);
834 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
835 size_t desc_size = sizeof(meta);
836 void *rts = NULL;
837 struct panfrost_transfer xfer;
838 unsigned rt_size;
839
840 if (dev->quirks & MIDGARD_SFBD)
841 rt_size = 0;
842 else if (dev->quirks & IS_BIFROST)
843 rt_size = sizeof(struct bifrost_blend_rt);
844 else
845 rt_size = sizeof(struct midgard_blend_rt);
846
847 desc_size += rt_size * rt_count;
848
849 if (rt_size)
850 rts = rzalloc_size(ctx, rt_size * rt_count);
851
852 panfrost_frag_shader_meta_init(ctx, &meta, rts);
853
854 xfer = panfrost_allocate_transient(batch, desc_size);
855
856 memcpy(xfer.cpu, &meta, sizeof(meta));
857 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
858
859 if (rt_size)
860 ralloc_free(rts);
861
862 shader_ptr = xfer.gpu;
863 } else {
864 shader_ptr = panfrost_upload_transient(batch, &meta,
865 sizeof(meta));
866 }
867
868 postfix->shader = shader_ptr;
869 }
870
871 static void
872 panfrost_mali_viewport_init(struct panfrost_context *ctx,
873 struct mali_viewport *mvp)
874 {
875 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
876
877 /* Clip bounds are encoded as floats. The viewport itself is encoded as
878 * (somewhat) asymmetric ints. */
879
880 const struct pipe_scissor_state *ss = &ctx->scissor;
881
882 memset(mvp, 0, sizeof(*mvp));
883
884 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
885 * each direction. Clipping to the viewport in theory should work, but
886 * in practice causes issues when we're not explicitly trying to
887 * scissor */
888
889 *mvp = (struct mali_viewport) {
890 .clip_minx = -INFINITY,
891 .clip_miny = -INFINITY,
892 .clip_maxx = INFINITY,
893 .clip_maxy = INFINITY,
894 };
895
896 /* Always scissor to the viewport by default. */
897 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
898 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
899
900 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
901 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
902
903 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
904 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
905
906 /* Apply the scissor test */
907
908 unsigned minx, miny, maxx, maxy;
909
910 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
911 minx = MAX2(ss->minx, vp_minx);
912 miny = MAX2(ss->miny, vp_miny);
913 maxx = MIN2(ss->maxx, vp_maxx);
914 maxy = MIN2(ss->maxy, vp_maxy);
915 } else {
916 minx = vp_minx;
917 miny = vp_miny;
918 maxx = vp_maxx;
919 maxy = vp_maxy;
920 }
921
922 /* Hardware needs the min/max to be strictly ordered, so flip if we
923 * need to. The viewport transformation in the vertex shader will
924 * handle the negatives if we don't */
925
926 if (miny > maxy) {
927 unsigned temp = miny;
928 miny = maxy;
929 maxy = temp;
930 }
931
932 if (minx > maxx) {
933 unsigned temp = minx;
934 minx = maxx;
935 maxx = temp;
936 }
937
938 if (minz > maxz) {
939 float temp = minz;
940 minz = maxz;
941 maxz = temp;
942 }
943
944 /* Clamp to the framebuffer size as a last check */
945
946 minx = MIN2(ctx->pipe_framebuffer.width, minx);
947 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
948
949 miny = MIN2(ctx->pipe_framebuffer.height, miny);
950 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
951
952 /* Upload */
953
954 mvp->viewport0[0] = minx;
955 mvp->viewport1[0] = MALI_POSITIVE(maxx);
956
957 mvp->viewport0[1] = miny;
958 mvp->viewport1[1] = MALI_POSITIVE(maxy);
959
960 mvp->clip_minz = minz;
961 mvp->clip_maxz = maxz;
962 }
963
964 void
965 panfrost_emit_viewport(struct panfrost_batch *batch,
966 struct mali_vertex_tiler_postfix *tiler_postfix)
967 {
968 struct panfrost_context *ctx = batch->ctx;
969 struct mali_viewport mvp;
970
971 panfrost_mali_viewport_init(batch->ctx, &mvp);
972
973 /* Update the job, unless we're doing wallpapering (whose lack of
974 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
975 * just... be faster :) */
976
977 if (!ctx->wallpaper_batch)
978 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
979 mvp.viewport0[1],
980 mvp.viewport1[0] + 1,
981 mvp.viewport1[1] + 1);
982
983 tiler_postfix->viewport = panfrost_upload_transient(batch, &mvp,
984 sizeof(mvp));
985 }
986
987 static mali_ptr
988 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
989 enum pipe_shader_type st,
990 struct panfrost_constant_buffer *buf,
991 unsigned index)
992 {
993 struct pipe_constant_buffer *cb = &buf->cb[index];
994 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
995
996 if (rsrc) {
997 panfrost_batch_add_bo(batch, rsrc->bo,
998 PAN_BO_ACCESS_SHARED |
999 PAN_BO_ACCESS_READ |
1000 panfrost_bo_access_for_stage(st));
1001
1002 /* Alignment gauranteed by
1003 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1004 return rsrc->bo->gpu + cb->buffer_offset;
1005 } else if (cb->user_buffer) {
1006 return panfrost_upload_transient(batch,
1007 cb->user_buffer +
1008 cb->buffer_offset,
1009 cb->buffer_size);
1010 } else {
1011 unreachable("No constant buffer");
1012 }
1013 }
1014
1015 struct sysval_uniform {
1016 union {
1017 float f[4];
1018 int32_t i[4];
1019 uint32_t u[4];
1020 uint64_t du[2];
1021 };
1022 };
1023
1024 static void
1025 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1026 struct sysval_uniform *uniform)
1027 {
1028 struct panfrost_context *ctx = batch->ctx;
1029 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1030
1031 uniform->f[0] = vp->scale[0];
1032 uniform->f[1] = vp->scale[1];
1033 uniform->f[2] = vp->scale[2];
1034 }
1035
1036 static void
1037 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1038 struct sysval_uniform *uniform)
1039 {
1040 struct panfrost_context *ctx = batch->ctx;
1041 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1042
1043 uniform->f[0] = vp->translate[0];
1044 uniform->f[1] = vp->translate[1];
1045 uniform->f[2] = vp->translate[2];
1046 }
1047
1048 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1049 enum pipe_shader_type st,
1050 unsigned int sysvalid,
1051 struct sysval_uniform *uniform)
1052 {
1053 struct panfrost_context *ctx = batch->ctx;
1054 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1055 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1056 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1057 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1058
1059 assert(dim);
1060 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1061
1062 if (dim > 1)
1063 uniform->i[1] = u_minify(tex->texture->height0,
1064 tex->u.tex.first_level);
1065
1066 if (dim > 2)
1067 uniform->i[2] = u_minify(tex->texture->depth0,
1068 tex->u.tex.first_level);
1069
1070 if (is_array)
1071 uniform->i[dim] = tex->texture->array_size;
1072 }
1073
1074 static void
1075 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1076 enum pipe_shader_type st,
1077 unsigned ssbo_id,
1078 struct sysval_uniform *uniform)
1079 {
1080 struct panfrost_context *ctx = batch->ctx;
1081
1082 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1083 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1084
1085 /* Compute address */
1086 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1087
1088 panfrost_batch_add_bo(batch, bo,
1089 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1090 panfrost_bo_access_for_stage(st));
1091
1092 /* Upload address and size as sysval */
1093 uniform->du[0] = bo->gpu + sb.buffer_offset;
1094 uniform->u[2] = sb.buffer_size;
1095 }
1096
1097 static void
1098 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1099 enum pipe_shader_type st,
1100 unsigned samp_idx,
1101 struct sysval_uniform *uniform)
1102 {
1103 struct panfrost_context *ctx = batch->ctx;
1104 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1105
1106 uniform->f[0] = sampl->min_lod;
1107 uniform->f[1] = sampl->max_lod;
1108 uniform->f[2] = sampl->lod_bias;
1109
1110 /* Even without any errata, Midgard represents "no mipmapping" as
1111 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1112 * panfrost_create_sampler_state which also explains our choice of
1113 * epsilon value (again to keep behaviour consistent) */
1114
1115 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1116 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1117 }
1118
1119 static void
1120 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1121 struct sysval_uniform *uniform)
1122 {
1123 struct panfrost_context *ctx = batch->ctx;
1124
1125 uniform->u[0] = ctx->compute_grid->grid[0];
1126 uniform->u[1] = ctx->compute_grid->grid[1];
1127 uniform->u[2] = ctx->compute_grid->grid[2];
1128 }
1129
1130 static void
1131 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1132 struct panfrost_shader_state *ss,
1133 enum pipe_shader_type st)
1134 {
1135 struct sysval_uniform *uniforms = (void *)buf;
1136
1137 for (unsigned i = 0; i < ss->sysval_count; ++i) {
1138 int sysval = ss->sysval[i];
1139
1140 switch (PAN_SYSVAL_TYPE(sysval)) {
1141 case PAN_SYSVAL_VIEWPORT_SCALE:
1142 panfrost_upload_viewport_scale_sysval(batch,
1143 &uniforms[i]);
1144 break;
1145 case PAN_SYSVAL_VIEWPORT_OFFSET:
1146 panfrost_upload_viewport_offset_sysval(batch,
1147 &uniforms[i]);
1148 break;
1149 case PAN_SYSVAL_TEXTURE_SIZE:
1150 panfrost_upload_txs_sysval(batch, st,
1151 PAN_SYSVAL_ID(sysval),
1152 &uniforms[i]);
1153 break;
1154 case PAN_SYSVAL_SSBO:
1155 panfrost_upload_ssbo_sysval(batch, st,
1156 PAN_SYSVAL_ID(sysval),
1157 &uniforms[i]);
1158 break;
1159 case PAN_SYSVAL_NUM_WORK_GROUPS:
1160 panfrost_upload_num_work_groups_sysval(batch,
1161 &uniforms[i]);
1162 break;
1163 case PAN_SYSVAL_SAMPLER:
1164 panfrost_upload_sampler_sysval(batch, st,
1165 PAN_SYSVAL_ID(sysval),
1166 &uniforms[i]);
1167 break;
1168 default:
1169 assert(0);
1170 }
1171 }
1172 }
1173
1174 static const void *
1175 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1176 unsigned index)
1177 {
1178 struct pipe_constant_buffer *cb = &buf->cb[index];
1179 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1180
1181 if (rsrc)
1182 return rsrc->bo->cpu;
1183 else if (cb->user_buffer)
1184 return cb->user_buffer;
1185 else
1186 unreachable("No constant buffer");
1187 }
1188
1189 void
1190 panfrost_emit_const_buf(struct panfrost_batch *batch,
1191 enum pipe_shader_type stage,
1192 struct mali_vertex_tiler_postfix *postfix)
1193 {
1194 struct panfrost_context *ctx = batch->ctx;
1195 struct panfrost_shader_variants *all = ctx->shader[stage];
1196
1197 if (!all)
1198 return;
1199
1200 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1201
1202 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1203
1204 /* Uniforms are implicitly UBO #0 */
1205 bool has_uniforms = buf->enabled_mask & (1 << 0);
1206
1207 /* Allocate room for the sysval and the uniforms */
1208 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1209 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1210 size_t size = sys_size + uniform_size;
1211 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1212 size);
1213
1214 /* Upload sysvals requested by the shader */
1215 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1216
1217 /* Upload uniforms */
1218 if (has_uniforms && uniform_size) {
1219 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1220 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1221 }
1222
1223 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1224 * uploaded */
1225
1226 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1227 assert(ubo_count >= 1);
1228
1229 size_t sz = sizeof(uint64_t) * ubo_count;
1230 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1231 int uniform_count = ss->uniform_count;
1232
1233 /* Upload uniforms as a UBO */
1234 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1235
1236 /* The rest are honest-to-goodness UBOs */
1237
1238 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1239 size_t usz = buf->cb[ubo].buffer_size;
1240 bool enabled = buf->enabled_mask & (1 << ubo);
1241 bool empty = usz == 0;
1242
1243 if (!enabled || empty) {
1244 /* Stub out disabled UBOs to catch accesses */
1245 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1246 continue;
1247 }
1248
1249 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1250 buf, ubo);
1251
1252 unsigned bytes_per_field = 16;
1253 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1254 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1255 }
1256
1257 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1258 postfix->uniforms = transfer.gpu;
1259 postfix->uniform_buffers = ubufs;
1260
1261 buf->dirty_mask = 0;
1262 }
1263
1264 void
1265 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1266 const struct pipe_grid_info *info,
1267 struct midgard_payload_vertex_tiler *vtp)
1268 {
1269 struct panfrost_context *ctx = batch->ctx;
1270 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1271 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1272 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1273 128));
1274 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1275 info->grid[2] * 4;
1276 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1277 shared_size,
1278 1);
1279
1280 struct mali_shared_memory shared = {
1281 .shared_memory = bo->gpu,
1282 .shared_workgroup_count =
1283 util_logbase2_ceil(info->grid[0]) +
1284 util_logbase2_ceil(info->grid[1]) +
1285 util_logbase2_ceil(info->grid[2]),
1286 .shared_unk1 = 0x2,
1287 .shared_shift = util_logbase2(single_size) - 1
1288 };
1289
1290 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1291 sizeof(shared));
1292 }
1293
1294 static mali_ptr
1295 panfrost_get_tex_desc(struct panfrost_batch *batch,
1296 enum pipe_shader_type st,
1297 struct panfrost_sampler_view *view)
1298 {
1299 if (!view)
1300 return (mali_ptr) 0;
1301
1302 struct pipe_sampler_view *pview = &view->base;
1303 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1304
1305 /* Add the BO to the job so it's retained until the job is done. */
1306
1307 panfrost_batch_add_bo(batch, rsrc->bo,
1308 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1309 panfrost_bo_access_for_stage(st));
1310
1311 panfrost_batch_add_bo(batch, view->midgard_bo,
1312 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1313 panfrost_bo_access_for_stage(st));
1314
1315 return view->midgard_bo->gpu;
1316 }
1317
1318 void
1319 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1320 enum pipe_shader_type stage,
1321 struct mali_vertex_tiler_postfix *postfix)
1322 {
1323 struct panfrost_context *ctx = batch->ctx;
1324 struct panfrost_device *device = pan_device(ctx->base.screen);
1325
1326 if (!ctx->sampler_view_count[stage])
1327 return;
1328
1329 if (device->quirks & IS_BIFROST) {
1330 struct bifrost_texture_descriptor *descriptors;
1331
1332 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1333 ctx->sampler_view_count[stage]);
1334
1335 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1336 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1337 struct pipe_sampler_view *pview = &view->base;
1338 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1339
1340 /* Add the BOs to the job so they are retained until the job is done. */
1341
1342 panfrost_batch_add_bo(batch, rsrc->bo,
1343 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1344 panfrost_bo_access_for_stage(stage));
1345
1346 panfrost_batch_add_bo(batch, view->bifrost_bo,
1347 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1348 panfrost_bo_access_for_stage(stage));
1349
1350 memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1351 }
1352
1353 postfix->textures = panfrost_upload_transient(batch,
1354 descriptors,
1355 sizeof(struct bifrost_texture_descriptor) *
1356 ctx->sampler_view_count[stage]);
1357
1358 free(descriptors);
1359 } else {
1360 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1361
1362 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1363 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1364 ctx->sampler_views[stage][i]);
1365
1366 postfix->textures = panfrost_upload_transient(batch,
1367 trampolines,
1368 sizeof(uint64_t) *
1369 ctx->sampler_view_count[stage]);
1370 }
1371 }
1372
1373 void
1374 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1375 enum pipe_shader_type stage,
1376 struct mali_vertex_tiler_postfix *postfix)
1377 {
1378 struct panfrost_context *ctx = batch->ctx;
1379 struct panfrost_device *device = pan_device(ctx->base.screen);
1380
1381 if (!ctx->sampler_count[stage])
1382 return;
1383
1384 if (device->quirks & IS_BIFROST) {
1385 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1386 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1387 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1388 transfer_size);
1389 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1390
1391 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1392 desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1393
1394 postfix->sampler_descriptor = transfer.gpu;
1395 } else {
1396 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1397 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1398 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1399 transfer_size);
1400 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1401
1402 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1403 desc[i] = ctx->samplers[stage][i]->midgard_hw;
1404
1405 postfix->sampler_descriptor = transfer.gpu;
1406 }
1407 }
1408
1409 void
1410 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1411 struct mali_vertex_tiler_postfix *vertex_postfix)
1412 {
1413 struct panfrost_context *ctx = batch->ctx;
1414
1415 if (!ctx->vertex)
1416 return;
1417
1418 struct panfrost_vertex_state *so = ctx->vertex;
1419
1420 panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1421 vertex_postfix->attribute_meta = panfrost_upload_transient(batch, so->hw,
1422 sizeof(*so->hw) *
1423 PAN_MAX_ATTRIBUTE);
1424 }
1425
1426 void
1427 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1428 struct mali_vertex_tiler_postfix *vertex_postfix)
1429 {
1430 struct panfrost_context *ctx = batch->ctx;
1431 struct panfrost_vertex_state *so = ctx->vertex;
1432
1433 /* Staged mali_attr, and index into them. i =/= k, depending on the
1434 * vertex buffer mask and instancing. Twice as much room is allocated,
1435 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1436 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1437 unsigned k = 0;
1438
1439 for (unsigned i = 0; i < so->num_elements; ++i) {
1440 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1441 * means duplicating some vertex buffers (who cares? aside from
1442 * maybe some caching implications but I somehow doubt that
1443 * matters) */
1444
1445 struct pipe_vertex_element *elem = &so->pipe[i];
1446 unsigned vbi = elem->vertex_buffer_index;
1447
1448 /* The exception to 1:1 mapping is that we can have multiple
1449 * entries (NPOT divisors), so we fixup anyways */
1450
1451 so->hw[i].index = k;
1452
1453 if (!(ctx->vb_mask & (1 << vbi)))
1454 continue;
1455
1456 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1457 struct panfrost_resource *rsrc;
1458
1459 rsrc = pan_resource(buf->buffer.resource);
1460 if (!rsrc)
1461 continue;
1462
1463 /* Align to 64 bytes by masking off the lower bits. This
1464 * will be adjusted back when we fixup the src_offset in
1465 * mali_attr_meta */
1466
1467 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1468 mali_ptr addr = raw_addr & ~63;
1469 unsigned chopped_addr = raw_addr - addr;
1470
1471 /* Add a dependency of the batch on the vertex buffer */
1472 panfrost_batch_add_bo(batch, rsrc->bo,
1473 PAN_BO_ACCESS_SHARED |
1474 PAN_BO_ACCESS_READ |
1475 PAN_BO_ACCESS_VERTEX_TILER);
1476
1477 /* Set common fields */
1478 attrs[k].elements = addr;
1479 attrs[k].stride = buf->stride;
1480
1481 /* Since we advanced the base pointer, we shrink the buffer
1482 * size */
1483 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1484
1485 /* We need to add the extra size we masked off (for
1486 * correctness) so the data doesn't get clamped away */
1487 attrs[k].size += chopped_addr;
1488
1489 /* For non-instancing make sure we initialize */
1490 attrs[k].shift = attrs[k].extra_flags = 0;
1491
1492 /* Instancing uses a dramatically different code path than
1493 * linear, so dispatch for the actual emission now that the
1494 * common code is finished */
1495
1496 unsigned divisor = elem->instance_divisor;
1497
1498 if (divisor && ctx->instance_count == 1) {
1499 /* Silly corner case where there's a divisor(=1) but
1500 * there's no legitimate instancing. So we want *every*
1501 * attribute to be the same. So set stride to zero so
1502 * we don't go anywhere. */
1503
1504 attrs[k].size = attrs[k].stride + chopped_addr;
1505 attrs[k].stride = 0;
1506 attrs[k++].elements |= MALI_ATTR_LINEAR;
1507 } else if (ctx->instance_count <= 1) {
1508 /* Normal, non-instanced attributes */
1509 attrs[k++].elements |= MALI_ATTR_LINEAR;
1510 } else {
1511 unsigned instance_shift = vertex_postfix->instance_shift;
1512 unsigned instance_odd = vertex_postfix->instance_odd;
1513
1514 k += panfrost_vertex_instanced(ctx->padded_count,
1515 instance_shift,
1516 instance_odd,
1517 divisor, &attrs[k]);
1518 }
1519 }
1520
1521 /* Add special gl_VertexID/gl_InstanceID buffers */
1522
1523 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1524 so->hw[PAN_VERTEX_ID].index = k++;
1525 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1526 so->hw[PAN_INSTANCE_ID].index = k++;
1527
1528 /* Upload whatever we emitted and go */
1529
1530 vertex_postfix->attributes = panfrost_upload_transient(batch, attrs,
1531 k * sizeof(*attrs));
1532 }
1533
1534 static mali_ptr
1535 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1536 unsigned stride, unsigned count)
1537 {
1538 /* Fill out the descriptor */
1539 slot->stride = stride;
1540 slot->size = stride * count;
1541 slot->shift = slot->extra_flags = 0;
1542
1543 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1544 slot->size);
1545
1546 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1547
1548 return transfer.gpu;
1549 }
1550
1551 static void
1552 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1553 unsigned stride, unsigned offset, unsigned count,
1554 struct pipe_stream_output_target *target)
1555 {
1556 /* Fill out the descriptor */
1557 slot->stride = stride * 4;
1558 slot->shift = slot->extra_flags = 0;
1559
1560 unsigned max_size = target->buffer_size;
1561 unsigned expected_size = slot->stride * count;
1562
1563 slot->size = MIN2(max_size, expected_size);
1564
1565 /* Grab the BO and bind it to the batch */
1566 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1567
1568 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1569 * the perspective of the TILER and FRAGMENT.
1570 */
1571 panfrost_batch_add_bo(batch, bo,
1572 PAN_BO_ACCESS_SHARED |
1573 PAN_BO_ACCESS_RW |
1574 PAN_BO_ACCESS_VERTEX_TILER |
1575 PAN_BO_ACCESS_FRAGMENT);
1576
1577 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1578 slot->elements = addr;
1579 }
1580
1581 /* Given a shader and buffer indices, link varying metadata together */
1582
1583 static bool
1584 is_special_varying(gl_varying_slot loc)
1585 {
1586 switch (loc) {
1587 case VARYING_SLOT_POS:
1588 case VARYING_SLOT_PSIZ:
1589 case VARYING_SLOT_PNTC:
1590 case VARYING_SLOT_FACE:
1591 return true;
1592 default:
1593 return false;
1594 }
1595 }
1596
1597 static void
1598 panfrost_emit_varying_meta(void *outptr, struct panfrost_shader_state *ss,
1599 signed general, signed gl_Position,
1600 signed gl_PointSize, signed gl_PointCoord,
1601 signed gl_FrontFacing)
1602 {
1603 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
1604
1605 for (unsigned i = 0; i < ss->varying_count; ++i) {
1606 gl_varying_slot location = ss->varyings_loc[i];
1607 int index = -1;
1608
1609 switch (location) {
1610 case VARYING_SLOT_POS:
1611 index = gl_Position;
1612 break;
1613 case VARYING_SLOT_PSIZ:
1614 index = gl_PointSize;
1615 break;
1616 case VARYING_SLOT_PNTC:
1617 index = gl_PointCoord;
1618 break;
1619 case VARYING_SLOT_FACE:
1620 index = gl_FrontFacing;
1621 break;
1622 default:
1623 index = general;
1624 break;
1625 }
1626
1627 assert(index >= 0);
1628 out[i].index = index;
1629 }
1630 }
1631
1632 static bool
1633 has_point_coord(unsigned mask, gl_varying_slot loc)
1634 {
1635 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1636 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1637 else if (loc == VARYING_SLOT_PNTC)
1638 return (mask & (1 << 8));
1639 else
1640 return false;
1641 }
1642
1643 /* Helpers for manipulating stream out information so we can pack varyings
1644 * accordingly. Compute the src_offset for a given captured varying */
1645
1646 static struct pipe_stream_output *
1647 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1648 {
1649 for (unsigned i = 0; i < info->num_outputs; ++i) {
1650 if (info->output[i].register_index == loc)
1651 return &info->output[i];
1652 }
1653
1654 unreachable("Varying not captured");
1655 }
1656
1657 /* TODO: Integers */
1658 static enum mali_format
1659 pan_xfb_format(unsigned nr_components)
1660 {
1661 switch (nr_components) {
1662 case 1: return MALI_R32F;
1663 case 2: return MALI_RG32F;
1664 case 3: return MALI_RGB32F;
1665 case 4: return MALI_RGBA32F;
1666 default: unreachable("Invalid format");
1667 }
1668 }
1669
1670 void
1671 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1672 unsigned vertex_count,
1673 struct mali_vertex_tiler_postfix *vertex_postfix,
1674 struct mali_vertex_tiler_postfix *tiler_postfix,
1675 union midgard_primitive_size *primitive_size)
1676 {
1677 /* Load the shaders */
1678 struct panfrost_context *ctx = batch->ctx;
1679 struct panfrost_shader_state *vs, *fs;
1680 unsigned int num_gen_varyings = 0;
1681 size_t vs_size, fs_size;
1682
1683 /* Allocate the varying descriptor */
1684
1685 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1686 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
1687 vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
1688 fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
1689
1690 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
1691 vs_size +
1692 fs_size);
1693
1694 struct pipe_stream_output_info *so = &vs->stream_output;
1695
1696 /* Check if this varying is linked by us. This is the case for
1697 * general-purpose, non-captured varyings. If it is, link it. If it's
1698 * not, use the provided stream out information to determine the
1699 * offset, since it was already linked for us. */
1700
1701 for (unsigned i = 0; i < vs->varying_count; i++) {
1702 gl_varying_slot loc = vs->varyings_loc[i];
1703
1704 bool special = is_special_varying(loc);
1705 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1706
1707 if (captured) {
1708 struct pipe_stream_output *o = pan_get_so(so, loc);
1709
1710 unsigned dst_offset = o->dst_offset * 4; /* dwords */
1711 vs->varyings[i].src_offset = dst_offset;
1712 } else if (!special) {
1713 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
1714 }
1715 }
1716
1717 /* Conversely, we need to set src_offset for the captured varyings.
1718 * Here, the layout is defined by the stream out info, not us */
1719
1720 /* Link up with fragment varyings */
1721 bool reads_point_coord = fs->reads_point_coord;
1722
1723 for (unsigned i = 0; i < fs->varying_count; i++) {
1724 gl_varying_slot loc = fs->varyings_loc[i];
1725 unsigned src_offset;
1726 signed vs_idx = -1;
1727
1728 /* Link up */
1729 for (unsigned j = 0; j < vs->varying_count; ++j) {
1730 if (vs->varyings_loc[j] == loc) {
1731 vs_idx = j;
1732 break;
1733 }
1734 }
1735
1736 /* Either assign or reuse */
1737 if (vs_idx >= 0)
1738 src_offset = vs->varyings[vs_idx].src_offset;
1739 else
1740 src_offset = 16 * (num_gen_varyings++);
1741
1742 fs->varyings[i].src_offset = src_offset;
1743
1744 if (has_point_coord(fs->point_sprite_mask, loc))
1745 reads_point_coord = true;
1746 }
1747
1748 memcpy(trans.cpu, vs->varyings, vs_size);
1749 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
1750
1751 union mali_attr varyings[PIPE_MAX_ATTRIBS] = {0};
1752
1753 /* Figure out how many streamout buffers could be bound */
1754 unsigned so_count = ctx->streamout.num_targets;
1755 for (unsigned i = 0; i < vs->varying_count; i++) {
1756 gl_varying_slot loc = vs->varyings_loc[i];
1757
1758 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1759 if (!captured) continue;
1760
1761 struct pipe_stream_output *o = pan_get_so(so, loc);
1762 so_count = MAX2(so_count, o->output_buffer + 1);
1763 }
1764
1765 signed idx = so_count;
1766 signed general = idx++;
1767 signed gl_Position = idx++;
1768 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
1769 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
1770 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
1771 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
1772
1773 /* Emit the stream out buffers */
1774
1775 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1776 ctx->vertex_count);
1777
1778 for (unsigned i = 0; i < so_count; ++i) {
1779 if (i < ctx->streamout.num_targets) {
1780 panfrost_emit_streamout(batch, &varyings[i],
1781 so->stride[i],
1782 ctx->streamout.offsets[i],
1783 out_count,
1784 ctx->streamout.targets[i]);
1785 } else {
1786 /* Emit a dummy buffer */
1787 panfrost_emit_varyings(batch, &varyings[i],
1788 so->stride[i] * 4,
1789 out_count);
1790
1791 /* Clear the attribute type */
1792 varyings[i].elements &= ~0xF;
1793 }
1794 }
1795
1796 panfrost_emit_varyings(batch, &varyings[general],
1797 num_gen_varyings * 16,
1798 vertex_count);
1799
1800 mali_ptr varyings_p;
1801
1802 /* fp32 vec4 gl_Position */
1803 varyings_p = panfrost_emit_varyings(batch, &varyings[gl_Position],
1804 sizeof(float) * 4, vertex_count);
1805 tiler_postfix->position_varying = varyings_p;
1806
1807
1808 if (panfrost_writes_point_size(ctx)) {
1809 varyings_p = panfrost_emit_varyings(batch,
1810 &varyings[gl_PointSize],
1811 2, vertex_count);
1812 primitive_size->pointer = varyings_p;
1813 }
1814
1815 if (reads_point_coord)
1816 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
1817
1818 if (fs->reads_face)
1819 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
1820
1821 if (fs->reads_frag_coord)
1822 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
1823
1824 struct panfrost_device *device = pan_device(ctx->base.screen);
1825 assert(!(device->quirks & IS_BIFROST) || !(reads_point_coord));
1826
1827 /* Let's go ahead and link varying meta to the buffer in question, now
1828 * that that information is available. VARYING_SLOT_POS is mapped to
1829 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
1830 * */
1831
1832 panfrost_emit_varying_meta(trans.cpu, vs, general, gl_Position,
1833 gl_PointSize, gl_PointCoord,
1834 gl_FrontFacing);
1835
1836 panfrost_emit_varying_meta(trans.cpu + vs_size, fs, general,
1837 gl_FragCoord, gl_PointSize,
1838 gl_PointCoord, gl_FrontFacing);
1839
1840 /* Replace streamout */
1841
1842 struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
1843 struct mali_attr_meta *ofs = ovs + vs->varying_count;
1844
1845 for (unsigned i = 0; i < vs->varying_count; i++) {
1846 gl_varying_slot loc = vs->varyings_loc[i];
1847
1848 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1849 if (!captured)
1850 continue;
1851
1852 struct pipe_stream_output *o = pan_get_so(so, loc);
1853 ovs[i].index = o->output_buffer;
1854
1855 /* Set the type appropriately. TODO: Integer varyings XXX */
1856 assert(o->stream == 0);
1857 ovs[i].format = pan_xfb_format(o->num_components);
1858
1859 if (device->quirks & HAS_SWIZZLES)
1860 ovs[i].swizzle = panfrost_get_default_swizzle(o->num_components);
1861 else
1862 ovs[i].swizzle = panfrost_bifrost_swizzle(o->num_components);
1863
1864 /* Link to the fragment */
1865 signed fs_idx = -1;
1866
1867 /* Link up */
1868 for (unsigned j = 0; j < fs->varying_count; ++j) {
1869 if (fs->varyings_loc[j] == loc) {
1870 fs_idx = j;
1871 break;
1872 }
1873 }
1874
1875 if (fs_idx >= 0) {
1876 ofs[fs_idx].index = ovs[i].index;
1877 ofs[fs_idx].format = ovs[i].format;
1878 ofs[fs_idx].swizzle = ovs[i].swizzle;
1879 }
1880 }
1881
1882 /* Replace point sprite */
1883 for (unsigned i = 0; i < fs->varying_count; i++) {
1884 /* If we have a point sprite replacement, handle that here. We
1885 * have to translate location first. TODO: Flip y in shader.
1886 * We're already keying ... just time crunch .. */
1887
1888 if (has_point_coord(fs->point_sprite_mask,
1889 fs->varyings_loc[i])) {
1890 ofs[i].index = gl_PointCoord;
1891
1892 /* Swizzle out the z/w to 0/1 */
1893 ofs[i].format = MALI_RG16F;
1894 ofs[i].swizzle = panfrost_get_default_swizzle(2);
1895 }
1896 }
1897
1898 /* Fix up unaligned addresses */
1899 for (unsigned i = 0; i < so_count; ++i) {
1900 if (varyings[i].elements < MALI_RECORD_SPECIAL)
1901 continue;
1902
1903 unsigned align = (varyings[i].elements & 63);
1904
1905 /* While we're at it, the SO buffers are linear */
1906
1907 if (!align) {
1908 varyings[i].elements |= MALI_ATTR_LINEAR;
1909 continue;
1910 }
1911
1912 /* We need to adjust alignment */
1913 varyings[i].elements &= ~63;
1914 varyings[i].elements |= MALI_ATTR_LINEAR;
1915 varyings[i].size += align;
1916
1917 for (unsigned v = 0; v < vs->varying_count; ++v) {
1918 if (ovs[v].index != i)
1919 continue;
1920
1921 ovs[v].src_offset = vs->varyings[v].src_offset + align;
1922 }
1923
1924 for (unsigned f = 0; f < fs->varying_count; ++f) {
1925 if (ofs[f].index != i)
1926 continue;
1927
1928 ofs[f].src_offset = fs->varyings[f].src_offset + align;
1929 }
1930 }
1931
1932 varyings_p = panfrost_upload_transient(batch, varyings,
1933 idx * sizeof(*varyings));
1934 vertex_postfix->varyings = varyings_p;
1935 tiler_postfix->varyings = varyings_p;
1936
1937 vertex_postfix->varying_meta = trans.gpu;
1938 tiler_postfix->varying_meta = trans.gpu + vs_size;
1939 }
1940
1941 void
1942 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1943 struct mali_vertex_tiler_prefix *vertex_prefix,
1944 struct mali_vertex_tiler_postfix *vertex_postfix,
1945 struct mali_vertex_tiler_prefix *tiler_prefix,
1946 struct mali_vertex_tiler_postfix *tiler_postfix,
1947 union midgard_primitive_size *primitive_size)
1948 {
1949 struct panfrost_context *ctx = batch->ctx;
1950 struct panfrost_device *device = pan_device(ctx->base.screen);
1951 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1952 struct bifrost_payload_vertex bifrost_vertex = {0,};
1953 struct bifrost_payload_tiler bifrost_tiler = {0,};
1954 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
1955 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
1956 void *vp, *tp;
1957 size_t vp_size, tp_size;
1958
1959 if (device->quirks & IS_BIFROST) {
1960 bifrost_vertex.prefix = *vertex_prefix;
1961 bifrost_vertex.postfix = *vertex_postfix;
1962 vp = &bifrost_vertex;
1963 vp_size = sizeof(bifrost_vertex);
1964
1965 bifrost_tiler.prefix = *tiler_prefix;
1966 bifrost_tiler.tiler.primitive_size = *primitive_size;
1967 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
1968 bifrost_tiler.postfix = *tiler_postfix;
1969 tp = &bifrost_tiler;
1970 tp_size = sizeof(bifrost_tiler);
1971 } else {
1972 midgard_vertex.prefix = *vertex_prefix;
1973 midgard_vertex.postfix = *vertex_postfix;
1974 vp = &midgard_vertex;
1975 vp_size = sizeof(midgard_vertex);
1976
1977 midgard_tiler.prefix = *tiler_prefix;
1978 midgard_tiler.postfix = *tiler_postfix;
1979 midgard_tiler.primitive_size = *primitive_size;
1980 tp = &midgard_tiler;
1981 tp_size = sizeof(midgard_tiler);
1982 }
1983
1984 if (wallpapering) {
1985 /* Inject in reverse order, with "predicted" job indices.
1986 * THIS IS A HACK XXX */
1987 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1988 batch->job_index + 2, tp, tp_size, true);
1989 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1990 vp, vp_size, true);
1991 return;
1992 }
1993
1994 /* If rasterizer discard is enable, only submit the vertex */
1995
1996 bool rasterizer_discard = ctx->rasterizer &&
1997 ctx->rasterizer->base.rasterizer_discard;
1998
1999 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
2000 vp, vp_size, false);
2001
2002 if (rasterizer_discard)
2003 return;
2004
2005 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, tp_size,
2006 false);
2007 }
2008
2009 /* TODO: stop hardcoding this */
2010 mali_ptr
2011 panfrost_emit_sample_locations(struct panfrost_batch *batch)
2012 {
2013 uint16_t locations[] = {
2014 128, 128,
2015 0, 256,
2016 0, 256,
2017 0, 256,
2018 0, 256,
2019 0, 256,
2020 0, 256,
2021 0, 256,
2022 0, 256,
2023 0, 256,
2024 0, 256,
2025 0, 256,
2026 0, 256,
2027 0, 256,
2028 0, 256,
2029 0, 256,
2030 0, 256,
2031 0, 256,
2032 0, 256,
2033 0, 256,
2034 0, 256,
2035 0, 256,
2036 0, 256,
2037 0, 256,
2038 0, 256,
2039 0, 256,
2040 0, 256,
2041 0, 256,
2042 0, 256,
2043 0, 256,
2044 0, 256,
2045 0, 256,
2046 128, 128,
2047 0, 0,
2048 0, 0,
2049 0, 0,
2050 0, 0,
2051 0, 0,
2052 0, 0,
2053 0, 0,
2054 0, 0,
2055 0, 0,
2056 0, 0,
2057 0, 0,
2058 0, 0,
2059 0, 0,
2060 0, 0,
2061 0, 0,
2062 };
2063
2064 return panfrost_upload_transient(batch, locations, 96 * sizeof(uint16_t));
2065 }