panfrost: Assert on unimplemented fragcoord etc
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28
29 #include "panfrost-quirks.h"
30
31 #include "pan_allocate.h"
32 #include "pan_bo.h"
33 #include "pan_cmdstream.h"
34 #include "pan_context.h"
35 #include "pan_job.h"
36
37 /* If a BO is accessed for a particular shader stage, will it be in the primary
38 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
39 * fragment will be primary, e.g. compute jobs will be considered
40 * "vertex/tiler" by analogy */
41
42 static inline uint32_t
43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
44 {
45 assert(stage == PIPE_SHADER_FRAGMENT ||
46 stage == PIPE_SHADER_VERTEX ||
47 stage == PIPE_SHADER_COMPUTE);
48
49 return stage == PIPE_SHADER_FRAGMENT ?
50 PAN_BO_ACCESS_FRAGMENT :
51 PAN_BO_ACCESS_VERTEX_TILER;
52 }
53
54 static void
55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
56 struct mali_vertex_tiler_postfix *postfix)
57 {
58 struct panfrost_device *dev = pan_device(ctx->base.screen);
59 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
60
61 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
62 struct mali_shared_memory shared = {
63 .stack_shift = shift,
64 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
65 .shared_workgroup_count = ~0,
66 };
67 postfix->shared_memory = panfrost_upload_transient(batch, &shared, sizeof(shared));
68 }
69
70 static void
71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
72 struct mali_vertex_tiler_postfix *postfix)
73 {
74 struct panfrost_device *dev = pan_device(ctx->base.screen);
75 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
76
77 /* If we haven't, reserve space for the framebuffer */
78
79 if (!batch->framebuffer.gpu) {
80 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
81 sizeof(struct mali_single_framebuffer) :
82 sizeof(struct mali_framebuffer);
83
84 batch->framebuffer = panfrost_allocate_transient(batch, size);
85
86 /* Tag the pointer */
87 if (!(dev->quirks & MIDGARD_SFBD))
88 batch->framebuffer.gpu |= MALI_MFBD;
89 }
90
91 postfix->shared_memory = batch->framebuffer.gpu;
92 }
93
94 static void
95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
96 struct mali_vertex_tiler_prefix *prefix,
97 struct mali_vertex_tiler_postfix *postfix)
98 {
99 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
100
101 postfix->gl_enables |= 0x7;
102 SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
103 rasterizer && rasterizer->base.front_ccw);
104 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
105 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
106 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
107 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
108 SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
109 rasterizer && rasterizer->base.flatshade_first);
110 }
111
112 void
113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
114 struct mali_vertex_tiler_prefix *prefix,
115 union midgard_primitive_size *primitive_size)
116 {
117 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
118
119 if (!panfrost_writes_point_size(ctx)) {
120 bool points = prefix->draw_mode == MALI_POINTS;
121 float val = 0.0f;
122
123 if (rasterizer)
124 val = points ?
125 rasterizer->base.point_size :
126 rasterizer->base.line_width;
127
128 primitive_size->constant = val;
129 }
130 }
131
132 static void
133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
134 struct mali_vertex_tiler_postfix *postfix)
135 {
136 SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
137 if (ctx->occlusion_query)
138 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
139 else
140 postfix->occlusion_counter = 0;
141 }
142
143 void
144 panfrost_vt_init(struct panfrost_context *ctx,
145 enum pipe_shader_type stage,
146 struct mali_vertex_tiler_prefix *prefix,
147 struct mali_vertex_tiler_postfix *postfix)
148 {
149 struct panfrost_device *device = pan_device(ctx->base.screen);
150
151 if (!ctx->shader[stage])
152 return;
153
154 memset(prefix, 0, sizeof(*prefix));
155 memset(postfix, 0, sizeof(*postfix));
156
157 if (device->quirks & IS_BIFROST) {
158 postfix->gl_enables = 0x2;
159 panfrost_vt_emit_shared_memory(ctx, postfix);
160 } else {
161 postfix->gl_enables = 0x6;
162 panfrost_vt_attach_framebuffer(ctx, postfix);
163 }
164
165 if (stage == PIPE_SHADER_FRAGMENT) {
166 panfrost_vt_update_occlusion_query(ctx, postfix);
167 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
168 }
169 }
170
171 static unsigned
172 panfrost_translate_index_size(unsigned size)
173 {
174 switch (size) {
175 case 1:
176 return MALI_DRAW_INDEXED_UINT8;
177
178 case 2:
179 return MALI_DRAW_INDEXED_UINT16;
180
181 case 4:
182 return MALI_DRAW_INDEXED_UINT32;
183
184 default:
185 unreachable("Invalid index size");
186 }
187 }
188
189 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
190 * good for the duration of the draw (transient), could last longer. Also get
191 * the bounds on the index buffer for the range accessed by the draw. We do
192 * these operations together because there are natural optimizations which
193 * require them to be together. */
194
195 static mali_ptr
196 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
197 const struct pipe_draw_info *info,
198 unsigned *min_index, unsigned *max_index)
199 {
200 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
201 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
202 off_t offset = info->start * info->index_size;
203 bool needs_indices = true;
204 mali_ptr out = 0;
205
206 if (info->max_index != ~0u) {
207 *min_index = info->min_index;
208 *max_index = info->max_index;
209 needs_indices = false;
210 }
211
212 if (!info->has_user_indices) {
213 /* Only resources can be directly mapped */
214 panfrost_batch_add_bo(batch, rsrc->bo,
215 PAN_BO_ACCESS_SHARED |
216 PAN_BO_ACCESS_READ |
217 PAN_BO_ACCESS_VERTEX_TILER);
218 out = rsrc->bo->gpu + offset;
219
220 /* Check the cache */
221 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
222 info->start,
223 info->count,
224 min_index,
225 max_index);
226 } else {
227 /* Otherwise, we need to upload to transient memory */
228 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
229 out = panfrost_upload_transient(batch, ibuf8 + offset,
230 info->count *
231 info->index_size);
232 }
233
234 if (needs_indices) {
235 /* Fallback */
236 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
237
238 if (!info->has_user_indices)
239 panfrost_minmax_cache_add(rsrc->index_cache,
240 info->start, info->count,
241 *min_index, *max_index);
242 }
243
244 return out;
245 }
246
247 void
248 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
249 const struct pipe_draw_info *info,
250 enum mali_draw_mode draw_mode,
251 struct mali_vertex_tiler_postfix *vertex_postfix,
252 struct mali_vertex_tiler_prefix *tiler_prefix,
253 struct mali_vertex_tiler_postfix *tiler_postfix,
254 unsigned *vertex_count,
255 unsigned *padded_count)
256 {
257 tiler_prefix->draw_mode = draw_mode;
258
259 unsigned draw_flags = 0;
260
261 if (panfrost_writes_point_size(ctx))
262 draw_flags |= MALI_DRAW_VARYING_SIZE;
263
264 if (info->primitive_restart)
265 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
266
267 /* These doesn't make much sense */
268
269 draw_flags |= 0x3000;
270
271 if (info->index_size) {
272 unsigned min_index = 0, max_index = 0;
273
274 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
275 info,
276 &min_index,
277 &max_index);
278
279 /* Use the corresponding values */
280 *vertex_count = max_index - min_index + 1;
281 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
282 tiler_prefix->offset_bias_correction = -min_index;
283 tiler_prefix->index_count = MALI_POSITIVE(info->count);
284 draw_flags |= panfrost_translate_index_size(info->index_size);
285 } else {
286 tiler_prefix->indices = 0;
287 *vertex_count = ctx->vertex_count;
288 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
289 tiler_prefix->offset_bias_correction = 0;
290 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
291 }
292
293 tiler_prefix->unknown_draw = draw_flags;
294
295 /* Encode the padded vertex count */
296
297 if (info->instance_count > 1) {
298 *padded_count = panfrost_padded_vertex_count(*vertex_count);
299
300 unsigned shift = __builtin_ctz(ctx->padded_count);
301 unsigned k = ctx->padded_count >> (shift + 1);
302
303 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
304 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
305 } else {
306 *padded_count = *vertex_count;
307
308 /* Reset instancing state */
309 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
310 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
311 }
312 }
313
314 static void
315 panfrost_shader_meta_init(struct panfrost_context *ctx,
316 enum pipe_shader_type st,
317 struct mali_shader_meta *meta)
318 {
319 const struct panfrost_device *dev = pan_device(ctx->base.screen);
320 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
321
322 memset(meta, 0, sizeof(*meta));
323 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
324 meta->attribute_count = ss->attribute_count;
325 meta->varying_count = ss->varying_count;
326 meta->texture_count = ctx->sampler_view_count[st];
327 meta->sampler_count = ctx->sampler_count[st];
328
329 if (dev->quirks & IS_BIFROST) {
330 meta->bifrost1.unk1 = 0x800200;
331 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
332 meta->bifrost2.preload_regs = 0xC0;
333 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
334 ss->uniform_cutoff);
335 } else {
336 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
337 ss->uniform_cutoff);
338 meta->midgard1.work_count = ss->work_reg_count;
339 meta->midgard1.flags_hi = 0x8; /* XXX */
340 meta->midgard1.flags_lo = 0x220;
341 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
342 }
343
344 }
345
346 static unsigned
347 panfrost_translate_compare_func(enum pipe_compare_func in)
348 {
349 switch (in) {
350 case PIPE_FUNC_NEVER:
351 return MALI_FUNC_NEVER;
352
353 case PIPE_FUNC_LESS:
354 return MALI_FUNC_LESS;
355
356 case PIPE_FUNC_EQUAL:
357 return MALI_FUNC_EQUAL;
358
359 case PIPE_FUNC_LEQUAL:
360 return MALI_FUNC_LEQUAL;
361
362 case PIPE_FUNC_GREATER:
363 return MALI_FUNC_GREATER;
364
365 case PIPE_FUNC_NOTEQUAL:
366 return MALI_FUNC_NOTEQUAL;
367
368 case PIPE_FUNC_GEQUAL:
369 return MALI_FUNC_GEQUAL;
370
371 case PIPE_FUNC_ALWAYS:
372 return MALI_FUNC_ALWAYS;
373
374 default:
375 unreachable("Invalid func");
376 }
377 }
378
379 static unsigned
380 panfrost_translate_stencil_op(enum pipe_stencil_op in)
381 {
382 switch (in) {
383 case PIPE_STENCIL_OP_KEEP:
384 return MALI_STENCIL_KEEP;
385
386 case PIPE_STENCIL_OP_ZERO:
387 return MALI_STENCIL_ZERO;
388
389 case PIPE_STENCIL_OP_REPLACE:
390 return MALI_STENCIL_REPLACE;
391
392 case PIPE_STENCIL_OP_INCR:
393 return MALI_STENCIL_INCR;
394
395 case PIPE_STENCIL_OP_DECR:
396 return MALI_STENCIL_DECR;
397
398 case PIPE_STENCIL_OP_INCR_WRAP:
399 return MALI_STENCIL_INCR_WRAP;
400
401 case PIPE_STENCIL_OP_DECR_WRAP:
402 return MALI_STENCIL_DECR_WRAP;
403
404 case PIPE_STENCIL_OP_INVERT:
405 return MALI_STENCIL_INVERT;
406
407 default:
408 unreachable("Invalid stencil op");
409 }
410 }
411
412 static unsigned
413 translate_tex_wrap(enum pipe_tex_wrap w)
414 {
415 switch (w) {
416 case PIPE_TEX_WRAP_REPEAT:
417 return MALI_WRAP_REPEAT;
418
419 case PIPE_TEX_WRAP_CLAMP:
420 return MALI_WRAP_CLAMP;
421
422 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
423 return MALI_WRAP_CLAMP_TO_EDGE;
424
425 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
426 return MALI_WRAP_CLAMP_TO_BORDER;
427
428 case PIPE_TEX_WRAP_MIRROR_REPEAT:
429 return MALI_WRAP_MIRRORED_REPEAT;
430
431 case PIPE_TEX_WRAP_MIRROR_CLAMP:
432 return MALI_WRAP_MIRRORED_CLAMP;
433
434 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
435 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
436
437 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
438 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
439
440 default:
441 unreachable("Invalid wrap");
442 }
443 }
444
445 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
446 struct mali_sampler_descriptor *hw)
447 {
448 unsigned func = panfrost_translate_compare_func(cso->compare_func);
449 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
450 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
451 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
452 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
453 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
454 unsigned mip_filter = mip_linear ?
455 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
456 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
457
458 *hw = (struct mali_sampler_descriptor) {
459 .filter_mode = min_filter | mag_filter | mip_filter |
460 normalized,
461 .wrap_s = translate_tex_wrap(cso->wrap_s),
462 .wrap_t = translate_tex_wrap(cso->wrap_t),
463 .wrap_r = translate_tex_wrap(cso->wrap_r),
464 .compare_func = panfrost_flip_compare_func(func),
465 .border_color = {
466 cso->border_color.f[0],
467 cso->border_color.f[1],
468 cso->border_color.f[2],
469 cso->border_color.f[3]
470 },
471 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
472 .max_lod = FIXED_16(cso->max_lod, false),
473 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
474 .seamless_cube_map = cso->seamless_cube_map,
475 };
476
477 /* If necessary, we disable mipmapping in the sampler descriptor by
478 * clamping the LOD as tight as possible (from 0 to epsilon,
479 * essentially -- remember these are fixed point numbers, so
480 * epsilon=1/256) */
481
482 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
483 hw->max_lod = hw->min_lod + 1;
484 }
485
486 static void
487 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
488 struct mali_stencil_test *out)
489 {
490 out->ref = 0; /* Gallium gets it from elsewhere */
491
492 out->mask = in->valuemask;
493 out->func = panfrost_translate_compare_func(in->func);
494 out->sfail = panfrost_translate_stencil_op(in->fail_op);
495 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
496 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
497 }
498
499 static void
500 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
501 struct mali_shader_meta *fragmeta)
502 {
503 if (!ctx->rasterizer) {
504 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
505 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
506 fragmeta->depth_units = 0.0f;
507 fragmeta->depth_factor = 0.0f;
508 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
509 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
510 return;
511 }
512
513 bool msaa = ctx->rasterizer->base.multisample;
514
515 /* TODO: Sample size */
516 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
517 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
518 fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
519 fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
520
521 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
522
523 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
524 ctx->rasterizer->base.offset_tri);
525 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
526 ctx->rasterizer->base.offset_tri);
527 }
528
529 static void
530 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
531 struct mali_shader_meta *fragmeta)
532 {
533 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
534 int zfunc = PIPE_FUNC_ALWAYS;
535
536 if (!zsa) {
537 struct pipe_stencil_state default_stencil = {
538 .enabled = 0,
539 .func = PIPE_FUNC_ALWAYS,
540 .fail_op = MALI_STENCIL_KEEP,
541 .zfail_op = MALI_STENCIL_KEEP,
542 .zpass_op = MALI_STENCIL_KEEP,
543 .writemask = 0xFF,
544 .valuemask = 0xFF
545 };
546
547 panfrost_make_stencil_state(&default_stencil,
548 &fragmeta->stencil_front);
549 fragmeta->stencil_mask_front = default_stencil.writemask;
550 fragmeta->stencil_back = fragmeta->stencil_front;
551 fragmeta->stencil_mask_back = default_stencil.writemask;
552 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
553 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
554 } else {
555 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
556 zsa->stencil[0].enabled);
557 panfrost_make_stencil_state(&zsa->stencil[0],
558 &fragmeta->stencil_front);
559 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
560 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
561
562 /* If back-stencil is not enabled, use the front values */
563
564 if (zsa->stencil[1].enabled) {
565 panfrost_make_stencil_state(&zsa->stencil[1],
566 &fragmeta->stencil_back);
567 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
568 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
569 } else {
570 fragmeta->stencil_back = fragmeta->stencil_front;
571 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
572 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
573 }
574
575 if (zsa->depth.enabled)
576 zfunc = zsa->depth.func;
577
578 /* Depth state (TODO: Refactor) */
579
580 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
581 zsa->depth.writemask);
582 }
583
584 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
585 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
586 }
587
588 static void
589 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
590 struct mali_shader_meta *fragmeta,
591 struct midgard_blend_rt *rts)
592 {
593 const struct panfrost_device *dev = pan_device(ctx->base.screen);
594
595 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
596 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
597 !ctx->blend->base.dither);
598
599 /* Get blending setup */
600 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
601
602 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
603 unsigned shader_offset = 0;
604 struct panfrost_bo *shader_bo = NULL;
605
606 for (unsigned c = 0; c < rt_count; ++c)
607 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
608 &shader_offset);
609
610 /* If there is a blend shader, work registers are shared. XXX: opt */
611
612 for (unsigned c = 0; c < rt_count; ++c) {
613 if (blend[c].is_shader)
614 fragmeta->midgard1.work_count = 16;
615 }
616
617 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
618 * copied to the blend_meta appended (by convention), but this is the
619 * field actually read by the hardware. (Or maybe both are read...?).
620 * Specify the last RTi with a blend shader. */
621
622 fragmeta->blend.shader = 0;
623
624 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
625 if (!blend[rt].is_shader)
626 continue;
627
628 fragmeta->blend.shader = blend[rt].shader.gpu |
629 blend[rt].shader.first_tag;
630 break;
631 }
632
633 if (dev->quirks & MIDGARD_SFBD) {
634 /* When only a single render target platform is used, the blend
635 * information is inside the shader meta itself. We additionally
636 * need to signal CAN_DISCARD for nontrivial blend modes (so
637 * we're able to read back the destination buffer) */
638
639 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
640 blend[0].is_shader);
641
642 if (!blend[0].is_shader) {
643 fragmeta->blend.equation = *blend[0].equation.equation;
644 fragmeta->blend.constant = blend[0].equation.constant;
645 }
646
647 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
648 !blend[0].no_blending);
649 return;
650 }
651
652 /* Additional blend descriptor tacked on for jobs using MFBD */
653
654 for (unsigned i = 0; i < rt_count; ++i) {
655 rts[i].flags = 0x200;
656
657 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
658 (ctx->pipe_framebuffer.cbufs[i]) &&
659 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
660
661 SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
662 SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
663 SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
664 SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
665
666 if (blend[i].is_shader) {
667 rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
668 } else {
669 rts[i].blend.equation = *blend[i].equation.equation;
670 rts[i].blend.constant = blend[i].equation.constant;
671 }
672 }
673 }
674
675 static void
676 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
677 struct mali_shader_meta *fragmeta,
678 struct midgard_blend_rt *rts)
679 {
680 const struct panfrost_device *dev = pan_device(ctx->base.screen);
681 struct panfrost_shader_state *fs;
682
683 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
684
685 fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
686 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
687 fragmeta->unknown2_4 = 0x4e0;
688
689 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
690 * is required (independent of 32-bit/64-bit descriptors), or why it's
691 * not used on later GPU revisions. Otherwise, all shader jobs fault on
692 * these earlier chips (perhaps this is a chicken bit of some kind).
693 * More investigation is needed. */
694
695 SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
696
697 /* Depending on whether it's legal to in the given shader, we try to
698 * enable early-z testing (or forward-pixel kill?) */
699
700 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
701 !fs->can_discard && !fs->writes_depth);
702
703 /* Add the writes Z/S flags if needed. */
704 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
705 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
706
707 /* Any time texturing is used, derivatives are implicitly calculated,
708 * so we need to enable helper invocations */
709
710 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
711 fs->helper_invocations);
712
713 /* CAN_DISCARD should be set if the fragment shader possibly contains a
714 * 'discard' instruction. It is likely this is related to optimizations
715 * related to forward-pixel kill, as per "Mali Performance 3: Is
716 * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
717
718 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
719 SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
720
721 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
722 panfrost_frag_meta_zsa_update(ctx, fragmeta);
723 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
724 }
725
726 void
727 panfrost_emit_shader_meta(struct panfrost_batch *batch,
728 enum pipe_shader_type st,
729 struct mali_vertex_tiler_postfix *postfix)
730 {
731 struct panfrost_context *ctx = batch->ctx;
732 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
733
734 if (!ss) {
735 postfix->shader = 0;
736 return;
737 }
738
739 struct mali_shader_meta meta;
740
741 panfrost_shader_meta_init(ctx, st, &meta);
742
743 /* Add the shader BO to the batch. */
744 panfrost_batch_add_bo(batch, ss->bo,
745 PAN_BO_ACCESS_PRIVATE |
746 PAN_BO_ACCESS_READ |
747 panfrost_bo_access_for_stage(st));
748
749 mali_ptr shader_ptr;
750
751 if (st == PIPE_SHADER_FRAGMENT) {
752 struct panfrost_device *dev = pan_device(ctx->base.screen);
753 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
754 size_t desc_size = sizeof(meta);
755 struct midgard_blend_rt rts[4];
756 struct panfrost_transfer xfer;
757
758 assert(rt_count <= ARRAY_SIZE(rts));
759
760 panfrost_frag_shader_meta_init(ctx, &meta, rts);
761
762 if (!(dev->quirks & MIDGARD_SFBD))
763 desc_size += sizeof(*rts) * rt_count;
764
765 xfer = panfrost_allocate_transient(batch, desc_size);
766
767 memcpy(xfer.cpu, &meta, sizeof(meta));
768 memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
769
770 shader_ptr = xfer.gpu;
771 } else {
772 shader_ptr = panfrost_upload_transient(batch, &meta,
773 sizeof(meta));
774 }
775
776 postfix->shader = shader_ptr;
777 }
778
779 static void
780 panfrost_mali_viewport_init(struct panfrost_context *ctx,
781 struct mali_viewport *mvp)
782 {
783 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
784
785 /* Clip bounds are encoded as floats. The viewport itself is encoded as
786 * (somewhat) asymmetric ints. */
787
788 const struct pipe_scissor_state *ss = &ctx->scissor;
789
790 memset(mvp, 0, sizeof(*mvp));
791
792 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
793 * each direction. Clipping to the viewport in theory should work, but
794 * in practice causes issues when we're not explicitly trying to
795 * scissor */
796
797 *mvp = (struct mali_viewport) {
798 .clip_minx = -INFINITY,
799 .clip_miny = -INFINITY,
800 .clip_maxx = INFINITY,
801 .clip_maxy = INFINITY,
802 };
803
804 /* Always scissor to the viewport by default. */
805 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
806 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
807
808 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
809 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
810
811 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
812 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
813
814 /* Apply the scissor test */
815
816 unsigned minx, miny, maxx, maxy;
817
818 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
819 minx = MAX2(ss->minx, vp_minx);
820 miny = MAX2(ss->miny, vp_miny);
821 maxx = MIN2(ss->maxx, vp_maxx);
822 maxy = MIN2(ss->maxy, vp_maxy);
823 } else {
824 minx = vp_minx;
825 miny = vp_miny;
826 maxx = vp_maxx;
827 maxy = vp_maxy;
828 }
829
830 /* Hardware needs the min/max to be strictly ordered, so flip if we
831 * need to. The viewport transformation in the vertex shader will
832 * handle the negatives if we don't */
833
834 if (miny > maxy) {
835 unsigned temp = miny;
836 miny = maxy;
837 maxy = temp;
838 }
839
840 if (minx > maxx) {
841 unsigned temp = minx;
842 minx = maxx;
843 maxx = temp;
844 }
845
846 if (minz > maxz) {
847 float temp = minz;
848 minz = maxz;
849 maxz = temp;
850 }
851
852 /* Clamp to the framebuffer size as a last check */
853
854 minx = MIN2(ctx->pipe_framebuffer.width, minx);
855 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
856
857 miny = MIN2(ctx->pipe_framebuffer.height, miny);
858 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
859
860 /* Upload */
861
862 mvp->viewport0[0] = minx;
863 mvp->viewport1[0] = MALI_POSITIVE(maxx);
864
865 mvp->viewport0[1] = miny;
866 mvp->viewport1[1] = MALI_POSITIVE(maxy);
867
868 mvp->clip_minz = minz;
869 mvp->clip_maxz = maxz;
870 }
871
872 void
873 panfrost_emit_viewport(struct panfrost_batch *batch,
874 struct mali_vertex_tiler_postfix *tiler_postfix)
875 {
876 struct panfrost_context *ctx = batch->ctx;
877 struct mali_viewport mvp;
878
879 panfrost_mali_viewport_init(batch->ctx, &mvp);
880
881 /* Update the job, unless we're doing wallpapering (whose lack of
882 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
883 * just... be faster :) */
884
885 if (!ctx->wallpaper_batch)
886 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
887 mvp.viewport0[1],
888 mvp.viewport1[0] + 1,
889 mvp.viewport1[1] + 1);
890
891 tiler_postfix->viewport = panfrost_upload_transient(batch, &mvp,
892 sizeof(mvp));
893 }
894
895 static mali_ptr
896 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
897 enum pipe_shader_type st,
898 struct panfrost_constant_buffer *buf,
899 unsigned index)
900 {
901 struct pipe_constant_buffer *cb = &buf->cb[index];
902 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
903
904 if (rsrc) {
905 panfrost_batch_add_bo(batch, rsrc->bo,
906 PAN_BO_ACCESS_SHARED |
907 PAN_BO_ACCESS_READ |
908 panfrost_bo_access_for_stage(st));
909
910 /* Alignment gauranteed by
911 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
912 return rsrc->bo->gpu + cb->buffer_offset;
913 } else if (cb->user_buffer) {
914 return panfrost_upload_transient(batch,
915 cb->user_buffer +
916 cb->buffer_offset,
917 cb->buffer_size);
918 } else {
919 unreachable("No constant buffer");
920 }
921 }
922
923 struct sysval_uniform {
924 union {
925 float f[4];
926 int32_t i[4];
927 uint32_t u[4];
928 uint64_t du[2];
929 };
930 };
931
932 static void
933 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
934 struct sysval_uniform *uniform)
935 {
936 struct panfrost_context *ctx = batch->ctx;
937 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
938
939 uniform->f[0] = vp->scale[0];
940 uniform->f[1] = vp->scale[1];
941 uniform->f[2] = vp->scale[2];
942 }
943
944 static void
945 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
946 struct sysval_uniform *uniform)
947 {
948 struct panfrost_context *ctx = batch->ctx;
949 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
950
951 uniform->f[0] = vp->translate[0];
952 uniform->f[1] = vp->translate[1];
953 uniform->f[2] = vp->translate[2];
954 }
955
956 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
957 enum pipe_shader_type st,
958 unsigned int sysvalid,
959 struct sysval_uniform *uniform)
960 {
961 struct panfrost_context *ctx = batch->ctx;
962 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
963 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
964 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
965 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
966
967 assert(dim);
968 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
969
970 if (dim > 1)
971 uniform->i[1] = u_minify(tex->texture->height0,
972 tex->u.tex.first_level);
973
974 if (dim > 2)
975 uniform->i[2] = u_minify(tex->texture->depth0,
976 tex->u.tex.first_level);
977
978 if (is_array)
979 uniform->i[dim] = tex->texture->array_size;
980 }
981
982 static void
983 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
984 enum pipe_shader_type st,
985 unsigned ssbo_id,
986 struct sysval_uniform *uniform)
987 {
988 struct panfrost_context *ctx = batch->ctx;
989
990 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
991 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
992
993 /* Compute address */
994 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
995
996 panfrost_batch_add_bo(batch, bo,
997 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
998 panfrost_bo_access_for_stage(st));
999
1000 /* Upload address and size as sysval */
1001 uniform->du[0] = bo->gpu + sb.buffer_offset;
1002 uniform->u[2] = sb.buffer_size;
1003 }
1004
1005 static void
1006 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1007 enum pipe_shader_type st,
1008 unsigned samp_idx,
1009 struct sysval_uniform *uniform)
1010 {
1011 struct panfrost_context *ctx = batch->ctx;
1012 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1013
1014 uniform->f[0] = sampl->min_lod;
1015 uniform->f[1] = sampl->max_lod;
1016 uniform->f[2] = sampl->lod_bias;
1017
1018 /* Even without any errata, Midgard represents "no mipmapping" as
1019 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1020 * panfrost_create_sampler_state which also explains our choice of
1021 * epsilon value (again to keep behaviour consistent) */
1022
1023 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1024 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1025 }
1026
1027 static void
1028 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1029 struct sysval_uniform *uniform)
1030 {
1031 struct panfrost_context *ctx = batch->ctx;
1032
1033 uniform->u[0] = ctx->compute_grid->grid[0];
1034 uniform->u[1] = ctx->compute_grid->grid[1];
1035 uniform->u[2] = ctx->compute_grid->grid[2];
1036 }
1037
1038 static void
1039 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1040 struct panfrost_shader_state *ss,
1041 enum pipe_shader_type st)
1042 {
1043 struct sysval_uniform *uniforms = (void *)buf;
1044
1045 for (unsigned i = 0; i < ss->sysval_count; ++i) {
1046 int sysval = ss->sysval[i];
1047
1048 switch (PAN_SYSVAL_TYPE(sysval)) {
1049 case PAN_SYSVAL_VIEWPORT_SCALE:
1050 panfrost_upload_viewport_scale_sysval(batch,
1051 &uniforms[i]);
1052 break;
1053 case PAN_SYSVAL_VIEWPORT_OFFSET:
1054 panfrost_upload_viewport_offset_sysval(batch,
1055 &uniforms[i]);
1056 break;
1057 case PAN_SYSVAL_TEXTURE_SIZE:
1058 panfrost_upload_txs_sysval(batch, st,
1059 PAN_SYSVAL_ID(sysval),
1060 &uniforms[i]);
1061 break;
1062 case PAN_SYSVAL_SSBO:
1063 panfrost_upload_ssbo_sysval(batch, st,
1064 PAN_SYSVAL_ID(sysval),
1065 &uniforms[i]);
1066 break;
1067 case PAN_SYSVAL_NUM_WORK_GROUPS:
1068 panfrost_upload_num_work_groups_sysval(batch,
1069 &uniforms[i]);
1070 break;
1071 case PAN_SYSVAL_SAMPLER:
1072 panfrost_upload_sampler_sysval(batch, st,
1073 PAN_SYSVAL_ID(sysval),
1074 &uniforms[i]);
1075 break;
1076 default:
1077 assert(0);
1078 }
1079 }
1080 }
1081
1082 static const void *
1083 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1084 unsigned index)
1085 {
1086 struct pipe_constant_buffer *cb = &buf->cb[index];
1087 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1088
1089 if (rsrc)
1090 return rsrc->bo->cpu;
1091 else if (cb->user_buffer)
1092 return cb->user_buffer;
1093 else
1094 unreachable("No constant buffer");
1095 }
1096
1097 void
1098 panfrost_emit_const_buf(struct panfrost_batch *batch,
1099 enum pipe_shader_type stage,
1100 struct mali_vertex_tiler_postfix *postfix)
1101 {
1102 struct panfrost_context *ctx = batch->ctx;
1103 struct panfrost_shader_variants *all = ctx->shader[stage];
1104
1105 if (!all)
1106 return;
1107
1108 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1109
1110 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1111
1112 /* Uniforms are implicitly UBO #0 */
1113 bool has_uniforms = buf->enabled_mask & (1 << 0);
1114
1115 /* Allocate room for the sysval and the uniforms */
1116 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1117 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1118 size_t size = sys_size + uniform_size;
1119 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1120 size);
1121
1122 /* Upload sysvals requested by the shader */
1123 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1124
1125 /* Upload uniforms */
1126 if (has_uniforms && uniform_size) {
1127 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1128 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1129 }
1130
1131 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1132 * uploaded */
1133
1134 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1135 assert(ubo_count >= 1);
1136
1137 size_t sz = sizeof(uint64_t) * ubo_count;
1138 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1139 int uniform_count = ss->uniform_count;
1140
1141 /* Upload uniforms as a UBO */
1142 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1143
1144 /* The rest are honest-to-goodness UBOs */
1145
1146 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1147 size_t usz = buf->cb[ubo].buffer_size;
1148 bool enabled = buf->enabled_mask & (1 << ubo);
1149 bool empty = usz == 0;
1150
1151 if (!enabled || empty) {
1152 /* Stub out disabled UBOs to catch accesses */
1153 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1154 continue;
1155 }
1156
1157 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1158 buf, ubo);
1159
1160 unsigned bytes_per_field = 16;
1161 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1162 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1163 }
1164
1165 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1166 postfix->uniforms = transfer.gpu;
1167 postfix->uniform_buffers = ubufs;
1168
1169 buf->dirty_mask = 0;
1170 }
1171
1172 void
1173 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1174 const struct pipe_grid_info *info,
1175 struct midgard_payload_vertex_tiler *vtp)
1176 {
1177 struct panfrost_context *ctx = batch->ctx;
1178 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1179 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1180 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1181 128));
1182 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1183 info->grid[2] * 4;
1184 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1185 shared_size,
1186 1);
1187
1188 struct mali_shared_memory shared = {
1189 .shared_memory = bo->gpu,
1190 .shared_workgroup_count =
1191 util_logbase2_ceil(info->grid[0]) +
1192 util_logbase2_ceil(info->grid[1]) +
1193 util_logbase2_ceil(info->grid[2]),
1194 .shared_unk1 = 0x2,
1195 .shared_shift = util_logbase2(single_size) - 1
1196 };
1197
1198 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1199 sizeof(shared));
1200 }
1201
1202 static mali_ptr
1203 panfrost_get_tex_desc(struct panfrost_batch *batch,
1204 enum pipe_shader_type st,
1205 struct panfrost_sampler_view *view)
1206 {
1207 if (!view)
1208 return (mali_ptr) 0;
1209
1210 struct pipe_sampler_view *pview = &view->base;
1211 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1212
1213 /* Add the BO to the job so it's retained until the job is done. */
1214
1215 panfrost_batch_add_bo(batch, rsrc->bo,
1216 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1217 panfrost_bo_access_for_stage(st));
1218
1219 panfrost_batch_add_bo(batch, view->bo,
1220 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1221 panfrost_bo_access_for_stage(st));
1222
1223 return view->bo->gpu;
1224 }
1225
1226 void
1227 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1228 enum pipe_shader_type stage,
1229 struct mali_vertex_tiler_postfix *postfix)
1230 {
1231 struct panfrost_context *ctx = batch->ctx;
1232
1233 if (!ctx->sampler_view_count[stage])
1234 return;
1235
1236 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1237
1238 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1239 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1240 ctx->sampler_views[stage][i]);
1241
1242 postfix->texture_trampoline = panfrost_upload_transient(batch,
1243 trampolines,
1244 sizeof(uint64_t) *
1245 ctx->sampler_view_count[stage]);
1246 }
1247
1248 void
1249 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1250 enum pipe_shader_type stage,
1251 struct mali_vertex_tiler_postfix *postfix)
1252 {
1253 struct panfrost_context *ctx = batch->ctx;
1254
1255 if (!ctx->sampler_count[stage])
1256 return;
1257
1258 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1259 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1260 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1261 transfer_size);
1262 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1263
1264 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1265 desc[i] = ctx->samplers[stage][i]->hw;
1266
1267 postfix->sampler_descriptor = transfer.gpu;
1268 }
1269
1270 void
1271 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1272 struct mali_vertex_tiler_postfix *vertex_postfix)
1273 {
1274 struct panfrost_context *ctx = batch->ctx;
1275
1276 if (!ctx->vertex)
1277 return;
1278
1279 struct panfrost_vertex_state *so = ctx->vertex;
1280
1281 panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1282 vertex_postfix->attribute_meta = panfrost_upload_transient(batch, so->hw,
1283 sizeof(*so->hw) *
1284 PAN_MAX_ATTRIBUTE);
1285 }
1286
1287 void
1288 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1289 struct mali_vertex_tiler_postfix *vertex_postfix)
1290 {
1291 struct panfrost_context *ctx = batch->ctx;
1292 struct panfrost_vertex_state *so = ctx->vertex;
1293
1294 /* Staged mali_attr, and index into them. i =/= k, depending on the
1295 * vertex buffer mask and instancing. Twice as much room is allocated,
1296 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1297 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1298 unsigned k = 0;
1299
1300 for (unsigned i = 0; i < so->num_elements; ++i) {
1301 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1302 * means duplicating some vertex buffers (who cares? aside from
1303 * maybe some caching implications but I somehow doubt that
1304 * matters) */
1305
1306 struct pipe_vertex_element *elem = &so->pipe[i];
1307 unsigned vbi = elem->vertex_buffer_index;
1308
1309 /* The exception to 1:1 mapping is that we can have multiple
1310 * entries (NPOT divisors), so we fixup anyways */
1311
1312 so->hw[i].index = k;
1313
1314 if (!(ctx->vb_mask & (1 << vbi)))
1315 continue;
1316
1317 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1318 struct panfrost_resource *rsrc;
1319
1320 rsrc = pan_resource(buf->buffer.resource);
1321 if (!rsrc)
1322 continue;
1323
1324 /* Align to 64 bytes by masking off the lower bits. This
1325 * will be adjusted back when we fixup the src_offset in
1326 * mali_attr_meta */
1327
1328 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1329 mali_ptr addr = raw_addr & ~63;
1330 unsigned chopped_addr = raw_addr - addr;
1331
1332 /* Add a dependency of the batch on the vertex buffer */
1333 panfrost_batch_add_bo(batch, rsrc->bo,
1334 PAN_BO_ACCESS_SHARED |
1335 PAN_BO_ACCESS_READ |
1336 PAN_BO_ACCESS_VERTEX_TILER);
1337
1338 /* Set common fields */
1339 attrs[k].elements = addr;
1340 attrs[k].stride = buf->stride;
1341
1342 /* Since we advanced the base pointer, we shrink the buffer
1343 * size */
1344 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1345
1346 /* We need to add the extra size we masked off (for
1347 * correctness) so the data doesn't get clamped away */
1348 attrs[k].size += chopped_addr;
1349
1350 /* For non-instancing make sure we initialize */
1351 attrs[k].shift = attrs[k].extra_flags = 0;
1352
1353 /* Instancing uses a dramatically different code path than
1354 * linear, so dispatch for the actual emission now that the
1355 * common code is finished */
1356
1357 unsigned divisor = elem->instance_divisor;
1358
1359 if (divisor && ctx->instance_count == 1) {
1360 /* Silly corner case where there's a divisor(=1) but
1361 * there's no legitimate instancing. So we want *every*
1362 * attribute to be the same. So set stride to zero so
1363 * we don't go anywhere. */
1364
1365 attrs[k].size = attrs[k].stride + chopped_addr;
1366 attrs[k].stride = 0;
1367 attrs[k++].elements |= MALI_ATTR_LINEAR;
1368 } else if (ctx->instance_count <= 1) {
1369 /* Normal, non-instanced attributes */
1370 attrs[k++].elements |= MALI_ATTR_LINEAR;
1371 } else {
1372 unsigned instance_shift = vertex_postfix->instance_shift;
1373 unsigned instance_odd = vertex_postfix->instance_odd;
1374
1375 k += panfrost_vertex_instanced(ctx->padded_count,
1376 instance_shift,
1377 instance_odd,
1378 divisor, &attrs[k]);
1379 }
1380 }
1381
1382 /* Add special gl_VertexID/gl_InstanceID buffers */
1383
1384 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1385 so->hw[PAN_VERTEX_ID].index = k++;
1386 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1387 so->hw[PAN_INSTANCE_ID].index = k++;
1388
1389 /* Upload whatever we emitted and go */
1390
1391 vertex_postfix->attributes = panfrost_upload_transient(batch, attrs,
1392 k * sizeof(*attrs));
1393 }
1394
1395 static mali_ptr
1396 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1397 unsigned stride, unsigned count)
1398 {
1399 /* Fill out the descriptor */
1400 slot->stride = stride;
1401 slot->size = stride * count;
1402 slot->shift = slot->extra_flags = 0;
1403
1404 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1405 slot->size);
1406
1407 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1408
1409 return transfer.gpu;
1410 }
1411
1412 static void
1413 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1414 unsigned stride, unsigned offset, unsigned count,
1415 struct pipe_stream_output_target *target)
1416 {
1417 /* Fill out the descriptor */
1418 slot->stride = stride * 4;
1419 slot->shift = slot->extra_flags = 0;
1420
1421 unsigned max_size = target->buffer_size;
1422 unsigned expected_size = slot->stride * count;
1423
1424 slot->size = MIN2(max_size, expected_size);
1425
1426 /* Grab the BO and bind it to the batch */
1427 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1428
1429 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1430 * the perspective of the TILER and FRAGMENT.
1431 */
1432 panfrost_batch_add_bo(batch, bo,
1433 PAN_BO_ACCESS_SHARED |
1434 PAN_BO_ACCESS_RW |
1435 PAN_BO_ACCESS_VERTEX_TILER |
1436 PAN_BO_ACCESS_FRAGMENT);
1437
1438 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1439 slot->elements = addr;
1440 }
1441
1442 /* Given a shader and buffer indices, link varying metadata together */
1443
1444 static bool
1445 is_special_varying(gl_varying_slot loc)
1446 {
1447 switch (loc) {
1448 case VARYING_SLOT_POS:
1449 case VARYING_SLOT_PSIZ:
1450 case VARYING_SLOT_PNTC:
1451 case VARYING_SLOT_FACE:
1452 return true;
1453 default:
1454 return false;
1455 }
1456 }
1457
1458 static void
1459 panfrost_emit_varying_meta(void *outptr, struct panfrost_shader_state *ss,
1460 signed general, signed gl_Position,
1461 signed gl_PointSize, signed gl_PointCoord,
1462 signed gl_FrontFacing)
1463 {
1464 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
1465
1466 for (unsigned i = 0; i < ss->varying_count; ++i) {
1467 gl_varying_slot location = ss->varyings_loc[i];
1468 int index = -1;
1469
1470 switch (location) {
1471 case VARYING_SLOT_POS:
1472 index = gl_Position;
1473 break;
1474 case VARYING_SLOT_PSIZ:
1475 index = gl_PointSize;
1476 break;
1477 case VARYING_SLOT_PNTC:
1478 index = gl_PointCoord;
1479 break;
1480 case VARYING_SLOT_FACE:
1481 index = gl_FrontFacing;
1482 break;
1483 default:
1484 index = general;
1485 break;
1486 }
1487
1488 assert(index >= 0);
1489 out[i].index = index;
1490 }
1491 }
1492
1493 static bool
1494 has_point_coord(unsigned mask, gl_varying_slot loc)
1495 {
1496 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1497 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1498 else if (loc == VARYING_SLOT_PNTC)
1499 return (mask & (1 << 8));
1500 else
1501 return false;
1502 }
1503
1504 /* Helpers for manipulating stream out information so we can pack varyings
1505 * accordingly. Compute the src_offset for a given captured varying */
1506
1507 static struct pipe_stream_output *
1508 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1509 {
1510 for (unsigned i = 0; i < info->num_outputs; ++i) {
1511 if (info->output[i].register_index == loc)
1512 return &info->output[i];
1513 }
1514
1515 unreachable("Varying not captured");
1516 }
1517
1518 /* TODO: Integers */
1519 static enum mali_format
1520 pan_xfb_format(unsigned nr_components)
1521 {
1522 switch (nr_components) {
1523 case 1: return MALI_R32F;
1524 case 2: return MALI_RG32F;
1525 case 3: return MALI_RGB32F;
1526 case 4: return MALI_RGBA32F;
1527 default: unreachable("Invalid format");
1528 }
1529 }
1530
1531 void
1532 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1533 unsigned vertex_count,
1534 struct mali_vertex_tiler_postfix *vertex_postfix,
1535 struct mali_vertex_tiler_postfix *tiler_postfix,
1536 union midgard_primitive_size *primitive_size)
1537 {
1538 /* Load the shaders */
1539 struct panfrost_context *ctx = batch->ctx;
1540 struct panfrost_shader_state *vs, *fs;
1541 unsigned int num_gen_varyings = 0;
1542 size_t vs_size, fs_size;
1543
1544 /* Allocate the varying descriptor */
1545
1546 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1547 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
1548 vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
1549 fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
1550
1551 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
1552 vs_size +
1553 fs_size);
1554
1555 struct pipe_stream_output_info *so = &vs->stream_output;
1556
1557 /* Check if this varying is linked by us. This is the case for
1558 * general-purpose, non-captured varyings. If it is, link it. If it's
1559 * not, use the provided stream out information to determine the
1560 * offset, since it was already linked for us. */
1561
1562 for (unsigned i = 0; i < vs->varying_count; i++) {
1563 gl_varying_slot loc = vs->varyings_loc[i];
1564
1565 bool special = is_special_varying(loc);
1566 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1567
1568 if (captured) {
1569 struct pipe_stream_output *o = pan_get_so(so, loc);
1570
1571 unsigned dst_offset = o->dst_offset * 4; /* dwords */
1572 vs->varyings[i].src_offset = dst_offset;
1573 } else if (!special) {
1574 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
1575 }
1576 }
1577
1578 /* Conversely, we need to set src_offset for the captured varyings.
1579 * Here, the layout is defined by the stream out info, not us */
1580
1581 /* Link up with fragment varyings */
1582 bool reads_point_coord = fs->reads_point_coord;
1583
1584 for (unsigned i = 0; i < fs->varying_count; i++) {
1585 gl_varying_slot loc = fs->varyings_loc[i];
1586 unsigned src_offset;
1587 signed vs_idx = -1;
1588
1589 /* Link up */
1590 for (unsigned j = 0; j < vs->varying_count; ++j) {
1591 if (vs->varyings_loc[j] == loc) {
1592 vs_idx = j;
1593 break;
1594 }
1595 }
1596
1597 /* Either assign or reuse */
1598 if (vs_idx >= 0)
1599 src_offset = vs->varyings[vs_idx].src_offset;
1600 else
1601 src_offset = 16 * (num_gen_varyings++);
1602
1603 fs->varyings[i].src_offset = src_offset;
1604
1605 if (has_point_coord(fs->point_sprite_mask, loc))
1606 reads_point_coord = true;
1607 }
1608
1609 memcpy(trans.cpu, vs->varyings, vs_size);
1610 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
1611
1612 union mali_attr varyings[PIPE_MAX_ATTRIBS] = {0};
1613
1614 /* Figure out how many streamout buffers could be bound */
1615 unsigned so_count = ctx->streamout.num_targets;
1616 for (unsigned i = 0; i < vs->varying_count; i++) {
1617 gl_varying_slot loc = vs->varyings_loc[i];
1618
1619 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1620 if (!captured) continue;
1621
1622 struct pipe_stream_output *o = pan_get_so(so, loc);
1623 so_count = MAX2(so_count, o->output_buffer + 1);
1624 }
1625
1626 signed idx = so_count;
1627 signed general = idx++;
1628 signed gl_Position = idx++;
1629 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
1630 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
1631 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
1632 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
1633
1634 /* Emit the stream out buffers */
1635
1636 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1637 ctx->vertex_count);
1638
1639 for (unsigned i = 0; i < so_count; ++i) {
1640 if (i < ctx->streamout.num_targets) {
1641 panfrost_emit_streamout(batch, &varyings[i],
1642 so->stride[i],
1643 ctx->streamout.offsets[i],
1644 out_count,
1645 ctx->streamout.targets[i]);
1646 } else {
1647 /* Emit a dummy buffer */
1648 panfrost_emit_varyings(batch, &varyings[i],
1649 so->stride[i] * 4,
1650 out_count);
1651
1652 /* Clear the attribute type */
1653 varyings[i].elements &= ~0xF;
1654 }
1655 }
1656
1657 panfrost_emit_varyings(batch, &varyings[general],
1658 num_gen_varyings * 16,
1659 vertex_count);
1660
1661 mali_ptr varyings_p;
1662
1663 /* fp32 vec4 gl_Position */
1664 varyings_p = panfrost_emit_varyings(batch, &varyings[gl_Position],
1665 sizeof(float) * 4, vertex_count);
1666 tiler_postfix->position_varying = varyings_p;
1667
1668
1669 if (panfrost_writes_point_size(ctx)) {
1670 varyings_p = panfrost_emit_varyings(batch,
1671 &varyings[gl_PointSize],
1672 2, vertex_count);
1673 primitive_size->pointer = varyings_p;
1674 }
1675
1676 if (reads_point_coord)
1677 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
1678
1679 if (fs->reads_face)
1680 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
1681
1682 if (fs->reads_frag_coord)
1683 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
1684
1685 struct panfrost_device *device = pan_device(ctx->base.screen);
1686 assert(!(device->quirks & IS_BIFROST) || !(reads_point_coord || fs->reads_face || fs->reads_frag_coord));
1687
1688 /* Let's go ahead and link varying meta to the buffer in question, now
1689 * that that information is available. VARYING_SLOT_POS is mapped to
1690 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
1691 * */
1692
1693 panfrost_emit_varying_meta(trans.cpu, vs, general, gl_Position,
1694 gl_PointSize, gl_PointCoord,
1695 gl_FrontFacing);
1696
1697 panfrost_emit_varying_meta(trans.cpu + vs_size, fs, general,
1698 gl_FragCoord, gl_PointSize,
1699 gl_PointCoord, gl_FrontFacing);
1700
1701 /* Replace streamout */
1702
1703 struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
1704 struct mali_attr_meta *ofs = ovs + vs->varying_count;
1705
1706 for (unsigned i = 0; i < vs->varying_count; i++) {
1707 gl_varying_slot loc = vs->varyings_loc[i];
1708
1709 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1710 if (!captured)
1711 continue;
1712
1713 struct pipe_stream_output *o = pan_get_so(so, loc);
1714 ovs[i].index = o->output_buffer;
1715
1716 /* Set the type appropriately. TODO: Integer varyings XXX */
1717 assert(o->stream == 0);
1718 ovs[i].format = pan_xfb_format(o->num_components);
1719 ovs[i].swizzle = panfrost_get_default_swizzle(o->num_components);
1720
1721 /* Link to the fragment */
1722 signed fs_idx = -1;
1723
1724 /* Link up */
1725 for (unsigned j = 0; j < fs->varying_count; ++j) {
1726 if (fs->varyings_loc[j] == loc) {
1727 fs_idx = j;
1728 break;
1729 }
1730 }
1731
1732 if (fs_idx >= 0) {
1733 ofs[fs_idx].index = ovs[i].index;
1734 ofs[fs_idx].format = ovs[i].format;
1735 ofs[fs_idx].swizzle = ovs[i].swizzle;
1736 }
1737 }
1738
1739 /* Replace point sprite */
1740 for (unsigned i = 0; i < fs->varying_count; i++) {
1741 /* If we have a point sprite replacement, handle that here. We
1742 * have to translate location first. TODO: Flip y in shader.
1743 * We're already keying ... just time crunch .. */
1744
1745 if (has_point_coord(fs->point_sprite_mask,
1746 fs->varyings_loc[i])) {
1747 ofs[i].index = gl_PointCoord;
1748
1749 /* Swizzle out the z/w to 0/1 */
1750 ofs[i].format = MALI_RG16F;
1751 ofs[i].swizzle = panfrost_get_default_swizzle(2);
1752 }
1753 }
1754
1755 /* Fix up unaligned addresses */
1756 for (unsigned i = 0; i < so_count; ++i) {
1757 if (varyings[i].elements < MALI_RECORD_SPECIAL)
1758 continue;
1759
1760 unsigned align = (varyings[i].elements & 63);
1761
1762 /* While we're at it, the SO buffers are linear */
1763
1764 if (!align) {
1765 varyings[i].elements |= MALI_ATTR_LINEAR;
1766 continue;
1767 }
1768
1769 /* We need to adjust alignment */
1770 varyings[i].elements &= ~63;
1771 varyings[i].elements |= MALI_ATTR_LINEAR;
1772 varyings[i].size += align;
1773
1774 for (unsigned v = 0; v < vs->varying_count; ++v) {
1775 if (ovs[v].index != i)
1776 continue;
1777
1778 ovs[v].src_offset = vs->varyings[v].src_offset + align;
1779 }
1780
1781 for (unsigned f = 0; f < fs->varying_count; ++f) {
1782 if (ofs[f].index != i)
1783 continue;
1784
1785 ofs[f].src_offset = fs->varyings[f].src_offset + align;
1786 }
1787 }
1788
1789 varyings_p = panfrost_upload_transient(batch, varyings,
1790 idx * sizeof(*varyings));
1791 vertex_postfix->varyings = varyings_p;
1792 tiler_postfix->varyings = varyings_p;
1793
1794 vertex_postfix->varying_meta = trans.gpu;
1795 tiler_postfix->varying_meta = trans.gpu + vs_size;
1796 }
1797
1798 void
1799 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1800 struct mali_vertex_tiler_prefix *vertex_prefix,
1801 struct mali_vertex_tiler_postfix *vertex_postfix,
1802 struct mali_vertex_tiler_prefix *tiler_prefix,
1803 struct mali_vertex_tiler_postfix *tiler_postfix,
1804 union midgard_primitive_size *primitive_size)
1805 {
1806 struct panfrost_context *ctx = batch->ctx;
1807 struct panfrost_device *device = pan_device(ctx->base.screen);
1808 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1809 struct bifrost_payload_vertex bifrost_vertex = {0,};
1810 struct bifrost_payload_tiler bifrost_tiler = {0,};
1811 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
1812 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
1813 void *vp, *tp;
1814 size_t vp_size, tp_size;
1815
1816 if (device->quirks & IS_BIFROST) {
1817 bifrost_vertex.prefix = *vertex_prefix;
1818 bifrost_vertex.postfix = *vertex_postfix;
1819 vp = &bifrost_vertex;
1820 vp_size = sizeof(bifrost_vertex);
1821
1822 bifrost_tiler.prefix = *tiler_prefix;
1823 bifrost_tiler.tiler.primitive_size = *primitive_size;
1824 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
1825 bifrost_tiler.postfix = *tiler_postfix;
1826 tp = &bifrost_tiler;
1827 tp_size = sizeof(bifrost_tiler);
1828 } else {
1829 midgard_vertex.prefix = *vertex_prefix;
1830 midgard_vertex.postfix = *vertex_postfix;
1831 vp = &midgard_vertex;
1832 vp_size = sizeof(midgard_vertex);
1833
1834 midgard_tiler.prefix = *tiler_prefix;
1835 midgard_tiler.postfix = *tiler_postfix;
1836 midgard_tiler.primitive_size = *primitive_size;
1837 tp = &midgard_tiler;
1838 tp_size = sizeof(midgard_tiler);
1839 }
1840
1841 if (wallpapering) {
1842 /* Inject in reverse order, with "predicted" job indices.
1843 * THIS IS A HACK XXX */
1844 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1845 batch->job_index + 2, tp, tp_size, true);
1846 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1847 vp, vp_size, true);
1848 return;
1849 }
1850
1851 /* If rasterizer discard is enable, only submit the vertex */
1852
1853 bool rasterizer_discard = ctx->rasterizer &&
1854 ctx->rasterizer->base.rasterizer_discard;
1855
1856 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1857 vp, vp_size, false);
1858
1859 if (rasterizer_discard)
1860 return;
1861
1862 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, tp_size,
1863 false);
1864 }
1865
1866 /* TODO: stop hardcoding this */
1867 mali_ptr
1868 panfrost_emit_sample_locations(struct panfrost_batch *batch)
1869 {
1870 uint16_t locations[] = {
1871 128, 128,
1872 0, 256,
1873 0, 256,
1874 0, 256,
1875 0, 256,
1876 0, 256,
1877 0, 256,
1878 0, 256,
1879 0, 256,
1880 0, 256,
1881 0, 256,
1882 0, 256,
1883 0, 256,
1884 0, 256,
1885 0, 256,
1886 0, 256,
1887 0, 256,
1888 0, 256,
1889 0, 256,
1890 0, 256,
1891 0, 256,
1892 0, 256,
1893 0, 256,
1894 0, 256,
1895 0, 256,
1896 0, 256,
1897 0, 256,
1898 0, 256,
1899 0, 256,
1900 0, 256,
1901 0, 256,
1902 0, 256,
1903 128, 128,
1904 0, 0,
1905 0, 0,
1906 0, 0,
1907 0, 0,
1908 0, 0,
1909 0, 0,
1910 0, 0,
1911 0, 0,
1912 0, 0,
1913 0, 0,
1914 0, 0,
1915 0, 0,
1916 0, 0,
1917 0, 0,
1918 0, 0,
1919 };
1920
1921 return panfrost_upload_transient(batch, locations, 96 * sizeof(uint16_t));
1922 }