panfrost: Move panfrost_emit_vertex_data() to pan_cmdstream.c
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_vbuf.h"
27
28 #include "panfrost-quirks.h"
29
30 #include "pan_allocate.h"
31 #include "pan_bo.h"
32 #include "pan_cmdstream.h"
33 #include "pan_context.h"
34 #include "pan_job.h"
35
36 /* TODO: Bifrost requires just a mali_shared_memory, without the rest of the
37 * framebuffer */
38
39 void
40 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
41 struct midgard_payload_vertex_tiler *vt)
42 {
43 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
44 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
45
46 /* If we haven't, reserve space for the framebuffer */
47
48 if (!batch->framebuffer.gpu) {
49 unsigned size = (screen->quirks & MIDGARD_SFBD) ?
50 sizeof(struct mali_single_framebuffer) :
51 sizeof(struct mali_framebuffer);
52
53 batch->framebuffer = panfrost_allocate_transient(batch, size);
54
55 /* Tag the pointer */
56 if (!(screen->quirks & MIDGARD_SFBD))
57 batch->framebuffer.gpu |= MALI_MFBD;
58 }
59
60 vt->postfix.shared_memory = batch->framebuffer.gpu;
61 }
62
63 void
64 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
65 struct midgard_payload_vertex_tiler *tp)
66 {
67 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
68
69 tp->gl_enables |= 0x7;
70 SET_BIT(tp->gl_enables, MALI_FRONT_CCW_TOP,
71 rasterizer && rasterizer->base.front_ccw);
72 SET_BIT(tp->gl_enables, MALI_CULL_FACE_FRONT,
73 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
74 SET_BIT(tp->gl_enables, MALI_CULL_FACE_BACK,
75 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
76 SET_BIT(tp->prefix.unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
77 rasterizer && rasterizer->base.flatshade_first);
78
79 if (!panfrost_writes_point_size(ctx)) {
80 bool points = tp->prefix.draw_mode == MALI_POINTS;
81 float val = 0.0f;
82
83 if (rasterizer)
84 val = points ?
85 rasterizer->base.point_size :
86 rasterizer->base.line_width;
87
88 tp->primitive_size.constant = val;
89 }
90 }
91
92 void
93 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
94 struct midgard_payload_vertex_tiler *tp)
95 {
96 SET_BIT(tp->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
97 if (ctx->occlusion_query)
98 tp->postfix.occlusion_counter = ctx->occlusion_query->bo->gpu;
99 else
100 tp->postfix.occlusion_counter = 0;
101 }
102
103 static unsigned
104 panfrost_translate_index_size(unsigned size)
105 {
106 switch (size) {
107 case 1:
108 return MALI_DRAW_INDEXED_UINT8;
109
110 case 2:
111 return MALI_DRAW_INDEXED_UINT16;
112
113 case 4:
114 return MALI_DRAW_INDEXED_UINT32;
115
116 default:
117 unreachable("Invalid index size");
118 }
119 }
120
121 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
122 * good for the duration of the draw (transient), could last longer. Also get
123 * the bounds on the index buffer for the range accessed by the draw. We do
124 * these operations together because there are natural optimizations which
125 * require them to be together. */
126
127 static mali_ptr
128 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
129 const struct pipe_draw_info *info,
130 unsigned *min_index, unsigned *max_index)
131 {
132 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
133 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
134 off_t offset = info->start * info->index_size;
135 bool needs_indices = true;
136 mali_ptr out = 0;
137
138 if (info->max_index != ~0u) {
139 *min_index = info->min_index;
140 *max_index = info->max_index;
141 needs_indices = false;
142 }
143
144 if (!info->has_user_indices) {
145 /* Only resources can be directly mapped */
146 panfrost_batch_add_bo(batch, rsrc->bo,
147 PAN_BO_ACCESS_SHARED |
148 PAN_BO_ACCESS_READ |
149 PAN_BO_ACCESS_VERTEX_TILER);
150 out = rsrc->bo->gpu + offset;
151
152 /* Check the cache */
153 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
154 info->start,
155 info->count,
156 min_index,
157 max_index);
158 } else {
159 /* Otherwise, we need to upload to transient memory */
160 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
161 out = panfrost_upload_transient(batch, ibuf8 + offset,
162 info->count *
163 info->index_size);
164 }
165
166 if (needs_indices) {
167 /* Fallback */
168 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
169
170 if (!info->has_user_indices)
171 panfrost_minmax_cache_add(rsrc->index_cache,
172 info->start, info->count,
173 *min_index, *max_index);
174 }
175
176 return out;
177 }
178
179 void
180 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
181 const struct pipe_draw_info *info,
182 enum mali_draw_mode draw_mode,
183 struct midgard_payload_vertex_tiler *vp,
184 struct midgard_payload_vertex_tiler *tp,
185 unsigned *vertex_count,
186 unsigned *padded_count)
187 {
188 tp->prefix.draw_mode = draw_mode;
189
190 unsigned draw_flags = 0;
191
192 if (panfrost_writes_point_size(ctx))
193 draw_flags |= MALI_DRAW_VARYING_SIZE;
194
195 if (info->primitive_restart)
196 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
197
198 /* These doesn't make much sense */
199
200 draw_flags |= 0x3000;
201
202 if (info->index_size) {
203 unsigned min_index = 0, max_index = 0;
204
205 tp->prefix.indices = panfrost_get_index_buffer_bounded(ctx,
206 info,
207 &min_index,
208 &max_index);
209
210 /* Use the corresponding values */
211 *vertex_count = max_index - min_index + 1;
212 tp->offset_start = vp->offset_start = min_index + info->index_bias;
213 tp->prefix.offset_bias_correction = -min_index;
214 tp->prefix.index_count = MALI_POSITIVE(info->count);
215 draw_flags |= panfrost_translate_index_size(info->index_size);
216 } else {
217 tp->prefix.indices = 0;
218 *vertex_count = ctx->vertex_count;
219 tp->offset_start = vp->offset_start = info->start;
220 tp->prefix.offset_bias_correction = 0;
221 tp->prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
222 }
223
224 tp->prefix.unknown_draw = draw_flags;
225
226 /* Encode the padded vertex count */
227
228 if (info->instance_count > 1) {
229 *padded_count = panfrost_padded_vertex_count(*vertex_count);
230
231 unsigned shift = __builtin_ctz(ctx->padded_count);
232 unsigned k = ctx->padded_count >> (shift + 1);
233
234 tp->instance_shift = vp->instance_shift = shift;
235 tp->instance_odd = vp->instance_odd = k;
236 } else {
237 *padded_count = *vertex_count;
238
239 /* Reset instancing state */
240 tp->instance_shift = vp->instance_shift = 0;
241 tp->instance_odd = vp->instance_odd = 0;
242 }
243 }
244
245 static void
246 panfrost_shader_meta_init(struct panfrost_context *ctx,
247 enum pipe_shader_type st,
248 struct mali_shader_meta *meta)
249 {
250 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
251
252 memset(meta, 0, sizeof(*meta));
253 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
254 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
255 ss->uniform_cutoff);
256 meta->midgard1.work_count = ss->work_reg_count;
257 meta->attribute_count = ss->attribute_count;
258 meta->varying_count = ss->varying_count;
259 meta->midgard1.flags_hi = 0x8; /* XXX */
260 meta->midgard1.flags_lo = 0x220;
261 meta->texture_count = ctx->sampler_view_count[st];
262 meta->sampler_count = ctx->sampler_count[st];
263 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
264 }
265
266 static unsigned
267 panfrost_translate_compare_func(enum pipe_compare_func in)
268 {
269 switch (in) {
270 case PIPE_FUNC_NEVER:
271 return MALI_FUNC_NEVER;
272
273 case PIPE_FUNC_LESS:
274 return MALI_FUNC_LESS;
275
276 case PIPE_FUNC_EQUAL:
277 return MALI_FUNC_EQUAL;
278
279 case PIPE_FUNC_LEQUAL:
280 return MALI_FUNC_LEQUAL;
281
282 case PIPE_FUNC_GREATER:
283 return MALI_FUNC_GREATER;
284
285 case PIPE_FUNC_NOTEQUAL:
286 return MALI_FUNC_NOTEQUAL;
287
288 case PIPE_FUNC_GEQUAL:
289 return MALI_FUNC_GEQUAL;
290
291 case PIPE_FUNC_ALWAYS:
292 return MALI_FUNC_ALWAYS;
293
294 default:
295 unreachable("Invalid func");
296 }
297 }
298
299 static unsigned
300 panfrost_translate_stencil_op(enum pipe_stencil_op in)
301 {
302 switch (in) {
303 case PIPE_STENCIL_OP_KEEP:
304 return MALI_STENCIL_KEEP;
305
306 case PIPE_STENCIL_OP_ZERO:
307 return MALI_STENCIL_ZERO;
308
309 case PIPE_STENCIL_OP_REPLACE:
310 return MALI_STENCIL_REPLACE;
311
312 case PIPE_STENCIL_OP_INCR:
313 return MALI_STENCIL_INCR;
314
315 case PIPE_STENCIL_OP_DECR:
316 return MALI_STENCIL_DECR;
317
318 case PIPE_STENCIL_OP_INCR_WRAP:
319 return MALI_STENCIL_INCR_WRAP;
320
321 case PIPE_STENCIL_OP_DECR_WRAP:
322 return MALI_STENCIL_DECR_WRAP;
323
324 case PIPE_STENCIL_OP_INVERT:
325 return MALI_STENCIL_INVERT;
326
327 default:
328 unreachable("Invalid stencil op");
329 }
330 }
331
332 static unsigned
333 translate_tex_wrap(enum pipe_tex_wrap w)
334 {
335 switch (w) {
336 case PIPE_TEX_WRAP_REPEAT:
337 return MALI_WRAP_REPEAT;
338
339 case PIPE_TEX_WRAP_CLAMP:
340 return MALI_WRAP_CLAMP;
341
342 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
343 return MALI_WRAP_CLAMP_TO_EDGE;
344
345 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
346 return MALI_WRAP_CLAMP_TO_BORDER;
347
348 case PIPE_TEX_WRAP_MIRROR_REPEAT:
349 return MALI_WRAP_MIRRORED_REPEAT;
350
351 case PIPE_TEX_WRAP_MIRROR_CLAMP:
352 return MALI_WRAP_MIRRORED_CLAMP;
353
354 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
355 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
356
357 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
358 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
359
360 default:
361 unreachable("Invalid wrap");
362 }
363 }
364
365 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
366 struct mali_sampler_descriptor *hw)
367 {
368 unsigned func = panfrost_translate_compare_func(cso->compare_func);
369 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
370 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
371 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
372 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
373 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
374 unsigned mip_filter = mip_linear ?
375 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
376 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
377
378 *hw = (struct mali_sampler_descriptor) {
379 .filter_mode = min_filter | mag_filter | mip_filter |
380 normalized,
381 .wrap_s = translate_tex_wrap(cso->wrap_s),
382 .wrap_t = translate_tex_wrap(cso->wrap_t),
383 .wrap_r = translate_tex_wrap(cso->wrap_r),
384 .compare_func = panfrost_flip_compare_func(func),
385 .border_color = {
386 cso->border_color.f[0],
387 cso->border_color.f[1],
388 cso->border_color.f[2],
389 cso->border_color.f[3]
390 },
391 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
392 .max_lod = FIXED_16(cso->max_lod, false),
393 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
394 .seamless_cube_map = cso->seamless_cube_map,
395 };
396
397 /* If necessary, we disable mipmapping in the sampler descriptor by
398 * clamping the LOD as tight as possible (from 0 to epsilon,
399 * essentially -- remember these are fixed point numbers, so
400 * epsilon=1/256) */
401
402 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
403 hw->max_lod = hw->min_lod + 1;
404 }
405
406 static void
407 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
408 struct mali_stencil_test *out)
409 {
410 out->ref = 0; /* Gallium gets it from elsewhere */
411
412 out->mask = in->valuemask;
413 out->func = panfrost_translate_compare_func(in->func);
414 out->sfail = panfrost_translate_stencil_op(in->fail_op);
415 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
416 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
417 }
418
419 static void
420 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
421 struct mali_shader_meta *fragmeta)
422 {
423 if (!ctx->rasterizer) {
424 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
425 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
426 fragmeta->depth_units = 0.0f;
427 fragmeta->depth_factor = 0.0f;
428 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
429 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
430 return;
431 }
432
433 bool msaa = ctx->rasterizer->base.multisample;
434
435 /* TODO: Sample size */
436 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
437 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
438 fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
439 fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
440
441 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
442
443 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
444 ctx->rasterizer->base.offset_tri);
445 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
446 ctx->rasterizer->base.offset_tri);
447 }
448
449 static void
450 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
451 struct mali_shader_meta *fragmeta)
452 {
453 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
454 int zfunc = PIPE_FUNC_ALWAYS;
455
456 if (!zsa) {
457 struct pipe_stencil_state default_stencil = {
458 .enabled = 0,
459 .func = PIPE_FUNC_ALWAYS,
460 .fail_op = MALI_STENCIL_KEEP,
461 .zfail_op = MALI_STENCIL_KEEP,
462 .zpass_op = MALI_STENCIL_KEEP,
463 .writemask = 0xFF,
464 .valuemask = 0xFF
465 };
466
467 panfrost_make_stencil_state(&default_stencil,
468 &fragmeta->stencil_front);
469 fragmeta->stencil_mask_front = default_stencil.writemask;
470 fragmeta->stencil_back = fragmeta->stencil_front;
471 fragmeta->stencil_mask_back = default_stencil.writemask;
472 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
473 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
474 } else {
475 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
476 zsa->stencil[0].enabled);
477 panfrost_make_stencil_state(&zsa->stencil[0],
478 &fragmeta->stencil_front);
479 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
480 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
481
482 /* If back-stencil is not enabled, use the front values */
483
484 if (zsa->stencil[1].enabled) {
485 panfrost_make_stencil_state(&zsa->stencil[1],
486 &fragmeta->stencil_back);
487 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
488 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
489 } else {
490 fragmeta->stencil_back = fragmeta->stencil_front;
491 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
492 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
493 }
494
495 if (zsa->depth.enabled)
496 zfunc = zsa->depth.func;
497
498 /* Depth state (TODO: Refactor) */
499
500 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
501 zsa->depth.writemask);
502 }
503
504 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
505 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
506 }
507
508 static void
509 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
510 struct mali_shader_meta *fragmeta,
511 struct midgard_blend_rt *rts)
512 {
513 const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
514
515 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
516 (screen->quirks & MIDGARD_SFBD) && ctx->blend &&
517 !ctx->blend->base.dither);
518
519 /* Get blending setup */
520 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
521
522 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
523 unsigned shader_offset = 0;
524 struct panfrost_bo *shader_bo = NULL;
525
526 for (unsigned c = 0; c < rt_count; ++c)
527 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
528 &shader_offset);
529
530 /* If there is a blend shader, work registers are shared. XXX: opt */
531
532 for (unsigned c = 0; c < rt_count; ++c) {
533 if (blend[c].is_shader)
534 fragmeta->midgard1.work_count = 16;
535 }
536
537 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
538 * copied to the blend_meta appended (by convention), but this is the
539 * field actually read by the hardware. (Or maybe both are read...?).
540 * Specify the last RTi with a blend shader. */
541
542 fragmeta->blend.shader = 0;
543
544 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
545 if (!blend[rt].is_shader)
546 continue;
547
548 fragmeta->blend.shader = blend[rt].shader.gpu |
549 blend[rt].shader.first_tag;
550 break;
551 }
552
553 if (screen->quirks & MIDGARD_SFBD) {
554 /* When only a single render target platform is used, the blend
555 * information is inside the shader meta itself. We additionally
556 * need to signal CAN_DISCARD for nontrivial blend modes (so
557 * we're able to read back the destination buffer) */
558
559 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
560 blend[0].is_shader);
561
562 if (!blend[0].is_shader) {
563 fragmeta->blend.equation = *blend[0].equation.equation;
564 fragmeta->blend.constant = blend[0].equation.constant;
565 }
566
567 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
568 !blend[0].no_blending);
569 return;
570 }
571
572 /* Additional blend descriptor tacked on for jobs using MFBD */
573
574 for (unsigned i = 0; i < rt_count; ++i) {
575 rts[i].flags = 0x200;
576
577 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
578 (ctx->pipe_framebuffer.cbufs[i]) &&
579 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
580
581 SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
582 SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
583 SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
584 SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
585
586 if (blend[i].is_shader) {
587 rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
588 } else {
589 rts[i].blend.equation = *blend[i].equation.equation;
590 rts[i].blend.constant = blend[i].equation.constant;
591 }
592 }
593 }
594
595 static void
596 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
597 struct mali_shader_meta *fragmeta,
598 struct midgard_blend_rt *rts)
599 {
600 const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
601 struct panfrost_shader_state *fs;
602
603 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
604
605 fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
606 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
607 fragmeta->unknown2_4 = 0x4e0;
608
609 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
610 * is required (independent of 32-bit/64-bit descriptors), or why it's
611 * not used on later GPU revisions. Otherwise, all shader jobs fault on
612 * these earlier chips (perhaps this is a chicken bit of some kind).
613 * More investigation is needed. */
614
615 SET_BIT(fragmeta->unknown2_4, 0x10, screen->quirks & MIDGARD_SFBD);
616
617 /* Depending on whether it's legal to in the given shader, we try to
618 * enable early-z testing (or forward-pixel kill?) */
619
620 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
621 !fs->can_discard && !fs->writes_depth);
622
623 /* Add the writes Z/S flags if needed. */
624 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
625 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
626
627 /* Any time texturing is used, derivatives are implicitly calculated,
628 * so we need to enable helper invocations */
629
630 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
631 fs->helper_invocations);
632
633 /* CAN_DISCARD should be set if the fragment shader possibly contains a
634 * 'discard' instruction. It is likely this is related to optimizations
635 * related to forward-pixel kill, as per "Mali Performance 3: Is
636 * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
637
638 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
639 SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
640
641 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
642 panfrost_frag_meta_zsa_update(ctx, fragmeta);
643 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
644 }
645
646 void
647 panfrost_emit_shader_meta(struct panfrost_batch *batch,
648 enum pipe_shader_type st,
649 struct midgard_payload_vertex_tiler *vtp)
650 {
651 struct panfrost_context *ctx = batch->ctx;
652 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
653
654 if (!ss) {
655 vtp->postfix.shader = 0;
656 return;
657 }
658
659 struct mali_shader_meta meta;
660
661 panfrost_shader_meta_init(ctx, st, &meta);
662
663 /* Add the shader BO to the batch. */
664 panfrost_batch_add_bo(batch, ss->bo,
665 PAN_BO_ACCESS_PRIVATE |
666 PAN_BO_ACCESS_READ |
667 panfrost_bo_access_for_stage(st));
668
669 mali_ptr shader_ptr;
670
671 if (st == PIPE_SHADER_FRAGMENT) {
672 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
673 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
674 size_t desc_size = sizeof(meta);
675 struct midgard_blend_rt rts[4];
676 struct panfrost_transfer xfer;
677
678 assert(rt_count <= ARRAY_SIZE(rts));
679
680 panfrost_frag_shader_meta_init(ctx, &meta, rts);
681
682 if (!(screen->quirks & MIDGARD_SFBD))
683 desc_size += sizeof(*rts) * rt_count;
684
685 xfer = panfrost_allocate_transient(batch, desc_size);
686
687 memcpy(xfer.cpu, &meta, sizeof(meta));
688 memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
689
690 shader_ptr = xfer.gpu;
691 } else {
692 shader_ptr = panfrost_upload_transient(batch, &meta,
693 sizeof(meta));
694 }
695
696 vtp->postfix.shader = shader_ptr;
697 }
698
699 static void
700 panfrost_mali_viewport_init(struct panfrost_context *ctx,
701 struct mali_viewport *mvp)
702 {
703 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
704
705 /* Clip bounds are encoded as floats. The viewport itself is encoded as
706 * (somewhat) asymmetric ints. */
707
708 const struct pipe_scissor_state *ss = &ctx->scissor;
709
710 memset(mvp, 0, sizeof(*mvp));
711
712 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
713 * each direction. Clipping to the viewport in theory should work, but
714 * in practice causes issues when we're not explicitly trying to
715 * scissor */
716
717 *mvp = (struct mali_viewport) {
718 .clip_minx = -INFINITY,
719 .clip_miny = -INFINITY,
720 .clip_maxx = INFINITY,
721 .clip_maxy = INFINITY,
722 };
723
724 /* Always scissor to the viewport by default. */
725 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
726 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
727
728 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
729 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
730
731 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
732 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
733
734 /* Apply the scissor test */
735
736 unsigned minx, miny, maxx, maxy;
737
738 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
739 minx = MAX2(ss->minx, vp_minx);
740 miny = MAX2(ss->miny, vp_miny);
741 maxx = MIN2(ss->maxx, vp_maxx);
742 maxy = MIN2(ss->maxy, vp_maxy);
743 } else {
744 minx = vp_minx;
745 miny = vp_miny;
746 maxx = vp_maxx;
747 maxy = vp_maxy;
748 }
749
750 /* Hardware needs the min/max to be strictly ordered, so flip if we
751 * need to. The viewport transformation in the vertex shader will
752 * handle the negatives if we don't */
753
754 if (miny > maxy) {
755 unsigned temp = miny;
756 miny = maxy;
757 maxy = temp;
758 }
759
760 if (minx > maxx) {
761 unsigned temp = minx;
762 minx = maxx;
763 maxx = temp;
764 }
765
766 if (minz > maxz) {
767 float temp = minz;
768 minz = maxz;
769 maxz = temp;
770 }
771
772 /* Clamp to the framebuffer size as a last check */
773
774 minx = MIN2(ctx->pipe_framebuffer.width, minx);
775 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
776
777 miny = MIN2(ctx->pipe_framebuffer.height, miny);
778 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
779
780 /* Upload */
781
782 mvp->viewport0[0] = minx;
783 mvp->viewport1[0] = MALI_POSITIVE(maxx);
784
785 mvp->viewport0[1] = miny;
786 mvp->viewport1[1] = MALI_POSITIVE(maxy);
787
788 mvp->clip_minz = minz;
789 mvp->clip_maxz = maxz;
790 }
791
792 void
793 panfrost_emit_viewport(struct panfrost_batch *batch,
794 struct midgard_payload_vertex_tiler *tp)
795 {
796 struct panfrost_context *ctx = batch->ctx;
797 struct mali_viewport mvp;
798
799 panfrost_mali_viewport_init(batch->ctx, &mvp);
800
801 /* Update the job, unless we're doing wallpapering (whose lack of
802 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
803 * just... be faster :) */
804
805 if (!ctx->wallpaper_batch)
806 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
807 mvp.viewport0[1],
808 mvp.viewport1[0] + 1,
809 mvp.viewport1[1] + 1);
810
811 tp->postfix.viewport = panfrost_upload_transient(batch, &mvp,
812 sizeof(mvp));
813 }
814
815 static mali_ptr
816 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
817 enum pipe_shader_type st,
818 struct panfrost_constant_buffer *buf,
819 unsigned index)
820 {
821 struct pipe_constant_buffer *cb = &buf->cb[index];
822 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
823
824 if (rsrc) {
825 panfrost_batch_add_bo(batch, rsrc->bo,
826 PAN_BO_ACCESS_SHARED |
827 PAN_BO_ACCESS_READ |
828 panfrost_bo_access_for_stage(st));
829
830 /* Alignment gauranteed by
831 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
832 return rsrc->bo->gpu + cb->buffer_offset;
833 } else if (cb->user_buffer) {
834 return panfrost_upload_transient(batch,
835 cb->user_buffer +
836 cb->buffer_offset,
837 cb->buffer_size);
838 } else {
839 unreachable("No constant buffer");
840 }
841 }
842
843 struct sysval_uniform {
844 union {
845 float f[4];
846 int32_t i[4];
847 uint32_t u[4];
848 uint64_t du[2];
849 };
850 };
851
852 static void
853 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
854 struct sysval_uniform *uniform)
855 {
856 struct panfrost_context *ctx = batch->ctx;
857 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
858
859 uniform->f[0] = vp->scale[0];
860 uniform->f[1] = vp->scale[1];
861 uniform->f[2] = vp->scale[2];
862 }
863
864 static void
865 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
866 struct sysval_uniform *uniform)
867 {
868 struct panfrost_context *ctx = batch->ctx;
869 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
870
871 uniform->f[0] = vp->translate[0];
872 uniform->f[1] = vp->translate[1];
873 uniform->f[2] = vp->translate[2];
874 }
875
876 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
877 enum pipe_shader_type st,
878 unsigned int sysvalid,
879 struct sysval_uniform *uniform)
880 {
881 struct panfrost_context *ctx = batch->ctx;
882 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
883 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
884 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
885 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
886
887 assert(dim);
888 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
889
890 if (dim > 1)
891 uniform->i[1] = u_minify(tex->texture->height0,
892 tex->u.tex.first_level);
893
894 if (dim > 2)
895 uniform->i[2] = u_minify(tex->texture->depth0,
896 tex->u.tex.first_level);
897
898 if (is_array)
899 uniform->i[dim] = tex->texture->array_size;
900 }
901
902 static void
903 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
904 enum pipe_shader_type st,
905 unsigned ssbo_id,
906 struct sysval_uniform *uniform)
907 {
908 struct panfrost_context *ctx = batch->ctx;
909
910 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
911 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
912
913 /* Compute address */
914 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
915
916 panfrost_batch_add_bo(batch, bo,
917 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
918 panfrost_bo_access_for_stage(st));
919
920 /* Upload address and size as sysval */
921 uniform->du[0] = bo->gpu + sb.buffer_offset;
922 uniform->u[2] = sb.buffer_size;
923 }
924
925 static void
926 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
927 enum pipe_shader_type st,
928 unsigned samp_idx,
929 struct sysval_uniform *uniform)
930 {
931 struct panfrost_context *ctx = batch->ctx;
932 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
933
934 uniform->f[0] = sampl->min_lod;
935 uniform->f[1] = sampl->max_lod;
936 uniform->f[2] = sampl->lod_bias;
937
938 /* Even without any errata, Midgard represents "no mipmapping" as
939 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
940 * panfrost_create_sampler_state which also explains our choice of
941 * epsilon value (again to keep behaviour consistent) */
942
943 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
944 uniform->f[1] = uniform->f[0] + (1.0/256.0);
945 }
946
947 static void
948 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
949 struct sysval_uniform *uniform)
950 {
951 struct panfrost_context *ctx = batch->ctx;
952
953 uniform->u[0] = ctx->compute_grid->grid[0];
954 uniform->u[1] = ctx->compute_grid->grid[1];
955 uniform->u[2] = ctx->compute_grid->grid[2];
956 }
957
958 static void
959 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
960 struct panfrost_shader_state *ss,
961 enum pipe_shader_type st)
962 {
963 struct sysval_uniform *uniforms = (void *)buf;
964
965 for (unsigned i = 0; i < ss->sysval_count; ++i) {
966 int sysval = ss->sysval[i];
967
968 switch (PAN_SYSVAL_TYPE(sysval)) {
969 case PAN_SYSVAL_VIEWPORT_SCALE:
970 panfrost_upload_viewport_scale_sysval(batch,
971 &uniforms[i]);
972 break;
973 case PAN_SYSVAL_VIEWPORT_OFFSET:
974 panfrost_upload_viewport_offset_sysval(batch,
975 &uniforms[i]);
976 break;
977 case PAN_SYSVAL_TEXTURE_SIZE:
978 panfrost_upload_txs_sysval(batch, st,
979 PAN_SYSVAL_ID(sysval),
980 &uniforms[i]);
981 break;
982 case PAN_SYSVAL_SSBO:
983 panfrost_upload_ssbo_sysval(batch, st,
984 PAN_SYSVAL_ID(sysval),
985 &uniforms[i]);
986 break;
987 case PAN_SYSVAL_NUM_WORK_GROUPS:
988 panfrost_upload_num_work_groups_sysval(batch,
989 &uniforms[i]);
990 break;
991 case PAN_SYSVAL_SAMPLER:
992 panfrost_upload_sampler_sysval(batch, st,
993 PAN_SYSVAL_ID(sysval),
994 &uniforms[i]);
995 break;
996 default:
997 assert(0);
998 }
999 }
1000 }
1001
1002 static const void *
1003 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1004 unsigned index)
1005 {
1006 struct pipe_constant_buffer *cb = &buf->cb[index];
1007 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1008
1009 if (rsrc)
1010 return rsrc->bo->cpu;
1011 else if (cb->user_buffer)
1012 return cb->user_buffer;
1013 else
1014 unreachable("No constant buffer");
1015 }
1016
1017 void
1018 panfrost_emit_const_buf(struct panfrost_batch *batch,
1019 enum pipe_shader_type stage,
1020 struct midgard_payload_vertex_tiler *vtp)
1021 {
1022 struct panfrost_context *ctx = batch->ctx;
1023 struct panfrost_shader_variants *all = ctx->shader[stage];
1024
1025 if (!all)
1026 return;
1027
1028 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1029
1030 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1031
1032 /* Uniforms are implicitly UBO #0 */
1033 bool has_uniforms = buf->enabled_mask & (1 << 0);
1034
1035 /* Allocate room for the sysval and the uniforms */
1036 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1037 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1038 size_t size = sys_size + uniform_size;
1039 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1040 size);
1041
1042 /* Upload sysvals requested by the shader */
1043 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1044
1045 /* Upload uniforms */
1046 if (has_uniforms && uniform_size) {
1047 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1048 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1049 }
1050
1051 struct mali_vertex_tiler_postfix *postfix = &vtp->postfix;
1052
1053 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1054 * uploaded */
1055
1056 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1057 assert(ubo_count >= 1);
1058
1059 size_t sz = sizeof(uint64_t) * ubo_count;
1060 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1061 int uniform_count = ss->uniform_count;
1062
1063 /* Upload uniforms as a UBO */
1064 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1065
1066 /* The rest are honest-to-goodness UBOs */
1067
1068 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1069 size_t usz = buf->cb[ubo].buffer_size;
1070 bool enabled = buf->enabled_mask & (1 << ubo);
1071 bool empty = usz == 0;
1072
1073 if (!enabled || empty) {
1074 /* Stub out disabled UBOs to catch accesses */
1075 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1076 continue;
1077 }
1078
1079 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1080 buf, ubo);
1081
1082 unsigned bytes_per_field = 16;
1083 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1084 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1085 }
1086
1087 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1088 postfix->uniforms = transfer.gpu;
1089 postfix->uniform_buffers = ubufs;
1090
1091 buf->dirty_mask = 0;
1092 }
1093
1094 void
1095 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1096 const struct pipe_grid_info *info,
1097 struct midgard_payload_vertex_tiler *vtp)
1098 {
1099 struct panfrost_context *ctx = batch->ctx;
1100 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1101 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1102 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1103 128));
1104 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1105 info->grid[2] * 4;
1106 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1107 shared_size,
1108 1);
1109
1110 struct mali_shared_memory shared = {
1111 .shared_memory = bo->gpu,
1112 .shared_workgroup_count =
1113 util_logbase2_ceil(info->grid[0]) +
1114 util_logbase2_ceil(info->grid[1]) +
1115 util_logbase2_ceil(info->grid[2]),
1116 .shared_unk1 = 0x2,
1117 .shared_shift = util_logbase2(single_size) - 1
1118 };
1119
1120 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1121 sizeof(shared));
1122 }
1123
1124 static mali_ptr
1125 panfrost_get_tex_desc(struct panfrost_batch *batch,
1126 enum pipe_shader_type st,
1127 struct panfrost_sampler_view *view)
1128 {
1129 if (!view)
1130 return (mali_ptr) 0;
1131
1132 struct pipe_sampler_view *pview = &view->base;
1133 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1134
1135 /* Add the BO to the job so it's retained until the job is done. */
1136
1137 panfrost_batch_add_bo(batch, rsrc->bo,
1138 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1139 panfrost_bo_access_for_stage(st));
1140
1141 panfrost_batch_add_bo(batch, view->bo,
1142 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1143 panfrost_bo_access_for_stage(st));
1144
1145 return view->bo->gpu;
1146 }
1147
1148 void
1149 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1150 enum pipe_shader_type stage,
1151 struct midgard_payload_vertex_tiler *vtp)
1152 {
1153 struct panfrost_context *ctx = batch->ctx;
1154
1155 if (!ctx->sampler_view_count[stage])
1156 return;
1157
1158 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1159
1160 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1161 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1162 ctx->sampler_views[stage][i]);
1163
1164 vtp->postfix.texture_trampoline = panfrost_upload_transient(batch,
1165 trampolines,
1166 sizeof(uint64_t) *
1167 ctx->sampler_view_count[stage]);
1168 }
1169
1170 void
1171 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1172 enum pipe_shader_type stage,
1173 struct midgard_payload_vertex_tiler *vtp)
1174 {
1175 struct panfrost_context *ctx = batch->ctx;
1176
1177 if (!ctx->sampler_count[stage])
1178 return;
1179
1180 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1181 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1182 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1183 transfer_size);
1184 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1185
1186 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1187 desc[i] = ctx->samplers[stage][i]->hw;
1188
1189 vtp->postfix.sampler_descriptor = transfer.gpu;
1190 }
1191
1192 void
1193 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1194 struct midgard_payload_vertex_tiler *vp)
1195 {
1196 struct panfrost_context *ctx = batch->ctx;
1197
1198 if (!ctx->vertex)
1199 return;
1200
1201 struct panfrost_vertex_state *so = ctx->vertex;
1202
1203 panfrost_vertex_state_upd_attr_offs(ctx, vp);
1204 vp->postfix.attribute_meta = panfrost_upload_transient(batch, so->hw,
1205 sizeof(*so->hw) *
1206 PAN_MAX_ATTRIBUTE);
1207 }
1208
1209 void
1210 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1211 struct midgard_payload_vertex_tiler *vp)
1212 {
1213 struct panfrost_context *ctx = batch->ctx;
1214 struct panfrost_vertex_state *so = ctx->vertex;
1215
1216 /* Staged mali_attr, and index into them. i =/= k, depending on the
1217 * vertex buffer mask and instancing. Twice as much room is allocated,
1218 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1219 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1220 unsigned k = 0;
1221
1222 for (unsigned i = 0; i < so->num_elements; ++i) {
1223 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1224 * means duplicating some vertex buffers (who cares? aside from
1225 * maybe some caching implications but I somehow doubt that
1226 * matters) */
1227
1228 struct pipe_vertex_element *elem = &so->pipe[i];
1229 unsigned vbi = elem->vertex_buffer_index;
1230
1231 /* The exception to 1:1 mapping is that we can have multiple
1232 * entries (NPOT divisors), so we fixup anyways */
1233
1234 so->hw[i].index = k;
1235
1236 if (!(ctx->vb_mask & (1 << vbi)))
1237 continue;
1238
1239 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1240 struct panfrost_resource *rsrc;
1241
1242 rsrc = pan_resource(buf->buffer.resource);
1243 if (!rsrc)
1244 continue;
1245
1246 /* Align to 64 bytes by masking off the lower bits. This
1247 * will be adjusted back when we fixup the src_offset in
1248 * mali_attr_meta */
1249
1250 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1251 mali_ptr addr = raw_addr & ~63;
1252 unsigned chopped_addr = raw_addr - addr;
1253
1254 /* Add a dependency of the batch on the vertex buffer */
1255 panfrost_batch_add_bo(batch, rsrc->bo,
1256 PAN_BO_ACCESS_SHARED |
1257 PAN_BO_ACCESS_READ |
1258 PAN_BO_ACCESS_VERTEX_TILER);
1259
1260 /* Set common fields */
1261 attrs[k].elements = addr;
1262 attrs[k].stride = buf->stride;
1263
1264 /* Since we advanced the base pointer, we shrink the buffer
1265 * size */
1266 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1267
1268 /* We need to add the extra size we masked off (for
1269 * correctness) so the data doesn't get clamped away */
1270 attrs[k].size += chopped_addr;
1271
1272 /* For non-instancing make sure we initialize */
1273 attrs[k].shift = attrs[k].extra_flags = 0;
1274
1275 /* Instancing uses a dramatically different code path than
1276 * linear, so dispatch for the actual emission now that the
1277 * common code is finished */
1278
1279 unsigned divisor = elem->instance_divisor;
1280
1281 if (divisor && ctx->instance_count == 1) {
1282 /* Silly corner case where there's a divisor(=1) but
1283 * there's no legitimate instancing. So we want *every*
1284 * attribute to be the same. So set stride to zero so
1285 * we don't go anywhere. */
1286
1287 attrs[k].size = attrs[k].stride + chopped_addr;
1288 attrs[k].stride = 0;
1289 attrs[k++].elements |= MALI_ATTR_LINEAR;
1290 } else if (ctx->instance_count <= 1) {
1291 /* Normal, non-instanced attributes */
1292 attrs[k++].elements |= MALI_ATTR_LINEAR;
1293 } else {
1294 unsigned instance_shift = vp->instance_shift;
1295 unsigned instance_odd = vp->instance_odd;
1296
1297 k += panfrost_vertex_instanced(ctx->padded_count,
1298 instance_shift,
1299 instance_odd,
1300 divisor, &attrs[k]);
1301 }
1302 }
1303
1304 /* Add special gl_VertexID/gl_InstanceID buffers */
1305
1306 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1307 so->hw[PAN_VERTEX_ID].index = k++;
1308 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1309 so->hw[PAN_INSTANCE_ID].index = k++;
1310
1311 /* Upload whatever we emitted and go */
1312
1313 vp->postfix.attributes = panfrost_upload_transient(batch, attrs,
1314 k * sizeof(*attrs));
1315 }
1316
1317 void
1318 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1319 struct midgard_payload_vertex_tiler *vp,
1320 struct midgard_payload_vertex_tiler *tp)
1321 {
1322 struct panfrost_context *ctx = batch->ctx;
1323 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1324
1325 if (wallpapering) {
1326 /* Inject in reverse order, with "predicted" job indices.
1327 * THIS IS A HACK XXX */
1328 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1329 batch->job_index + 2, tp, sizeof(*tp), true);
1330 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1331 vp, sizeof(*vp), true);
1332 return;
1333 }
1334
1335 /* If rasterizer discard is enable, only submit the vertex */
1336
1337 bool rasterizer_discard = ctx->rasterizer &&
1338 ctx->rasterizer->base.rasterizer_discard;
1339
1340 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1341 vp, sizeof(*vp), false);
1342
1343 if (rasterizer_discard)
1344 return;
1345
1346 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, sizeof(*tp),
1347 false);
1348 }