panfrost: Emit texture descriptor on bifrost
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28
29 #include "panfrost-quirks.h"
30
31 #include "pan_allocate.h"
32 #include "pan_bo.h"
33 #include "pan_cmdstream.h"
34 #include "pan_context.h"
35 #include "pan_job.h"
36
37 /* If a BO is accessed for a particular shader stage, will it be in the primary
38 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
39 * fragment will be primary, e.g. compute jobs will be considered
40 * "vertex/tiler" by analogy */
41
42 static inline uint32_t
43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
44 {
45 assert(stage == PIPE_SHADER_FRAGMENT ||
46 stage == PIPE_SHADER_VERTEX ||
47 stage == PIPE_SHADER_COMPUTE);
48
49 return stage == PIPE_SHADER_FRAGMENT ?
50 PAN_BO_ACCESS_FRAGMENT :
51 PAN_BO_ACCESS_VERTEX_TILER;
52 }
53
54 static void
55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
56 struct mali_vertex_tiler_postfix *postfix)
57 {
58 struct panfrost_device *dev = pan_device(ctx->base.screen);
59 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
60
61 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
62 struct mali_shared_memory shared = {
63 .stack_shift = shift,
64 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
65 .shared_workgroup_count = ~0,
66 };
67 postfix->shared_memory = panfrost_upload_transient(batch, &shared, sizeof(shared));
68 }
69
70 static void
71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
72 struct mali_vertex_tiler_postfix *postfix)
73 {
74 struct panfrost_device *dev = pan_device(ctx->base.screen);
75 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
76
77 /* If we haven't, reserve space for the framebuffer */
78
79 if (!batch->framebuffer.gpu) {
80 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
81 sizeof(struct mali_single_framebuffer) :
82 sizeof(struct mali_framebuffer);
83
84 batch->framebuffer = panfrost_allocate_transient(batch, size);
85
86 /* Tag the pointer */
87 if (!(dev->quirks & MIDGARD_SFBD))
88 batch->framebuffer.gpu |= MALI_MFBD;
89 }
90
91 postfix->shared_memory = batch->framebuffer.gpu;
92 }
93
94 static void
95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
96 struct mali_vertex_tiler_prefix *prefix,
97 struct mali_vertex_tiler_postfix *postfix)
98 {
99 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
100
101 postfix->gl_enables |= 0x7;
102 SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
103 rasterizer && rasterizer->base.front_ccw);
104 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
105 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
106 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
107 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
108 SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
109 rasterizer && rasterizer->base.flatshade_first);
110 }
111
112 void
113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
114 struct mali_vertex_tiler_prefix *prefix,
115 union midgard_primitive_size *primitive_size)
116 {
117 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
118
119 if (!panfrost_writes_point_size(ctx)) {
120 bool points = prefix->draw_mode == MALI_POINTS;
121 float val = 0.0f;
122
123 if (rasterizer)
124 val = points ?
125 rasterizer->base.point_size :
126 rasterizer->base.line_width;
127
128 primitive_size->constant = val;
129 }
130 }
131
132 static void
133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
134 struct mali_vertex_tiler_postfix *postfix)
135 {
136 SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
137 if (ctx->occlusion_query)
138 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
139 else
140 postfix->occlusion_counter = 0;
141 }
142
143 void
144 panfrost_vt_init(struct panfrost_context *ctx,
145 enum pipe_shader_type stage,
146 struct mali_vertex_tiler_prefix *prefix,
147 struct mali_vertex_tiler_postfix *postfix)
148 {
149 struct panfrost_device *device = pan_device(ctx->base.screen);
150
151 if (!ctx->shader[stage])
152 return;
153
154 memset(prefix, 0, sizeof(*prefix));
155 memset(postfix, 0, sizeof(*postfix));
156
157 if (device->quirks & IS_BIFROST) {
158 postfix->gl_enables = 0x2;
159 panfrost_vt_emit_shared_memory(ctx, postfix);
160 } else {
161 postfix->gl_enables = 0x6;
162 panfrost_vt_attach_framebuffer(ctx, postfix);
163 }
164
165 if (stage == PIPE_SHADER_FRAGMENT) {
166 panfrost_vt_update_occlusion_query(ctx, postfix);
167 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
168 }
169 }
170
171 static unsigned
172 panfrost_translate_index_size(unsigned size)
173 {
174 switch (size) {
175 case 1:
176 return MALI_DRAW_INDEXED_UINT8;
177
178 case 2:
179 return MALI_DRAW_INDEXED_UINT16;
180
181 case 4:
182 return MALI_DRAW_INDEXED_UINT32;
183
184 default:
185 unreachable("Invalid index size");
186 }
187 }
188
189 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
190 * good for the duration of the draw (transient), could last longer. Also get
191 * the bounds on the index buffer for the range accessed by the draw. We do
192 * these operations together because there are natural optimizations which
193 * require them to be together. */
194
195 static mali_ptr
196 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
197 const struct pipe_draw_info *info,
198 unsigned *min_index, unsigned *max_index)
199 {
200 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
201 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
202 off_t offset = info->start * info->index_size;
203 bool needs_indices = true;
204 mali_ptr out = 0;
205
206 if (info->max_index != ~0u) {
207 *min_index = info->min_index;
208 *max_index = info->max_index;
209 needs_indices = false;
210 }
211
212 if (!info->has_user_indices) {
213 /* Only resources can be directly mapped */
214 panfrost_batch_add_bo(batch, rsrc->bo,
215 PAN_BO_ACCESS_SHARED |
216 PAN_BO_ACCESS_READ |
217 PAN_BO_ACCESS_VERTEX_TILER);
218 out = rsrc->bo->gpu + offset;
219
220 /* Check the cache */
221 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
222 info->start,
223 info->count,
224 min_index,
225 max_index);
226 } else {
227 /* Otherwise, we need to upload to transient memory */
228 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
229 out = panfrost_upload_transient(batch, ibuf8 + offset,
230 info->count *
231 info->index_size);
232 }
233
234 if (needs_indices) {
235 /* Fallback */
236 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
237
238 if (!info->has_user_indices)
239 panfrost_minmax_cache_add(rsrc->index_cache,
240 info->start, info->count,
241 *min_index, *max_index);
242 }
243
244 return out;
245 }
246
247 void
248 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
249 const struct pipe_draw_info *info,
250 enum mali_draw_mode draw_mode,
251 struct mali_vertex_tiler_postfix *vertex_postfix,
252 struct mali_vertex_tiler_prefix *tiler_prefix,
253 struct mali_vertex_tiler_postfix *tiler_postfix,
254 unsigned *vertex_count,
255 unsigned *padded_count)
256 {
257 tiler_prefix->draw_mode = draw_mode;
258
259 unsigned draw_flags = 0;
260
261 if (panfrost_writes_point_size(ctx))
262 draw_flags |= MALI_DRAW_VARYING_SIZE;
263
264 if (info->primitive_restart)
265 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
266
267 /* These doesn't make much sense */
268
269 draw_flags |= 0x3000;
270
271 if (info->index_size) {
272 unsigned min_index = 0, max_index = 0;
273
274 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
275 info,
276 &min_index,
277 &max_index);
278
279 /* Use the corresponding values */
280 *vertex_count = max_index - min_index + 1;
281 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
282 tiler_prefix->offset_bias_correction = -min_index;
283 tiler_prefix->index_count = MALI_POSITIVE(info->count);
284 draw_flags |= panfrost_translate_index_size(info->index_size);
285 } else {
286 tiler_prefix->indices = 0;
287 *vertex_count = ctx->vertex_count;
288 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
289 tiler_prefix->offset_bias_correction = 0;
290 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
291 }
292
293 tiler_prefix->unknown_draw = draw_flags;
294
295 /* Encode the padded vertex count */
296
297 if (info->instance_count > 1) {
298 *padded_count = panfrost_padded_vertex_count(*vertex_count);
299
300 unsigned shift = __builtin_ctz(ctx->padded_count);
301 unsigned k = ctx->padded_count >> (shift + 1);
302
303 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
304 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
305 } else {
306 *padded_count = *vertex_count;
307
308 /* Reset instancing state */
309 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
310 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
311 }
312 }
313
314 static void
315 panfrost_shader_meta_init(struct panfrost_context *ctx,
316 enum pipe_shader_type st,
317 struct mali_shader_meta *meta)
318 {
319 const struct panfrost_device *dev = pan_device(ctx->base.screen);
320 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
321
322 memset(meta, 0, sizeof(*meta));
323 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
324 meta->attribute_count = ss->attribute_count;
325 meta->varying_count = ss->varying_count;
326 meta->texture_count = ctx->sampler_view_count[st];
327 meta->sampler_count = ctx->sampler_count[st];
328
329 if (dev->quirks & IS_BIFROST) {
330 meta->bifrost1.unk1 = 0x800200;
331 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
332 meta->bifrost2.preload_regs = 0xC0;
333 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
334 ss->uniform_cutoff);
335 } else {
336 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
337 ss->uniform_cutoff);
338 meta->midgard1.work_count = ss->work_reg_count;
339 meta->midgard1.flags_hi = 0x8; /* XXX */
340 meta->midgard1.flags_lo = 0x220;
341 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
342 }
343
344 }
345
346 static unsigned
347 panfrost_translate_compare_func(enum pipe_compare_func in)
348 {
349 switch (in) {
350 case PIPE_FUNC_NEVER:
351 return MALI_FUNC_NEVER;
352
353 case PIPE_FUNC_LESS:
354 return MALI_FUNC_LESS;
355
356 case PIPE_FUNC_EQUAL:
357 return MALI_FUNC_EQUAL;
358
359 case PIPE_FUNC_LEQUAL:
360 return MALI_FUNC_LEQUAL;
361
362 case PIPE_FUNC_GREATER:
363 return MALI_FUNC_GREATER;
364
365 case PIPE_FUNC_NOTEQUAL:
366 return MALI_FUNC_NOTEQUAL;
367
368 case PIPE_FUNC_GEQUAL:
369 return MALI_FUNC_GEQUAL;
370
371 case PIPE_FUNC_ALWAYS:
372 return MALI_FUNC_ALWAYS;
373
374 default:
375 unreachable("Invalid func");
376 }
377 }
378
379 static unsigned
380 panfrost_translate_stencil_op(enum pipe_stencil_op in)
381 {
382 switch (in) {
383 case PIPE_STENCIL_OP_KEEP:
384 return MALI_STENCIL_KEEP;
385
386 case PIPE_STENCIL_OP_ZERO:
387 return MALI_STENCIL_ZERO;
388
389 case PIPE_STENCIL_OP_REPLACE:
390 return MALI_STENCIL_REPLACE;
391
392 case PIPE_STENCIL_OP_INCR:
393 return MALI_STENCIL_INCR;
394
395 case PIPE_STENCIL_OP_DECR:
396 return MALI_STENCIL_DECR;
397
398 case PIPE_STENCIL_OP_INCR_WRAP:
399 return MALI_STENCIL_INCR_WRAP;
400
401 case PIPE_STENCIL_OP_DECR_WRAP:
402 return MALI_STENCIL_DECR_WRAP;
403
404 case PIPE_STENCIL_OP_INVERT:
405 return MALI_STENCIL_INVERT;
406
407 default:
408 unreachable("Invalid stencil op");
409 }
410 }
411
412 static unsigned
413 translate_tex_wrap(enum pipe_tex_wrap w)
414 {
415 switch (w) {
416 case PIPE_TEX_WRAP_REPEAT:
417 return MALI_WRAP_REPEAT;
418
419 case PIPE_TEX_WRAP_CLAMP:
420 return MALI_WRAP_CLAMP;
421
422 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
423 return MALI_WRAP_CLAMP_TO_EDGE;
424
425 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
426 return MALI_WRAP_CLAMP_TO_BORDER;
427
428 case PIPE_TEX_WRAP_MIRROR_REPEAT:
429 return MALI_WRAP_MIRRORED_REPEAT;
430
431 case PIPE_TEX_WRAP_MIRROR_CLAMP:
432 return MALI_WRAP_MIRRORED_CLAMP;
433
434 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
435 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
436
437 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
438 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
439
440 default:
441 unreachable("Invalid wrap");
442 }
443 }
444
445 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
446 struct mali_sampler_descriptor *hw)
447 {
448 unsigned func = panfrost_translate_compare_func(cso->compare_func);
449 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
450 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
451 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
452 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
453 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
454 unsigned mip_filter = mip_linear ?
455 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
456 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
457
458 *hw = (struct mali_sampler_descriptor) {
459 .filter_mode = min_filter | mag_filter | mip_filter |
460 normalized,
461 .wrap_s = translate_tex_wrap(cso->wrap_s),
462 .wrap_t = translate_tex_wrap(cso->wrap_t),
463 .wrap_r = translate_tex_wrap(cso->wrap_r),
464 .compare_func = panfrost_flip_compare_func(func),
465 .border_color = {
466 cso->border_color.f[0],
467 cso->border_color.f[1],
468 cso->border_color.f[2],
469 cso->border_color.f[3]
470 },
471 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
472 .max_lod = FIXED_16(cso->max_lod, false),
473 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
474 .seamless_cube_map = cso->seamless_cube_map,
475 };
476
477 /* If necessary, we disable mipmapping in the sampler descriptor by
478 * clamping the LOD as tight as possible (from 0 to epsilon,
479 * essentially -- remember these are fixed point numbers, so
480 * epsilon=1/256) */
481
482 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
483 hw->max_lod = hw->min_lod + 1;
484 }
485
486 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
487 struct bifrost_sampler_descriptor *hw)
488 {
489 *hw = (struct bifrost_sampler_descriptor) {
490 .unk1 = 0x1,
491 .wrap_s = translate_tex_wrap(cso->wrap_s),
492 .wrap_t = translate_tex_wrap(cso->wrap_t),
493 .wrap_r = translate_tex_wrap(cso->wrap_r),
494 .unk8 = 0x8,
495 .unk2 = 0x2,
496 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
497 .norm_coords = cso->normalized_coords,
498 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
499 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
500 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
501 .max_lod = FIXED_16(cso->max_lod, false),
502 };
503
504 /* If necessary, we disable mipmapping in the sampler descriptor by
505 * clamping the LOD as tight as possible (from 0 to epsilon,
506 * essentially -- remember these are fixed point numbers, so
507 * epsilon=1/256) */
508
509 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
510 hw->max_lod = hw->min_lod + 1;
511 }
512
513 static void
514 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
515 struct mali_stencil_test *out)
516 {
517 out->ref = 0; /* Gallium gets it from elsewhere */
518
519 out->mask = in->valuemask;
520 out->func = panfrost_translate_compare_func(in->func);
521 out->sfail = panfrost_translate_stencil_op(in->fail_op);
522 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
523 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
524 }
525
526 static void
527 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
528 struct mali_shader_meta *fragmeta)
529 {
530 if (!ctx->rasterizer) {
531 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
532 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
533 fragmeta->depth_units = 0.0f;
534 fragmeta->depth_factor = 0.0f;
535 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
536 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
537 return;
538 }
539
540 bool msaa = ctx->rasterizer->base.multisample;
541
542 /* TODO: Sample size */
543 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
544 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
545 fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
546 fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
547
548 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
549
550 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
551 ctx->rasterizer->base.offset_tri);
552 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
553 ctx->rasterizer->base.offset_tri);
554 }
555
556 static void
557 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
558 struct mali_shader_meta *fragmeta)
559 {
560 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
561 int zfunc = PIPE_FUNC_ALWAYS;
562
563 if (!zsa) {
564 struct pipe_stencil_state default_stencil = {
565 .enabled = 0,
566 .func = PIPE_FUNC_ALWAYS,
567 .fail_op = MALI_STENCIL_KEEP,
568 .zfail_op = MALI_STENCIL_KEEP,
569 .zpass_op = MALI_STENCIL_KEEP,
570 .writemask = 0xFF,
571 .valuemask = 0xFF
572 };
573
574 panfrost_make_stencil_state(&default_stencil,
575 &fragmeta->stencil_front);
576 fragmeta->stencil_mask_front = default_stencil.writemask;
577 fragmeta->stencil_back = fragmeta->stencil_front;
578 fragmeta->stencil_mask_back = default_stencil.writemask;
579 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
580 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
581 } else {
582 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
583 zsa->stencil[0].enabled);
584 panfrost_make_stencil_state(&zsa->stencil[0],
585 &fragmeta->stencil_front);
586 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
587 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
588
589 /* If back-stencil is not enabled, use the front values */
590
591 if (zsa->stencil[1].enabled) {
592 panfrost_make_stencil_state(&zsa->stencil[1],
593 &fragmeta->stencil_back);
594 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
595 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
596 } else {
597 fragmeta->stencil_back = fragmeta->stencil_front;
598 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
599 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
600 }
601
602 if (zsa->depth.enabled)
603 zfunc = zsa->depth.func;
604
605 /* Depth state (TODO: Refactor) */
606
607 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
608 zsa->depth.writemask);
609 }
610
611 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
612 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
613 }
614
615 static void
616 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
617 struct mali_shader_meta *fragmeta,
618 struct midgard_blend_rt *rts)
619 {
620 const struct panfrost_device *dev = pan_device(ctx->base.screen);
621
622 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
623 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
624 !ctx->blend->base.dither);
625
626 /* Get blending setup */
627 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
628
629 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
630 unsigned shader_offset = 0;
631 struct panfrost_bo *shader_bo = NULL;
632
633 for (unsigned c = 0; c < rt_count; ++c)
634 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
635 &shader_offset);
636
637 /* If there is a blend shader, work registers are shared. XXX: opt */
638
639 for (unsigned c = 0; c < rt_count; ++c) {
640 if (blend[c].is_shader)
641 fragmeta->midgard1.work_count = 16;
642 }
643
644 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
645 * copied to the blend_meta appended (by convention), but this is the
646 * field actually read by the hardware. (Or maybe both are read...?).
647 * Specify the last RTi with a blend shader. */
648
649 fragmeta->blend.shader = 0;
650
651 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
652 if (!blend[rt].is_shader)
653 continue;
654
655 fragmeta->blend.shader = blend[rt].shader.gpu |
656 blend[rt].shader.first_tag;
657 break;
658 }
659
660 if (dev->quirks & MIDGARD_SFBD) {
661 /* When only a single render target platform is used, the blend
662 * information is inside the shader meta itself. We additionally
663 * need to signal CAN_DISCARD for nontrivial blend modes (so
664 * we're able to read back the destination buffer) */
665
666 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
667 blend[0].is_shader);
668
669 if (!blend[0].is_shader) {
670 fragmeta->blend.equation = *blend[0].equation.equation;
671 fragmeta->blend.constant = blend[0].equation.constant;
672 }
673
674 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
675 !blend[0].no_blending);
676 return;
677 }
678
679 /* Additional blend descriptor tacked on for jobs using MFBD */
680
681 for (unsigned i = 0; i < rt_count; ++i) {
682 rts[i].flags = 0x200;
683
684 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
685 (ctx->pipe_framebuffer.cbufs[i]) &&
686 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
687
688 SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
689 SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
690 SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
691 SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
692
693 if (blend[i].is_shader) {
694 rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
695 } else {
696 rts[i].blend.equation = *blend[i].equation.equation;
697 rts[i].blend.constant = blend[i].equation.constant;
698 }
699 }
700 }
701
702 static void
703 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
704 struct mali_shader_meta *fragmeta,
705 struct midgard_blend_rt *rts)
706 {
707 const struct panfrost_device *dev = pan_device(ctx->base.screen);
708 struct panfrost_shader_state *fs;
709
710 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
711
712 fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
713 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
714 fragmeta->unknown2_4 = 0x4e0;
715
716 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
717 * is required (independent of 32-bit/64-bit descriptors), or why it's
718 * not used on later GPU revisions. Otherwise, all shader jobs fault on
719 * these earlier chips (perhaps this is a chicken bit of some kind).
720 * More investigation is needed. */
721
722 SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
723
724 /* Depending on whether it's legal to in the given shader, we try to
725 * enable early-z testing (or forward-pixel kill?) */
726
727 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
728 !fs->can_discard && !fs->writes_depth);
729
730 /* Add the writes Z/S flags if needed. */
731 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
732 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
733
734 /* Any time texturing is used, derivatives are implicitly calculated,
735 * so we need to enable helper invocations */
736
737 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
738 fs->helper_invocations);
739
740 /* CAN_DISCARD should be set if the fragment shader possibly contains a
741 * 'discard' instruction. It is likely this is related to optimizations
742 * related to forward-pixel kill, as per "Mali Performance 3: Is
743 * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
744
745 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
746 SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
747
748 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
749 panfrost_frag_meta_zsa_update(ctx, fragmeta);
750 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
751 }
752
753 void
754 panfrost_emit_shader_meta(struct panfrost_batch *batch,
755 enum pipe_shader_type st,
756 struct mali_vertex_tiler_postfix *postfix)
757 {
758 struct panfrost_context *ctx = batch->ctx;
759 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
760
761 if (!ss) {
762 postfix->shader = 0;
763 return;
764 }
765
766 struct mali_shader_meta meta;
767
768 panfrost_shader_meta_init(ctx, st, &meta);
769
770 /* Add the shader BO to the batch. */
771 panfrost_batch_add_bo(batch, ss->bo,
772 PAN_BO_ACCESS_PRIVATE |
773 PAN_BO_ACCESS_READ |
774 panfrost_bo_access_for_stage(st));
775
776 mali_ptr shader_ptr;
777
778 if (st == PIPE_SHADER_FRAGMENT) {
779 struct panfrost_device *dev = pan_device(ctx->base.screen);
780 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
781 size_t desc_size = sizeof(meta);
782 struct midgard_blend_rt rts[4];
783 struct panfrost_transfer xfer;
784
785 assert(rt_count <= ARRAY_SIZE(rts));
786
787 panfrost_frag_shader_meta_init(ctx, &meta, rts);
788
789 if (!(dev->quirks & MIDGARD_SFBD))
790 desc_size += sizeof(*rts) * rt_count;
791
792 xfer = panfrost_allocate_transient(batch, desc_size);
793
794 memcpy(xfer.cpu, &meta, sizeof(meta));
795 memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
796
797 shader_ptr = xfer.gpu;
798 } else {
799 shader_ptr = panfrost_upload_transient(batch, &meta,
800 sizeof(meta));
801 }
802
803 postfix->shader = shader_ptr;
804 }
805
806 static void
807 panfrost_mali_viewport_init(struct panfrost_context *ctx,
808 struct mali_viewport *mvp)
809 {
810 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
811
812 /* Clip bounds are encoded as floats. The viewport itself is encoded as
813 * (somewhat) asymmetric ints. */
814
815 const struct pipe_scissor_state *ss = &ctx->scissor;
816
817 memset(mvp, 0, sizeof(*mvp));
818
819 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
820 * each direction. Clipping to the viewport in theory should work, but
821 * in practice causes issues when we're not explicitly trying to
822 * scissor */
823
824 *mvp = (struct mali_viewport) {
825 .clip_minx = -INFINITY,
826 .clip_miny = -INFINITY,
827 .clip_maxx = INFINITY,
828 .clip_maxy = INFINITY,
829 };
830
831 /* Always scissor to the viewport by default. */
832 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
833 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
834
835 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
836 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
837
838 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
839 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
840
841 /* Apply the scissor test */
842
843 unsigned minx, miny, maxx, maxy;
844
845 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
846 minx = MAX2(ss->minx, vp_minx);
847 miny = MAX2(ss->miny, vp_miny);
848 maxx = MIN2(ss->maxx, vp_maxx);
849 maxy = MIN2(ss->maxy, vp_maxy);
850 } else {
851 minx = vp_minx;
852 miny = vp_miny;
853 maxx = vp_maxx;
854 maxy = vp_maxy;
855 }
856
857 /* Hardware needs the min/max to be strictly ordered, so flip if we
858 * need to. The viewport transformation in the vertex shader will
859 * handle the negatives if we don't */
860
861 if (miny > maxy) {
862 unsigned temp = miny;
863 miny = maxy;
864 maxy = temp;
865 }
866
867 if (minx > maxx) {
868 unsigned temp = minx;
869 minx = maxx;
870 maxx = temp;
871 }
872
873 if (minz > maxz) {
874 float temp = minz;
875 minz = maxz;
876 maxz = temp;
877 }
878
879 /* Clamp to the framebuffer size as a last check */
880
881 minx = MIN2(ctx->pipe_framebuffer.width, minx);
882 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
883
884 miny = MIN2(ctx->pipe_framebuffer.height, miny);
885 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
886
887 /* Upload */
888
889 mvp->viewport0[0] = minx;
890 mvp->viewport1[0] = MALI_POSITIVE(maxx);
891
892 mvp->viewport0[1] = miny;
893 mvp->viewport1[1] = MALI_POSITIVE(maxy);
894
895 mvp->clip_minz = minz;
896 mvp->clip_maxz = maxz;
897 }
898
899 void
900 panfrost_emit_viewport(struct panfrost_batch *batch,
901 struct mali_vertex_tiler_postfix *tiler_postfix)
902 {
903 struct panfrost_context *ctx = batch->ctx;
904 struct mali_viewport mvp;
905
906 panfrost_mali_viewport_init(batch->ctx, &mvp);
907
908 /* Update the job, unless we're doing wallpapering (whose lack of
909 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
910 * just... be faster :) */
911
912 if (!ctx->wallpaper_batch)
913 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
914 mvp.viewport0[1],
915 mvp.viewport1[0] + 1,
916 mvp.viewport1[1] + 1);
917
918 tiler_postfix->viewport = panfrost_upload_transient(batch, &mvp,
919 sizeof(mvp));
920 }
921
922 static mali_ptr
923 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
924 enum pipe_shader_type st,
925 struct panfrost_constant_buffer *buf,
926 unsigned index)
927 {
928 struct pipe_constant_buffer *cb = &buf->cb[index];
929 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
930
931 if (rsrc) {
932 panfrost_batch_add_bo(batch, rsrc->bo,
933 PAN_BO_ACCESS_SHARED |
934 PAN_BO_ACCESS_READ |
935 panfrost_bo_access_for_stage(st));
936
937 /* Alignment gauranteed by
938 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
939 return rsrc->bo->gpu + cb->buffer_offset;
940 } else if (cb->user_buffer) {
941 return panfrost_upload_transient(batch,
942 cb->user_buffer +
943 cb->buffer_offset,
944 cb->buffer_size);
945 } else {
946 unreachable("No constant buffer");
947 }
948 }
949
950 struct sysval_uniform {
951 union {
952 float f[4];
953 int32_t i[4];
954 uint32_t u[4];
955 uint64_t du[2];
956 };
957 };
958
959 static void
960 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
961 struct sysval_uniform *uniform)
962 {
963 struct panfrost_context *ctx = batch->ctx;
964 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
965
966 uniform->f[0] = vp->scale[0];
967 uniform->f[1] = vp->scale[1];
968 uniform->f[2] = vp->scale[2];
969 }
970
971 static void
972 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
973 struct sysval_uniform *uniform)
974 {
975 struct panfrost_context *ctx = batch->ctx;
976 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
977
978 uniform->f[0] = vp->translate[0];
979 uniform->f[1] = vp->translate[1];
980 uniform->f[2] = vp->translate[2];
981 }
982
983 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
984 enum pipe_shader_type st,
985 unsigned int sysvalid,
986 struct sysval_uniform *uniform)
987 {
988 struct panfrost_context *ctx = batch->ctx;
989 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
990 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
991 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
992 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
993
994 assert(dim);
995 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
996
997 if (dim > 1)
998 uniform->i[1] = u_minify(tex->texture->height0,
999 tex->u.tex.first_level);
1000
1001 if (dim > 2)
1002 uniform->i[2] = u_minify(tex->texture->depth0,
1003 tex->u.tex.first_level);
1004
1005 if (is_array)
1006 uniform->i[dim] = tex->texture->array_size;
1007 }
1008
1009 static void
1010 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1011 enum pipe_shader_type st,
1012 unsigned ssbo_id,
1013 struct sysval_uniform *uniform)
1014 {
1015 struct panfrost_context *ctx = batch->ctx;
1016
1017 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1018 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1019
1020 /* Compute address */
1021 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1022
1023 panfrost_batch_add_bo(batch, bo,
1024 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1025 panfrost_bo_access_for_stage(st));
1026
1027 /* Upload address and size as sysval */
1028 uniform->du[0] = bo->gpu + sb.buffer_offset;
1029 uniform->u[2] = sb.buffer_size;
1030 }
1031
1032 static void
1033 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1034 enum pipe_shader_type st,
1035 unsigned samp_idx,
1036 struct sysval_uniform *uniform)
1037 {
1038 struct panfrost_context *ctx = batch->ctx;
1039 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1040
1041 uniform->f[0] = sampl->min_lod;
1042 uniform->f[1] = sampl->max_lod;
1043 uniform->f[2] = sampl->lod_bias;
1044
1045 /* Even without any errata, Midgard represents "no mipmapping" as
1046 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1047 * panfrost_create_sampler_state which also explains our choice of
1048 * epsilon value (again to keep behaviour consistent) */
1049
1050 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1051 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1052 }
1053
1054 static void
1055 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1056 struct sysval_uniform *uniform)
1057 {
1058 struct panfrost_context *ctx = batch->ctx;
1059
1060 uniform->u[0] = ctx->compute_grid->grid[0];
1061 uniform->u[1] = ctx->compute_grid->grid[1];
1062 uniform->u[2] = ctx->compute_grid->grid[2];
1063 }
1064
1065 static void
1066 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1067 struct panfrost_shader_state *ss,
1068 enum pipe_shader_type st)
1069 {
1070 struct sysval_uniform *uniforms = (void *)buf;
1071
1072 for (unsigned i = 0; i < ss->sysval_count; ++i) {
1073 int sysval = ss->sysval[i];
1074
1075 switch (PAN_SYSVAL_TYPE(sysval)) {
1076 case PAN_SYSVAL_VIEWPORT_SCALE:
1077 panfrost_upload_viewport_scale_sysval(batch,
1078 &uniforms[i]);
1079 break;
1080 case PAN_SYSVAL_VIEWPORT_OFFSET:
1081 panfrost_upload_viewport_offset_sysval(batch,
1082 &uniforms[i]);
1083 break;
1084 case PAN_SYSVAL_TEXTURE_SIZE:
1085 panfrost_upload_txs_sysval(batch, st,
1086 PAN_SYSVAL_ID(sysval),
1087 &uniforms[i]);
1088 break;
1089 case PAN_SYSVAL_SSBO:
1090 panfrost_upload_ssbo_sysval(batch, st,
1091 PAN_SYSVAL_ID(sysval),
1092 &uniforms[i]);
1093 break;
1094 case PAN_SYSVAL_NUM_WORK_GROUPS:
1095 panfrost_upload_num_work_groups_sysval(batch,
1096 &uniforms[i]);
1097 break;
1098 case PAN_SYSVAL_SAMPLER:
1099 panfrost_upload_sampler_sysval(batch, st,
1100 PAN_SYSVAL_ID(sysval),
1101 &uniforms[i]);
1102 break;
1103 default:
1104 assert(0);
1105 }
1106 }
1107 }
1108
1109 static const void *
1110 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1111 unsigned index)
1112 {
1113 struct pipe_constant_buffer *cb = &buf->cb[index];
1114 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1115
1116 if (rsrc)
1117 return rsrc->bo->cpu;
1118 else if (cb->user_buffer)
1119 return cb->user_buffer;
1120 else
1121 unreachable("No constant buffer");
1122 }
1123
1124 void
1125 panfrost_emit_const_buf(struct panfrost_batch *batch,
1126 enum pipe_shader_type stage,
1127 struct mali_vertex_tiler_postfix *postfix)
1128 {
1129 struct panfrost_context *ctx = batch->ctx;
1130 struct panfrost_shader_variants *all = ctx->shader[stage];
1131
1132 if (!all)
1133 return;
1134
1135 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1136
1137 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1138
1139 /* Uniforms are implicitly UBO #0 */
1140 bool has_uniforms = buf->enabled_mask & (1 << 0);
1141
1142 /* Allocate room for the sysval and the uniforms */
1143 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1144 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1145 size_t size = sys_size + uniform_size;
1146 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1147 size);
1148
1149 /* Upload sysvals requested by the shader */
1150 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1151
1152 /* Upload uniforms */
1153 if (has_uniforms && uniform_size) {
1154 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1155 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1156 }
1157
1158 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1159 * uploaded */
1160
1161 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1162 assert(ubo_count >= 1);
1163
1164 size_t sz = sizeof(uint64_t) * ubo_count;
1165 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1166 int uniform_count = ss->uniform_count;
1167
1168 /* Upload uniforms as a UBO */
1169 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1170
1171 /* The rest are honest-to-goodness UBOs */
1172
1173 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1174 size_t usz = buf->cb[ubo].buffer_size;
1175 bool enabled = buf->enabled_mask & (1 << ubo);
1176 bool empty = usz == 0;
1177
1178 if (!enabled || empty) {
1179 /* Stub out disabled UBOs to catch accesses */
1180 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1181 continue;
1182 }
1183
1184 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1185 buf, ubo);
1186
1187 unsigned bytes_per_field = 16;
1188 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1189 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1190 }
1191
1192 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1193 postfix->uniforms = transfer.gpu;
1194 postfix->uniform_buffers = ubufs;
1195
1196 buf->dirty_mask = 0;
1197 }
1198
1199 void
1200 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1201 const struct pipe_grid_info *info,
1202 struct midgard_payload_vertex_tiler *vtp)
1203 {
1204 struct panfrost_context *ctx = batch->ctx;
1205 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1206 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1207 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1208 128));
1209 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1210 info->grid[2] * 4;
1211 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1212 shared_size,
1213 1);
1214
1215 struct mali_shared_memory shared = {
1216 .shared_memory = bo->gpu,
1217 .shared_workgroup_count =
1218 util_logbase2_ceil(info->grid[0]) +
1219 util_logbase2_ceil(info->grid[1]) +
1220 util_logbase2_ceil(info->grid[2]),
1221 .shared_unk1 = 0x2,
1222 .shared_shift = util_logbase2(single_size) - 1
1223 };
1224
1225 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1226 sizeof(shared));
1227 }
1228
1229 static mali_ptr
1230 panfrost_get_tex_desc(struct panfrost_batch *batch,
1231 enum pipe_shader_type st,
1232 struct panfrost_sampler_view *view)
1233 {
1234 if (!view)
1235 return (mali_ptr) 0;
1236
1237 struct pipe_sampler_view *pview = &view->base;
1238 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1239
1240 /* Add the BO to the job so it's retained until the job is done. */
1241
1242 panfrost_batch_add_bo(batch, rsrc->bo,
1243 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1244 panfrost_bo_access_for_stage(st));
1245
1246 panfrost_batch_add_bo(batch, view->midgard_bo,
1247 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1248 panfrost_bo_access_for_stage(st));
1249
1250 return view->midgard_bo->gpu;
1251 }
1252
1253 void
1254 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1255 enum pipe_shader_type stage,
1256 struct mali_vertex_tiler_postfix *postfix)
1257 {
1258 struct panfrost_context *ctx = batch->ctx;
1259 struct panfrost_device *device = pan_device(ctx->base.screen);
1260
1261 if (!ctx->sampler_view_count[stage])
1262 return;
1263
1264 if (device->quirks & IS_BIFROST) {
1265 struct bifrost_texture_descriptor *descriptors;
1266
1267 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1268 ctx->sampler_view_count[stage]);
1269
1270 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1271 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1272 struct pipe_sampler_view *pview = &view->base;
1273 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1274
1275 panfrost_batch_add_bo(batch, rsrc->bo,
1276 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1277 panfrost_bo_access_for_stage(stage));
1278
1279 memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1280 }
1281
1282 postfix->textures = panfrost_upload_transient(batch,
1283 descriptors,
1284 sizeof(struct bifrost_texture_descriptor) *
1285 ctx->sampler_view_count[stage]);
1286 } else {
1287 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1288
1289 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1290 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1291 ctx->sampler_views[stage][i]);
1292
1293 postfix->textures = panfrost_upload_transient(batch,
1294 trampolines,
1295 sizeof(uint64_t) *
1296 ctx->sampler_view_count[stage]);
1297 }
1298 }
1299
1300 void
1301 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1302 enum pipe_shader_type stage,
1303 struct mali_vertex_tiler_postfix *postfix)
1304 {
1305 struct panfrost_context *ctx = batch->ctx;
1306 struct panfrost_device *device = pan_device(ctx->base.screen);
1307
1308 if (!ctx->sampler_count[stage])
1309 return;
1310
1311 if (device->quirks & IS_BIFROST) {
1312 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1313 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1314 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1315 transfer_size);
1316 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1317
1318 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1319 desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1320
1321 postfix->sampler_descriptor = transfer.gpu;
1322 } else {
1323 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1324 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1325 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1326 transfer_size);
1327 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1328
1329 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1330 desc[i] = ctx->samplers[stage][i]->midgard_hw;
1331
1332 postfix->sampler_descriptor = transfer.gpu;
1333 }
1334 }
1335
1336 void
1337 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1338 struct mali_vertex_tiler_postfix *vertex_postfix)
1339 {
1340 struct panfrost_context *ctx = batch->ctx;
1341
1342 if (!ctx->vertex)
1343 return;
1344
1345 struct panfrost_vertex_state *so = ctx->vertex;
1346
1347 panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1348 vertex_postfix->attribute_meta = panfrost_upload_transient(batch, so->hw,
1349 sizeof(*so->hw) *
1350 PAN_MAX_ATTRIBUTE);
1351 }
1352
1353 void
1354 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1355 struct mali_vertex_tiler_postfix *vertex_postfix)
1356 {
1357 struct panfrost_context *ctx = batch->ctx;
1358 struct panfrost_vertex_state *so = ctx->vertex;
1359
1360 /* Staged mali_attr, and index into them. i =/= k, depending on the
1361 * vertex buffer mask and instancing. Twice as much room is allocated,
1362 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1363 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1364 unsigned k = 0;
1365
1366 for (unsigned i = 0; i < so->num_elements; ++i) {
1367 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1368 * means duplicating some vertex buffers (who cares? aside from
1369 * maybe some caching implications but I somehow doubt that
1370 * matters) */
1371
1372 struct pipe_vertex_element *elem = &so->pipe[i];
1373 unsigned vbi = elem->vertex_buffer_index;
1374
1375 /* The exception to 1:1 mapping is that we can have multiple
1376 * entries (NPOT divisors), so we fixup anyways */
1377
1378 so->hw[i].index = k;
1379
1380 if (!(ctx->vb_mask & (1 << vbi)))
1381 continue;
1382
1383 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1384 struct panfrost_resource *rsrc;
1385
1386 rsrc = pan_resource(buf->buffer.resource);
1387 if (!rsrc)
1388 continue;
1389
1390 /* Align to 64 bytes by masking off the lower bits. This
1391 * will be adjusted back when we fixup the src_offset in
1392 * mali_attr_meta */
1393
1394 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1395 mali_ptr addr = raw_addr & ~63;
1396 unsigned chopped_addr = raw_addr - addr;
1397
1398 /* Add a dependency of the batch on the vertex buffer */
1399 panfrost_batch_add_bo(batch, rsrc->bo,
1400 PAN_BO_ACCESS_SHARED |
1401 PAN_BO_ACCESS_READ |
1402 PAN_BO_ACCESS_VERTEX_TILER);
1403
1404 /* Set common fields */
1405 attrs[k].elements = addr;
1406 attrs[k].stride = buf->stride;
1407
1408 /* Since we advanced the base pointer, we shrink the buffer
1409 * size */
1410 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1411
1412 /* We need to add the extra size we masked off (for
1413 * correctness) so the data doesn't get clamped away */
1414 attrs[k].size += chopped_addr;
1415
1416 /* For non-instancing make sure we initialize */
1417 attrs[k].shift = attrs[k].extra_flags = 0;
1418
1419 /* Instancing uses a dramatically different code path than
1420 * linear, so dispatch for the actual emission now that the
1421 * common code is finished */
1422
1423 unsigned divisor = elem->instance_divisor;
1424
1425 if (divisor && ctx->instance_count == 1) {
1426 /* Silly corner case where there's a divisor(=1) but
1427 * there's no legitimate instancing. So we want *every*
1428 * attribute to be the same. So set stride to zero so
1429 * we don't go anywhere. */
1430
1431 attrs[k].size = attrs[k].stride + chopped_addr;
1432 attrs[k].stride = 0;
1433 attrs[k++].elements |= MALI_ATTR_LINEAR;
1434 } else if (ctx->instance_count <= 1) {
1435 /* Normal, non-instanced attributes */
1436 attrs[k++].elements |= MALI_ATTR_LINEAR;
1437 } else {
1438 unsigned instance_shift = vertex_postfix->instance_shift;
1439 unsigned instance_odd = vertex_postfix->instance_odd;
1440
1441 k += panfrost_vertex_instanced(ctx->padded_count,
1442 instance_shift,
1443 instance_odd,
1444 divisor, &attrs[k]);
1445 }
1446 }
1447
1448 /* Add special gl_VertexID/gl_InstanceID buffers */
1449
1450 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1451 so->hw[PAN_VERTEX_ID].index = k++;
1452 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1453 so->hw[PAN_INSTANCE_ID].index = k++;
1454
1455 /* Upload whatever we emitted and go */
1456
1457 vertex_postfix->attributes = panfrost_upload_transient(batch, attrs,
1458 k * sizeof(*attrs));
1459 }
1460
1461 static mali_ptr
1462 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1463 unsigned stride, unsigned count)
1464 {
1465 /* Fill out the descriptor */
1466 slot->stride = stride;
1467 slot->size = stride * count;
1468 slot->shift = slot->extra_flags = 0;
1469
1470 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1471 slot->size);
1472
1473 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1474
1475 return transfer.gpu;
1476 }
1477
1478 static void
1479 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1480 unsigned stride, unsigned offset, unsigned count,
1481 struct pipe_stream_output_target *target)
1482 {
1483 /* Fill out the descriptor */
1484 slot->stride = stride * 4;
1485 slot->shift = slot->extra_flags = 0;
1486
1487 unsigned max_size = target->buffer_size;
1488 unsigned expected_size = slot->stride * count;
1489
1490 slot->size = MIN2(max_size, expected_size);
1491
1492 /* Grab the BO and bind it to the batch */
1493 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1494
1495 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1496 * the perspective of the TILER and FRAGMENT.
1497 */
1498 panfrost_batch_add_bo(batch, bo,
1499 PAN_BO_ACCESS_SHARED |
1500 PAN_BO_ACCESS_RW |
1501 PAN_BO_ACCESS_VERTEX_TILER |
1502 PAN_BO_ACCESS_FRAGMENT);
1503
1504 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1505 slot->elements = addr;
1506 }
1507
1508 /* Given a shader and buffer indices, link varying metadata together */
1509
1510 static bool
1511 is_special_varying(gl_varying_slot loc)
1512 {
1513 switch (loc) {
1514 case VARYING_SLOT_POS:
1515 case VARYING_SLOT_PSIZ:
1516 case VARYING_SLOT_PNTC:
1517 case VARYING_SLOT_FACE:
1518 return true;
1519 default:
1520 return false;
1521 }
1522 }
1523
1524 static void
1525 panfrost_emit_varying_meta(void *outptr, struct panfrost_shader_state *ss,
1526 signed general, signed gl_Position,
1527 signed gl_PointSize, signed gl_PointCoord,
1528 signed gl_FrontFacing)
1529 {
1530 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
1531
1532 for (unsigned i = 0; i < ss->varying_count; ++i) {
1533 gl_varying_slot location = ss->varyings_loc[i];
1534 int index = -1;
1535
1536 switch (location) {
1537 case VARYING_SLOT_POS:
1538 index = gl_Position;
1539 break;
1540 case VARYING_SLOT_PSIZ:
1541 index = gl_PointSize;
1542 break;
1543 case VARYING_SLOT_PNTC:
1544 index = gl_PointCoord;
1545 break;
1546 case VARYING_SLOT_FACE:
1547 index = gl_FrontFacing;
1548 break;
1549 default:
1550 index = general;
1551 break;
1552 }
1553
1554 assert(index >= 0);
1555 out[i].index = index;
1556 }
1557 }
1558
1559 static bool
1560 has_point_coord(unsigned mask, gl_varying_slot loc)
1561 {
1562 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1563 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1564 else if (loc == VARYING_SLOT_PNTC)
1565 return (mask & (1 << 8));
1566 else
1567 return false;
1568 }
1569
1570 /* Helpers for manipulating stream out information so we can pack varyings
1571 * accordingly. Compute the src_offset for a given captured varying */
1572
1573 static struct pipe_stream_output *
1574 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1575 {
1576 for (unsigned i = 0; i < info->num_outputs; ++i) {
1577 if (info->output[i].register_index == loc)
1578 return &info->output[i];
1579 }
1580
1581 unreachable("Varying not captured");
1582 }
1583
1584 /* TODO: Integers */
1585 static enum mali_format
1586 pan_xfb_format(unsigned nr_components)
1587 {
1588 switch (nr_components) {
1589 case 1: return MALI_R32F;
1590 case 2: return MALI_RG32F;
1591 case 3: return MALI_RGB32F;
1592 case 4: return MALI_RGBA32F;
1593 default: unreachable("Invalid format");
1594 }
1595 }
1596
1597 void
1598 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1599 unsigned vertex_count,
1600 struct mali_vertex_tiler_postfix *vertex_postfix,
1601 struct mali_vertex_tiler_postfix *tiler_postfix,
1602 union midgard_primitive_size *primitive_size)
1603 {
1604 /* Load the shaders */
1605 struct panfrost_context *ctx = batch->ctx;
1606 struct panfrost_shader_state *vs, *fs;
1607 unsigned int num_gen_varyings = 0;
1608 size_t vs_size, fs_size;
1609
1610 /* Allocate the varying descriptor */
1611
1612 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1613 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
1614 vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
1615 fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
1616
1617 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
1618 vs_size +
1619 fs_size);
1620
1621 struct pipe_stream_output_info *so = &vs->stream_output;
1622
1623 /* Check if this varying is linked by us. This is the case for
1624 * general-purpose, non-captured varyings. If it is, link it. If it's
1625 * not, use the provided stream out information to determine the
1626 * offset, since it was already linked for us. */
1627
1628 for (unsigned i = 0; i < vs->varying_count; i++) {
1629 gl_varying_slot loc = vs->varyings_loc[i];
1630
1631 bool special = is_special_varying(loc);
1632 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1633
1634 if (captured) {
1635 struct pipe_stream_output *o = pan_get_so(so, loc);
1636
1637 unsigned dst_offset = o->dst_offset * 4; /* dwords */
1638 vs->varyings[i].src_offset = dst_offset;
1639 } else if (!special) {
1640 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
1641 }
1642 }
1643
1644 /* Conversely, we need to set src_offset for the captured varyings.
1645 * Here, the layout is defined by the stream out info, not us */
1646
1647 /* Link up with fragment varyings */
1648 bool reads_point_coord = fs->reads_point_coord;
1649
1650 for (unsigned i = 0; i < fs->varying_count; i++) {
1651 gl_varying_slot loc = fs->varyings_loc[i];
1652 unsigned src_offset;
1653 signed vs_idx = -1;
1654
1655 /* Link up */
1656 for (unsigned j = 0; j < vs->varying_count; ++j) {
1657 if (vs->varyings_loc[j] == loc) {
1658 vs_idx = j;
1659 break;
1660 }
1661 }
1662
1663 /* Either assign or reuse */
1664 if (vs_idx >= 0)
1665 src_offset = vs->varyings[vs_idx].src_offset;
1666 else
1667 src_offset = 16 * (num_gen_varyings++);
1668
1669 fs->varyings[i].src_offset = src_offset;
1670
1671 if (has_point_coord(fs->point_sprite_mask, loc))
1672 reads_point_coord = true;
1673 }
1674
1675 memcpy(trans.cpu, vs->varyings, vs_size);
1676 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
1677
1678 union mali_attr varyings[PIPE_MAX_ATTRIBS] = {0};
1679
1680 /* Figure out how many streamout buffers could be bound */
1681 unsigned so_count = ctx->streamout.num_targets;
1682 for (unsigned i = 0; i < vs->varying_count; i++) {
1683 gl_varying_slot loc = vs->varyings_loc[i];
1684
1685 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1686 if (!captured) continue;
1687
1688 struct pipe_stream_output *o = pan_get_so(so, loc);
1689 so_count = MAX2(so_count, o->output_buffer + 1);
1690 }
1691
1692 signed idx = so_count;
1693 signed general = idx++;
1694 signed gl_Position = idx++;
1695 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
1696 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
1697 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
1698 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
1699
1700 /* Emit the stream out buffers */
1701
1702 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1703 ctx->vertex_count);
1704
1705 for (unsigned i = 0; i < so_count; ++i) {
1706 if (i < ctx->streamout.num_targets) {
1707 panfrost_emit_streamout(batch, &varyings[i],
1708 so->stride[i],
1709 ctx->streamout.offsets[i],
1710 out_count,
1711 ctx->streamout.targets[i]);
1712 } else {
1713 /* Emit a dummy buffer */
1714 panfrost_emit_varyings(batch, &varyings[i],
1715 so->stride[i] * 4,
1716 out_count);
1717
1718 /* Clear the attribute type */
1719 varyings[i].elements &= ~0xF;
1720 }
1721 }
1722
1723 panfrost_emit_varyings(batch, &varyings[general],
1724 num_gen_varyings * 16,
1725 vertex_count);
1726
1727 mali_ptr varyings_p;
1728
1729 /* fp32 vec4 gl_Position */
1730 varyings_p = panfrost_emit_varyings(batch, &varyings[gl_Position],
1731 sizeof(float) * 4, vertex_count);
1732 tiler_postfix->position_varying = varyings_p;
1733
1734
1735 if (panfrost_writes_point_size(ctx)) {
1736 varyings_p = panfrost_emit_varyings(batch,
1737 &varyings[gl_PointSize],
1738 2, vertex_count);
1739 primitive_size->pointer = varyings_p;
1740 }
1741
1742 if (reads_point_coord)
1743 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
1744
1745 if (fs->reads_face)
1746 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
1747
1748 if (fs->reads_frag_coord)
1749 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
1750
1751 struct panfrost_device *device = pan_device(ctx->base.screen);
1752 assert(!(device->quirks & IS_BIFROST) || !(reads_point_coord || fs->reads_face || fs->reads_frag_coord));
1753
1754 /* Let's go ahead and link varying meta to the buffer in question, now
1755 * that that information is available. VARYING_SLOT_POS is mapped to
1756 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
1757 * */
1758
1759 panfrost_emit_varying_meta(trans.cpu, vs, general, gl_Position,
1760 gl_PointSize, gl_PointCoord,
1761 gl_FrontFacing);
1762
1763 panfrost_emit_varying_meta(trans.cpu + vs_size, fs, general,
1764 gl_FragCoord, gl_PointSize,
1765 gl_PointCoord, gl_FrontFacing);
1766
1767 /* Replace streamout */
1768
1769 struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
1770 struct mali_attr_meta *ofs = ovs + vs->varying_count;
1771
1772 for (unsigned i = 0; i < vs->varying_count; i++) {
1773 gl_varying_slot loc = vs->varyings_loc[i];
1774
1775 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1776 if (!captured)
1777 continue;
1778
1779 struct pipe_stream_output *o = pan_get_so(so, loc);
1780 ovs[i].index = o->output_buffer;
1781
1782 /* Set the type appropriately. TODO: Integer varyings XXX */
1783 assert(o->stream == 0);
1784 ovs[i].format = pan_xfb_format(o->num_components);
1785 ovs[i].swizzle = panfrost_get_default_swizzle(o->num_components);
1786
1787 /* Link to the fragment */
1788 signed fs_idx = -1;
1789
1790 /* Link up */
1791 for (unsigned j = 0; j < fs->varying_count; ++j) {
1792 if (fs->varyings_loc[j] == loc) {
1793 fs_idx = j;
1794 break;
1795 }
1796 }
1797
1798 if (fs_idx >= 0) {
1799 ofs[fs_idx].index = ovs[i].index;
1800 ofs[fs_idx].format = ovs[i].format;
1801 ofs[fs_idx].swizzle = ovs[i].swizzle;
1802 }
1803 }
1804
1805 /* Replace point sprite */
1806 for (unsigned i = 0; i < fs->varying_count; i++) {
1807 /* If we have a point sprite replacement, handle that here. We
1808 * have to translate location first. TODO: Flip y in shader.
1809 * We're already keying ... just time crunch .. */
1810
1811 if (has_point_coord(fs->point_sprite_mask,
1812 fs->varyings_loc[i])) {
1813 ofs[i].index = gl_PointCoord;
1814
1815 /* Swizzle out the z/w to 0/1 */
1816 ofs[i].format = MALI_RG16F;
1817 ofs[i].swizzle = panfrost_get_default_swizzle(2);
1818 }
1819 }
1820
1821 /* Fix up unaligned addresses */
1822 for (unsigned i = 0; i < so_count; ++i) {
1823 if (varyings[i].elements < MALI_RECORD_SPECIAL)
1824 continue;
1825
1826 unsigned align = (varyings[i].elements & 63);
1827
1828 /* While we're at it, the SO buffers are linear */
1829
1830 if (!align) {
1831 varyings[i].elements |= MALI_ATTR_LINEAR;
1832 continue;
1833 }
1834
1835 /* We need to adjust alignment */
1836 varyings[i].elements &= ~63;
1837 varyings[i].elements |= MALI_ATTR_LINEAR;
1838 varyings[i].size += align;
1839
1840 for (unsigned v = 0; v < vs->varying_count; ++v) {
1841 if (ovs[v].index != i)
1842 continue;
1843
1844 ovs[v].src_offset = vs->varyings[v].src_offset + align;
1845 }
1846
1847 for (unsigned f = 0; f < fs->varying_count; ++f) {
1848 if (ofs[f].index != i)
1849 continue;
1850
1851 ofs[f].src_offset = fs->varyings[f].src_offset + align;
1852 }
1853 }
1854
1855 varyings_p = panfrost_upload_transient(batch, varyings,
1856 idx * sizeof(*varyings));
1857 vertex_postfix->varyings = varyings_p;
1858 tiler_postfix->varyings = varyings_p;
1859
1860 vertex_postfix->varying_meta = trans.gpu;
1861 tiler_postfix->varying_meta = trans.gpu + vs_size;
1862 }
1863
1864 void
1865 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1866 struct mali_vertex_tiler_prefix *vertex_prefix,
1867 struct mali_vertex_tiler_postfix *vertex_postfix,
1868 struct mali_vertex_tiler_prefix *tiler_prefix,
1869 struct mali_vertex_tiler_postfix *tiler_postfix,
1870 union midgard_primitive_size *primitive_size)
1871 {
1872 struct panfrost_context *ctx = batch->ctx;
1873 struct panfrost_device *device = pan_device(ctx->base.screen);
1874 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1875 struct bifrost_payload_vertex bifrost_vertex = {0,};
1876 struct bifrost_payload_tiler bifrost_tiler = {0,};
1877 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
1878 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
1879 void *vp, *tp;
1880 size_t vp_size, tp_size;
1881
1882 if (device->quirks & IS_BIFROST) {
1883 bifrost_vertex.prefix = *vertex_prefix;
1884 bifrost_vertex.postfix = *vertex_postfix;
1885 vp = &bifrost_vertex;
1886 vp_size = sizeof(bifrost_vertex);
1887
1888 bifrost_tiler.prefix = *tiler_prefix;
1889 bifrost_tiler.tiler.primitive_size = *primitive_size;
1890 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
1891 bifrost_tiler.postfix = *tiler_postfix;
1892 tp = &bifrost_tiler;
1893 tp_size = sizeof(bifrost_tiler);
1894 } else {
1895 midgard_vertex.prefix = *vertex_prefix;
1896 midgard_vertex.postfix = *vertex_postfix;
1897 vp = &midgard_vertex;
1898 vp_size = sizeof(midgard_vertex);
1899
1900 midgard_tiler.prefix = *tiler_prefix;
1901 midgard_tiler.postfix = *tiler_postfix;
1902 midgard_tiler.primitive_size = *primitive_size;
1903 tp = &midgard_tiler;
1904 tp_size = sizeof(midgard_tiler);
1905 }
1906
1907 if (wallpapering) {
1908 /* Inject in reverse order, with "predicted" job indices.
1909 * THIS IS A HACK XXX */
1910 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1911 batch->job_index + 2, tp, tp_size, true);
1912 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1913 vp, vp_size, true);
1914 return;
1915 }
1916
1917 /* If rasterizer discard is enable, only submit the vertex */
1918
1919 bool rasterizer_discard = ctx->rasterizer &&
1920 ctx->rasterizer->base.rasterizer_discard;
1921
1922 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1923 vp, vp_size, false);
1924
1925 if (rasterizer_discard)
1926 return;
1927
1928 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, tp_size,
1929 false);
1930 }
1931
1932 /* TODO: stop hardcoding this */
1933 mali_ptr
1934 panfrost_emit_sample_locations(struct panfrost_batch *batch)
1935 {
1936 uint16_t locations[] = {
1937 128, 128,
1938 0, 256,
1939 0, 256,
1940 0, 256,
1941 0, 256,
1942 0, 256,
1943 0, 256,
1944 0, 256,
1945 0, 256,
1946 0, 256,
1947 0, 256,
1948 0, 256,
1949 0, 256,
1950 0, 256,
1951 0, 256,
1952 0, 256,
1953 0, 256,
1954 0, 256,
1955 0, 256,
1956 0, 256,
1957 0, 256,
1958 0, 256,
1959 0, 256,
1960 0, 256,
1961 0, 256,
1962 0, 256,
1963 0, 256,
1964 0, 256,
1965 0, 256,
1966 0, 256,
1967 0, 256,
1968 0, 256,
1969 128, 128,
1970 0, 0,
1971 0, 0,
1972 0, 0,
1973 0, 0,
1974 0, 0,
1975 0, 0,
1976 0, 0,
1977 0, 0,
1978 0, 0,
1979 0, 0,
1980 0, 0,
1981 0, 0,
1982 0, 0,
1983 0, 0,
1984 0, 0,
1985 };
1986
1987 return panfrost_upload_transient(batch, locations, 96 * sizeof(uint16_t));
1988 }