panfrost: Fix norm coords on bifrost sampler
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28
29 #include "panfrost-quirks.h"
30
31 #include "pan_allocate.h"
32 #include "pan_bo.h"
33 #include "pan_cmdstream.h"
34 #include "pan_context.h"
35 #include "pan_job.h"
36
37 /* If a BO is accessed for a particular shader stage, will it be in the primary
38 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
39 * fragment will be primary, e.g. compute jobs will be considered
40 * "vertex/tiler" by analogy */
41
42 static inline uint32_t
43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
44 {
45 assert(stage == PIPE_SHADER_FRAGMENT ||
46 stage == PIPE_SHADER_VERTEX ||
47 stage == PIPE_SHADER_COMPUTE);
48
49 return stage == PIPE_SHADER_FRAGMENT ?
50 PAN_BO_ACCESS_FRAGMENT :
51 PAN_BO_ACCESS_VERTEX_TILER;
52 }
53
54 static void
55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
56 struct mali_vertex_tiler_postfix *postfix)
57 {
58 struct panfrost_device *dev = pan_device(ctx->base.screen);
59 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
60
61 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
62 struct mali_shared_memory shared = {
63 .stack_shift = shift,
64 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
65 .shared_workgroup_count = ~0,
66 };
67 postfix->shared_memory = panfrost_upload_transient(batch, &shared, sizeof(shared));
68 }
69
70 static void
71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
72 struct mali_vertex_tiler_postfix *postfix)
73 {
74 struct panfrost_device *dev = pan_device(ctx->base.screen);
75 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
76
77 /* If we haven't, reserve space for the framebuffer */
78
79 if (!batch->framebuffer.gpu) {
80 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
81 sizeof(struct mali_single_framebuffer) :
82 sizeof(struct mali_framebuffer);
83
84 batch->framebuffer = panfrost_allocate_transient(batch, size);
85
86 /* Tag the pointer */
87 if (!(dev->quirks & MIDGARD_SFBD))
88 batch->framebuffer.gpu |= MALI_MFBD;
89 }
90
91 postfix->shared_memory = batch->framebuffer.gpu;
92 }
93
94 static void
95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
96 struct mali_vertex_tiler_prefix *prefix,
97 struct mali_vertex_tiler_postfix *postfix)
98 {
99 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
100
101 postfix->gl_enables |= 0x7;
102 SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
103 rasterizer && rasterizer->base.front_ccw);
104 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
105 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
106 SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
107 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
108 SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
109 rasterizer && rasterizer->base.flatshade_first);
110 }
111
112 void
113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
114 struct mali_vertex_tiler_prefix *prefix,
115 union midgard_primitive_size *primitive_size)
116 {
117 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
118
119 if (!panfrost_writes_point_size(ctx)) {
120 bool points = prefix->draw_mode == MALI_POINTS;
121 float val = 0.0f;
122
123 if (rasterizer)
124 val = points ?
125 rasterizer->base.point_size :
126 rasterizer->base.line_width;
127
128 primitive_size->constant = val;
129 }
130 }
131
132 static void
133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
134 struct mali_vertex_tiler_postfix *postfix)
135 {
136 SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
137 if (ctx->occlusion_query)
138 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
139 else
140 postfix->occlusion_counter = 0;
141 }
142
143 void
144 panfrost_vt_init(struct panfrost_context *ctx,
145 enum pipe_shader_type stage,
146 struct mali_vertex_tiler_prefix *prefix,
147 struct mali_vertex_tiler_postfix *postfix)
148 {
149 struct panfrost_device *device = pan_device(ctx->base.screen);
150
151 if (!ctx->shader[stage])
152 return;
153
154 memset(prefix, 0, sizeof(*prefix));
155 memset(postfix, 0, sizeof(*postfix));
156
157 if (device->quirks & IS_BIFROST) {
158 postfix->gl_enables = 0x2;
159 panfrost_vt_emit_shared_memory(ctx, postfix);
160 } else {
161 postfix->gl_enables = 0x6;
162 panfrost_vt_attach_framebuffer(ctx, postfix);
163 }
164
165 if (stage == PIPE_SHADER_FRAGMENT) {
166 panfrost_vt_update_occlusion_query(ctx, postfix);
167 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
168 }
169 }
170
171 static unsigned
172 panfrost_translate_index_size(unsigned size)
173 {
174 switch (size) {
175 case 1:
176 return MALI_DRAW_INDEXED_UINT8;
177
178 case 2:
179 return MALI_DRAW_INDEXED_UINT16;
180
181 case 4:
182 return MALI_DRAW_INDEXED_UINT32;
183
184 default:
185 unreachable("Invalid index size");
186 }
187 }
188
189 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
190 * good for the duration of the draw (transient), could last longer. Also get
191 * the bounds on the index buffer for the range accessed by the draw. We do
192 * these operations together because there are natural optimizations which
193 * require them to be together. */
194
195 static mali_ptr
196 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
197 const struct pipe_draw_info *info,
198 unsigned *min_index, unsigned *max_index)
199 {
200 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
201 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
202 off_t offset = info->start * info->index_size;
203 bool needs_indices = true;
204 mali_ptr out = 0;
205
206 if (info->max_index != ~0u) {
207 *min_index = info->min_index;
208 *max_index = info->max_index;
209 needs_indices = false;
210 }
211
212 if (!info->has_user_indices) {
213 /* Only resources can be directly mapped */
214 panfrost_batch_add_bo(batch, rsrc->bo,
215 PAN_BO_ACCESS_SHARED |
216 PAN_BO_ACCESS_READ |
217 PAN_BO_ACCESS_VERTEX_TILER);
218 out = rsrc->bo->gpu + offset;
219
220 /* Check the cache */
221 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
222 info->start,
223 info->count,
224 min_index,
225 max_index);
226 } else {
227 /* Otherwise, we need to upload to transient memory */
228 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
229 out = panfrost_upload_transient(batch, ibuf8 + offset,
230 info->count *
231 info->index_size);
232 }
233
234 if (needs_indices) {
235 /* Fallback */
236 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
237
238 if (!info->has_user_indices)
239 panfrost_minmax_cache_add(rsrc->index_cache,
240 info->start, info->count,
241 *min_index, *max_index);
242 }
243
244 return out;
245 }
246
247 void
248 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
249 const struct pipe_draw_info *info,
250 enum mali_draw_mode draw_mode,
251 struct mali_vertex_tiler_postfix *vertex_postfix,
252 struct mali_vertex_tiler_prefix *tiler_prefix,
253 struct mali_vertex_tiler_postfix *tiler_postfix,
254 unsigned *vertex_count,
255 unsigned *padded_count)
256 {
257 tiler_prefix->draw_mode = draw_mode;
258
259 unsigned draw_flags = 0;
260
261 if (panfrost_writes_point_size(ctx))
262 draw_flags |= MALI_DRAW_VARYING_SIZE;
263
264 if (info->primitive_restart)
265 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
266
267 /* These doesn't make much sense */
268
269 draw_flags |= 0x3000;
270
271 if (info->index_size) {
272 unsigned min_index = 0, max_index = 0;
273
274 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
275 info,
276 &min_index,
277 &max_index);
278
279 /* Use the corresponding values */
280 *vertex_count = max_index - min_index + 1;
281 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
282 tiler_prefix->offset_bias_correction = -min_index;
283 tiler_prefix->index_count = MALI_POSITIVE(info->count);
284 draw_flags |= panfrost_translate_index_size(info->index_size);
285 } else {
286 tiler_prefix->indices = 0;
287 *vertex_count = ctx->vertex_count;
288 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
289 tiler_prefix->offset_bias_correction = 0;
290 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
291 }
292
293 tiler_prefix->unknown_draw = draw_flags;
294
295 /* Encode the padded vertex count */
296
297 if (info->instance_count > 1) {
298 *padded_count = panfrost_padded_vertex_count(*vertex_count);
299
300 unsigned shift = __builtin_ctz(ctx->padded_count);
301 unsigned k = ctx->padded_count >> (shift + 1);
302
303 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
304 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
305 } else {
306 *padded_count = *vertex_count;
307
308 /* Reset instancing state */
309 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
310 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
311 }
312 }
313
314 static void
315 panfrost_shader_meta_init(struct panfrost_context *ctx,
316 enum pipe_shader_type st,
317 struct mali_shader_meta *meta)
318 {
319 const struct panfrost_device *dev = pan_device(ctx->base.screen);
320 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
321
322 memset(meta, 0, sizeof(*meta));
323 meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
324 meta->attribute_count = ss->attribute_count;
325 meta->varying_count = ss->varying_count;
326 meta->texture_count = ctx->sampler_view_count[st];
327 meta->sampler_count = ctx->sampler_count[st];
328
329 if (dev->quirks & IS_BIFROST) {
330 meta->bifrost1.unk1 = 0x800200;
331 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
332 meta->bifrost2.preload_regs = 0xC0;
333 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
334 ss->uniform_cutoff);
335 } else {
336 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
337 ss->uniform_cutoff);
338 meta->midgard1.work_count = ss->work_reg_count;
339 meta->midgard1.flags_hi = 0x8; /* XXX */
340 meta->midgard1.flags_lo = 0x220;
341 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
342 }
343
344 }
345
346 static unsigned
347 panfrost_translate_compare_func(enum pipe_compare_func in)
348 {
349 switch (in) {
350 case PIPE_FUNC_NEVER:
351 return MALI_FUNC_NEVER;
352
353 case PIPE_FUNC_LESS:
354 return MALI_FUNC_LESS;
355
356 case PIPE_FUNC_EQUAL:
357 return MALI_FUNC_EQUAL;
358
359 case PIPE_FUNC_LEQUAL:
360 return MALI_FUNC_LEQUAL;
361
362 case PIPE_FUNC_GREATER:
363 return MALI_FUNC_GREATER;
364
365 case PIPE_FUNC_NOTEQUAL:
366 return MALI_FUNC_NOTEQUAL;
367
368 case PIPE_FUNC_GEQUAL:
369 return MALI_FUNC_GEQUAL;
370
371 case PIPE_FUNC_ALWAYS:
372 return MALI_FUNC_ALWAYS;
373
374 default:
375 unreachable("Invalid func");
376 }
377 }
378
379 static unsigned
380 panfrost_translate_stencil_op(enum pipe_stencil_op in)
381 {
382 switch (in) {
383 case PIPE_STENCIL_OP_KEEP:
384 return MALI_STENCIL_KEEP;
385
386 case PIPE_STENCIL_OP_ZERO:
387 return MALI_STENCIL_ZERO;
388
389 case PIPE_STENCIL_OP_REPLACE:
390 return MALI_STENCIL_REPLACE;
391
392 case PIPE_STENCIL_OP_INCR:
393 return MALI_STENCIL_INCR;
394
395 case PIPE_STENCIL_OP_DECR:
396 return MALI_STENCIL_DECR;
397
398 case PIPE_STENCIL_OP_INCR_WRAP:
399 return MALI_STENCIL_INCR_WRAP;
400
401 case PIPE_STENCIL_OP_DECR_WRAP:
402 return MALI_STENCIL_DECR_WRAP;
403
404 case PIPE_STENCIL_OP_INVERT:
405 return MALI_STENCIL_INVERT;
406
407 default:
408 unreachable("Invalid stencil op");
409 }
410 }
411
412 static unsigned
413 translate_tex_wrap(enum pipe_tex_wrap w)
414 {
415 switch (w) {
416 case PIPE_TEX_WRAP_REPEAT:
417 return MALI_WRAP_REPEAT;
418
419 case PIPE_TEX_WRAP_CLAMP:
420 return MALI_WRAP_CLAMP;
421
422 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
423 return MALI_WRAP_CLAMP_TO_EDGE;
424
425 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
426 return MALI_WRAP_CLAMP_TO_BORDER;
427
428 case PIPE_TEX_WRAP_MIRROR_REPEAT:
429 return MALI_WRAP_MIRRORED_REPEAT;
430
431 case PIPE_TEX_WRAP_MIRROR_CLAMP:
432 return MALI_WRAP_MIRRORED_CLAMP;
433
434 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
435 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
436
437 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
438 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
439
440 default:
441 unreachable("Invalid wrap");
442 }
443 }
444
445 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
446 struct mali_sampler_descriptor *hw)
447 {
448 unsigned func = panfrost_translate_compare_func(cso->compare_func);
449 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
450 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
451 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
452 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
453 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
454 unsigned mip_filter = mip_linear ?
455 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
456 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
457
458 *hw = (struct mali_sampler_descriptor) {
459 .filter_mode = min_filter | mag_filter | mip_filter |
460 normalized,
461 .wrap_s = translate_tex_wrap(cso->wrap_s),
462 .wrap_t = translate_tex_wrap(cso->wrap_t),
463 .wrap_r = translate_tex_wrap(cso->wrap_r),
464 .compare_func = panfrost_flip_compare_func(func),
465 .border_color = {
466 cso->border_color.f[0],
467 cso->border_color.f[1],
468 cso->border_color.f[2],
469 cso->border_color.f[3]
470 },
471 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
472 .max_lod = FIXED_16(cso->max_lod, false),
473 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
474 .seamless_cube_map = cso->seamless_cube_map,
475 };
476
477 /* If necessary, we disable mipmapping in the sampler descriptor by
478 * clamping the LOD as tight as possible (from 0 to epsilon,
479 * essentially -- remember these are fixed point numbers, so
480 * epsilon=1/256) */
481
482 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
483 hw->max_lod = hw->min_lod + 1;
484 }
485
486 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
487 struct bifrost_sampler_descriptor *hw)
488 {
489 *hw = (struct bifrost_sampler_descriptor) {
490 .unk1 = 0x1,
491 .wrap_s = translate_tex_wrap(cso->wrap_s),
492 .wrap_t = translate_tex_wrap(cso->wrap_t),
493 .wrap_r = translate_tex_wrap(cso->wrap_r),
494 .unk8 = 0x8,
495 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
496 .norm_coords = cso->normalized_coords,
497 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
498 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
499 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
500 .max_lod = FIXED_16(cso->max_lod, false),
501 };
502
503 /* If necessary, we disable mipmapping in the sampler descriptor by
504 * clamping the LOD as tight as possible (from 0 to epsilon,
505 * essentially -- remember these are fixed point numbers, so
506 * epsilon=1/256) */
507
508 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
509 hw->max_lod = hw->min_lod + 1;
510 }
511
512 static void
513 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
514 struct mali_stencil_test *out)
515 {
516 out->ref = 0; /* Gallium gets it from elsewhere */
517
518 out->mask = in->valuemask;
519 out->func = panfrost_translate_compare_func(in->func);
520 out->sfail = panfrost_translate_stencil_op(in->fail_op);
521 out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
522 out->dppass = panfrost_translate_stencil_op(in->zpass_op);
523 }
524
525 static void
526 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
527 struct mali_shader_meta *fragmeta)
528 {
529 if (!ctx->rasterizer) {
530 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
531 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
532 fragmeta->depth_units = 0.0f;
533 fragmeta->depth_factor = 0.0f;
534 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
535 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
536 return;
537 }
538
539 bool msaa = ctx->rasterizer->base.multisample;
540
541 /* TODO: Sample size */
542 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
543 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
544 fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
545 fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
546
547 /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
548
549 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
550 ctx->rasterizer->base.offset_tri);
551 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
552 ctx->rasterizer->base.offset_tri);
553 }
554
555 static void
556 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
557 struct mali_shader_meta *fragmeta)
558 {
559 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
560 int zfunc = PIPE_FUNC_ALWAYS;
561
562 if (!zsa) {
563 struct pipe_stencil_state default_stencil = {
564 .enabled = 0,
565 .func = PIPE_FUNC_ALWAYS,
566 .fail_op = MALI_STENCIL_KEEP,
567 .zfail_op = MALI_STENCIL_KEEP,
568 .zpass_op = MALI_STENCIL_KEEP,
569 .writemask = 0xFF,
570 .valuemask = 0xFF
571 };
572
573 panfrost_make_stencil_state(&default_stencil,
574 &fragmeta->stencil_front);
575 fragmeta->stencil_mask_front = default_stencil.writemask;
576 fragmeta->stencil_back = fragmeta->stencil_front;
577 fragmeta->stencil_mask_back = default_stencil.writemask;
578 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
579 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
580 } else {
581 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
582 zsa->stencil[0].enabled);
583 panfrost_make_stencil_state(&zsa->stencil[0],
584 &fragmeta->stencil_front);
585 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
586 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
587
588 /* If back-stencil is not enabled, use the front values */
589
590 if (zsa->stencil[1].enabled) {
591 panfrost_make_stencil_state(&zsa->stencil[1],
592 &fragmeta->stencil_back);
593 fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
594 fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
595 } else {
596 fragmeta->stencil_back = fragmeta->stencil_front;
597 fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
598 fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
599 }
600
601 if (zsa->depth.enabled)
602 zfunc = zsa->depth.func;
603
604 /* Depth state (TODO: Refactor) */
605
606 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
607 zsa->depth.writemask);
608 }
609
610 fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
611 fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
612 }
613
614 static void
615 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
616 struct mali_shader_meta *fragmeta,
617 void *rts)
618 {
619 const struct panfrost_device *dev = pan_device(ctx->base.screen);
620
621 SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
622 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
623 !ctx->blend->base.dither);
624
625 /* Get blending setup */
626 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
627
628 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
629 unsigned shader_offset = 0;
630 struct panfrost_bo *shader_bo = NULL;
631
632 for (unsigned c = 0; c < rt_count; ++c)
633 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
634 &shader_offset);
635
636 /* If there is a blend shader, work registers are shared. XXX: opt */
637
638 for (unsigned c = 0; c < rt_count; ++c) {
639 if (blend[c].is_shader)
640 fragmeta->midgard1.work_count = 16;
641 }
642
643 /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
644 * copied to the blend_meta appended (by convention), but this is the
645 * field actually read by the hardware. (Or maybe both are read...?).
646 * Specify the last RTi with a blend shader. */
647
648 fragmeta->blend.shader = 0;
649
650 for (signed rt = (rt_count - 1); rt >= 0; --rt) {
651 if (!blend[rt].is_shader)
652 continue;
653
654 fragmeta->blend.shader = blend[rt].shader.gpu |
655 blend[rt].shader.first_tag;
656 break;
657 }
658
659 if (dev->quirks & MIDGARD_SFBD) {
660 /* When only a single render target platform is used, the blend
661 * information is inside the shader meta itself. We additionally
662 * need to signal CAN_DISCARD for nontrivial blend modes (so
663 * we're able to read back the destination buffer) */
664
665 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
666 blend[0].is_shader);
667
668 if (!blend[0].is_shader) {
669 fragmeta->blend.equation = *blend[0].equation.equation;
670 fragmeta->blend.constant = blend[0].equation.constant;
671 }
672
673 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
674 !blend[0].no_blending);
675 return;
676 }
677
678 /* Additional blend descriptor tacked on for jobs using MFBD */
679
680 for (unsigned i = 0; i < rt_count; ++i) {
681 if (dev->quirks & IS_BIFROST) {
682 struct bifrost_blend_rt *brts = rts;
683 struct panfrost_shader_state *fs;
684 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
685
686 brts[i].flags = 0x200;
687 if (blend[i].is_shader) {
688 /* The blend shader's address needs to be at
689 * the same top 32 bit as the fragment shader.
690 * TODO: Ensure that's always the case.
691 */
692 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
693 (fs->bo->gpu & (0xffffffffull << 32)));
694 brts[i].shader = blend[i].shader.gpu;
695 brts[i].unk2 = 0x0;
696 } else {
697 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
698 const struct util_format_description *format_desc;
699 format_desc = util_format_description(format);
700
701 brts[i].equation = *blend[i].equation.equation;
702
703 /* TODO: this is a bit more complicated */
704 brts[i].constant = blend[i].equation.constant;
705
706 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
707 brts[i].unk2 = 0x19;
708
709 brts[i].shader_type = fs->blend_types[i];
710 }
711 } else {
712 struct midgard_blend_rt *mrts = rts;
713
714 mrts[i].flags = 0x200;
715
716 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
717 (ctx->pipe_framebuffer.cbufs[i]) &&
718 util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
719
720 SET_BIT(mrts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
721 SET_BIT(mrts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
722 SET_BIT(mrts[i].flags, MALI_BLEND_SRGB, is_srgb);
723 SET_BIT(mrts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
724
725 if (blend[i].is_shader) {
726 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
727 } else {
728 mrts[i].blend.equation = *blend[i].equation.equation;
729 mrts[i].blend.constant = blend[i].equation.constant;
730 }
731 }
732 }
733 }
734
735 static void
736 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
737 struct mali_shader_meta *fragmeta,
738 void *rts)
739 {
740 const struct panfrost_device *dev = pan_device(ctx->base.screen);
741 struct panfrost_shader_state *fs;
742
743 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
744
745 fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
746 fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
747 fragmeta->unknown2_4 = 0x4e0;
748
749 /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
750 * is required (independent of 32-bit/64-bit descriptors), or why it's
751 * not used on later GPU revisions. Otherwise, all shader jobs fault on
752 * these earlier chips (perhaps this is a chicken bit of some kind).
753 * More investigation is needed. */
754
755 SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
756
757 /* Depending on whether it's legal to in the given shader, we try to
758 * enable early-z testing (or forward-pixel kill?) */
759
760 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
761 !fs->can_discard && !fs->writes_depth);
762
763 /* Add the writes Z/S flags if needed. */
764 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
765 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
766
767 /* Any time texturing is used, derivatives are implicitly calculated,
768 * so we need to enable helper invocations */
769
770 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
771 fs->helper_invocations);
772
773 /* CAN_DISCARD should be set if the fragment shader possibly contains a
774 * 'discard' instruction. It is likely this is related to optimizations
775 * related to forward-pixel kill, as per "Mali Performance 3: Is
776 * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
777
778 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
779 SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
780
781 panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
782 panfrost_frag_meta_zsa_update(ctx, fragmeta);
783 panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
784 }
785
786 void
787 panfrost_emit_shader_meta(struct panfrost_batch *batch,
788 enum pipe_shader_type st,
789 struct mali_vertex_tiler_postfix *postfix)
790 {
791 struct panfrost_context *ctx = batch->ctx;
792 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
793
794 if (!ss) {
795 postfix->shader = 0;
796 return;
797 }
798
799 struct mali_shader_meta meta;
800
801 panfrost_shader_meta_init(ctx, st, &meta);
802
803 /* Add the shader BO to the batch. */
804 panfrost_batch_add_bo(batch, ss->bo,
805 PAN_BO_ACCESS_PRIVATE |
806 PAN_BO_ACCESS_READ |
807 panfrost_bo_access_for_stage(st));
808
809 mali_ptr shader_ptr;
810
811 if (st == PIPE_SHADER_FRAGMENT) {
812 struct panfrost_device *dev = pan_device(ctx->base.screen);
813 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
814 size_t desc_size = sizeof(meta);
815 void *rts = NULL;
816 struct panfrost_transfer xfer;
817 unsigned rt_size;
818
819 if (dev->quirks & MIDGARD_SFBD)
820 rt_size = 0;
821 else if (dev->quirks & IS_BIFROST)
822 rt_size = sizeof(struct bifrost_blend_rt);
823 else
824 rt_size = sizeof(struct midgard_blend_rt);
825
826 desc_size += rt_size * rt_count;
827
828 if (rt_size)
829 rts = rzalloc_size(ctx, rt_size * rt_count);
830
831 panfrost_frag_shader_meta_init(ctx, &meta, rts);
832
833 xfer = panfrost_allocate_transient(batch, desc_size);
834
835 memcpy(xfer.cpu, &meta, sizeof(meta));
836 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
837
838 if (rt_size)
839 ralloc_free(rts);
840
841 shader_ptr = xfer.gpu;
842 } else {
843 shader_ptr = panfrost_upload_transient(batch, &meta,
844 sizeof(meta));
845 }
846
847 postfix->shader = shader_ptr;
848 }
849
850 static void
851 panfrost_mali_viewport_init(struct panfrost_context *ctx,
852 struct mali_viewport *mvp)
853 {
854 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
855
856 /* Clip bounds are encoded as floats. The viewport itself is encoded as
857 * (somewhat) asymmetric ints. */
858
859 const struct pipe_scissor_state *ss = &ctx->scissor;
860
861 memset(mvp, 0, sizeof(*mvp));
862
863 /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
864 * each direction. Clipping to the viewport in theory should work, but
865 * in practice causes issues when we're not explicitly trying to
866 * scissor */
867
868 *mvp = (struct mali_viewport) {
869 .clip_minx = -INFINITY,
870 .clip_miny = -INFINITY,
871 .clip_maxx = INFINITY,
872 .clip_maxy = INFINITY,
873 };
874
875 /* Always scissor to the viewport by default. */
876 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
877 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
878
879 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
880 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
881
882 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
883 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
884
885 /* Apply the scissor test */
886
887 unsigned minx, miny, maxx, maxy;
888
889 if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
890 minx = MAX2(ss->minx, vp_minx);
891 miny = MAX2(ss->miny, vp_miny);
892 maxx = MIN2(ss->maxx, vp_maxx);
893 maxy = MIN2(ss->maxy, vp_maxy);
894 } else {
895 minx = vp_minx;
896 miny = vp_miny;
897 maxx = vp_maxx;
898 maxy = vp_maxy;
899 }
900
901 /* Hardware needs the min/max to be strictly ordered, so flip if we
902 * need to. The viewport transformation in the vertex shader will
903 * handle the negatives if we don't */
904
905 if (miny > maxy) {
906 unsigned temp = miny;
907 miny = maxy;
908 maxy = temp;
909 }
910
911 if (minx > maxx) {
912 unsigned temp = minx;
913 minx = maxx;
914 maxx = temp;
915 }
916
917 if (minz > maxz) {
918 float temp = minz;
919 minz = maxz;
920 maxz = temp;
921 }
922
923 /* Clamp to the framebuffer size as a last check */
924
925 minx = MIN2(ctx->pipe_framebuffer.width, minx);
926 maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
927
928 miny = MIN2(ctx->pipe_framebuffer.height, miny);
929 maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
930
931 /* Upload */
932
933 mvp->viewport0[0] = minx;
934 mvp->viewport1[0] = MALI_POSITIVE(maxx);
935
936 mvp->viewport0[1] = miny;
937 mvp->viewport1[1] = MALI_POSITIVE(maxy);
938
939 mvp->clip_minz = minz;
940 mvp->clip_maxz = maxz;
941 }
942
943 void
944 panfrost_emit_viewport(struct panfrost_batch *batch,
945 struct mali_vertex_tiler_postfix *tiler_postfix)
946 {
947 struct panfrost_context *ctx = batch->ctx;
948 struct mali_viewport mvp;
949
950 panfrost_mali_viewport_init(batch->ctx, &mvp);
951
952 /* Update the job, unless we're doing wallpapering (whose lack of
953 * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
954 * just... be faster :) */
955
956 if (!ctx->wallpaper_batch)
957 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
958 mvp.viewport0[1],
959 mvp.viewport1[0] + 1,
960 mvp.viewport1[1] + 1);
961
962 tiler_postfix->viewport = panfrost_upload_transient(batch, &mvp,
963 sizeof(mvp));
964 }
965
966 static mali_ptr
967 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
968 enum pipe_shader_type st,
969 struct panfrost_constant_buffer *buf,
970 unsigned index)
971 {
972 struct pipe_constant_buffer *cb = &buf->cb[index];
973 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
974
975 if (rsrc) {
976 panfrost_batch_add_bo(batch, rsrc->bo,
977 PAN_BO_ACCESS_SHARED |
978 PAN_BO_ACCESS_READ |
979 panfrost_bo_access_for_stage(st));
980
981 /* Alignment gauranteed by
982 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
983 return rsrc->bo->gpu + cb->buffer_offset;
984 } else if (cb->user_buffer) {
985 return panfrost_upload_transient(batch,
986 cb->user_buffer +
987 cb->buffer_offset,
988 cb->buffer_size);
989 } else {
990 unreachable("No constant buffer");
991 }
992 }
993
994 struct sysval_uniform {
995 union {
996 float f[4];
997 int32_t i[4];
998 uint32_t u[4];
999 uint64_t du[2];
1000 };
1001 };
1002
1003 static void
1004 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1005 struct sysval_uniform *uniform)
1006 {
1007 struct panfrost_context *ctx = batch->ctx;
1008 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1009
1010 uniform->f[0] = vp->scale[0];
1011 uniform->f[1] = vp->scale[1];
1012 uniform->f[2] = vp->scale[2];
1013 }
1014
1015 static void
1016 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1017 struct sysval_uniform *uniform)
1018 {
1019 struct panfrost_context *ctx = batch->ctx;
1020 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1021
1022 uniform->f[0] = vp->translate[0];
1023 uniform->f[1] = vp->translate[1];
1024 uniform->f[2] = vp->translate[2];
1025 }
1026
1027 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1028 enum pipe_shader_type st,
1029 unsigned int sysvalid,
1030 struct sysval_uniform *uniform)
1031 {
1032 struct panfrost_context *ctx = batch->ctx;
1033 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1034 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1035 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1036 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1037
1038 assert(dim);
1039 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1040
1041 if (dim > 1)
1042 uniform->i[1] = u_minify(tex->texture->height0,
1043 tex->u.tex.first_level);
1044
1045 if (dim > 2)
1046 uniform->i[2] = u_minify(tex->texture->depth0,
1047 tex->u.tex.first_level);
1048
1049 if (is_array)
1050 uniform->i[dim] = tex->texture->array_size;
1051 }
1052
1053 static void
1054 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1055 enum pipe_shader_type st,
1056 unsigned ssbo_id,
1057 struct sysval_uniform *uniform)
1058 {
1059 struct panfrost_context *ctx = batch->ctx;
1060
1061 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1062 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1063
1064 /* Compute address */
1065 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1066
1067 panfrost_batch_add_bo(batch, bo,
1068 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1069 panfrost_bo_access_for_stage(st));
1070
1071 /* Upload address and size as sysval */
1072 uniform->du[0] = bo->gpu + sb.buffer_offset;
1073 uniform->u[2] = sb.buffer_size;
1074 }
1075
1076 static void
1077 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1078 enum pipe_shader_type st,
1079 unsigned samp_idx,
1080 struct sysval_uniform *uniform)
1081 {
1082 struct panfrost_context *ctx = batch->ctx;
1083 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1084
1085 uniform->f[0] = sampl->min_lod;
1086 uniform->f[1] = sampl->max_lod;
1087 uniform->f[2] = sampl->lod_bias;
1088
1089 /* Even without any errata, Midgard represents "no mipmapping" as
1090 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1091 * panfrost_create_sampler_state which also explains our choice of
1092 * epsilon value (again to keep behaviour consistent) */
1093
1094 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1095 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1096 }
1097
1098 static void
1099 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1100 struct sysval_uniform *uniform)
1101 {
1102 struct panfrost_context *ctx = batch->ctx;
1103
1104 uniform->u[0] = ctx->compute_grid->grid[0];
1105 uniform->u[1] = ctx->compute_grid->grid[1];
1106 uniform->u[2] = ctx->compute_grid->grid[2];
1107 }
1108
1109 static void
1110 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1111 struct panfrost_shader_state *ss,
1112 enum pipe_shader_type st)
1113 {
1114 struct sysval_uniform *uniforms = (void *)buf;
1115
1116 for (unsigned i = 0; i < ss->sysval_count; ++i) {
1117 int sysval = ss->sysval[i];
1118
1119 switch (PAN_SYSVAL_TYPE(sysval)) {
1120 case PAN_SYSVAL_VIEWPORT_SCALE:
1121 panfrost_upload_viewport_scale_sysval(batch,
1122 &uniforms[i]);
1123 break;
1124 case PAN_SYSVAL_VIEWPORT_OFFSET:
1125 panfrost_upload_viewport_offset_sysval(batch,
1126 &uniforms[i]);
1127 break;
1128 case PAN_SYSVAL_TEXTURE_SIZE:
1129 panfrost_upload_txs_sysval(batch, st,
1130 PAN_SYSVAL_ID(sysval),
1131 &uniforms[i]);
1132 break;
1133 case PAN_SYSVAL_SSBO:
1134 panfrost_upload_ssbo_sysval(batch, st,
1135 PAN_SYSVAL_ID(sysval),
1136 &uniforms[i]);
1137 break;
1138 case PAN_SYSVAL_NUM_WORK_GROUPS:
1139 panfrost_upload_num_work_groups_sysval(batch,
1140 &uniforms[i]);
1141 break;
1142 case PAN_SYSVAL_SAMPLER:
1143 panfrost_upload_sampler_sysval(batch, st,
1144 PAN_SYSVAL_ID(sysval),
1145 &uniforms[i]);
1146 break;
1147 default:
1148 assert(0);
1149 }
1150 }
1151 }
1152
1153 static const void *
1154 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1155 unsigned index)
1156 {
1157 struct pipe_constant_buffer *cb = &buf->cb[index];
1158 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1159
1160 if (rsrc)
1161 return rsrc->bo->cpu;
1162 else if (cb->user_buffer)
1163 return cb->user_buffer;
1164 else
1165 unreachable("No constant buffer");
1166 }
1167
1168 void
1169 panfrost_emit_const_buf(struct panfrost_batch *batch,
1170 enum pipe_shader_type stage,
1171 struct mali_vertex_tiler_postfix *postfix)
1172 {
1173 struct panfrost_context *ctx = batch->ctx;
1174 struct panfrost_shader_variants *all = ctx->shader[stage];
1175
1176 if (!all)
1177 return;
1178
1179 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1180
1181 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1182
1183 /* Uniforms are implicitly UBO #0 */
1184 bool has_uniforms = buf->enabled_mask & (1 << 0);
1185
1186 /* Allocate room for the sysval and the uniforms */
1187 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1188 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1189 size_t size = sys_size + uniform_size;
1190 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1191 size);
1192
1193 /* Upload sysvals requested by the shader */
1194 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1195
1196 /* Upload uniforms */
1197 if (has_uniforms && uniform_size) {
1198 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1199 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1200 }
1201
1202 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1203 * uploaded */
1204
1205 unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1206 assert(ubo_count >= 1);
1207
1208 size_t sz = sizeof(uint64_t) * ubo_count;
1209 uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1210 int uniform_count = ss->uniform_count;
1211
1212 /* Upload uniforms as a UBO */
1213 ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1214
1215 /* The rest are honest-to-goodness UBOs */
1216
1217 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1218 size_t usz = buf->cb[ubo].buffer_size;
1219 bool enabled = buf->enabled_mask & (1 << ubo);
1220 bool empty = usz == 0;
1221
1222 if (!enabled || empty) {
1223 /* Stub out disabled UBOs to catch accesses */
1224 ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1225 continue;
1226 }
1227
1228 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1229 buf, ubo);
1230
1231 unsigned bytes_per_field = 16;
1232 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1233 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1234 }
1235
1236 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1237 postfix->uniforms = transfer.gpu;
1238 postfix->uniform_buffers = ubufs;
1239
1240 buf->dirty_mask = 0;
1241 }
1242
1243 void
1244 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1245 const struct pipe_grid_info *info,
1246 struct midgard_payload_vertex_tiler *vtp)
1247 {
1248 struct panfrost_context *ctx = batch->ctx;
1249 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1250 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1251 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1252 128));
1253 unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1254 info->grid[2] * 4;
1255 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1256 shared_size,
1257 1);
1258
1259 struct mali_shared_memory shared = {
1260 .shared_memory = bo->gpu,
1261 .shared_workgroup_count =
1262 util_logbase2_ceil(info->grid[0]) +
1263 util_logbase2_ceil(info->grid[1]) +
1264 util_logbase2_ceil(info->grid[2]),
1265 .shared_unk1 = 0x2,
1266 .shared_shift = util_logbase2(single_size) - 1
1267 };
1268
1269 vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1270 sizeof(shared));
1271 }
1272
1273 static mali_ptr
1274 panfrost_get_tex_desc(struct panfrost_batch *batch,
1275 enum pipe_shader_type st,
1276 struct panfrost_sampler_view *view)
1277 {
1278 if (!view)
1279 return (mali_ptr) 0;
1280
1281 struct pipe_sampler_view *pview = &view->base;
1282 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1283
1284 /* Add the BO to the job so it's retained until the job is done. */
1285
1286 panfrost_batch_add_bo(batch, rsrc->bo,
1287 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1288 panfrost_bo_access_for_stage(st));
1289
1290 panfrost_batch_add_bo(batch, view->midgard_bo,
1291 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1292 panfrost_bo_access_for_stage(st));
1293
1294 return view->midgard_bo->gpu;
1295 }
1296
1297 void
1298 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1299 enum pipe_shader_type stage,
1300 struct mali_vertex_tiler_postfix *postfix)
1301 {
1302 struct panfrost_context *ctx = batch->ctx;
1303 struct panfrost_device *device = pan_device(ctx->base.screen);
1304
1305 if (!ctx->sampler_view_count[stage])
1306 return;
1307
1308 if (device->quirks & IS_BIFROST) {
1309 struct bifrost_texture_descriptor *descriptors;
1310
1311 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1312 ctx->sampler_view_count[stage]);
1313
1314 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1315 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1316 struct pipe_sampler_view *pview = &view->base;
1317 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1318
1319 panfrost_batch_add_bo(batch, rsrc->bo,
1320 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1321 panfrost_bo_access_for_stage(stage));
1322
1323 memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1324 }
1325
1326 postfix->textures = panfrost_upload_transient(batch,
1327 descriptors,
1328 sizeof(struct bifrost_texture_descriptor) *
1329 ctx->sampler_view_count[stage]);
1330
1331 free(descriptors);
1332 } else {
1333 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1334
1335 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1336 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1337 ctx->sampler_views[stage][i]);
1338
1339 postfix->textures = panfrost_upload_transient(batch,
1340 trampolines,
1341 sizeof(uint64_t) *
1342 ctx->sampler_view_count[stage]);
1343 }
1344 }
1345
1346 void
1347 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1348 enum pipe_shader_type stage,
1349 struct mali_vertex_tiler_postfix *postfix)
1350 {
1351 struct panfrost_context *ctx = batch->ctx;
1352 struct panfrost_device *device = pan_device(ctx->base.screen);
1353
1354 if (!ctx->sampler_count[stage])
1355 return;
1356
1357 if (device->quirks & IS_BIFROST) {
1358 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1359 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1360 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1361 transfer_size);
1362 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1363
1364 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1365 desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1366
1367 postfix->sampler_descriptor = transfer.gpu;
1368 } else {
1369 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1370 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1371 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1372 transfer_size);
1373 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1374
1375 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1376 desc[i] = ctx->samplers[stage][i]->midgard_hw;
1377
1378 postfix->sampler_descriptor = transfer.gpu;
1379 }
1380 }
1381
1382 void
1383 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1384 struct mali_vertex_tiler_postfix *vertex_postfix)
1385 {
1386 struct panfrost_context *ctx = batch->ctx;
1387
1388 if (!ctx->vertex)
1389 return;
1390
1391 struct panfrost_vertex_state *so = ctx->vertex;
1392
1393 panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1394 vertex_postfix->attribute_meta = panfrost_upload_transient(batch, so->hw,
1395 sizeof(*so->hw) *
1396 PAN_MAX_ATTRIBUTE);
1397 }
1398
1399 void
1400 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1401 struct mali_vertex_tiler_postfix *vertex_postfix)
1402 {
1403 struct panfrost_context *ctx = batch->ctx;
1404 struct panfrost_vertex_state *so = ctx->vertex;
1405
1406 /* Staged mali_attr, and index into them. i =/= k, depending on the
1407 * vertex buffer mask and instancing. Twice as much room is allocated,
1408 * for a worst case of NPOT_DIVIDEs which take up extra slot */
1409 union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1410 unsigned k = 0;
1411
1412 for (unsigned i = 0; i < so->num_elements; ++i) {
1413 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1414 * means duplicating some vertex buffers (who cares? aside from
1415 * maybe some caching implications but I somehow doubt that
1416 * matters) */
1417
1418 struct pipe_vertex_element *elem = &so->pipe[i];
1419 unsigned vbi = elem->vertex_buffer_index;
1420
1421 /* The exception to 1:1 mapping is that we can have multiple
1422 * entries (NPOT divisors), so we fixup anyways */
1423
1424 so->hw[i].index = k;
1425
1426 if (!(ctx->vb_mask & (1 << vbi)))
1427 continue;
1428
1429 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1430 struct panfrost_resource *rsrc;
1431
1432 rsrc = pan_resource(buf->buffer.resource);
1433 if (!rsrc)
1434 continue;
1435
1436 /* Align to 64 bytes by masking off the lower bits. This
1437 * will be adjusted back when we fixup the src_offset in
1438 * mali_attr_meta */
1439
1440 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1441 mali_ptr addr = raw_addr & ~63;
1442 unsigned chopped_addr = raw_addr - addr;
1443
1444 /* Add a dependency of the batch on the vertex buffer */
1445 panfrost_batch_add_bo(batch, rsrc->bo,
1446 PAN_BO_ACCESS_SHARED |
1447 PAN_BO_ACCESS_READ |
1448 PAN_BO_ACCESS_VERTEX_TILER);
1449
1450 /* Set common fields */
1451 attrs[k].elements = addr;
1452 attrs[k].stride = buf->stride;
1453
1454 /* Since we advanced the base pointer, we shrink the buffer
1455 * size */
1456 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1457
1458 /* We need to add the extra size we masked off (for
1459 * correctness) so the data doesn't get clamped away */
1460 attrs[k].size += chopped_addr;
1461
1462 /* For non-instancing make sure we initialize */
1463 attrs[k].shift = attrs[k].extra_flags = 0;
1464
1465 /* Instancing uses a dramatically different code path than
1466 * linear, so dispatch for the actual emission now that the
1467 * common code is finished */
1468
1469 unsigned divisor = elem->instance_divisor;
1470
1471 if (divisor && ctx->instance_count == 1) {
1472 /* Silly corner case where there's a divisor(=1) but
1473 * there's no legitimate instancing. So we want *every*
1474 * attribute to be the same. So set stride to zero so
1475 * we don't go anywhere. */
1476
1477 attrs[k].size = attrs[k].stride + chopped_addr;
1478 attrs[k].stride = 0;
1479 attrs[k++].elements |= MALI_ATTR_LINEAR;
1480 } else if (ctx->instance_count <= 1) {
1481 /* Normal, non-instanced attributes */
1482 attrs[k++].elements |= MALI_ATTR_LINEAR;
1483 } else {
1484 unsigned instance_shift = vertex_postfix->instance_shift;
1485 unsigned instance_odd = vertex_postfix->instance_odd;
1486
1487 k += panfrost_vertex_instanced(ctx->padded_count,
1488 instance_shift,
1489 instance_odd,
1490 divisor, &attrs[k]);
1491 }
1492 }
1493
1494 /* Add special gl_VertexID/gl_InstanceID buffers */
1495
1496 panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1497 so->hw[PAN_VERTEX_ID].index = k++;
1498 panfrost_instance_id(ctx->padded_count, &attrs[k]);
1499 so->hw[PAN_INSTANCE_ID].index = k++;
1500
1501 /* Upload whatever we emitted and go */
1502
1503 vertex_postfix->attributes = panfrost_upload_transient(batch, attrs,
1504 k * sizeof(*attrs));
1505 }
1506
1507 static mali_ptr
1508 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1509 unsigned stride, unsigned count)
1510 {
1511 /* Fill out the descriptor */
1512 slot->stride = stride;
1513 slot->size = stride * count;
1514 slot->shift = slot->extra_flags = 0;
1515
1516 struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1517 slot->size);
1518
1519 slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1520
1521 return transfer.gpu;
1522 }
1523
1524 static void
1525 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1526 unsigned stride, unsigned offset, unsigned count,
1527 struct pipe_stream_output_target *target)
1528 {
1529 /* Fill out the descriptor */
1530 slot->stride = stride * 4;
1531 slot->shift = slot->extra_flags = 0;
1532
1533 unsigned max_size = target->buffer_size;
1534 unsigned expected_size = slot->stride * count;
1535
1536 slot->size = MIN2(max_size, expected_size);
1537
1538 /* Grab the BO and bind it to the batch */
1539 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1540
1541 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1542 * the perspective of the TILER and FRAGMENT.
1543 */
1544 panfrost_batch_add_bo(batch, bo,
1545 PAN_BO_ACCESS_SHARED |
1546 PAN_BO_ACCESS_RW |
1547 PAN_BO_ACCESS_VERTEX_TILER |
1548 PAN_BO_ACCESS_FRAGMENT);
1549
1550 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1551 slot->elements = addr;
1552 }
1553
1554 /* Given a shader and buffer indices, link varying metadata together */
1555
1556 static bool
1557 is_special_varying(gl_varying_slot loc)
1558 {
1559 switch (loc) {
1560 case VARYING_SLOT_POS:
1561 case VARYING_SLOT_PSIZ:
1562 case VARYING_SLOT_PNTC:
1563 case VARYING_SLOT_FACE:
1564 return true;
1565 default:
1566 return false;
1567 }
1568 }
1569
1570 static void
1571 panfrost_emit_varying_meta(void *outptr, struct panfrost_shader_state *ss,
1572 signed general, signed gl_Position,
1573 signed gl_PointSize, signed gl_PointCoord,
1574 signed gl_FrontFacing)
1575 {
1576 struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
1577
1578 for (unsigned i = 0; i < ss->varying_count; ++i) {
1579 gl_varying_slot location = ss->varyings_loc[i];
1580 int index = -1;
1581
1582 switch (location) {
1583 case VARYING_SLOT_POS:
1584 index = gl_Position;
1585 break;
1586 case VARYING_SLOT_PSIZ:
1587 index = gl_PointSize;
1588 break;
1589 case VARYING_SLOT_PNTC:
1590 index = gl_PointCoord;
1591 break;
1592 case VARYING_SLOT_FACE:
1593 index = gl_FrontFacing;
1594 break;
1595 default:
1596 index = general;
1597 break;
1598 }
1599
1600 assert(index >= 0);
1601 out[i].index = index;
1602 }
1603 }
1604
1605 static bool
1606 has_point_coord(unsigned mask, gl_varying_slot loc)
1607 {
1608 if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1609 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1610 else if (loc == VARYING_SLOT_PNTC)
1611 return (mask & (1 << 8));
1612 else
1613 return false;
1614 }
1615
1616 /* Helpers for manipulating stream out information so we can pack varyings
1617 * accordingly. Compute the src_offset for a given captured varying */
1618
1619 static struct pipe_stream_output *
1620 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1621 {
1622 for (unsigned i = 0; i < info->num_outputs; ++i) {
1623 if (info->output[i].register_index == loc)
1624 return &info->output[i];
1625 }
1626
1627 unreachable("Varying not captured");
1628 }
1629
1630 /* TODO: Integers */
1631 static enum mali_format
1632 pan_xfb_format(unsigned nr_components)
1633 {
1634 switch (nr_components) {
1635 case 1: return MALI_R32F;
1636 case 2: return MALI_RG32F;
1637 case 3: return MALI_RGB32F;
1638 case 4: return MALI_RGBA32F;
1639 default: unreachable("Invalid format");
1640 }
1641 }
1642
1643 void
1644 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1645 unsigned vertex_count,
1646 struct mali_vertex_tiler_postfix *vertex_postfix,
1647 struct mali_vertex_tiler_postfix *tiler_postfix,
1648 union midgard_primitive_size *primitive_size)
1649 {
1650 /* Load the shaders */
1651 struct panfrost_context *ctx = batch->ctx;
1652 struct panfrost_shader_state *vs, *fs;
1653 unsigned int num_gen_varyings = 0;
1654 size_t vs_size, fs_size;
1655
1656 /* Allocate the varying descriptor */
1657
1658 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1659 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
1660 vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
1661 fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
1662
1663 struct panfrost_transfer trans = panfrost_allocate_transient(batch,
1664 vs_size +
1665 fs_size);
1666
1667 struct pipe_stream_output_info *so = &vs->stream_output;
1668
1669 /* Check if this varying is linked by us. This is the case for
1670 * general-purpose, non-captured varyings. If it is, link it. If it's
1671 * not, use the provided stream out information to determine the
1672 * offset, since it was already linked for us. */
1673
1674 for (unsigned i = 0; i < vs->varying_count; i++) {
1675 gl_varying_slot loc = vs->varyings_loc[i];
1676
1677 bool special = is_special_varying(loc);
1678 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1679
1680 if (captured) {
1681 struct pipe_stream_output *o = pan_get_so(so, loc);
1682
1683 unsigned dst_offset = o->dst_offset * 4; /* dwords */
1684 vs->varyings[i].src_offset = dst_offset;
1685 } else if (!special) {
1686 vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
1687 }
1688 }
1689
1690 /* Conversely, we need to set src_offset for the captured varyings.
1691 * Here, the layout is defined by the stream out info, not us */
1692
1693 /* Link up with fragment varyings */
1694 bool reads_point_coord = fs->reads_point_coord;
1695
1696 for (unsigned i = 0; i < fs->varying_count; i++) {
1697 gl_varying_slot loc = fs->varyings_loc[i];
1698 unsigned src_offset;
1699 signed vs_idx = -1;
1700
1701 /* Link up */
1702 for (unsigned j = 0; j < vs->varying_count; ++j) {
1703 if (vs->varyings_loc[j] == loc) {
1704 vs_idx = j;
1705 break;
1706 }
1707 }
1708
1709 /* Either assign or reuse */
1710 if (vs_idx >= 0)
1711 src_offset = vs->varyings[vs_idx].src_offset;
1712 else
1713 src_offset = 16 * (num_gen_varyings++);
1714
1715 fs->varyings[i].src_offset = src_offset;
1716
1717 if (has_point_coord(fs->point_sprite_mask, loc))
1718 reads_point_coord = true;
1719 }
1720
1721 memcpy(trans.cpu, vs->varyings, vs_size);
1722 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
1723
1724 union mali_attr varyings[PIPE_MAX_ATTRIBS] = {0};
1725
1726 /* Figure out how many streamout buffers could be bound */
1727 unsigned so_count = ctx->streamout.num_targets;
1728 for (unsigned i = 0; i < vs->varying_count; i++) {
1729 gl_varying_slot loc = vs->varyings_loc[i];
1730
1731 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1732 if (!captured) continue;
1733
1734 struct pipe_stream_output *o = pan_get_so(so, loc);
1735 so_count = MAX2(so_count, o->output_buffer + 1);
1736 }
1737
1738 signed idx = so_count;
1739 signed general = idx++;
1740 signed gl_Position = idx++;
1741 signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
1742 signed gl_PointCoord = reads_point_coord ? (idx++) : -1;
1743 signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
1744 signed gl_FragCoord = fs->reads_frag_coord ? (idx++) : -1;
1745
1746 /* Emit the stream out buffers */
1747
1748 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1749 ctx->vertex_count);
1750
1751 for (unsigned i = 0; i < so_count; ++i) {
1752 if (i < ctx->streamout.num_targets) {
1753 panfrost_emit_streamout(batch, &varyings[i],
1754 so->stride[i],
1755 ctx->streamout.offsets[i],
1756 out_count,
1757 ctx->streamout.targets[i]);
1758 } else {
1759 /* Emit a dummy buffer */
1760 panfrost_emit_varyings(batch, &varyings[i],
1761 so->stride[i] * 4,
1762 out_count);
1763
1764 /* Clear the attribute type */
1765 varyings[i].elements &= ~0xF;
1766 }
1767 }
1768
1769 panfrost_emit_varyings(batch, &varyings[general],
1770 num_gen_varyings * 16,
1771 vertex_count);
1772
1773 mali_ptr varyings_p;
1774
1775 /* fp32 vec4 gl_Position */
1776 varyings_p = panfrost_emit_varyings(batch, &varyings[gl_Position],
1777 sizeof(float) * 4, vertex_count);
1778 tiler_postfix->position_varying = varyings_p;
1779
1780
1781 if (panfrost_writes_point_size(ctx)) {
1782 varyings_p = panfrost_emit_varyings(batch,
1783 &varyings[gl_PointSize],
1784 2, vertex_count);
1785 primitive_size->pointer = varyings_p;
1786 }
1787
1788 if (reads_point_coord)
1789 varyings[gl_PointCoord].elements = MALI_VARYING_POINT_COORD;
1790
1791 if (fs->reads_face)
1792 varyings[gl_FrontFacing].elements = MALI_VARYING_FRONT_FACING;
1793
1794 if (fs->reads_frag_coord)
1795 varyings[gl_FragCoord].elements = MALI_VARYING_FRAG_COORD;
1796
1797 struct panfrost_device *device = pan_device(ctx->base.screen);
1798 assert(!(device->quirks & IS_BIFROST) || !(reads_point_coord || fs->reads_face || fs->reads_frag_coord));
1799
1800 /* Let's go ahead and link varying meta to the buffer in question, now
1801 * that that information is available. VARYING_SLOT_POS is mapped to
1802 * gl_FragCoord for fragment shaders but gl_Positionf or vertex shaders
1803 * */
1804
1805 panfrost_emit_varying_meta(trans.cpu, vs, general, gl_Position,
1806 gl_PointSize, gl_PointCoord,
1807 gl_FrontFacing);
1808
1809 panfrost_emit_varying_meta(trans.cpu + vs_size, fs, general,
1810 gl_FragCoord, gl_PointSize,
1811 gl_PointCoord, gl_FrontFacing);
1812
1813 /* Replace streamout */
1814
1815 struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
1816 struct mali_attr_meta *ofs = ovs + vs->varying_count;
1817
1818 for (unsigned i = 0; i < vs->varying_count; i++) {
1819 gl_varying_slot loc = vs->varyings_loc[i];
1820
1821 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
1822 if (!captured)
1823 continue;
1824
1825 struct pipe_stream_output *o = pan_get_so(so, loc);
1826 ovs[i].index = o->output_buffer;
1827
1828 /* Set the type appropriately. TODO: Integer varyings XXX */
1829 assert(o->stream == 0);
1830 ovs[i].format = pan_xfb_format(o->num_components);
1831 ovs[i].swizzle = panfrost_get_default_swizzle(o->num_components);
1832
1833 /* Link to the fragment */
1834 signed fs_idx = -1;
1835
1836 /* Link up */
1837 for (unsigned j = 0; j < fs->varying_count; ++j) {
1838 if (fs->varyings_loc[j] == loc) {
1839 fs_idx = j;
1840 break;
1841 }
1842 }
1843
1844 if (fs_idx >= 0) {
1845 ofs[fs_idx].index = ovs[i].index;
1846 ofs[fs_idx].format = ovs[i].format;
1847 ofs[fs_idx].swizzle = ovs[i].swizzle;
1848 }
1849 }
1850
1851 /* Replace point sprite */
1852 for (unsigned i = 0; i < fs->varying_count; i++) {
1853 /* If we have a point sprite replacement, handle that here. We
1854 * have to translate location first. TODO: Flip y in shader.
1855 * We're already keying ... just time crunch .. */
1856
1857 if (has_point_coord(fs->point_sprite_mask,
1858 fs->varyings_loc[i])) {
1859 ofs[i].index = gl_PointCoord;
1860
1861 /* Swizzle out the z/w to 0/1 */
1862 ofs[i].format = MALI_RG16F;
1863 ofs[i].swizzle = panfrost_get_default_swizzle(2);
1864 }
1865 }
1866
1867 /* Fix up unaligned addresses */
1868 for (unsigned i = 0; i < so_count; ++i) {
1869 if (varyings[i].elements < MALI_RECORD_SPECIAL)
1870 continue;
1871
1872 unsigned align = (varyings[i].elements & 63);
1873
1874 /* While we're at it, the SO buffers are linear */
1875
1876 if (!align) {
1877 varyings[i].elements |= MALI_ATTR_LINEAR;
1878 continue;
1879 }
1880
1881 /* We need to adjust alignment */
1882 varyings[i].elements &= ~63;
1883 varyings[i].elements |= MALI_ATTR_LINEAR;
1884 varyings[i].size += align;
1885
1886 for (unsigned v = 0; v < vs->varying_count; ++v) {
1887 if (ovs[v].index != i)
1888 continue;
1889
1890 ovs[v].src_offset = vs->varyings[v].src_offset + align;
1891 }
1892
1893 for (unsigned f = 0; f < fs->varying_count; ++f) {
1894 if (ofs[f].index != i)
1895 continue;
1896
1897 ofs[f].src_offset = fs->varyings[f].src_offset + align;
1898 }
1899 }
1900
1901 varyings_p = panfrost_upload_transient(batch, varyings,
1902 idx * sizeof(*varyings));
1903 vertex_postfix->varyings = varyings_p;
1904 tiler_postfix->varyings = varyings_p;
1905
1906 vertex_postfix->varying_meta = trans.gpu;
1907 tiler_postfix->varying_meta = trans.gpu + vs_size;
1908 }
1909
1910 void
1911 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1912 struct mali_vertex_tiler_prefix *vertex_prefix,
1913 struct mali_vertex_tiler_postfix *vertex_postfix,
1914 struct mali_vertex_tiler_prefix *tiler_prefix,
1915 struct mali_vertex_tiler_postfix *tiler_postfix,
1916 union midgard_primitive_size *primitive_size)
1917 {
1918 struct panfrost_context *ctx = batch->ctx;
1919 struct panfrost_device *device = pan_device(ctx->base.screen);
1920 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1921 struct bifrost_payload_vertex bifrost_vertex = {0,};
1922 struct bifrost_payload_tiler bifrost_tiler = {0,};
1923 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
1924 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
1925 void *vp, *tp;
1926 size_t vp_size, tp_size;
1927
1928 if (device->quirks & IS_BIFROST) {
1929 bifrost_vertex.prefix = *vertex_prefix;
1930 bifrost_vertex.postfix = *vertex_postfix;
1931 vp = &bifrost_vertex;
1932 vp_size = sizeof(bifrost_vertex);
1933
1934 bifrost_tiler.prefix = *tiler_prefix;
1935 bifrost_tiler.tiler.primitive_size = *primitive_size;
1936 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
1937 bifrost_tiler.postfix = *tiler_postfix;
1938 tp = &bifrost_tiler;
1939 tp_size = sizeof(bifrost_tiler);
1940 } else {
1941 midgard_vertex.prefix = *vertex_prefix;
1942 midgard_vertex.postfix = *vertex_postfix;
1943 vp = &midgard_vertex;
1944 vp_size = sizeof(midgard_vertex);
1945
1946 midgard_tiler.prefix = *tiler_prefix;
1947 midgard_tiler.postfix = *tiler_postfix;
1948 midgard_tiler.primitive_size = *primitive_size;
1949 tp = &midgard_tiler;
1950 tp_size = sizeof(midgard_tiler);
1951 }
1952
1953 if (wallpapering) {
1954 /* Inject in reverse order, with "predicted" job indices.
1955 * THIS IS A HACK XXX */
1956 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1957 batch->job_index + 2, tp, tp_size, true);
1958 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1959 vp, vp_size, true);
1960 return;
1961 }
1962
1963 /* If rasterizer discard is enable, only submit the vertex */
1964
1965 bool rasterizer_discard = ctx->rasterizer &&
1966 ctx->rasterizer->base.rasterizer_discard;
1967
1968 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1969 vp, vp_size, false);
1970
1971 if (rasterizer_discard)
1972 return;
1973
1974 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, tp_size,
1975 false);
1976 }
1977
1978 /* TODO: stop hardcoding this */
1979 mali_ptr
1980 panfrost_emit_sample_locations(struct panfrost_batch *batch)
1981 {
1982 uint16_t locations[] = {
1983 128, 128,
1984 0, 256,
1985 0, 256,
1986 0, 256,
1987 0, 256,
1988 0, 256,
1989 0, 256,
1990 0, 256,
1991 0, 256,
1992 0, 256,
1993 0, 256,
1994 0, 256,
1995 0, 256,
1996 0, 256,
1997 0, 256,
1998 0, 256,
1999 0, 256,
2000 0, 256,
2001 0, 256,
2002 0, 256,
2003 0, 256,
2004 0, 256,
2005 0, 256,
2006 0, 256,
2007 0, 256,
2008 0, 256,
2009 0, 256,
2010 0, 256,
2011 0, 256,
2012 0, 256,
2013 0, 256,
2014 0, 256,
2015 128, 128,
2016 0, 0,
2017 0, 0,
2018 0, 0,
2019 0, 0,
2020 0, 0,
2021 0, 0,
2022 0, 0,
2023 0, 0,
2024 0, 0,
2025 0, 0,
2026 0, 0,
2027 0, 0,
2028 0, 0,
2029 0, 0,
2030 0, 0,
2031 };
2032
2033 return panfrost_upload_transient(batch, locations, 96 * sizeof(uint16_t));
2034 }