c2c54814987300d21ba9643e3cea616b77a4e8aa
[mesa.git] / src / gallium / drivers / panfrost / pan_cmdstream.c
1 /*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "util/macros.h"
26 #include "util/u_prim.h"
27 #include "util/u_vbuf.h"
28 #include "util/u_helpers.h"
29
30 #include "panfrost-quirks.h"
31
32 #include "pan_pool.h"
33 #include "pan_bo.h"
34 #include "pan_cmdstream.h"
35 #include "pan_context.h"
36 #include "pan_job.h"
37
38 /* If a BO is accessed for a particular shader stage, will it be in the primary
39 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
40 * fragment will be primary, e.g. compute jobs will be considered
41 * "vertex/tiler" by analogy */
42
43 static inline uint32_t
44 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
45 {
46 assert(stage == PIPE_SHADER_FRAGMENT ||
47 stage == PIPE_SHADER_VERTEX ||
48 stage == PIPE_SHADER_COMPUTE);
49
50 return stage == PIPE_SHADER_FRAGMENT ?
51 PAN_BO_ACCESS_FRAGMENT :
52 PAN_BO_ACCESS_VERTEX_TILER;
53 }
54
55 mali_ptr
56 panfrost_vt_emit_shared_memory(struct panfrost_batch *batch)
57 {
58 struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
59
60 struct mali_shared_memory shared = {
61 .shared_workgroup_count = ~0,
62 };
63
64 if (batch->stack_size) {
65 struct panfrost_bo *stack =
66 panfrost_batch_get_scratchpad(batch, batch->stack_size,
67 dev->thread_tls_alloc,
68 dev->core_count);
69
70 shared.stack_shift = panfrost_get_stack_shift(batch->stack_size);
71 shared.scratchpad = stack->gpu;
72 }
73
74 return panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64);
75 }
76
77 void
78 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
79 bool points,
80 union midgard_primitive_size *primitive_size)
81 {
82 struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
83
84 if (!panfrost_writes_point_size(ctx)) {
85 float val = points ?
86 rasterizer->base.point_size :
87 rasterizer->base.line_width;
88
89 primitive_size->constant = val;
90 }
91 }
92
93 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
94 * good for the duration of the draw (transient), could last longer. Also get
95 * the bounds on the index buffer for the range accessed by the draw. We do
96 * these operations together because there are natural optimizations which
97 * require them to be together. */
98
99 mali_ptr
100 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
101 const struct pipe_draw_info *info,
102 unsigned *min_index, unsigned *max_index)
103 {
104 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
105 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
106 off_t offset = info->start * info->index_size;
107 bool needs_indices = true;
108 mali_ptr out = 0;
109
110 if (info->max_index != ~0u) {
111 *min_index = info->min_index;
112 *max_index = info->max_index;
113 needs_indices = false;
114 }
115
116 if (!info->has_user_indices) {
117 /* Only resources can be directly mapped */
118 panfrost_batch_add_bo(batch, rsrc->bo,
119 PAN_BO_ACCESS_SHARED |
120 PAN_BO_ACCESS_READ |
121 PAN_BO_ACCESS_VERTEX_TILER);
122 out = rsrc->bo->gpu + offset;
123
124 /* Check the cache */
125 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
126 info->start,
127 info->count,
128 min_index,
129 max_index);
130 } else {
131 /* Otherwise, we need to upload to transient memory */
132 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
133 struct panfrost_transfer T =
134 panfrost_pool_alloc_aligned(&batch->pool,
135 info->count * info->index_size,
136 info->index_size);
137
138 memcpy(T.cpu, ibuf8 + offset, info->count * info->index_size);
139 out = T.gpu;
140 }
141
142 if (needs_indices) {
143 /* Fallback */
144 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
145
146 if (!info->has_user_indices)
147 panfrost_minmax_cache_add(rsrc->index_cache,
148 info->start, info->count,
149 *min_index, *max_index);
150 }
151
152 return out;
153 }
154
155 static unsigned
156 translate_tex_wrap(enum pipe_tex_wrap w)
157 {
158 switch (w) {
159 case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
160 case PIPE_TEX_WRAP_CLAMP: return MALI_WRAP_MODE_CLAMP;
161 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
162 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
163 case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
164 case PIPE_TEX_WRAP_MIRROR_CLAMP: return MALI_WRAP_MODE_MIRRORED_CLAMP;
165 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
166 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
167 default: unreachable("Invalid wrap");
168 }
169 }
170
171 /* The hardware compares in the wrong order order, so we have to flip before
172 * encoding. Yes, really. */
173
174 static enum mali_func
175 panfrost_sampler_compare_func(const struct pipe_sampler_state *cso)
176 {
177 if (!cso->compare_mode)
178 return MALI_FUNC_NEVER;
179
180 enum mali_func f = panfrost_translate_compare_func(cso->compare_func);
181 return panfrost_flip_compare_func(f);
182 }
183
184 static enum mali_mipmap_mode
185 pan_pipe_to_mipmode(enum pipe_tex_mipfilter f)
186 {
187 switch (f) {
188 case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
189 case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
190 case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE;
191 default: unreachable("Invalid");
192 }
193 }
194
195 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
196 struct mali_midgard_sampler_packed *hw)
197 {
198 pan_pack(hw, MIDGARD_SAMPLER, cfg) {
199 cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
200 cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
201 cfg.mipmap_mode = (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) ?
202 MALI_MIPMAP_MODE_TRILINEAR : MALI_MIPMAP_MODE_NEAREST;
203 cfg.normalized_coordinates = cso->normalized_coords;
204
205 cfg.lod_bias = FIXED_16(cso->lod_bias, true);
206
207 cfg.minimum_lod = FIXED_16(cso->min_lod, false);
208
209 /* If necessary, we disable mipmapping in the sampler descriptor by
210 * clamping the LOD as tight as possible (from 0 to epsilon,
211 * essentially -- remember these are fixed point numbers, so
212 * epsilon=1/256) */
213
214 cfg.maximum_lod = (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) ?
215 cfg.minimum_lod + 1 :
216 FIXED_16(cso->max_lod, false);
217
218 cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s);
219 cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t);
220 cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r);
221
222 cfg.compare_function = panfrost_sampler_compare_func(cso);
223 cfg.seamless_cube_map = cso->seamless_cube_map;
224
225 cfg.border_color_r = cso->border_color.f[0];
226 cfg.border_color_g = cso->border_color.f[1];
227 cfg.border_color_b = cso->border_color.f[2];
228 cfg.border_color_a = cso->border_color.f[3];
229 }
230 }
231
232 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
233 struct mali_bifrost_sampler_packed *hw)
234 {
235 pan_pack(hw, BIFROST_SAMPLER, cfg) {
236 cfg.magnify_linear = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR;
237 cfg.minify_linear = cso->min_img_filter == PIPE_TEX_FILTER_LINEAR;
238 cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
239 cfg.normalized_coordinates = cso->normalized_coords;
240
241 cfg.lod_bias = FIXED_16(cso->lod_bias, true);
242 cfg.minimum_lod = FIXED_16(cso->min_lod, false);
243 cfg.maximum_lod = FIXED_16(cso->max_lod, false);
244
245 cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s);
246 cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t);
247 cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r);
248
249 cfg.compare_function = panfrost_sampler_compare_func(cso);
250 cfg.seamless_cube_map = cso->seamless_cube_map;
251 }
252 }
253
254 static bool
255 panfrost_fs_required(
256 struct panfrost_shader_state *fs,
257 struct panfrost_blend_final *blend,
258 unsigned rt_count)
259 {
260 /* If we generally have side effects */
261 if (fs->fs_sidefx)
262 return true;
263
264 /* If colour is written we need to execute */
265 for (unsigned i = 0; i < rt_count; ++i) {
266 if (!blend[i].no_colour)
267 return true;
268 }
269
270 /* If depth is written and not implied we need to execute.
271 * TODO: Predicate on Z/S writes being enabled */
272 return (fs->writes_depth || fs->writes_stencil);
273 }
274
275 static void
276 panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
277 struct panfrost_blend_final *blend)
278 {
279 const struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
280 struct panfrost_shader_state *fs = panfrost_get_shader_state(batch->ctx, PIPE_SHADER_FRAGMENT);
281 unsigned rt_count = batch->key.nr_cbufs;
282
283 struct bifrost_blend_rt *brts = rts;
284
285 /* Disable blending for depth-only */
286
287 if (rt_count == 0) {
288 if (dev->quirks & IS_BIFROST) {
289 memset(brts, 0, sizeof(*brts));
290 brts[0].unk2 = 0x3;
291 } else {
292 pan_pack(rts, MIDGARD_BLEND_OPAQUE, cfg) {
293 cfg.equation = 0xf0122122; /* Replace */
294 }
295 }
296 }
297
298 for (unsigned i = 0; i < rt_count; ++i) {
299 struct mali_blend_flags_packed flags = {};
300
301 pan_pack(&flags, BLEND_FLAGS, cfg) {
302 if (blend[i].no_colour) {
303 cfg.enable = false;
304 break;
305 }
306
307 batch->draws |= (PIPE_CLEAR_COLOR0 << i);
308
309 cfg.srgb = util_format_is_srgb(batch->key.cbufs[i]->format);
310 cfg.load_destination = blend[i].load_dest;
311 cfg.dither_disable = !batch->ctx->blend->base.dither;
312
313 if (!(dev->quirks & IS_BIFROST))
314 cfg.midgard_blend_shader = blend[i].is_shader;
315 }
316
317 if (dev->quirks & IS_BIFROST) {
318 memset(brts + i, 0, sizeof(brts[i]));
319 brts[i].flags = flags.opaque[0];
320
321 if (blend[i].is_shader) {
322 /* The blend shader's address needs to be at
323 * the same top 32 bit as the fragment shader.
324 * TODO: Ensure that's always the case.
325 */
326 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
327 (fs->bo->gpu & (0xffffffffull << 32)));
328 brts[i].shader = blend[i].shader.gpu;
329 brts[i].unk2 = 0x0;
330 } else {
331 enum pipe_format format = batch->key.cbufs[i]->format;
332 const struct util_format_description *format_desc;
333 format_desc = util_format_description(format);
334
335 brts[i].equation = blend[i].equation.equation;
336
337 /* TODO: this is a bit more complicated */
338 brts[i].constant = blend[i].equation.constant;
339
340 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
341
342 /* 0x19 disables blending and forces REPLACE
343 * mode (equivalent to rgb_mode = alpha_mode =
344 * x122, colour mask = 0xF). 0x1a allows
345 * blending. */
346 brts[i].unk2 = blend[i].opaque ? 0x19 : 0x1a;
347
348 brts[i].shader_type = fs->blend_types[i];
349 }
350 } else {
351 pan_pack(rts, MIDGARD_BLEND_OPAQUE, cfg) {
352 cfg.flags = flags;
353
354 if (blend[i].is_shader) {
355 cfg.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
356 } else {
357 cfg.equation = blend[i].equation.equation.opaque[0];
358 cfg.constant = blend[i].equation.constant;
359 }
360 }
361
362 rts += MALI_MIDGARD_BLEND_LENGTH;
363 }
364 }
365 }
366
367 static void
368 panfrost_emit_frag_shader(struct panfrost_context *ctx,
369 struct mali_state_packed *fragmeta,
370 struct panfrost_blend_final *blend)
371 {
372 const struct panfrost_device *dev = pan_device(ctx->base.screen);
373 struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
374 struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
375 const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
376 unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
377 bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
378
379 /* Built up here */
380 struct mali_shader_packed shader = fs->shader;
381 struct mali_preload_packed preload = fs->preload;
382 uint32_t properties;
383 struct mali_multisample_misc_packed multisample_misc;
384 struct mali_stencil_mask_misc_packed stencil_mask_misc;
385 union midgard_blend sfbd_blend = { 0 };
386
387 if (!panfrost_fs_required(fs, blend, rt_count)) {
388 if (dev->quirks & IS_BIFROST) {
389 pan_pack(&shader, SHADER, cfg) {}
390
391 pan_pack(&properties, BIFROST_PROPERTIES, cfg) {
392 cfg.unknown = 0x950020; /* XXX */
393 cfg.early_z_enable = true;
394 }
395
396 preload.opaque[0] = 0;
397 } else {
398 pan_pack(&shader, SHADER, cfg) {
399 cfg.shader = 0x1;
400 }
401
402 pan_pack(&properties, MIDGARD_PROPERTIES, cfg) {
403 cfg.work_register_count = 1;
404 cfg.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
405 cfg.early_z_enable = true;
406 }
407 }
408 } else if (dev->quirks & IS_BIFROST) {
409 bool no_blend = true;
410
411 for (unsigned i = 0; i < rt_count; ++i)
412 no_blend &= (!blend[i].load_dest | blend[i].no_colour);
413
414 pan_pack(&properties, BIFROST_PROPERTIES, cfg) {
415 cfg.early_z_enable = !fs->can_discard && !fs->writes_depth && no_blend;
416 }
417
418 /* Combine with prepacked properties */
419 properties |= fs->properties.opaque[0];
420 } else {
421 /* Reasons to disable early-Z from a shader perspective */
422 bool late_z = fs->can_discard || fs->writes_global ||
423 fs->writes_depth || fs->writes_stencil;
424
425 /* If either depth or stencil is enabled, discard matters */
426 bool zs_enabled =
427 (zsa->base.depth.enabled && zsa->base.depth.func != PIPE_FUNC_ALWAYS) ||
428 zsa->base.stencil[0].enabled;
429
430 bool has_blend_shader = false;
431
432 for (unsigned c = 0; c < rt_count; ++c)
433 has_blend_shader |= blend[c].is_shader;
434
435 pan_pack(&properties, MIDGARD_PROPERTIES, cfg) {
436 /* TODO: Reduce this limit? */
437 if (has_blend_shader)
438 cfg.work_register_count = MAX2(fs->work_reg_count, 8);
439 else
440 cfg.work_register_count = fs->work_reg_count;
441
442 cfg.early_z_enable = !(late_z || alpha_to_coverage);
443 cfg.reads_tilebuffer = fs->outputs_read || (!zs_enabled && fs->can_discard);
444 cfg.reads_depth_stencil = zs_enabled && fs->can_discard;
445 }
446
447 properties |= fs->properties.opaque[0];
448 }
449
450 pan_pack(&multisample_misc, MULTISAMPLE_MISC, cfg) {
451 bool msaa = rast->multisample;
452 cfg.multisample_enable = msaa;
453 cfg.sample_mask = (msaa ? ctx->sample_mask : ~0) & 0xFFFF;
454
455 /* EXT_shader_framebuffer_fetch requires per-sample */
456 bool per_sample = ctx->min_samples > 1 || fs->outputs_read;
457 cfg.evaluate_per_sample = msaa && per_sample;
458
459 if (dev->quirks & MIDGARD_SFBD) {
460 cfg.sfbd_load_destination = blend[0].load_dest;
461 cfg.sfbd_blend_shader = blend[0].is_shader;
462 }
463
464 cfg.depth_function = zsa->base.depth.enabled ?
465 panfrost_translate_compare_func(zsa->base.depth.func) :
466 MALI_FUNC_ALWAYS;
467
468 cfg.depth_write_mask = zsa->base.depth.writemask;
469 cfg.near_discard = rast->depth_clip_near;
470 cfg.far_discard = rast->depth_clip_far;
471 cfg.unknown_2 = true;
472 }
473
474 pan_pack(&stencil_mask_misc, STENCIL_MASK_MISC, cfg) {
475 cfg.stencil_mask_front = zsa->stencil_mask_front;
476 cfg.stencil_mask_back = zsa->stencil_mask_back;
477 cfg.stencil_enable = zsa->base.stencil[0].enabled;
478 cfg.alpha_to_coverage = alpha_to_coverage;
479
480 if (dev->quirks & MIDGARD_SFBD) {
481 cfg.sfbd_write_enable = !blend[0].no_colour;
482 cfg.sfbd_srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
483 cfg.sfbd_dither_disable = !ctx->blend->base.dither;
484 }
485
486 cfg.unknown_1 = 0x7;
487 cfg.depth_range_1 = cfg.depth_range_2 = rast->offset_tri;
488 cfg.single_sampled_lines = !rast->multisample;
489 }
490
491 if (dev->quirks & MIDGARD_SFBD) {
492 if (blend[0].is_shader) {
493 sfbd_blend.shader = blend[0].shader.gpu |
494 blend[0].shader.first_tag;
495 } else {
496 sfbd_blend.equation = blend[0].equation.equation;
497 sfbd_blend.constant = blend[0].equation.constant;
498 }
499 } else if (!(dev->quirks & IS_BIFROST)) {
500 /* Bug where MRT-capable hw apparently reads the last blend
501 * shader from here instead of the usual location? */
502
503 for (signed rt = ((signed) rt_count - 1); rt >= 0; --rt) {
504 if (!blend[rt].is_shader)
505 continue;
506
507 sfbd_blend.shader = blend[rt].shader.gpu |
508 blend[rt].shader.first_tag;
509 break;
510 }
511 }
512
513 pan_pack(fragmeta, STATE_OPAQUE, cfg) {
514 cfg.shader = fs->shader;
515 cfg.properties = properties;
516 cfg.depth_units = rast->offset_units * 2.0f;
517 cfg.depth_factor = rast->offset_scale;
518 cfg.multisample_misc = multisample_misc;
519 cfg.stencil_mask_misc = stencil_mask_misc;
520
521 cfg.stencil_front = zsa->stencil_front;
522 cfg.stencil_back = zsa->stencil_back;
523
524 /* Bottom bits for stencil ref, exactly one word */
525 bool back_enab = zsa->base.stencil[1].enabled;
526 cfg.stencil_front.opaque[0] |= ctx->stencil_ref.ref_value[0];
527 cfg.stencil_back.opaque[0] |= ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
528
529 if (dev->quirks & IS_BIFROST)
530 cfg.preload = preload;
531 else
532 memcpy(&cfg.sfbd_blend, &sfbd_blend, sizeof(sfbd_blend));
533 }
534 }
535
536 mali_ptr
537 panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage)
538 {
539 struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage);
540
541 panfrost_batch_add_bo(batch, ss->bo,
542 PAN_BO_ACCESS_PRIVATE |
543 PAN_BO_ACCESS_READ |
544 PAN_BO_ACCESS_VERTEX_TILER);
545
546 panfrost_batch_add_bo(batch, pan_resource(ss->upload.rsrc)->bo,
547 PAN_BO_ACCESS_PRIVATE |
548 PAN_BO_ACCESS_READ |
549 PAN_BO_ACCESS_VERTEX_TILER);
550
551 return pan_resource(ss->upload.rsrc)->bo->gpu + ss->upload.offset;
552 }
553
554 mali_ptr
555 panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
556 {
557 struct panfrost_context *ctx = batch->ctx;
558 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
559
560 /* Add the shader BO to the batch. */
561 panfrost_batch_add_bo(batch, ss->bo,
562 PAN_BO_ACCESS_PRIVATE |
563 PAN_BO_ACCESS_READ |
564 PAN_BO_ACCESS_FRAGMENT);
565
566 struct panfrost_device *dev = pan_device(ctx->base.screen);
567 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
568 struct panfrost_transfer xfer;
569 unsigned rt_size;
570
571 if (dev->quirks & MIDGARD_SFBD)
572 rt_size = 0;
573 else if (dev->quirks & IS_BIFROST)
574 rt_size = sizeof(struct bifrost_blend_rt);
575 else
576 rt_size = sizeof(struct midgard_blend_rt);
577
578 unsigned desc_size = MALI_STATE_LENGTH + rt_size * rt_count;
579 xfer = panfrost_pool_alloc_aligned(&batch->pool, desc_size, MALI_STATE_LENGTH);
580
581 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
582
583 for (unsigned c = 0; c < ctx->pipe_framebuffer.nr_cbufs; ++c)
584 blend[c] = panfrost_get_blend_for_context(ctx, c);
585
586 panfrost_emit_frag_shader(ctx, (struct mali_state_packed *) xfer.cpu, blend);
587
588 if (!(dev->quirks & MIDGARD_SFBD))
589 panfrost_emit_blend(batch, xfer.cpu + MALI_STATE_LENGTH, blend);
590 else
591 batch->draws |= PIPE_CLEAR_COLOR0;
592
593 return xfer.gpu;
594 }
595
596 mali_ptr
597 panfrost_emit_viewport(struct panfrost_batch *batch)
598 {
599 struct panfrost_context *ctx = batch->ctx;
600 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
601 const struct pipe_scissor_state *ss = &ctx->scissor;
602 const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
603 const struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
604
605 /* Derive min/max from translate/scale. Note since |x| >= 0 by
606 * definition, we have that -|x| <= |x| hence translate - |scale| <=
607 * translate + |scale|, so the ordering is correct here. */
608 float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
609 float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
610 float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
611 float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
612 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
613 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
614
615 /* Scissor to the intersection of viewport and to the scissor, clamped
616 * to the framebuffer */
617
618 unsigned minx = MIN2(fb->width, vp_minx);
619 unsigned maxx = MIN2(fb->width, vp_maxx);
620 unsigned miny = MIN2(fb->height, vp_miny);
621 unsigned maxy = MIN2(fb->height, vp_maxy);
622
623 if (ss && rast->scissor) {
624 minx = MAX2(ss->minx, minx);
625 miny = MAX2(ss->miny, miny);
626 maxx = MIN2(ss->maxx, maxx);
627 maxy = MIN2(ss->maxy, maxy);
628 }
629
630 struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool, MALI_VIEWPORT_LENGTH);
631
632 pan_pack(T.cpu, VIEWPORT, cfg) {
633 cfg.scissor_minimum_x = minx;
634 cfg.scissor_minimum_y = miny;
635 cfg.scissor_maximum_x = maxx - 1;
636 cfg.scissor_maximum_y = maxy - 1;
637
638 cfg.minimum_z = rast->depth_clip_near ? minz : -INFINITY;
639 cfg.maximum_z = rast->depth_clip_far ? maxz : INFINITY;
640 }
641
642 panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
643 return T.gpu;
644 }
645
646 static mali_ptr
647 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
648 enum pipe_shader_type st,
649 struct panfrost_constant_buffer *buf,
650 unsigned index)
651 {
652 struct pipe_constant_buffer *cb = &buf->cb[index];
653 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
654
655 if (rsrc) {
656 panfrost_batch_add_bo(batch, rsrc->bo,
657 PAN_BO_ACCESS_SHARED |
658 PAN_BO_ACCESS_READ |
659 panfrost_bo_access_for_stage(st));
660
661 /* Alignment gauranteed by
662 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
663 return rsrc->bo->gpu + cb->buffer_offset;
664 } else if (cb->user_buffer) {
665 return panfrost_pool_upload_aligned(&batch->pool,
666 cb->user_buffer +
667 cb->buffer_offset,
668 cb->buffer_size, 16);
669 } else {
670 unreachable("No constant buffer");
671 }
672 }
673
674 struct sysval_uniform {
675 union {
676 float f[4];
677 int32_t i[4];
678 uint32_t u[4];
679 uint64_t du[2];
680 };
681 };
682
683 static void
684 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
685 struct sysval_uniform *uniform)
686 {
687 struct panfrost_context *ctx = batch->ctx;
688 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
689
690 uniform->f[0] = vp->scale[0];
691 uniform->f[1] = vp->scale[1];
692 uniform->f[2] = vp->scale[2];
693 }
694
695 static void
696 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
697 struct sysval_uniform *uniform)
698 {
699 struct panfrost_context *ctx = batch->ctx;
700 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
701
702 uniform->f[0] = vp->translate[0];
703 uniform->f[1] = vp->translate[1];
704 uniform->f[2] = vp->translate[2];
705 }
706
707 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
708 enum pipe_shader_type st,
709 unsigned int sysvalid,
710 struct sysval_uniform *uniform)
711 {
712 struct panfrost_context *ctx = batch->ctx;
713 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
714 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
715 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
716 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
717
718 assert(dim);
719 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
720
721 if (dim > 1)
722 uniform->i[1] = u_minify(tex->texture->height0,
723 tex->u.tex.first_level);
724
725 if (dim > 2)
726 uniform->i[2] = u_minify(tex->texture->depth0,
727 tex->u.tex.first_level);
728
729 if (is_array)
730 uniform->i[dim] = tex->texture->array_size;
731 }
732
733 static void
734 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
735 enum pipe_shader_type st,
736 unsigned ssbo_id,
737 struct sysval_uniform *uniform)
738 {
739 struct panfrost_context *ctx = batch->ctx;
740
741 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
742 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
743
744 /* Compute address */
745 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
746
747 panfrost_batch_add_bo(batch, bo,
748 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
749 panfrost_bo_access_for_stage(st));
750
751 /* Upload address and size as sysval */
752 uniform->du[0] = bo->gpu + sb.buffer_offset;
753 uniform->u[2] = sb.buffer_size;
754 }
755
756 static void
757 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
758 enum pipe_shader_type st,
759 unsigned samp_idx,
760 struct sysval_uniform *uniform)
761 {
762 struct panfrost_context *ctx = batch->ctx;
763 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
764
765 uniform->f[0] = sampl->min_lod;
766 uniform->f[1] = sampl->max_lod;
767 uniform->f[2] = sampl->lod_bias;
768
769 /* Even without any errata, Midgard represents "no mipmapping" as
770 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
771 * panfrost_create_sampler_state which also explains our choice of
772 * epsilon value (again to keep behaviour consistent) */
773
774 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
775 uniform->f[1] = uniform->f[0] + (1.0/256.0);
776 }
777
778 static void
779 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
780 struct sysval_uniform *uniform)
781 {
782 struct panfrost_context *ctx = batch->ctx;
783
784 uniform->u[0] = ctx->compute_grid->grid[0];
785 uniform->u[1] = ctx->compute_grid->grid[1];
786 uniform->u[2] = ctx->compute_grid->grid[2];
787 }
788
789 static void
790 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
791 struct panfrost_shader_state *ss,
792 enum pipe_shader_type st)
793 {
794 struct sysval_uniform *uniforms = (void *)buf;
795
796 for (unsigned i = 0; i < ss->sysval_count; ++i) {
797 int sysval = ss->sysval[i];
798
799 switch (PAN_SYSVAL_TYPE(sysval)) {
800 case PAN_SYSVAL_VIEWPORT_SCALE:
801 panfrost_upload_viewport_scale_sysval(batch,
802 &uniforms[i]);
803 break;
804 case PAN_SYSVAL_VIEWPORT_OFFSET:
805 panfrost_upload_viewport_offset_sysval(batch,
806 &uniforms[i]);
807 break;
808 case PAN_SYSVAL_TEXTURE_SIZE:
809 panfrost_upload_txs_sysval(batch, st,
810 PAN_SYSVAL_ID(sysval),
811 &uniforms[i]);
812 break;
813 case PAN_SYSVAL_SSBO:
814 panfrost_upload_ssbo_sysval(batch, st,
815 PAN_SYSVAL_ID(sysval),
816 &uniforms[i]);
817 break;
818 case PAN_SYSVAL_NUM_WORK_GROUPS:
819 panfrost_upload_num_work_groups_sysval(batch,
820 &uniforms[i]);
821 break;
822 case PAN_SYSVAL_SAMPLER:
823 panfrost_upload_sampler_sysval(batch, st,
824 PAN_SYSVAL_ID(sysval),
825 &uniforms[i]);
826 break;
827 default:
828 assert(0);
829 }
830 }
831 }
832
833 static const void *
834 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
835 unsigned index)
836 {
837 struct pipe_constant_buffer *cb = &buf->cb[index];
838 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
839
840 if (rsrc)
841 return rsrc->bo->cpu;
842 else if (cb->user_buffer)
843 return cb->user_buffer;
844 else
845 unreachable("No constant buffer");
846 }
847
848 mali_ptr
849 panfrost_emit_const_buf(struct panfrost_batch *batch,
850 enum pipe_shader_type stage,
851 mali_ptr *push_constants)
852 {
853 struct panfrost_context *ctx = batch->ctx;
854 struct panfrost_shader_variants *all = ctx->shader[stage];
855
856 if (!all)
857 return 0;
858
859 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
860
861 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
862
863 /* Uniforms are implicitly UBO #0 */
864 bool has_uniforms = buf->enabled_mask & (1 << 0);
865
866 /* Allocate room for the sysval and the uniforms */
867 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
868 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
869 size_t size = sys_size + uniform_size;
870 struct panfrost_transfer transfer =
871 panfrost_pool_alloc_aligned(&batch->pool, size, 16);
872
873 /* Upload sysvals requested by the shader */
874 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
875
876 /* Upload uniforms */
877 if (has_uniforms && uniform_size) {
878 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
879 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
880 }
881
882 /* Next up, attach UBOs. UBO #0 is the uniforms we just
883 * uploaded, so it's always included. The count is the highest UBO
884 * addressable -- gaps are included. */
885
886 unsigned ubo_count = 32 - __builtin_clz(buf->enabled_mask | 1);
887
888 size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count;
889 struct panfrost_transfer ubos =
890 panfrost_pool_alloc_aligned(&batch->pool, sz,
891 MALI_UNIFORM_BUFFER_LENGTH);
892
893 uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
894
895 /* Upload uniforms as a UBO */
896
897 if (size) {
898 pan_pack(ubo_ptr, UNIFORM_BUFFER, cfg) {
899 cfg.entries = DIV_ROUND_UP(size, 16);
900 cfg.pointer = transfer.gpu;
901 }
902 } else {
903 *ubo_ptr = 0;
904 }
905
906 /* The rest are honest-to-goodness UBOs */
907
908 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
909 size_t usz = buf->cb[ubo].buffer_size;
910 bool enabled = buf->enabled_mask & (1 << ubo);
911 bool empty = usz == 0;
912
913 if (!enabled || empty) {
914 ubo_ptr[ubo] = 0;
915 continue;
916 }
917
918 pan_pack(ubo_ptr + ubo, UNIFORM_BUFFER, cfg) {
919 cfg.entries = DIV_ROUND_UP(usz, 16);
920 cfg.pointer = panfrost_map_constant_buffer_gpu(batch,
921 stage, buf, ubo);
922 }
923 }
924
925 *push_constants = transfer.gpu;
926
927 buf->dirty_mask = 0;
928 return ubos.gpu;
929 }
930
931 mali_ptr
932 panfrost_emit_shared_memory(struct panfrost_batch *batch,
933 const struct pipe_grid_info *info)
934 {
935 struct panfrost_context *ctx = batch->ctx;
936 struct panfrost_device *dev = pan_device(ctx->base.screen);
937 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
938 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
939 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
940 128));
941
942 unsigned log2_instances =
943 util_logbase2_ceil(info->grid[0]) +
944 util_logbase2_ceil(info->grid[1]) +
945 util_logbase2_ceil(info->grid[2]);
946
947 unsigned shared_size = single_size * (1 << log2_instances) * dev->core_count;
948 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
949 shared_size,
950 1);
951
952 struct mali_shared_memory shared = {
953 .shared_memory = bo->gpu,
954 .shared_workgroup_count = log2_instances,
955 .shared_shift = util_logbase2(single_size) + 1
956 };
957
958 return panfrost_pool_upload_aligned(&batch->pool, &shared,
959 sizeof(shared), 64);
960 }
961
962 static mali_ptr
963 panfrost_get_tex_desc(struct panfrost_batch *batch,
964 enum pipe_shader_type st,
965 struct panfrost_sampler_view *view)
966 {
967 if (!view)
968 return (mali_ptr) 0;
969
970 struct pipe_sampler_view *pview = &view->base;
971 struct panfrost_resource *rsrc = pan_resource(pview->texture);
972
973 /* Add the BO to the job so it's retained until the job is done. */
974
975 panfrost_batch_add_bo(batch, rsrc->bo,
976 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
977 panfrost_bo_access_for_stage(st));
978
979 panfrost_batch_add_bo(batch, view->bo,
980 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
981 panfrost_bo_access_for_stage(st));
982
983 return view->bo->gpu;
984 }
985
986 static void
987 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
988 struct pipe_context *pctx)
989 {
990 struct panfrost_resource *rsrc = pan_resource(view->base.texture);
991 if (view->texture_bo != rsrc->bo->gpu ||
992 view->modifier != rsrc->modifier) {
993 panfrost_bo_unreference(view->bo);
994 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
995 }
996 }
997
998 mali_ptr
999 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1000 enum pipe_shader_type stage)
1001 {
1002 struct panfrost_context *ctx = batch->ctx;
1003 struct panfrost_device *device = pan_device(ctx->base.screen);
1004
1005 if (!ctx->sampler_view_count[stage])
1006 return 0;
1007
1008 if (device->quirks & IS_BIFROST) {
1009 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
1010 MALI_BIFROST_TEXTURE_LENGTH *
1011 ctx->sampler_view_count[stage],
1012 MALI_BIFROST_TEXTURE_LENGTH);
1013
1014 struct mali_bifrost_texture_packed *out =
1015 (struct mali_bifrost_texture_packed *) T.cpu;
1016
1017 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1018 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1019 struct pipe_sampler_view *pview = &view->base;
1020 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1021
1022 panfrost_update_sampler_view(view, &ctx->base);
1023 out[i] = view->bifrost_descriptor;
1024
1025 /* Add the BOs to the job so they are retained until the job is done. */
1026
1027 panfrost_batch_add_bo(batch, rsrc->bo,
1028 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1029 panfrost_bo_access_for_stage(stage));
1030
1031 panfrost_batch_add_bo(batch, view->bo,
1032 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1033 panfrost_bo_access_for_stage(stage));
1034 }
1035
1036 return T.gpu;
1037 } else {
1038 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1039
1040 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1041 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1042
1043 panfrost_update_sampler_view(view, &ctx->base);
1044
1045 trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1046 }
1047
1048 return panfrost_pool_upload_aligned(&batch->pool, trampolines,
1049 sizeof(uint64_t) *
1050 ctx->sampler_view_count[stage],
1051 sizeof(uint64_t));
1052 }
1053 }
1054
1055 mali_ptr
1056 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1057 enum pipe_shader_type stage)
1058 {
1059 struct panfrost_context *ctx = batch->ctx;
1060
1061 if (!ctx->sampler_count[stage])
1062 return 0;
1063
1064 size_t desc_size = MALI_BIFROST_SAMPLER_LENGTH;
1065 assert(MALI_BIFROST_SAMPLER_LENGTH == MALI_MIDGARD_SAMPLER_LENGTH);
1066
1067 size_t sz = desc_size * ctx->sampler_count[stage];
1068 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool, sz, desc_size);
1069 struct mali_midgard_sampler_packed *out = (struct mali_midgard_sampler_packed *) T.cpu;
1070
1071 for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i)
1072 out[i] = ctx->samplers[stage][i]->hw;
1073
1074 return T.gpu;
1075 }
1076
1077 mali_ptr
1078 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1079 mali_ptr *buffers)
1080 {
1081 struct panfrost_context *ctx = batch->ctx;
1082 struct panfrost_vertex_state *so = ctx->vertex;
1083 struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1084
1085 /* Worst case: everything is NPOT, which is only possible if instancing
1086 * is enabled. Otherwise single record is gauranteed */
1087 struct panfrost_transfer S = panfrost_pool_alloc_aligned(&batch->pool,
1088 MALI_ATTRIBUTE_BUFFER_LENGTH * vs->attribute_count *
1089 (ctx->instance_count > 1 ? 2 : 1),
1090 MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
1091
1092 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
1093 MALI_ATTRIBUTE_LENGTH * vs->attribute_count,
1094 MALI_ATTRIBUTE_LENGTH);
1095
1096 struct mali_attribute_buffer_packed *bufs =
1097 (struct mali_attribute_buffer_packed *) S.cpu;
1098
1099 struct mali_attribute_packed *out =
1100 (struct mali_attribute_packed *) T.cpu;
1101
1102 unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 };
1103 unsigned k = 0;
1104
1105 for (unsigned i = 0; i < so->num_elements; ++i) {
1106 /* We map buffers 1:1 with the attributes, which
1107 * means duplicating some vertex buffers (who cares? aside from
1108 * maybe some caching implications but I somehow doubt that
1109 * matters) */
1110
1111 struct pipe_vertex_element *elem = &so->pipe[i];
1112 unsigned vbi = elem->vertex_buffer_index;
1113 attrib_to_buffer[i] = k;
1114
1115 if (!(ctx->vb_mask & (1 << vbi)))
1116 continue;
1117
1118 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1119 struct panfrost_resource *rsrc;
1120
1121 rsrc = pan_resource(buf->buffer.resource);
1122 if (!rsrc)
1123 continue;
1124
1125 /* Add a dependency of the batch on the vertex buffer */
1126 panfrost_batch_add_bo(batch, rsrc->bo,
1127 PAN_BO_ACCESS_SHARED |
1128 PAN_BO_ACCESS_READ |
1129 PAN_BO_ACCESS_VERTEX_TILER);
1130
1131 /* Mask off lower bits, see offset fixup below */
1132 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1133 mali_ptr addr = raw_addr & ~63;
1134
1135 /* Since we advanced the base pointer, we shrink the buffer
1136 * size, but add the offset we subtracted */
1137 unsigned size = rsrc->base.width0 + (raw_addr - addr)
1138 - buf->buffer_offset;
1139
1140 /* When there is a divisor, the hardware-level divisor is
1141 * the product of the instance divisor and the padded count */
1142 unsigned divisor = elem->instance_divisor;
1143 unsigned hw_divisor = ctx->padded_count * divisor;
1144 unsigned stride = buf->stride;
1145
1146 /* If there's a divisor(=1) but no instancing, we want every
1147 * attribute to be the same */
1148
1149 if (divisor && ctx->instance_count == 1)
1150 stride = 0;
1151
1152 if (!divisor || ctx->instance_count <= 1) {
1153 pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1154 if (ctx->instance_count > 1) {
1155 cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
1156 cfg.divisor = ctx->padded_count;
1157 }
1158
1159 cfg.pointer = addr;
1160 cfg.stride = stride;
1161 cfg.size = size;
1162 }
1163 } else if (util_is_power_of_two_or_zero(hw_divisor)) {
1164 pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1165 cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
1166 cfg.pointer = addr;
1167 cfg.stride = stride;
1168 cfg.size = size;
1169 cfg.divisor_r = __builtin_ctz(hw_divisor);
1170 }
1171
1172 } else {
1173 unsigned shift = 0, extra_flags = 0;
1174
1175 unsigned magic_divisor =
1176 panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
1177
1178 pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1179 cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
1180 cfg.pointer = addr;
1181 cfg.stride = stride;
1182 cfg.size = size;
1183
1184 cfg.divisor_r = shift;
1185 cfg.divisor_e = extra_flags;
1186 }
1187
1188 pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
1189 cfg.divisor_numerator = magic_divisor;
1190 cfg.divisor = divisor;
1191 }
1192
1193 ++k;
1194 }
1195
1196 ++k;
1197 }
1198
1199 /* Add special gl_VertexID/gl_InstanceID buffers */
1200
1201 if (unlikely(vs->attribute_count >= PAN_VERTEX_ID)) {
1202 panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
1203
1204 pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
1205 cfg.buffer_index = k++;
1206 cfg.format = so->formats[PAN_VERTEX_ID];
1207 }
1208
1209 panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
1210
1211 pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
1212 cfg.buffer_index = k++;
1213 cfg.format = so->formats[PAN_INSTANCE_ID];
1214 }
1215 }
1216
1217 /* Attribute addresses require 64-byte alignment, so let:
1218 *
1219 * base' = base & ~63 = base - (base & 63)
1220 * offset' = offset + (base & 63)
1221 *
1222 * Since base' + offset' = base + offset, these are equivalent
1223 * addressing modes and now base is 64 aligned.
1224 */
1225
1226 for (unsigned i = 0; i < so->num_elements; ++i) {
1227 unsigned vbi = so->pipe[i].vertex_buffer_index;
1228 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1229
1230 /* Adjust by the masked off bits of the offset. Make sure we
1231 * read src_offset from so->hw (which is not GPU visible)
1232 * rather than target (which is) due to caching effects */
1233
1234 unsigned src_offset = so->pipe[i].src_offset;
1235
1236 /* BOs aligned to 4k so guaranteed aligned to 64 */
1237 src_offset += (buf->buffer_offset & 63);
1238
1239 /* Also, somewhat obscurely per-instance data needs to be
1240 * offset in response to a delayed start in an indexed draw */
1241
1242 if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
1243 src_offset -= buf->stride * ctx->offset_start;
1244
1245 pan_pack(out + i, ATTRIBUTE, cfg) {
1246 cfg.buffer_index = attrib_to_buffer[i];
1247 cfg.format = so->formats[i];
1248 cfg.offset = src_offset;
1249 }
1250 }
1251
1252 *buffers = S.gpu;
1253 return T.gpu;
1254 }
1255
1256 static mali_ptr
1257 panfrost_emit_varyings(struct panfrost_batch *batch,
1258 struct mali_attribute_buffer_packed *slot,
1259 unsigned stride, unsigned count)
1260 {
1261 unsigned size = stride * count;
1262 mali_ptr ptr = panfrost_pool_alloc_aligned(&batch->invisible_pool, size, 64).gpu;
1263
1264 pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
1265 cfg.stride = stride;
1266 cfg.size = size;
1267 cfg.pointer = ptr;
1268 }
1269
1270 return ptr;
1271 }
1272
1273 static unsigned
1274 panfrost_streamout_offset(unsigned stride, unsigned offset,
1275 struct pipe_stream_output_target *target)
1276 {
1277 return (target->buffer_offset + (offset * stride * 4)) & 63;
1278 }
1279
1280 static void
1281 panfrost_emit_streamout(struct panfrost_batch *batch,
1282 struct mali_attribute_buffer_packed *slot,
1283 unsigned stride_words, unsigned offset, unsigned count,
1284 struct pipe_stream_output_target *target)
1285 {
1286 unsigned stride = stride_words * 4;
1287 unsigned max_size = target->buffer_size;
1288 unsigned expected_size = stride * count;
1289
1290 /* Grab the BO and bind it to the batch */
1291 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1292
1293 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1294 * the perspective of the TILER and FRAGMENT.
1295 */
1296 panfrost_batch_add_bo(batch, bo,
1297 PAN_BO_ACCESS_SHARED |
1298 PAN_BO_ACCESS_RW |
1299 PAN_BO_ACCESS_VERTEX_TILER |
1300 PAN_BO_ACCESS_FRAGMENT);
1301
1302 /* We will have an offset applied to get alignment */
1303 mali_ptr addr = bo->gpu + target->buffer_offset + (offset * stride);
1304
1305 pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
1306 cfg.pointer = (addr & ~63);
1307 cfg.stride = stride;
1308 cfg.size = MIN2(max_size, expected_size) + (addr & 63);
1309 }
1310 }
1311
1312 /* Helpers for manipulating stream out information so we can pack varyings
1313 * accordingly. Compute the src_offset for a given captured varying */
1314
1315 static struct pipe_stream_output *
1316 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1317 {
1318 for (unsigned i = 0; i < info->num_outputs; ++i) {
1319 if (info->output[i].register_index == loc)
1320 return &info->output[i];
1321 }
1322
1323 unreachable("Varying not captured");
1324 }
1325
1326 static unsigned
1327 pan_varying_size(enum mali_format fmt)
1328 {
1329 unsigned type = MALI_EXTRACT_TYPE(fmt);
1330 unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1331 unsigned bits = MALI_EXTRACT_BITS(fmt);
1332 unsigned bpc = 0;
1333
1334 if (bits == MALI_CHANNEL_FLOAT) {
1335 /* No doubles */
1336 bool fp16 = (type == MALI_FORMAT_SINT);
1337 assert(fp16 || (type == MALI_FORMAT_UNORM));
1338
1339 bpc = fp16 ? 2 : 4;
1340 } else {
1341 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1342
1343 /* See the enums */
1344 bits = 1 << bits;
1345 assert(bits >= 8);
1346 bpc = bits / 8;
1347 }
1348
1349 return bpc * chan;
1350 }
1351
1352 /* Indices for named (non-XFB) varyings that are present. These are packed
1353 * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1354 * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1355 * of a given special field given a shift S by:
1356 *
1357 * idx = popcount(P & ((1 << S) - 1))
1358 *
1359 * That is... look at all of the varyings that come earlier and count them, the
1360 * count is the new index since plus one. Likewise, the total number of special
1361 * buffers required is simply popcount(P)
1362 */
1363
1364 enum pan_special_varying {
1365 PAN_VARY_GENERAL = 0,
1366 PAN_VARY_POSITION = 1,
1367 PAN_VARY_PSIZ = 2,
1368 PAN_VARY_PNTCOORD = 3,
1369 PAN_VARY_FACE = 4,
1370 PAN_VARY_FRAGCOORD = 5,
1371
1372 /* Keep last */
1373 PAN_VARY_MAX,
1374 };
1375
1376 /* Given a varying, figure out which index it correpsonds to */
1377
1378 static inline unsigned
1379 pan_varying_index(unsigned present, enum pan_special_varying v)
1380 {
1381 unsigned mask = (1 << v) - 1;
1382 return util_bitcount(present & mask);
1383 }
1384
1385 /* Get the base offset for XFB buffers, which by convention come after
1386 * everything else. Wrapper function for semantic reasons; by construction this
1387 * is just popcount. */
1388
1389 static inline unsigned
1390 pan_xfb_base(unsigned present)
1391 {
1392 return util_bitcount(present);
1393 }
1394
1395 /* Computes the present mask for varyings so we can start emitting varying records */
1396
1397 static inline unsigned
1398 pan_varying_present(
1399 struct panfrost_shader_state *vs,
1400 struct panfrost_shader_state *fs,
1401 unsigned quirks,
1402 uint16_t point_coord_mask)
1403 {
1404 /* At the moment we always emit general and position buffers. Not
1405 * strictly necessary but usually harmless */
1406
1407 unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1408
1409 /* Enable special buffers by the shader info */
1410
1411 if (vs->writes_point_size)
1412 present |= (1 << PAN_VARY_PSIZ);
1413
1414 if (fs->reads_point_coord)
1415 present |= (1 << PAN_VARY_PNTCOORD);
1416
1417 if (fs->reads_face)
1418 present |= (1 << PAN_VARY_FACE);
1419
1420 if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1421 present |= (1 << PAN_VARY_FRAGCOORD);
1422
1423 /* Also, if we have a point sprite, we need a point coord buffer */
1424
1425 for (unsigned i = 0; i < fs->varying_count; i++) {
1426 gl_varying_slot loc = fs->varyings_loc[i];
1427
1428 if (util_varying_is_point_coord(loc, point_coord_mask))
1429 present |= (1 << PAN_VARY_PNTCOORD);
1430 }
1431
1432 return present;
1433 }
1434
1435 /* Emitters for varying records */
1436
1437 static void
1438 pan_emit_vary(struct mali_attribute_packed *out,
1439 unsigned present, enum pan_special_varying buf,
1440 unsigned quirks, enum mali_format format,
1441 unsigned offset)
1442 {
1443 unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
1444 unsigned swizzle = quirks & HAS_SWIZZLES ?
1445 panfrost_get_default_swizzle(nr_channels) :
1446 panfrost_bifrost_swizzle(nr_channels);
1447
1448 pan_pack(out, ATTRIBUTE, cfg) {
1449 cfg.buffer_index = pan_varying_index(present, buf);
1450 cfg.unknown = quirks & IS_BIFROST ? 0x0 : 0x1;
1451 cfg.format = (format << 12) | swizzle;
1452 cfg.offset = offset;
1453 }
1454 }
1455
1456 /* General varying that is unused */
1457
1458 static void
1459 pan_emit_vary_only(struct mali_attribute_packed *out,
1460 unsigned present, unsigned quirks)
1461 {
1462 pan_emit_vary(out, present, 0, quirks, MALI_VARYING_DISCARD, 0);
1463 }
1464
1465 /* Special records */
1466
1467 static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1468 [PAN_VARY_POSITION] = MALI_VARYING_POS,
1469 [PAN_VARY_PSIZ] = MALI_R16F,
1470 [PAN_VARY_PNTCOORD] = MALI_R16F,
1471 [PAN_VARY_FACE] = MALI_R32I,
1472 [PAN_VARY_FRAGCOORD] = MALI_RGBA32F
1473 };
1474
1475 static void
1476 pan_emit_vary_special(struct mali_attribute_packed *out,
1477 unsigned present, enum pan_special_varying buf,
1478 unsigned quirks)
1479 {
1480 assert(buf < PAN_VARY_MAX);
1481 pan_emit_vary(out, present, buf, quirks, pan_varying_formats[buf], 0);
1482 }
1483
1484 static enum mali_format
1485 pan_xfb_format(enum mali_format format, unsigned nr)
1486 {
1487 if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1488 return MALI_R32F | MALI_NR_CHANNELS(nr);
1489 else
1490 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1491 }
1492
1493 /* Transform feedback records. Note struct pipe_stream_output is (if packed as
1494 * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1495 * value. */
1496
1497 static void
1498 pan_emit_vary_xfb(struct mali_attribute_packed *out,
1499 unsigned present,
1500 unsigned max_xfb,
1501 unsigned *streamout_offsets,
1502 unsigned quirks,
1503 enum mali_format format,
1504 struct pipe_stream_output o)
1505 {
1506 unsigned swizzle = quirks & HAS_SWIZZLES ?
1507 panfrost_get_default_swizzle(o.num_components) :
1508 panfrost_bifrost_swizzle(o.num_components);
1509
1510 pan_pack(out, ATTRIBUTE, cfg) {
1511 /* XFB buffers come after everything else */
1512 cfg.buffer_index = pan_xfb_base(present) + o.output_buffer;
1513 cfg.unknown = quirks & IS_BIFROST ? 0x0 : 0x1;
1514
1515 /* Override number of channels and precision to highp */
1516 cfg.format = (pan_xfb_format(format, o.num_components) << 12) | swizzle;
1517
1518 /* Apply given offsets together */
1519 cfg.offset = (o.dst_offset * 4) /* dwords */
1520 + streamout_offsets[o.output_buffer];
1521 }
1522 }
1523
1524 /* Determine if we should capture a varying for XFB. This requires actually
1525 * having a buffer for it. If we don't capture it, we'll fallback to a general
1526 * varying path (linked or unlinked, possibly discarding the write) */
1527
1528 static bool
1529 panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1530 unsigned loc, unsigned max_xfb)
1531 {
1532 if (!(xfb->so_mask & (1ll << loc)))
1533 return false;
1534
1535 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1536 return o->output_buffer < max_xfb;
1537 }
1538
1539 static void
1540 pan_emit_general_varying(struct mali_attribute_packed *out,
1541 struct panfrost_shader_state *other,
1542 struct panfrost_shader_state *xfb,
1543 gl_varying_slot loc,
1544 enum mali_format format,
1545 unsigned present,
1546 unsigned quirks,
1547 unsigned *gen_offsets,
1548 enum mali_format *gen_formats,
1549 unsigned *gen_stride,
1550 unsigned idx,
1551 bool should_alloc)
1552 {
1553 /* Check if we're linked */
1554 signed other_idx = -1;
1555
1556 for (unsigned j = 0; j < other->varying_count; ++j) {
1557 if (other->varyings_loc[j] == loc) {
1558 other_idx = j;
1559 break;
1560 }
1561 }
1562
1563 if (other_idx < 0) {
1564 pan_emit_vary_only(out, present, quirks);
1565 return;
1566 }
1567
1568 unsigned offset = gen_offsets[other_idx];
1569
1570 if (should_alloc) {
1571 /* We're linked, so allocate a space via a watermark allocation */
1572 enum mali_format alt = other->varyings[other_idx];
1573
1574 /* Do interpolation at minimum precision */
1575 unsigned size_main = pan_varying_size(format);
1576 unsigned size_alt = pan_varying_size(alt);
1577 unsigned size = MIN2(size_main, size_alt);
1578
1579 /* If a varying is marked for XFB but not actually captured, we
1580 * should match the format to the format that would otherwise
1581 * be used for XFB, since dEQP checks for invariance here. It's
1582 * unclear if this is required by the spec. */
1583
1584 if (xfb->so_mask & (1ull << loc)) {
1585 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1586 format = pan_xfb_format(format, o->num_components);
1587 size = pan_varying_size(format);
1588 } else if (size == size_alt) {
1589 format = alt;
1590 }
1591
1592 gen_offsets[idx] = *gen_stride;
1593 gen_formats[other_idx] = format;
1594 offset = *gen_stride;
1595 *gen_stride += size;
1596 }
1597
1598 pan_emit_vary(out, present, PAN_VARY_GENERAL, quirks, format, offset);
1599 }
1600
1601 /* Higher-level wrapper around all of the above, classifying a varying into one
1602 * of the above types */
1603
1604 static void
1605 panfrost_emit_varying(
1606 struct mali_attribute_packed *out,
1607 struct panfrost_shader_state *stage,
1608 struct panfrost_shader_state *other,
1609 struct panfrost_shader_state *xfb,
1610 unsigned present,
1611 uint16_t point_sprite_mask,
1612 unsigned max_xfb,
1613 unsigned *streamout_offsets,
1614 unsigned quirks,
1615 unsigned *gen_offsets,
1616 enum mali_format *gen_formats,
1617 unsigned *gen_stride,
1618 unsigned idx,
1619 bool should_alloc,
1620 bool is_fragment)
1621 {
1622 gl_varying_slot loc = stage->varyings_loc[idx];
1623 enum mali_format format = stage->varyings[idx];
1624
1625 /* Override format to match linkage */
1626 if (!should_alloc && gen_formats[idx])
1627 format = gen_formats[idx];
1628
1629 if (util_varying_is_point_coord(loc, point_sprite_mask)) {
1630 pan_emit_vary_special(out, present, PAN_VARY_PNTCOORD, quirks);
1631 } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1632 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1633 pan_emit_vary_xfb(out, present, max_xfb, streamout_offsets, quirks, format, *o);
1634 } else if (loc == VARYING_SLOT_POS) {
1635 if (is_fragment)
1636 pan_emit_vary_special(out, present, PAN_VARY_FRAGCOORD, quirks);
1637 else
1638 pan_emit_vary_special(out, present, PAN_VARY_POSITION, quirks);
1639 } else if (loc == VARYING_SLOT_PSIZ) {
1640 pan_emit_vary_special(out, present, PAN_VARY_PSIZ, quirks);
1641 } else if (loc == VARYING_SLOT_PNTC) {
1642 pan_emit_vary_special(out, present, PAN_VARY_PNTCOORD, quirks);
1643 } else if (loc == VARYING_SLOT_FACE) {
1644 pan_emit_vary_special(out, present, PAN_VARY_FACE, quirks);
1645 } else {
1646 pan_emit_general_varying(out, other, xfb, loc, format, present,
1647 quirks, gen_offsets, gen_formats, gen_stride,
1648 idx, should_alloc);
1649 }
1650 }
1651
1652 static void
1653 pan_emit_special_input(struct mali_attribute_buffer_packed *out,
1654 unsigned present,
1655 enum pan_special_varying v,
1656 unsigned special)
1657 {
1658 if (present & (1 << v)) {
1659 unsigned idx = pan_varying_index(present, v);
1660
1661 pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
1662 cfg.special = special;
1663 cfg.type = 0;
1664 }
1665 }
1666 }
1667
1668 void
1669 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1670 unsigned vertex_count,
1671 mali_ptr *vs_attribs,
1672 mali_ptr *fs_attribs,
1673 mali_ptr *buffers,
1674 mali_ptr *position,
1675 mali_ptr *psiz)
1676 {
1677 /* Load the shaders */
1678 struct panfrost_context *ctx = batch->ctx;
1679 struct panfrost_device *dev = pan_device(ctx->base.screen);
1680 struct panfrost_shader_state *vs, *fs;
1681 size_t vs_size, fs_size;
1682
1683 /* Allocate the varying descriptor */
1684
1685 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1686 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
1687 vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
1688 fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
1689
1690 struct panfrost_transfer trans = panfrost_pool_alloc_aligned(
1691 &batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
1692
1693 struct pipe_stream_output_info *so = &vs->stream_output;
1694 uint16_t point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
1695 unsigned present = pan_varying_present(vs, fs, dev->quirks, point_coord_mask);
1696
1697 /* Check if this varying is linked by us. This is the case for
1698 * general-purpose, non-captured varyings. If it is, link it. If it's
1699 * not, use the provided stream out information to determine the
1700 * offset, since it was already linked for us. */
1701
1702 unsigned gen_offsets[32];
1703 enum mali_format gen_formats[32];
1704 memset(gen_offsets, 0, sizeof(gen_offsets));
1705 memset(gen_formats, 0, sizeof(gen_formats));
1706
1707 unsigned gen_stride = 0;
1708 assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
1709 assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
1710
1711 unsigned streamout_offsets[32];
1712
1713 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
1714 streamout_offsets[i] = panfrost_streamout_offset(
1715 so->stride[i],
1716 ctx->streamout.offsets[i],
1717 ctx->streamout.targets[i]);
1718 }
1719
1720 struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
1721 struct mali_attribute_packed *ofs = ovs + vs->varying_count;
1722
1723 for (unsigned i = 0; i < vs->varying_count; i++) {
1724 panfrost_emit_varying(ovs + i, vs, fs, vs, present, 0,
1725 ctx->streamout.num_targets, streamout_offsets,
1726 dev->quirks,
1727 gen_offsets, gen_formats, &gen_stride, i, true, false);
1728 }
1729
1730 for (unsigned i = 0; i < fs->varying_count; i++) {
1731 panfrost_emit_varying(ofs + i, fs, vs, vs, present, point_coord_mask,
1732 ctx->streamout.num_targets, streamout_offsets,
1733 dev->quirks,
1734 gen_offsets, gen_formats, &gen_stride, i, false, true);
1735 }
1736
1737 unsigned xfb_base = pan_xfb_base(present);
1738 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
1739 MALI_ATTRIBUTE_BUFFER_LENGTH * (xfb_base + ctx->streamout.num_targets),
1740 MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
1741 struct mali_attribute_buffer_packed *varyings =
1742 (struct mali_attribute_buffer_packed *) T.cpu;
1743
1744 /* Emit the stream out buffers */
1745
1746 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1747 ctx->vertex_count);
1748
1749 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
1750 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
1751 so->stride[i],
1752 ctx->streamout.offsets[i],
1753 out_count,
1754 ctx->streamout.targets[i]);
1755 }
1756
1757 panfrost_emit_varyings(batch,
1758 &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
1759 gen_stride, vertex_count);
1760
1761 /* fp32 vec4 gl_Position */
1762 *position = panfrost_emit_varyings(batch,
1763 &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
1764 sizeof(float) * 4, vertex_count);
1765
1766 if (present & (1 << PAN_VARY_PSIZ)) {
1767 *psiz = panfrost_emit_varyings(batch,
1768 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
1769 2, vertex_count);
1770 }
1771
1772 pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
1773 pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
1774 pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
1775
1776 *buffers = T.gpu;
1777 *vs_attribs = trans.gpu;
1778 *fs_attribs = trans.gpu + vs_size;
1779 }
1780
1781 void
1782 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1783 struct mali_vertex_tiler_prefix *vertex_prefix,
1784 struct mali_draw_packed *vertex_draw,
1785 struct mali_vertex_tiler_prefix *tiler_prefix,
1786 struct mali_draw_packed *tiler_draw,
1787 union midgard_primitive_size *primitive_size)
1788 {
1789 struct panfrost_context *ctx = batch->ctx;
1790 struct panfrost_device *device = pan_device(ctx->base.screen);
1791 bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
1792 struct bifrost_payload_vertex bifrost_vertex = {0,};
1793 struct bifrost_payload_tiler bifrost_tiler = {0,};
1794 struct midgard_payload_vertex_tiler midgard_vertex = {0,};
1795 struct midgard_payload_vertex_tiler midgard_tiler = {0,};
1796 void *vp, *tp;
1797 size_t vp_size, tp_size;
1798
1799 if (device->quirks & IS_BIFROST) {
1800 bifrost_vertex.prefix = *vertex_prefix;
1801 memcpy(&bifrost_vertex.postfix, vertex_draw, MALI_DRAW_LENGTH);
1802 vp = &bifrost_vertex;
1803 vp_size = sizeof(bifrost_vertex);
1804
1805 bifrost_tiler.prefix = *tiler_prefix;
1806 bifrost_tiler.primitive_size = *primitive_size;
1807 bifrost_tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
1808 memcpy(&bifrost_tiler.postfix, tiler_draw, MALI_DRAW_LENGTH);
1809 tp = &bifrost_tiler;
1810 tp_size = sizeof(bifrost_tiler);
1811 } else {
1812 midgard_vertex.prefix = *vertex_prefix;
1813 memcpy(&midgard_vertex.postfix, vertex_draw, MALI_DRAW_LENGTH);
1814 vp = &midgard_vertex;
1815 vp_size = sizeof(midgard_vertex);
1816
1817 midgard_tiler.prefix = *tiler_prefix;
1818 memcpy(&midgard_tiler.postfix, tiler_draw, MALI_DRAW_LENGTH);
1819 midgard_tiler.primitive_size = *primitive_size;
1820 tp = &midgard_tiler;
1821 tp_size = sizeof(midgard_tiler);
1822 }
1823
1824 if (wallpapering) {
1825 /* Inject in reverse order, with "predicted" job indices.
1826 * THIS IS A HACK XXX */
1827 panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false,
1828 batch->scoreboard.job_index + 2, tp, tp_size, true);
1829 panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
1830 vp, vp_size, true);
1831 return;
1832 }
1833
1834 /* If rasterizer discard is enable, only submit the vertex */
1835
1836 unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
1837 vp, vp_size, false);
1838
1839 if (ctx->rasterizer->base.rasterizer_discard)
1840 return;
1841
1842 panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, vertex, tp, tp_size,
1843 false);
1844 }
1845
1846 /* TODO: stop hardcoding this */
1847 mali_ptr
1848 panfrost_emit_sample_locations(struct panfrost_batch *batch)
1849 {
1850 uint16_t locations[] = {
1851 128, 128,
1852 0, 256,
1853 0, 256,
1854 0, 256,
1855 0, 256,
1856 0, 256,
1857 0, 256,
1858 0, 256,
1859 0, 256,
1860 0, 256,
1861 0, 256,
1862 0, 256,
1863 0, 256,
1864 0, 256,
1865 0, 256,
1866 0, 256,
1867 0, 256,
1868 0, 256,
1869 0, 256,
1870 0, 256,
1871 0, 256,
1872 0, 256,
1873 0, 256,
1874 0, 256,
1875 0, 256,
1876 0, 256,
1877 0, 256,
1878 0, 256,
1879 0, 256,
1880 0, 256,
1881 0, 256,
1882 0, 256,
1883 128, 128,
1884 0, 0,
1885 0, 0,
1886 0, 0,
1887 0, 0,
1888 0, 0,
1889 0, 0,
1890 0, 0,
1891 0, 0,
1892 0, 0,
1893 0, 0,
1894 0, 0,
1895 0, 0,
1896 0, 0,
1897 0, 0,
1898 0, 0,
1899 };
1900
1901 return panfrost_pool_upload_aligned(&batch->pool, locations, 96 * sizeof(uint16_t), 64);
1902 }