src/gallium/drivers/panfrost/pan_cmdstream.c

   1 /*
   2  * Copyright (C) 2018 Alyssa Rosenzweig
   3  * Copyright (C) 2020 Collabora Ltd.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "util/macros.h"
  26 #include "util/u_prim.h"
  27 #include "util/u_vbuf.h"
  28
  29 #include "panfrost-quirks.h"
  30
  31 #include "pan_pool.h"
  32 #include "pan_bo.h"
  33 #include "pan_cmdstream.h"
  34 #include "pan_context.h"
  35 #include "pan_job.h"
  36
  37 /* If a BO is accessed for a particular shader stage, will it be in the primary
  38  * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
  39  * fragment will be primary, e.g. compute jobs will be considered
  40  * "vertex/tiler" by analogy */
  41
  42 static inline uint32_t
  43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
  44 {
  45         assert(stage == PIPE_SHADER_FRAGMENT ||
  46                stage == PIPE_SHADER_VERTEX ||
  47                stage == PIPE_SHADER_COMPUTE);
  48
  49         return stage == PIPE_SHADER_FRAGMENT ?
  50                PAN_BO_ACCESS_FRAGMENT :
  51                PAN_BO_ACCESS_VERTEX_TILER;
  52 }
  53
  54 static void
  55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
  56                                struct mali_vertex_tiler_postfix *postfix)
  57 {
  58         struct panfrost_device *dev = pan_device(ctx->base.screen);
  59         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  60
  61         unsigned shift = panfrost_get_stack_shift(batch->stack_size);
  62         struct mali_shared_memory shared = {
  63                 .stack_shift = shift,
  64                 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
  65                 .shared_workgroup_count = ~0,
  66         };
  67         postfix->shared_memory = panfrost_pool_upload(&batch->pool, &shared, sizeof(shared));
  68 }
  69
  70 static void
  71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
  72                                struct mali_vertex_tiler_postfix *postfix)
  73 {
  74         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  75         postfix->shared_memory = panfrost_batch_reserve_framebuffer(batch);
  76 }
  77
  78 static void
  79 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
  80                               struct mali_vertex_tiler_prefix *prefix,
  81                               struct mali_vertex_tiler_postfix *postfix)
  82 {
  83         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
  84
  85         postfix->gl_enables |= 0x7;
  86         SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
  87                 rasterizer && rasterizer->base.front_ccw);
  88         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
  89                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
  90         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
  91                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
  92         SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
  93                 rasterizer && rasterizer->base.flatshade_first);
  94 }
  95
  96 void
  97 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
  98                                   struct mali_vertex_tiler_prefix *prefix,
  99                                   union midgard_primitive_size *primitive_size)
 100 {
 101         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 102
 103         if (!panfrost_writes_point_size(ctx)) {
 104                 bool points = prefix->draw_mode == MALI_POINTS;
 105                 float val = 0.0f;
 106
 107                 if (rasterizer)
 108                         val = points ?
 109                               rasterizer->base.point_size :
 110                               rasterizer->base.line_width;
 111
 112                 primitive_size->constant = val;
 113         }
 114 }
 115
 116 static void
 117 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
 118                                    struct mali_vertex_tiler_postfix *postfix)
 119 {
 120         SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
 121         if (ctx->occlusion_query) {
 122                 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
 123                 panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
 124                                       PAN_BO_ACCESS_SHARED |
 125                                       PAN_BO_ACCESS_RW |
 126                                       PAN_BO_ACCESS_FRAGMENT);
 127         } else {
 128                 postfix->occlusion_counter = 0;
 129         }
 130 }
 131
 132 void
 133 panfrost_vt_init(struct panfrost_context *ctx,
 134                  enum pipe_shader_type stage,
 135                  struct mali_vertex_tiler_prefix *prefix,
 136                  struct mali_vertex_tiler_postfix *postfix)
 137 {
 138         struct panfrost_device *device = pan_device(ctx->base.screen);
 139
 140         if (!ctx->shader[stage])
 141                 return;
 142
 143         memset(prefix, 0, sizeof(*prefix));
 144         memset(postfix, 0, sizeof(*postfix));
 145
 146         if (device->quirks & IS_BIFROST) {
 147                 postfix->gl_enables = 0x2;
 148                 panfrost_vt_emit_shared_memory(ctx, postfix);
 149         } else {
 150                 postfix->gl_enables = 0x6;
 151                 panfrost_vt_attach_framebuffer(ctx, postfix);
 152         }
 153
 154         if (stage == PIPE_SHADER_FRAGMENT) {
 155                 panfrost_vt_update_occlusion_query(ctx, postfix);
 156                 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
 157         }
 158 }
 159
 160 static unsigned
 161 panfrost_translate_index_size(unsigned size)
 162 {
 163         switch (size) {
 164         case 1:
 165                 return MALI_DRAW_INDEXED_UINT8;
 166
 167         case 2:
 168                 return MALI_DRAW_INDEXED_UINT16;
 169
 170         case 4:
 171                 return MALI_DRAW_INDEXED_UINT32;
 172
 173         default:
 174                 unreachable("Invalid index size");
 175         }
 176 }
 177
 178 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
 179  * good for the duration of the draw (transient), could last longer. Also get
 180  * the bounds on the index buffer for the range accessed by the draw. We do
 181  * these operations together because there are natural optimizations which
 182  * require them to be together. */
 183
 184 static mali_ptr
 185 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
 186                                   const struct pipe_draw_info *info,
 187                                   unsigned *min_index, unsigned *max_index)
 188 {
 189         struct panfrost_resource *rsrc = pan_resource(info->index.resource);
 190         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 191         off_t offset = info->start * info->index_size;
 192         bool needs_indices = true;
 193         mali_ptr out = 0;
 194
 195         if (info->max_index != ~0u) {
 196                 *min_index = info->min_index;
 197                 *max_index = info->max_index;
 198                 needs_indices = false;
 199         }
 200
 201         if (!info->has_user_indices) {
 202                 /* Only resources can be directly mapped */
 203                 panfrost_batch_add_bo(batch, rsrc->bo,
 204                                       PAN_BO_ACCESS_SHARED |
 205                                       PAN_BO_ACCESS_READ |
 206                                       PAN_BO_ACCESS_VERTEX_TILER);
 207                 out = rsrc->bo->gpu + offset;
 208
 209                 /* Check the cache */
 210                 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
 211                                                            info->start,
 212                                                            info->count,
 213                                                            min_index,
 214                                                            max_index);
 215         } else {
 216                 /* Otherwise, we need to upload to transient memory */
 217                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
 218                 out = panfrost_pool_upload(&batch->pool, ibuf8 + offset,
 219                                                 info->count *
 220                                                 info->index_size);
 221         }
 222
 223         if (needs_indices) {
 224                 /* Fallback */
 225                 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
 226
 227                 if (!info->has_user_indices)
 228                         panfrost_minmax_cache_add(rsrc->index_cache,
 229                                                   info->start, info->count,
 230                                                   *min_index, *max_index);
 231         }
 232
 233         return out;
 234 }
 235
 236 void
 237 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
 238                           const struct pipe_draw_info *info,
 239                           enum mali_draw_mode draw_mode,
 240                           struct mali_vertex_tiler_postfix *vertex_postfix,
 241                           struct mali_vertex_tiler_prefix *tiler_prefix,
 242                           struct mali_vertex_tiler_postfix *tiler_postfix,
 243                           unsigned *vertex_count,
 244                           unsigned *padded_count)
 245 {
 246         tiler_prefix->draw_mode = draw_mode;
 247
 248         unsigned draw_flags = 0;
 249
 250         if (panfrost_writes_point_size(ctx))
 251                 draw_flags |= MALI_DRAW_VARYING_SIZE;
 252
 253         if (info->primitive_restart)
 254                 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
 255
 256         /* These doesn't make much sense */
 257
 258         draw_flags |= 0x3000;
 259
 260         if (info->index_size) {
 261                 unsigned min_index = 0, max_index = 0;
 262
 263                 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
 264                                                                        info,
 265                                                                        &min_index,
 266                                                                        &max_index);
 267
 268                 /* Use the corresponding values */
 269                 *vertex_count = max_index - min_index + 1;
 270                 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
 271                 tiler_prefix->offset_bias_correction = -min_index;
 272                 tiler_prefix->index_count = MALI_POSITIVE(info->count);
 273                 draw_flags |= panfrost_translate_index_size(info->index_size);
 274         } else {
 275                 tiler_prefix->indices = 0;
 276                 *vertex_count = ctx->vertex_count;
 277                 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
 278                 tiler_prefix->offset_bias_correction = 0;
 279                 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
 280         }
 281
 282         tiler_prefix->unknown_draw = draw_flags;
 283
 284         /* Encode the padded vertex count */
 285
 286         if (info->instance_count > 1) {
 287                 *padded_count = panfrost_padded_vertex_count(*vertex_count);
 288
 289                 unsigned shift = __builtin_ctz(ctx->padded_count);
 290                 unsigned k = ctx->padded_count >> (shift + 1);
 291
 292                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
 293                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
 294         } else {
 295                 *padded_count = *vertex_count;
 296
 297                 /* Reset instancing state */
 298                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
 299                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
 300         }
 301 }
 302
 303 static void
 304 panfrost_shader_meta_init(struct panfrost_context *ctx,
 305                           enum pipe_shader_type st,
 306                           struct mali_shader_meta *meta)
 307 {
 308         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 309         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 310
 311         memset(meta, 0, sizeof(*meta));
 312         meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
 313         meta->attribute_count = ss->attribute_count;
 314         meta->varying_count = ss->varying_count;
 315         meta->texture_count = ctx->sampler_view_count[st];
 316         meta->sampler_count = ctx->sampler_count[st];
 317
 318         if (dev->quirks & IS_BIFROST) {
 319                 if (st == PIPE_SHADER_VERTEX)
 320                         meta->bifrost1.unk1 = 0x800000;
 321                 else {
 322                         /* First clause ATEST |= 0x4000000.
 323                          * Less than 32 regs |= 0x200 */
 324                         meta->bifrost1.unk1 = 0x950020;
 325                 }
 326
 327                 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 328                 if (st == PIPE_SHADER_VERTEX)
 329                         meta->bifrost2.preload_regs = 0xC0;
 330                 else {
 331                         meta->bifrost2.preload_regs = 0x1;
 332                         SET_BIT(meta->bifrost2.preload_regs, 0x10, ss->reads_frag_coord);
 333                 }
 334
 335                 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
 336                                                     ss->uniform_cutoff);
 337         } else {
 338                 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
 339                                                     ss->uniform_cutoff);
 340                 meta->midgard1.work_count = ss->work_reg_count;
 341
 342                 /* TODO: This is not conformant on ES3 */
 343                 meta->midgard1.flags_hi = MALI_SUPPRESS_INF_NAN;
 344
 345                 meta->midgard1.flags_lo = 0x20;
 346                 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 347
 348                 SET_BIT(meta->midgard1.flags_hi, MALI_WRITES_GLOBAL, ss->writes_global);
 349         }
 350 }
 351
 352 static unsigned
 353 panfrost_translate_compare_func(enum pipe_compare_func in)
 354 {
 355         switch (in) {
 356         case PIPE_FUNC_NEVER:
 357                 return MALI_FUNC_NEVER;
 358
 359         case PIPE_FUNC_LESS:
 360                 return MALI_FUNC_LESS;
 361
 362         case PIPE_FUNC_EQUAL:
 363                 return MALI_FUNC_EQUAL;
 364
 365         case PIPE_FUNC_LEQUAL:
 366                 return MALI_FUNC_LEQUAL;
 367
 368         case PIPE_FUNC_GREATER:
 369                 return MALI_FUNC_GREATER;
 370
 371         case PIPE_FUNC_NOTEQUAL:
 372                 return MALI_FUNC_NOTEQUAL;
 373
 374         case PIPE_FUNC_GEQUAL:
 375                 return MALI_FUNC_GEQUAL;
 376
 377         case PIPE_FUNC_ALWAYS:
 378                 return MALI_FUNC_ALWAYS;
 379
 380         default:
 381                 unreachable("Invalid func");
 382         }
 383 }
 384
 385 static unsigned
 386 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 387 {
 388         switch (in) {
 389         case PIPE_STENCIL_OP_KEEP:
 390                 return MALI_STENCIL_KEEP;
 391
 392         case PIPE_STENCIL_OP_ZERO:
 393                 return MALI_STENCIL_ZERO;
 394
 395         case PIPE_STENCIL_OP_REPLACE:
 396                return MALI_STENCIL_REPLACE;
 397
 398         case PIPE_STENCIL_OP_INCR:
 399                 return MALI_STENCIL_INCR;
 400
 401         case PIPE_STENCIL_OP_DECR:
 402                 return MALI_STENCIL_DECR;
 403
 404         case PIPE_STENCIL_OP_INCR_WRAP:
 405                 return MALI_STENCIL_INCR_WRAP;
 406
 407         case PIPE_STENCIL_OP_DECR_WRAP:
 408                 return MALI_STENCIL_DECR_WRAP;
 409
 410         case PIPE_STENCIL_OP_INVERT:
 411                 return MALI_STENCIL_INVERT;
 412
 413         default:
 414                 unreachable("Invalid stencil op");
 415         }
 416 }
 417
 418 static unsigned
 419 translate_tex_wrap(enum pipe_tex_wrap w)
 420 {
 421         switch (w) {
 422         case PIPE_TEX_WRAP_REPEAT:
 423                 return MALI_WRAP_REPEAT;
 424
 425         case PIPE_TEX_WRAP_CLAMP:
 426                 return MALI_WRAP_CLAMP;
 427
 428         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 429                 return MALI_WRAP_CLAMP_TO_EDGE;
 430
 431         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 432                 return MALI_WRAP_CLAMP_TO_BORDER;
 433
 434         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 435                 return MALI_WRAP_MIRRORED_REPEAT;
 436
 437         case PIPE_TEX_WRAP_MIRROR_CLAMP:
 438                 return MALI_WRAP_MIRRORED_CLAMP;
 439
 440         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 441                 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
 442
 443         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 444                 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
 445
 446         default:
 447                 unreachable("Invalid wrap");
 448         }
 449 }
 450
 451 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
 452                                 struct mali_sampler_descriptor *hw)
 453 {
 454         unsigned func = panfrost_translate_compare_func(cso->compare_func);
 455         bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
 456         bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
 457         bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
 458         unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
 459         unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
 460         unsigned mip_filter = mip_linear  ?
 461                               (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
 462         unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
 463
 464         *hw = (struct mali_sampler_descriptor) {
 465                 .filter_mode = min_filter | mag_filter | mip_filter |
 466                                normalized,
 467                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 468                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 469                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 470                 .compare_func = panfrost_flip_compare_func(func),
 471                 .border_color = {
 472                         cso->border_color.f[0],
 473                         cso->border_color.f[1],
 474                         cso->border_color.f[2],
 475                         cso->border_color.f[3]
 476                 },
 477                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 478                 .max_lod = FIXED_16(cso->max_lod, false),
 479                 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
 480                 .seamless_cube_map = cso->seamless_cube_map,
 481         };
 482
 483         /* If necessary, we disable mipmapping in the sampler descriptor by
 484          * clamping the LOD as tight as possible (from 0 to epsilon,
 485          * essentially -- remember these are fixed point numbers, so
 486          * epsilon=1/256) */
 487
 488         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 489                 hw->max_lod = hw->min_lod + 1;
 490 }
 491
 492 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
 493                                         struct bifrost_sampler_descriptor *hw)
 494 {
 495         *hw = (struct bifrost_sampler_descriptor) {
 496                 .unk1 = 0x1,
 497                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 498                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 499                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 500                 .unk8 = 0x8,
 501                 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
 502                 .norm_coords = cso->normalized_coords,
 503                 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
 504                 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
 505                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 506                 .max_lod = FIXED_16(cso->max_lod, false),
 507         };
 508
 509         /* If necessary, we disable mipmapping in the sampler descriptor by
 510          * clamping the LOD as tight as possible (from 0 to epsilon,
 511          * essentially -- remember these are fixed point numbers, so
 512          * epsilon=1/256) */
 513
 514         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 515                 hw->max_lod = hw->min_lod + 1;
 516 }
 517
 518 static void
 519 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
 520                             struct mali_stencil_test *out)
 521 {
 522         out->ref = 0; /* Gallium gets it from elsewhere */
 523
 524         out->mask = in->valuemask;
 525         out->func = panfrost_translate_compare_func(in->func);
 526         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 527         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 528         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 529 }
 530
 531 static void
 532 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
 533                                      struct mali_shader_meta *fragmeta)
 534 {
 535         if (!ctx->rasterizer) {
 536                 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
 537                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
 538                 fragmeta->depth_units = 0.0f;
 539                 fragmeta->depth_factor = 0.0f;
 540                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
 541                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
 542                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, true);
 543                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, true);
 544                 return;
 545         }
 546
 547         struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 548
 549         bool msaa = rast->multisample;
 550
 551         /* TODO: Sample size */
 552         SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
 553         SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
 554
 555         SET_BIT(fragmeta->unknown2_3, MALI_PER_SAMPLE,
 556                         msaa && ctx->min_samples > 1);
 557
 558         fragmeta->depth_units = rast->offset_units * 2.0f;
 559         fragmeta->depth_factor = rast->offset_scale;
 560
 561         /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
 562
 563         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, rast->offset_tri);
 564         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, rast->offset_tri);
 565
 566         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, rast->depth_clip_near);
 567         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, rast->depth_clip_far);
 568 }
 569
 570 static void
 571 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
 572                               struct mali_shader_meta *fragmeta)
 573 {
 574         const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 575         int zfunc = PIPE_FUNC_ALWAYS;
 576
 577         if (!zsa) {
 578                 struct pipe_stencil_state default_stencil = {
 579                         .enabled = 0,
 580                         .func = PIPE_FUNC_ALWAYS,
 581                         .fail_op = MALI_STENCIL_KEEP,
 582                         .zfail_op = MALI_STENCIL_KEEP,
 583                         .zpass_op = MALI_STENCIL_KEEP,
 584                         .writemask = 0xFF,
 585                         .valuemask = 0xFF
 586                 };
 587
 588                 panfrost_make_stencil_state(&default_stencil,
 589                                             &fragmeta->stencil_front);
 590                 fragmeta->stencil_mask_front = default_stencil.writemask;
 591                 fragmeta->stencil_back = fragmeta->stencil_front;
 592                 fragmeta->stencil_mask_back = default_stencil.writemask;
 593                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
 594                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
 595         } else {
 596                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
 597                         zsa->stencil[0].enabled);
 598                 panfrost_make_stencil_state(&zsa->stencil[0],
 599                                             &fragmeta->stencil_front);
 600                 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
 601                 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
 602
 603                 /* If back-stencil is not enabled, use the front values */
 604
 605                 if (zsa->stencil[1].enabled) {
 606                         panfrost_make_stencil_state(&zsa->stencil[1],
 607                                                     &fragmeta->stencil_back);
 608                         fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
 609                         fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
 610                 } else {
 611                         fragmeta->stencil_back = fragmeta->stencil_front;
 612                         fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
 613                         fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
 614                 }
 615
 616                 if (zsa->depth.enabled)
 617                         zfunc = zsa->depth.func;
 618
 619                 /* Depth state (TODO: Refactor) */
 620
 621                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
 622                         zsa->depth.writemask);
 623         }
 624
 625         fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
 626         fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
 627 }
 628
 629 static bool
 630 panfrost_fs_required(
 631                 struct panfrost_shader_state *fs,
 632                 struct panfrost_blend_final *blend,
 633                 unsigned rt_count)
 634 {
 635         /* If we generally have side effects */
 636         if (fs->fs_sidefx)
 637                 return true;
 638
 639         /* If colour is written we need to execute */
 640         for (unsigned i = 0; i < rt_count; ++i) {
 641                 if (!blend[i].no_colour)
 642                         return true;
 643         }
 644
 645         /* If depth is written and not implied we need to execute.
 646          * TODO: Predicate on Z/S writes being enabled */
 647         return (fs->writes_depth || fs->writes_stencil);
 648 }
 649
 650 static void
 651 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
 652                                 struct mali_shader_meta *fragmeta,
 653                                 void *rts)
 654 {
 655         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 656         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 657         struct panfrost_shader_state *fs;
 658         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 659
 660         SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
 661                 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
 662                 !ctx->blend->base.dither);
 663
 664         SET_BIT(fragmeta->unknown2_4, MALI_ALPHA_TO_COVERAGE,
 665                         ctx->blend->base.alpha_to_coverage);
 666
 667         /* Get blending setup */
 668         unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 669
 670         struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 671         unsigned shader_offset = 0;
 672         struct panfrost_bo *shader_bo = NULL;
 673
 674         for (unsigned c = 0; c < rt_count; ++c)
 675                 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
 676                                                           &shader_offset);
 677
 678         /* Disable shader execution if we can */
 679         if (dev->quirks & MIDGARD_SHADERLESS
 680                         && !panfrost_fs_required(fs, blend, rt_count)) {
 681                 fragmeta->shader = 0;
 682                 fragmeta->attribute_count = 0;
 683                 fragmeta->varying_count = 0;
 684                 fragmeta->texture_count = 0;
 685                 fragmeta->sampler_count = 0;
 686
 687                 /* This feature is not known to work on Bifrost */
 688                 fragmeta->midgard1.work_count = 1;
 689                 fragmeta->midgard1.uniform_count = 0;
 690                 fragmeta->midgard1.uniform_buffer_count = 0;
 691         }
 692
 693          /* If there is a blend shader, work registers are shared. We impose 8
 694           * work registers as a limit for blend shaders. Should be lower XXX */
 695
 696         if (!(dev->quirks & IS_BIFROST)) {
 697                 for (unsigned c = 0; c < rt_count; ++c) {
 698                         if (blend[c].is_shader) {
 699                                 fragmeta->midgard1.work_count =
 700                                         MAX2(fragmeta->midgard1.work_count, 8);
 701                         }
 702                 }
 703         }
 704
 705         /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
 706          * copied to the blend_meta appended (by convention), but this is the
 707          * field actually read by the hardware. (Or maybe both are read...?).
 708          * Specify the last RTi with a blend shader. */
 709
 710         fragmeta->blend.shader = 0;
 711
 712         for (signed rt = (rt_count - 1); rt >= 0; --rt) {
 713                 if (!blend[rt].is_shader)
 714                         continue;
 715
 716                 fragmeta->blend.shader = blend[rt].shader.gpu |
 717                                          blend[rt].shader.first_tag;
 718                 break;
 719         }
 720
 721         if (dev->quirks & MIDGARD_SFBD) {
 722                 /* When only a single render target platform is used, the blend
 723                  * information is inside the shader meta itself. We additionally
 724                  * need to signal CAN_DISCARD for nontrivial blend modes (so
 725                  * we're able to read back the destination buffer) */
 726
 727                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
 728                         blend[0].is_shader);
 729
 730                 if (!blend[0].is_shader) {
 731                         fragmeta->blend.equation = *blend[0].equation.equation;
 732                         fragmeta->blend.constant = blend[0].equation.constant;
 733                 }
 734
 735                 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
 736                         !blend[0].no_blending || fs->can_discard);
 737
 738                 batch->draws |= PIPE_CLEAR_COLOR0;
 739                 return;
 740         }
 741
 742         if (dev->quirks & IS_BIFROST) {
 743                 bool no_blend = true;
 744
 745                 for (unsigned i = 0; i < rt_count; ++i)
 746                         no_blend &= (blend[i].no_blending | blend[i].no_colour);
 747
 748                 SET_BIT(fragmeta->bifrost1.unk1, MALI_BIFROST_EARLY_Z,
 749                         !fs->can_discard && !fs->writes_depth && no_blend);
 750         }
 751
 752         /* Additional blend descriptor tacked on for jobs using MFBD */
 753
 754         for (unsigned i = 0; i < rt_count; ++i) {
 755                 unsigned flags = 0;
 756
 757                 if (ctx->pipe_framebuffer.nr_cbufs > i && !blend[i].no_colour) {
 758                         flags = 0x200;
 759                         batch->draws |= (PIPE_CLEAR_COLOR0 << i);
 760
 761                         bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
 762                                        (ctx->pipe_framebuffer.cbufs[i]) &&
 763                                        util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
 764
 765                         SET_BIT(flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
 766                         SET_BIT(flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
 767                         SET_BIT(flags, MALI_BLEND_SRGB, is_srgb);
 768                         SET_BIT(flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
 769                 }
 770
 771                 if (dev->quirks & IS_BIFROST) {
 772                         struct bifrost_blend_rt *brts = rts;
 773
 774                         brts[i].flags = flags;
 775
 776                         if (blend[i].is_shader) {
 777                                 /* The blend shader's address needs to be at
 778                                  * the same top 32 bit as the fragment shader.
 779                                  * TODO: Ensure that's always the case.
 780                                  */
 781                                 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
 782                                        (fs->bo->gpu & (0xffffffffull << 32)));
 783                                 brts[i].shader = blend[i].shader.gpu;
 784                                 brts[i].unk2 = 0x0;
 785                         } else if (ctx->pipe_framebuffer.nr_cbufs > i) {
 786                                 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
 787                                 const struct util_format_description *format_desc;
 788                                 format_desc = util_format_description(format);
 789
 790                                 brts[i].equation = *blend[i].equation.equation;
 791
 792                                 /* TODO: this is a bit more complicated */
 793                                 brts[i].constant = blend[i].equation.constant;
 794
 795                                 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
 796
 797                                 /* 0x19 disables blending and forces REPLACE
 798                                  * mode (equivalent to rgb_mode = alpha_mode =
 799                                  * x122, colour mask = 0xF). 0x1a allows
 800                                  * blending. */
 801                                 brts[i].unk2 = blend[i].no_blending ? 0x19 : 0x1a;
 802
 803                                 brts[i].shader_type = fs->blend_types[i];
 804                         } else {
 805                                 /* Dummy attachment for depth-only */
 806                                 brts[i].unk2 = 0x3;
 807                                 brts[i].shader_type = fs->blend_types[i];
 808                         }
 809                 } else {
 810                         struct midgard_blend_rt *mrts = rts;
 811                         mrts[i].flags = flags;
 812
 813                         if (blend[i].is_shader) {
 814                                 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
 815                         } else {
 816                                 mrts[i].blend.equation = *blend[i].equation.equation;
 817                                 mrts[i].blend.constant = blend[i].equation.constant;
 818                         }
 819                 }
 820         }
 821 }
 822
 823 static void
 824 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
 825                                struct mali_shader_meta *fragmeta,
 826                                void *rts)
 827 {
 828         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 829         struct panfrost_shader_state *fs;
 830
 831         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 832
 833         bool msaa = ctx->rasterizer && ctx->rasterizer->base.multisample;
 834         fragmeta->coverage_mask = (msaa ? ctx->sample_mask : ~0) & 0xF;
 835
 836         fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10;
 837         fragmeta->unknown2_4 = 0x4e0;
 838
 839         /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
 840          * is required (independent of 32-bit/64-bit descriptors), or why it's
 841          * not used on later GPU revisions. Otherwise, all shader jobs fault on
 842          * these earlier chips (perhaps this is a chicken bit of some kind).
 843          * More investigation is needed. */
 844
 845         SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
 846
 847         if (dev->quirks & IS_BIFROST) {
 848                 /* TODO */
 849         } else {
 850                 /* Depending on whether it's legal to in the given shader, we try to
 851                  * enable early-z testing. TODO: respect e-z force */
 852
 853                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
 854                         !fs->can_discard && !fs->writes_global &&
 855                         !fs->writes_depth && !fs->writes_stencil &&
 856                         !ctx->blend->base.alpha_to_coverage);
 857
 858                 /* Add the writes Z/S flags if needed. */
 859                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
 860                 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
 861
 862                 /* Any time texturing is used, derivatives are implicitly calculated,
 863                  * so we need to enable helper invocations */
 864
 865                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
 866                         fs->helper_invocations);
 867
 868                 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 869
 870                 bool depth_enabled = fs->writes_depth ||
 871                    (zsa && zsa->depth.enabled && zsa->depth.func != PIPE_FUNC_ALWAYS);
 872
 873                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_TILEBUFFER,
 874                         fs->outputs_read || (!depth_enabled && fs->can_discard));
 875                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_ZS, depth_enabled && fs->can_discard);
 876         }
 877
 878         panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
 879         panfrost_frag_meta_zsa_update(ctx, fragmeta);
 880         panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
 881 }
 882
 883 void
 884 panfrost_emit_shader_meta(struct panfrost_batch *batch,
 885                           enum pipe_shader_type st,
 886                           struct mali_vertex_tiler_postfix *postfix)
 887 {
 888         struct panfrost_context *ctx = batch->ctx;
 889         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 890
 891         if (!ss) {
 892                 postfix->shader = 0;
 893                 return;
 894         }
 895
 896         struct mali_shader_meta meta;
 897
 898         panfrost_shader_meta_init(ctx, st, &meta);
 899
 900         /* Add the shader BO to the batch. */
 901         panfrost_batch_add_bo(batch, ss->bo,
 902                               PAN_BO_ACCESS_PRIVATE |
 903                               PAN_BO_ACCESS_READ |
 904                               panfrost_bo_access_for_stage(st));
 905
 906         mali_ptr shader_ptr;
 907
 908         if (st == PIPE_SHADER_FRAGMENT) {
 909                 struct panfrost_device *dev = pan_device(ctx->base.screen);
 910                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 911                 size_t desc_size = sizeof(meta);
 912                 void *rts = NULL;
 913                 struct panfrost_transfer xfer;
 914                 unsigned rt_size;
 915
 916                 if (dev->quirks & MIDGARD_SFBD)
 917                         rt_size = 0;
 918                 else if (dev->quirks & IS_BIFROST)
 919                         rt_size = sizeof(struct bifrost_blend_rt);
 920                 else
 921                         rt_size = sizeof(struct midgard_blend_rt);
 922
 923                 desc_size += rt_size * rt_count;
 924
 925                 if (rt_size)
 926                         rts = rzalloc_size(ctx, rt_size * rt_count);
 927
 928                 panfrost_frag_shader_meta_init(ctx, &meta, rts);
 929
 930                 xfer = panfrost_pool_alloc(&batch->pool, desc_size);
 931
 932                 memcpy(xfer.cpu, &meta, sizeof(meta));
 933                 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
 934
 935                 if (rt_size)
 936                         ralloc_free(rts);
 937
 938                 shader_ptr = xfer.gpu;
 939         } else {
 940                 shader_ptr = panfrost_pool_upload(&batch->pool, &meta,
 941                                                        sizeof(meta));
 942         }
 943
 944         postfix->shader = shader_ptr;
 945 }
 946
 947 static void
 948 panfrost_mali_viewport_init(struct panfrost_context *ctx,
 949                             struct mali_viewport *mvp)
 950 {
 951         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 952
 953         /* Clip bounds are encoded as floats. The viewport itself is encoded as
 954          * (somewhat) asymmetric ints. */
 955
 956         const struct pipe_scissor_state *ss = &ctx->scissor;
 957
 958         memset(mvp, 0, sizeof(*mvp));
 959
 960         /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
 961          * each direction. Clipping to the viewport in theory should work, but
 962          * in practice causes issues when we're not explicitly trying to
 963          * scissor */
 964
 965         *mvp = (struct mali_viewport) {
 966                 .clip_minx = -INFINITY,
 967                 .clip_miny = -INFINITY,
 968                 .clip_maxx = INFINITY,
 969                 .clip_maxy = INFINITY,
 970         };
 971
 972         /* Always scissor to the viewport by default. */
 973         float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
 974         float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
 975
 976         float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
 977         float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
 978
 979         float minz = (vp->translate[2] - fabsf(vp->scale[2]));
 980         float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
 981
 982         /* Apply the scissor test */
 983
 984         unsigned minx, miny, maxx, maxy;
 985
 986         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
 987                 minx = MAX2(ss->minx, vp_minx);
 988                 miny = MAX2(ss->miny, vp_miny);
 989                 maxx = MIN2(ss->maxx, vp_maxx);
 990                 maxy = MIN2(ss->maxy, vp_maxy);
 991         } else {
 992                 minx = vp_minx;
 993                 miny = vp_miny;
 994                 maxx = vp_maxx;
 995                 maxy = vp_maxy;
 996         }
 997
 998         /* Hardware needs the min/max to be strictly ordered, so flip if we
 999          * need to. The viewport transformation in the vertex shader will
1000          * handle the negatives if we don't */
1001
1002         if (miny > maxy) {
1003                 unsigned temp = miny;
1004                 miny = maxy;
1005                 maxy = temp;
1006         }
1007
1008         if (minx > maxx) {
1009                 unsigned temp = minx;
1010                 minx = maxx;
1011                 maxx = temp;
1012         }
1013
1014         if (minz > maxz) {
1015                 float temp = minz;
1016                 minz = maxz;
1017                 maxz = temp;
1018         }
1019
1020         /* Clamp to the framebuffer size as a last check */
1021
1022         minx = MIN2(ctx->pipe_framebuffer.width, minx);
1023         maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
1024
1025         miny = MIN2(ctx->pipe_framebuffer.height, miny);
1026         maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
1027
1028         /* Upload */
1029
1030         mvp->viewport0[0] = minx;
1031         mvp->viewport1[0] = MALI_POSITIVE(maxx);
1032
1033         mvp->viewport0[1] = miny;
1034         mvp->viewport1[1] = MALI_POSITIVE(maxy);
1035
1036         bool clip_near = true;
1037         bool clip_far = true;
1038
1039         if (ctx->rasterizer) {
1040                 clip_near = ctx->rasterizer->base.depth_clip_near;
1041                 clip_far = ctx->rasterizer->base.depth_clip_far;
1042         }
1043
1044         mvp->clip_minz = clip_near ? minz : -INFINITY;
1045         mvp->clip_maxz = clip_far ? maxz : INFINITY;
1046 }
1047
1048 void
1049 panfrost_emit_viewport(struct panfrost_batch *batch,
1050                        struct mali_vertex_tiler_postfix *tiler_postfix)
1051 {
1052         struct panfrost_context *ctx = batch->ctx;
1053         struct mali_viewport mvp;
1054
1055         panfrost_mali_viewport_init(batch->ctx,  &mvp);
1056
1057         /* Update the job, unless we're doing wallpapering (whose lack of
1058          * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
1059          * just... be faster :) */
1060
1061         if (!ctx->wallpaper_batch)
1062                 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
1063                                              mvp.viewport0[1],
1064                                              mvp.viewport1[0] + 1,
1065                                              mvp.viewport1[1] + 1);
1066
1067         tiler_postfix->viewport = panfrost_pool_upload(&batch->pool, &mvp,
1068                                                             sizeof(mvp));
1069 }
1070
1071 static mali_ptr
1072 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
1073                                  enum pipe_shader_type st,
1074                                  struct panfrost_constant_buffer *buf,
1075                                  unsigned index)
1076 {
1077         struct pipe_constant_buffer *cb = &buf->cb[index];
1078         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1079
1080         if (rsrc) {
1081                 panfrost_batch_add_bo(batch, rsrc->bo,
1082                                       PAN_BO_ACCESS_SHARED |
1083                                       PAN_BO_ACCESS_READ |
1084                                       panfrost_bo_access_for_stage(st));
1085
1086                 /* Alignment gauranteed by
1087                  * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1088                 return rsrc->bo->gpu + cb->buffer_offset;
1089         } else if (cb->user_buffer) {
1090                 return panfrost_pool_upload(&batch->pool,
1091                                                  cb->user_buffer +
1092                                                  cb->buffer_offset,
1093                                                  cb->buffer_size);
1094         } else {
1095                 unreachable("No constant buffer");
1096         }
1097 }
1098
1099 struct sysval_uniform {
1100         union {
1101                 float f[4];
1102                 int32_t i[4];
1103                 uint32_t u[4];
1104                 uint64_t du[2];
1105         };
1106 };
1107
1108 static void
1109 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1110                                       struct sysval_uniform *uniform)
1111 {
1112         struct panfrost_context *ctx = batch->ctx;
1113         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1114
1115         uniform->f[0] = vp->scale[0];
1116         uniform->f[1] = vp->scale[1];
1117         uniform->f[2] = vp->scale[2];
1118 }
1119
1120 static void
1121 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1122                                        struct sysval_uniform *uniform)
1123 {
1124         struct panfrost_context *ctx = batch->ctx;
1125         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1126
1127         uniform->f[0] = vp->translate[0];
1128         uniform->f[1] = vp->translate[1];
1129         uniform->f[2] = vp->translate[2];
1130 }
1131
1132 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1133                                        enum pipe_shader_type st,
1134                                        unsigned int sysvalid,
1135                                        struct sysval_uniform *uniform)
1136 {
1137         struct panfrost_context *ctx = batch->ctx;
1138         unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1139         unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1140         bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1141         struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1142
1143         assert(dim);
1144         uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1145
1146         if (dim > 1)
1147                 uniform->i[1] = u_minify(tex->texture->height0,
1148                                          tex->u.tex.first_level);
1149
1150         if (dim > 2)
1151                 uniform->i[2] = u_minify(tex->texture->depth0,
1152                                          tex->u.tex.first_level);
1153
1154         if (is_array)
1155                 uniform->i[dim] = tex->texture->array_size;
1156 }
1157
1158 static void
1159 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1160                             enum pipe_shader_type st,
1161                             unsigned ssbo_id,
1162                             struct sysval_uniform *uniform)
1163 {
1164         struct panfrost_context *ctx = batch->ctx;
1165
1166         assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1167         struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1168
1169         /* Compute address */
1170         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1171
1172         panfrost_batch_add_bo(batch, bo,
1173                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1174                               panfrost_bo_access_for_stage(st));
1175
1176         /* Upload address and size as sysval */
1177         uniform->du[0] = bo->gpu + sb.buffer_offset;
1178         uniform->u[2] = sb.buffer_size;
1179 }
1180
1181 static void
1182 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1183                                enum pipe_shader_type st,
1184                                unsigned samp_idx,
1185                                struct sysval_uniform *uniform)
1186 {
1187         struct panfrost_context *ctx = batch->ctx;
1188         struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1189
1190         uniform->f[0] = sampl->min_lod;
1191         uniform->f[1] = sampl->max_lod;
1192         uniform->f[2] = sampl->lod_bias;
1193
1194         /* Even without any errata, Midgard represents "no mipmapping" as
1195          * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1196          * panfrost_create_sampler_state which also explains our choice of
1197          * epsilon value (again to keep behaviour consistent) */
1198
1199         if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1200                 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1201 }
1202
1203 static void
1204 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1205                                        struct sysval_uniform *uniform)
1206 {
1207         struct panfrost_context *ctx = batch->ctx;
1208
1209         uniform->u[0] = ctx->compute_grid->grid[0];
1210         uniform->u[1] = ctx->compute_grid->grid[1];
1211         uniform->u[2] = ctx->compute_grid->grid[2];
1212 }
1213
1214 static void
1215 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1216                         struct panfrost_shader_state *ss,
1217                         enum pipe_shader_type st)
1218 {
1219         struct sysval_uniform *uniforms = (void *)buf;
1220
1221         for (unsigned i = 0; i < ss->sysval_count; ++i) {
1222                 int sysval = ss->sysval[i];
1223
1224                 switch (PAN_SYSVAL_TYPE(sysval)) {
1225                 case PAN_SYSVAL_VIEWPORT_SCALE:
1226                         panfrost_upload_viewport_scale_sysval(batch,
1227                                                               &uniforms[i]);
1228                         break;
1229                 case PAN_SYSVAL_VIEWPORT_OFFSET:
1230                         panfrost_upload_viewport_offset_sysval(batch,
1231                                                                &uniforms[i]);
1232                         break;
1233                 case PAN_SYSVAL_TEXTURE_SIZE:
1234                         panfrost_upload_txs_sysval(batch, st,
1235                                                    PAN_SYSVAL_ID(sysval),
1236                                                    &uniforms[i]);
1237                         break;
1238                 case PAN_SYSVAL_SSBO:
1239                         panfrost_upload_ssbo_sysval(batch, st,
1240                                                     PAN_SYSVAL_ID(sysval),
1241                                                     &uniforms[i]);
1242                         break;
1243                 case PAN_SYSVAL_NUM_WORK_GROUPS:
1244                         panfrost_upload_num_work_groups_sysval(batch,
1245                                                                &uniforms[i]);
1246                         break;
1247                 case PAN_SYSVAL_SAMPLER:
1248                         panfrost_upload_sampler_sysval(batch, st,
1249                                                        PAN_SYSVAL_ID(sysval),
1250                                                        &uniforms[i]);
1251                         break;
1252                 default:
1253                         assert(0);
1254                 }
1255         }
1256 }
1257
1258 static const void *
1259 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1260                                  unsigned index)
1261 {
1262         struct pipe_constant_buffer *cb = &buf->cb[index];
1263         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1264
1265         if (rsrc)
1266                 return rsrc->bo->cpu;
1267         else if (cb->user_buffer)
1268                 return cb->user_buffer;
1269         else
1270                 unreachable("No constant buffer");
1271 }
1272
1273 void
1274 panfrost_emit_const_buf(struct panfrost_batch *batch,
1275                         enum pipe_shader_type stage,
1276                         struct mali_vertex_tiler_postfix *postfix)
1277 {
1278         struct panfrost_context *ctx = batch->ctx;
1279         struct panfrost_shader_variants *all = ctx->shader[stage];
1280
1281         if (!all)
1282                 return;
1283
1284         struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1285
1286         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1287
1288         /* Uniforms are implicitly UBO #0 */
1289         bool has_uniforms = buf->enabled_mask & (1 << 0);
1290
1291         /* Allocate room for the sysval and the uniforms */
1292         size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1293         size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1294         size_t size = sys_size + uniform_size;
1295         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1296                                                                         size);
1297
1298         /* Upload sysvals requested by the shader */
1299         panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1300
1301         /* Upload uniforms */
1302         if (has_uniforms && uniform_size) {
1303                 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1304                 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1305         }
1306
1307         /* Next up, attach UBOs. UBO #0 is the uniforms we just
1308          * uploaded */
1309
1310         unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1311         assert(ubo_count >= 1);
1312
1313         size_t sz = sizeof(uint64_t) * ubo_count;
1314         uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1315         int uniform_count = ss->uniform_count;
1316
1317         /* Upload uniforms as a UBO */
1318         ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1319
1320         /* The rest are honest-to-goodness UBOs */
1321
1322         for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1323                 size_t usz = buf->cb[ubo].buffer_size;
1324                 bool enabled = buf->enabled_mask & (1 << ubo);
1325                 bool empty = usz == 0;
1326
1327                 if (!enabled || empty) {
1328                         /* Stub out disabled UBOs to catch accesses */
1329                         ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1330                         continue;
1331                 }
1332
1333                 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1334                                                                 buf, ubo);
1335
1336                 unsigned bytes_per_field = 16;
1337                 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1338                 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1339         }
1340
1341         mali_ptr ubufs = panfrost_pool_upload(&batch->pool, ubos, sz);
1342         postfix->uniforms = transfer.gpu;
1343         postfix->uniform_buffers = ubufs;
1344
1345         buf->dirty_mask = 0;
1346 }
1347
1348 void
1349 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1350                             const struct pipe_grid_info *info,
1351                             struct midgard_payload_vertex_tiler *vtp)
1352 {
1353         struct panfrost_context *ctx = batch->ctx;
1354         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1355         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1356         unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1357                                                            128));
1358         unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1359                                info->grid[2] * 4;
1360         struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1361                                                                   shared_size,
1362                                                                   1);
1363
1364         struct mali_shared_memory shared = {
1365                 .shared_memory = bo->gpu,
1366                 .shared_workgroup_count =
1367                         util_logbase2_ceil(info->grid[0]) +
1368                         util_logbase2_ceil(info->grid[1]) +
1369                         util_logbase2_ceil(info->grid[2]),
1370                 .shared_unk1 = 0x2,
1371                 .shared_shift = util_logbase2(single_size) - 1
1372         };
1373
1374         vtp->postfix.shared_memory = panfrost_pool_upload(&batch->pool, &shared,
1375                                                                sizeof(shared));
1376 }
1377
1378 static mali_ptr
1379 panfrost_get_tex_desc(struct panfrost_batch *batch,
1380                       enum pipe_shader_type st,
1381                       struct panfrost_sampler_view *view)
1382 {
1383         if (!view)
1384                 return (mali_ptr) 0;
1385
1386         struct pipe_sampler_view *pview = &view->base;
1387         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1388
1389         /* Add the BO to the job so it's retained until the job is done. */
1390
1391         panfrost_batch_add_bo(batch, rsrc->bo,
1392                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1393                               panfrost_bo_access_for_stage(st));
1394
1395         panfrost_batch_add_bo(batch, view->bo,
1396                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1397                               panfrost_bo_access_for_stage(st));
1398
1399         return view->bo->gpu;
1400 }
1401
1402 static void
1403 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1404                              struct pipe_context *pctx)
1405 {
1406         struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1407         if (view->texture_bo != rsrc->bo->gpu ||
1408             view->layout != rsrc->layout) {
1409                 panfrost_bo_unreference(view->bo);
1410                 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1411         }
1412 }
1413
1414 void
1415 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1416                                   enum pipe_shader_type stage,
1417                                   struct mali_vertex_tiler_postfix *postfix)
1418 {
1419         struct panfrost_context *ctx = batch->ctx;
1420         struct panfrost_device *device = pan_device(ctx->base.screen);
1421
1422         if (!ctx->sampler_view_count[stage])
1423                 return;
1424
1425         if (device->quirks & IS_BIFROST) {
1426                 struct bifrost_texture_descriptor *descriptors;
1427
1428                 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1429                                      ctx->sampler_view_count[stage]);
1430
1431                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1432                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1433                         struct pipe_sampler_view *pview = &view->base;
1434                         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1435                         panfrost_update_sampler_view(view, &ctx->base);
1436
1437                         /* Add the BOs to the job so they are retained until the job is done. */
1438
1439                         panfrost_batch_add_bo(batch, rsrc->bo,
1440                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1441                                               panfrost_bo_access_for_stage(stage));
1442
1443                         panfrost_batch_add_bo(batch, view->bo,
1444                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1445                                               panfrost_bo_access_for_stage(stage));
1446
1447                         memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1448                 }
1449
1450                 postfix->textures = panfrost_pool_upload(&batch->pool,
1451                                                               descriptors,
1452                                                               sizeof(struct bifrost_texture_descriptor) *
1453                                                                       ctx->sampler_view_count[stage]);
1454
1455                 free(descriptors);
1456         } else {
1457                 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1458
1459                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1460                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1461
1462                         panfrost_update_sampler_view(view, &ctx->base);
1463
1464                         trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1465                 }
1466
1467                 postfix->textures = panfrost_pool_upload(&batch->pool,
1468                                                               trampolines,
1469                                                               sizeof(uint64_t) *
1470                                                               ctx->sampler_view_count[stage]);
1471         }
1472 }
1473
1474 void
1475 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1476                                   enum pipe_shader_type stage,
1477                                   struct mali_vertex_tiler_postfix *postfix)
1478 {
1479         struct panfrost_context *ctx = batch->ctx;
1480         struct panfrost_device *device = pan_device(ctx->base.screen);
1481
1482         if (!ctx->sampler_count[stage])
1483                 return;
1484
1485         if (device->quirks & IS_BIFROST) {
1486                 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1487                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1488                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1489                                                                                 transfer_size);
1490                 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1491
1492                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1493                         desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1494
1495                 postfix->sampler_descriptor = transfer.gpu;
1496         } else {
1497                 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1498                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1499                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1500                                                                                 transfer_size);
1501                 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1502
1503                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1504                         desc[i] = ctx->samplers[stage][i]->midgard_hw;
1505
1506                 postfix->sampler_descriptor = transfer.gpu;
1507         }
1508 }
1509
1510 void
1511 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1512                                struct mali_vertex_tiler_postfix *vertex_postfix)
1513 {
1514         struct panfrost_context *ctx = batch->ctx;
1515
1516         if (!ctx->vertex)
1517                 return;
1518
1519         struct panfrost_vertex_state *so = ctx->vertex;
1520
1521         panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1522         vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
1523                                                                sizeof(*so->hw) *
1524                                                                PAN_MAX_ATTRIBUTE);
1525 }
1526
1527 void
1528 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1529                           struct mali_vertex_tiler_postfix *vertex_postfix)
1530 {
1531         struct panfrost_context *ctx = batch->ctx;
1532         struct panfrost_vertex_state *so = ctx->vertex;
1533
1534         /* Staged mali_attr, and index into them. i =/= k, depending on the
1535          * vertex buffer mask and instancing. Twice as much room is allocated,
1536          * for a worst case of NPOT_DIVIDEs which take up extra slot */
1537         union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1538         unsigned k = 0;
1539
1540         for (unsigned i = 0; i < so->num_elements; ++i) {
1541                 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1542                  * means duplicating some vertex buffers (who cares? aside from
1543                  * maybe some caching implications but I somehow doubt that
1544                  * matters) */
1545
1546                 struct pipe_vertex_element *elem = &so->pipe[i];
1547                 unsigned vbi = elem->vertex_buffer_index;
1548
1549                 /* The exception to 1:1 mapping is that we can have multiple
1550                  * entries (NPOT divisors), so we fixup anyways */
1551
1552                 so->hw[i].index = k;
1553
1554                 if (!(ctx->vb_mask & (1 << vbi)))
1555                         continue;
1556
1557                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1558                 struct panfrost_resource *rsrc;
1559
1560                 rsrc = pan_resource(buf->buffer.resource);
1561                 if (!rsrc)
1562                         continue;
1563
1564                 /* Align to 64 bytes by masking off the lower bits. This
1565                  * will be adjusted back when we fixup the src_offset in
1566                  * mali_attr_meta */
1567
1568                 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1569                 mali_ptr addr = raw_addr & ~63;
1570                 unsigned chopped_addr = raw_addr - addr;
1571
1572                 /* Add a dependency of the batch on the vertex buffer */
1573                 panfrost_batch_add_bo(batch, rsrc->bo,
1574                                       PAN_BO_ACCESS_SHARED |
1575                                       PAN_BO_ACCESS_READ |
1576                                       PAN_BO_ACCESS_VERTEX_TILER);
1577
1578                 /* Set common fields */
1579                 attrs[k].elements = addr;
1580                 attrs[k].stride = buf->stride;
1581
1582                 /* Since we advanced the base pointer, we shrink the buffer
1583                  * size */
1584                 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1585
1586                 /* We need to add the extra size we masked off (for
1587                  * correctness) so the data doesn't get clamped away */
1588                 attrs[k].size += chopped_addr;
1589
1590                 /* For non-instancing make sure we initialize */
1591                 attrs[k].shift = attrs[k].extra_flags = 0;
1592
1593                 /* Instancing uses a dramatically different code path than
1594                  * linear, so dispatch for the actual emission now that the
1595                  * common code is finished */
1596
1597                 unsigned divisor = elem->instance_divisor;
1598
1599                 if (divisor && ctx->instance_count == 1) {
1600                         /* Silly corner case where there's a divisor(=1) but
1601                          * there's no legitimate instancing. So we want *every*
1602                          * attribute to be the same. So set stride to zero so
1603                          * we don't go anywhere. */
1604
1605                         attrs[k].size = attrs[k].stride + chopped_addr;
1606                         attrs[k].stride = 0;
1607                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1608                 } else if (ctx->instance_count <= 1) {
1609                         /* Normal, non-instanced attributes */
1610                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1611                 } else {
1612                         unsigned instance_shift = vertex_postfix->instance_shift;
1613                         unsigned instance_odd = vertex_postfix->instance_odd;
1614
1615                         k += panfrost_vertex_instanced(ctx->padded_count,
1616                                                        instance_shift,
1617                                                        instance_odd,
1618                                                        divisor, &attrs[k]);
1619                 }
1620         }
1621
1622         /* Add special gl_VertexID/gl_InstanceID buffers */
1623
1624         panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1625         so->hw[PAN_VERTEX_ID].index = k++;
1626         panfrost_instance_id(ctx->padded_count, &attrs[k]);
1627         so->hw[PAN_INSTANCE_ID].index = k++;
1628
1629         /* Upload whatever we emitted and go */
1630
1631         vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs,
1632                                                            k * sizeof(*attrs));
1633 }
1634
1635 static mali_ptr
1636 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1637                        unsigned stride, unsigned count)
1638 {
1639         /* Fill out the descriptor */
1640         slot->stride = stride;
1641         slot->size = stride * count;
1642         slot->shift = slot->extra_flags = 0;
1643
1644         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1645                                                                         slot->size);
1646
1647         slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1648
1649         return transfer.gpu;
1650 }
1651
1652 static unsigned
1653 panfrost_streamout_offset(unsigned stride, unsigned offset,
1654                         struct pipe_stream_output_target *target)
1655 {
1656         return (target->buffer_offset + (offset * stride * 4)) & 63;
1657 }
1658
1659 static void
1660 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1661                         unsigned stride, unsigned offset, unsigned count,
1662                         struct pipe_stream_output_target *target)
1663 {
1664         /* Fill out the descriptor */
1665         slot->stride = stride * 4;
1666         slot->shift = slot->extra_flags = 0;
1667
1668         unsigned max_size = target->buffer_size;
1669         unsigned expected_size = slot->stride * count;
1670
1671         /* Grab the BO and bind it to the batch */
1672         struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1673
1674         /* Varyings are WRITE from the perspective of the VERTEX but READ from
1675          * the perspective of the TILER and FRAGMENT.
1676          */
1677         panfrost_batch_add_bo(batch, bo,
1678                               PAN_BO_ACCESS_SHARED |
1679                               PAN_BO_ACCESS_RW |
1680                               PAN_BO_ACCESS_VERTEX_TILER |
1681                               PAN_BO_ACCESS_FRAGMENT);
1682
1683         /* We will have an offset applied to get alignment */
1684         mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1685         slot->elements = (addr & ~63) | MALI_ATTR_LINEAR;
1686         slot->size = MIN2(max_size, expected_size) + (addr & 63);
1687 }
1688
1689 static bool
1690 has_point_coord(unsigned mask, gl_varying_slot loc)
1691 {
1692         if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1693                 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1694         else if (loc == VARYING_SLOT_PNTC)
1695                 return (mask & (1 << 8));
1696         else
1697                 return false;
1698 }
1699
1700 /* Helpers for manipulating stream out information so we can pack varyings
1701  * accordingly. Compute the src_offset for a given captured varying */
1702
1703 static struct pipe_stream_output *
1704 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1705 {
1706         for (unsigned i = 0; i < info->num_outputs; ++i) {
1707                 if (info->output[i].register_index == loc)
1708                         return &info->output[i];
1709         }
1710
1711         unreachable("Varying not captured");
1712 }
1713
1714 static unsigned
1715 pan_varying_size(enum mali_format fmt)
1716 {
1717         unsigned type = MALI_EXTRACT_TYPE(fmt);
1718         unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1719         unsigned bits = MALI_EXTRACT_BITS(fmt);
1720         unsigned bpc = 0;
1721
1722         if (bits == MALI_CHANNEL_FLOAT) {
1723                 /* No doubles */
1724                 bool fp16 = (type == MALI_FORMAT_SINT);
1725                 assert(fp16 || (type == MALI_FORMAT_UNORM));
1726
1727                 bpc = fp16 ? 2 : 4;
1728         } else {
1729                 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1730
1731                 /* See the enums */
1732                 bits = 1 << bits;
1733                 assert(bits >= 8);
1734                 bpc = bits / 8;
1735         }
1736
1737         return bpc * chan;
1738 }
1739
1740 /* Indices for named (non-XFB) varyings that are present. These are packed
1741  * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1742  * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1743  * of a given special field given a shift S by:
1744  *
1745  *      idx = popcount(P & ((1 << S) - 1))
1746  *
1747  * That is... look at all of the varyings that come earlier and count them, the
1748  * count is the new index since plus one. Likewise, the total number of special
1749  * buffers required is simply popcount(P)
1750  */
1751
1752 enum pan_special_varying {
1753         PAN_VARY_GENERAL = 0,
1754         PAN_VARY_POSITION = 1,
1755         PAN_VARY_PSIZ = 2,
1756         PAN_VARY_PNTCOORD = 3,
1757         PAN_VARY_FACE = 4,
1758         PAN_VARY_FRAGCOORD = 5,
1759
1760         /* Keep last */
1761         PAN_VARY_MAX,
1762 };
1763
1764 /* Given a varying, figure out which index it correpsonds to */
1765
1766 static inline unsigned
1767 pan_varying_index(unsigned present, enum pan_special_varying v)
1768 {
1769         unsigned mask = (1 << v) - 1;
1770         return util_bitcount(present & mask);
1771 }
1772
1773 /* Get the base offset for XFB buffers, which by convention come after
1774  * everything else. Wrapper function for semantic reasons; by construction this
1775  * is just popcount. */
1776
1777 static inline unsigned
1778 pan_xfb_base(unsigned present)
1779 {
1780         return util_bitcount(present);
1781 }
1782
1783 /* Computes the present mask for varyings so we can start emitting varying records */
1784
1785 static inline unsigned
1786 pan_varying_present(
1787         struct panfrost_shader_state *vs,
1788         struct panfrost_shader_state *fs,
1789         unsigned quirks)
1790 {
1791         /* At the moment we always emit general and position buffers. Not
1792          * strictly necessary but usually harmless */
1793
1794         unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1795
1796         /* Enable special buffers by the shader info */
1797
1798         if (vs->writes_point_size)
1799                 present |= (1 << PAN_VARY_PSIZ);
1800
1801         if (fs->reads_point_coord)
1802                 present |= (1 << PAN_VARY_PNTCOORD);
1803
1804         if (fs->reads_face)
1805                 present |= (1 << PAN_VARY_FACE);
1806
1807         if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1808                 present |= (1 << PAN_VARY_FRAGCOORD);
1809
1810         /* Also, if we have a point sprite, we need a point coord buffer */
1811
1812         for (unsigned i = 0; i < fs->varying_count; i++)  {
1813                 gl_varying_slot loc = fs->varyings_loc[i];
1814
1815                 if (has_point_coord(fs->point_sprite_mask, loc))
1816                         present |= (1 << PAN_VARY_PNTCOORD);
1817         }
1818
1819         return present;
1820 }
1821
1822 /* Emitters for varying records */
1823
1824 static struct mali_attr_meta
1825 pan_emit_vary(unsigned present, enum pan_special_varying buf,
1826                 unsigned quirks, enum mali_format format,
1827                 unsigned offset)
1828 {
1829         unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
1830
1831         struct mali_attr_meta meta = {
1832                 .index = pan_varying_index(present, buf),
1833                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1834                 .swizzle = quirks & HAS_SWIZZLES ?
1835                         panfrost_get_default_swizzle(nr_channels) :
1836                         panfrost_bifrost_swizzle(nr_channels),
1837                 .format = format,
1838                 .src_offset = offset
1839         };
1840
1841         return meta;
1842 }
1843
1844 /* General varying that is unused */
1845
1846 static struct mali_attr_meta
1847 pan_emit_vary_only(unsigned present, unsigned quirks)
1848 {
1849         return pan_emit_vary(present, 0, quirks, MALI_VARYING_DISCARD, 0);
1850 }
1851
1852 /* Special records */
1853
1854 static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1855         [PAN_VARY_POSITION]     = MALI_VARYING_POS,
1856         [PAN_VARY_PSIZ]         = MALI_R16F,
1857         [PAN_VARY_PNTCOORD]     = MALI_R16F,
1858         [PAN_VARY_FACE]         = MALI_R32I,
1859         [PAN_VARY_FRAGCOORD]    = MALI_RGBA32F
1860 };
1861
1862 static struct mali_attr_meta
1863 pan_emit_vary_special(unsigned present, enum pan_special_varying buf,
1864                 unsigned quirks)
1865 {
1866         assert(buf < PAN_VARY_MAX);
1867         return pan_emit_vary(present, buf, quirks, pan_varying_formats[buf], 0);
1868 }
1869
1870 static enum mali_format
1871 pan_xfb_format(enum mali_format format, unsigned nr)
1872 {
1873         if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1874                 return MALI_R32F | MALI_NR_CHANNELS(nr);
1875         else
1876                 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1877 }
1878
1879 /* Transform feedback records. Note struct pipe_stream_output is (if packed as
1880  * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1881  * value. */
1882
1883 static struct mali_attr_meta
1884 pan_emit_vary_xfb(unsigned present,
1885                 unsigned max_xfb,
1886                 unsigned *streamout_offsets,
1887                 unsigned quirks,
1888                 enum mali_format format,
1889                 struct pipe_stream_output o)
1890 {
1891         /* Otherwise construct a record for it */
1892         struct mali_attr_meta meta = {
1893                 /* XFB buffers come after everything else */
1894                 .index = pan_xfb_base(present) + o.output_buffer,
1895
1896                 /* As usual unknown bit */
1897                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1898
1899                 /* Override swizzle with number of channels */
1900                 .swizzle = quirks & HAS_SWIZZLES ?
1901                         panfrost_get_default_swizzle(o.num_components) :
1902                         panfrost_bifrost_swizzle(o.num_components),
1903
1904                 /* Override number of channels and precision to highp */
1905                 .format = pan_xfb_format(format, o.num_components),
1906
1907                 /* Apply given offsets together */
1908                 .src_offset = (o.dst_offset * 4) /* dwords */
1909                         + streamout_offsets[o.output_buffer]
1910         };
1911
1912         return meta;
1913 }
1914
1915 /* Determine if we should capture a varying for XFB. This requires actually
1916  * having a buffer for it. If we don't capture it, we'll fallback to a general
1917  * varying path (linked or unlinked, possibly discarding the write) */
1918
1919 static bool
1920 panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1921                 unsigned loc, unsigned max_xfb)
1922 {
1923         if (!(xfb->so_mask & (1ll << loc)))
1924                 return false;
1925
1926         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1927         return o->output_buffer < max_xfb;
1928 }
1929
1930 /* Higher-level wrapper around all of the above, classifying a varying into one
1931  * of the above types */
1932
1933 static struct mali_attr_meta
1934 panfrost_emit_varying(
1935                 struct panfrost_shader_state *stage,
1936                 struct panfrost_shader_state *other,
1937                 struct panfrost_shader_state *xfb,
1938                 unsigned present,
1939                 unsigned max_xfb,
1940                 unsigned *streamout_offsets,
1941                 unsigned quirks,
1942                 unsigned *gen_offsets,
1943                 enum mali_format *gen_formats,
1944                 unsigned *gen_stride,
1945                 unsigned idx,
1946                 bool should_alloc,
1947                 bool is_fragment)
1948 {
1949         gl_varying_slot loc = stage->varyings_loc[idx];
1950         enum mali_format format = stage->varyings[idx];
1951
1952         /* Override format to match linkage */
1953         if (!should_alloc && gen_formats[idx])
1954                 format = gen_formats[idx];
1955
1956         if (has_point_coord(stage->point_sprite_mask, loc)) {
1957                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1958         } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1959                 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1960                 return pan_emit_vary_xfb(present, max_xfb, streamout_offsets, quirks, format, *o);
1961         } else if (loc == VARYING_SLOT_POS) {
1962                 if (is_fragment)
1963                         return pan_emit_vary_special(present, PAN_VARY_FRAGCOORD, quirks);
1964                 else
1965                         return pan_emit_vary_special(present, PAN_VARY_POSITION, quirks);
1966         } else if (loc == VARYING_SLOT_PSIZ) {
1967                 return pan_emit_vary_special(present, PAN_VARY_PSIZ, quirks);
1968         } else if (loc == VARYING_SLOT_PNTC) {
1969                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1970         } else if (loc == VARYING_SLOT_FACE) {
1971                 return pan_emit_vary_special(present, PAN_VARY_FACE, quirks);
1972         }
1973
1974         /* We've exhausted special cases, so it's otherwise a general varying. Check if we're linked */
1975         signed other_idx = -1;
1976
1977         for (unsigned j = 0; j < other->varying_count; ++j) {
1978                 if (other->varyings_loc[j] == loc) {
1979                         other_idx = j;
1980                         break;
1981                 }
1982         }
1983
1984         if (other_idx < 0)
1985                 return pan_emit_vary_only(present, quirks);
1986
1987         unsigned offset = gen_offsets[other_idx];
1988
1989         if (should_alloc) {
1990                 /* We're linked, so allocate a space via a watermark allocation */
1991                 enum mali_format alt = other->varyings[other_idx];
1992
1993                 /* Do interpolation at minimum precision */
1994                 unsigned size_main = pan_varying_size(format);
1995                 unsigned size_alt = pan_varying_size(alt);
1996                 unsigned size = MIN2(size_main, size_alt);
1997
1998                 /* If a varying is marked for XFB but not actually captured, we
1999                  * should match the format to the format that would otherwise
2000                  * be used for XFB, since dEQP checks for invariance here. It's
2001                  * unclear if this is required by the spec. */
2002
2003                 if (xfb->so_mask & (1ull << loc)) {
2004                         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
2005                         format = pan_xfb_format(format, o->num_components);
2006                         size = pan_varying_size(format);
2007                 } else if (size == size_alt) {
2008                         format = alt;
2009                 }
2010
2011                 gen_offsets[idx] = *gen_stride;
2012                 gen_formats[other_idx] = format;
2013                 offset = *gen_stride;
2014                 *gen_stride += size;
2015         }
2016
2017         return pan_emit_vary(present, PAN_VARY_GENERAL,
2018                         quirks, format, offset);
2019 }
2020
2021 static void
2022 pan_emit_special_input(union mali_attr *varyings,
2023                 unsigned present,
2024                 enum pan_special_varying v,
2025                 mali_ptr addr)
2026 {
2027         if (present & (1 << v)) {
2028                 /* Ensure we write exactly once for performance and with fields
2029                  * zeroed appropriately to avoid flakes */
2030
2031                 union mali_attr s = {
2032                         .elements = addr
2033                 };
2034
2035                 varyings[pan_varying_index(present, v)] = s;
2036         }
2037 }
2038
2039 void
2040 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2041                                  unsigned vertex_count,
2042                                  struct mali_vertex_tiler_postfix *vertex_postfix,
2043                                  struct mali_vertex_tiler_postfix *tiler_postfix,
2044                                  union midgard_primitive_size *primitive_size)
2045 {
2046         /* Load the shaders */
2047         struct panfrost_context *ctx = batch->ctx;
2048         struct panfrost_device *dev = pan_device(ctx->base.screen);
2049         struct panfrost_shader_state *vs, *fs;
2050         size_t vs_size, fs_size;
2051
2052         /* Allocate the varying descriptor */
2053
2054         vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2055         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2056         vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
2057         fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
2058
2059         struct panfrost_transfer trans = panfrost_pool_alloc(&batch->pool,
2060                                                                      vs_size +
2061                                                                      fs_size);
2062
2063         struct pipe_stream_output_info *so = &vs->stream_output;
2064         unsigned present = pan_varying_present(vs, fs, dev->quirks);
2065
2066         /* Check if this varying is linked by us. This is the case for
2067          * general-purpose, non-captured varyings. If it is, link it. If it's
2068          * not, use the provided stream out information to determine the
2069          * offset, since it was already linked for us. */
2070
2071         unsigned gen_offsets[32];
2072         enum mali_format gen_formats[32];
2073         memset(gen_offsets, 0, sizeof(gen_offsets));
2074         memset(gen_formats, 0, sizeof(gen_formats));
2075
2076         unsigned gen_stride = 0;
2077         assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
2078         assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
2079
2080         unsigned streamout_offsets[32];
2081
2082         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2083                 streamout_offsets[i] = panfrost_streamout_offset(
2084                                         so->stride[i],
2085                                         ctx->streamout.offsets[i],
2086                                         ctx->streamout.targets[i]);
2087         }
2088
2089         struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
2090         struct mali_attr_meta *ofs = ovs + vs->varying_count;
2091
2092         for (unsigned i = 0; i < vs->varying_count; i++) {
2093                 ovs[i] = panfrost_emit_varying(vs, fs, vs, present,
2094                                 ctx->streamout.num_targets, streamout_offsets,
2095                                 dev->quirks,
2096                                 gen_offsets, gen_formats, &gen_stride, i, true, false);
2097         }
2098
2099         for (unsigned i = 0; i < fs->varying_count; i++) {
2100                 ofs[i] = panfrost_emit_varying(fs, vs, vs, present,
2101                                 ctx->streamout.num_targets, streamout_offsets,
2102                                 dev->quirks,
2103                                 gen_offsets, gen_formats, &gen_stride, i, false, true);
2104         }
2105
2106         unsigned xfb_base = pan_xfb_base(present);
2107         struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
2108                         sizeof(union mali_attr) * (xfb_base + ctx->streamout.num_targets));
2109         union mali_attr *varyings = (union mali_attr *) T.cpu;
2110
2111         /* Emit the stream out buffers */
2112
2113         unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
2114                                                            ctx->vertex_count);
2115
2116         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2117                 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2118                                         so->stride[i],
2119                                         ctx->streamout.offsets[i],
2120                                         out_count,
2121                                         ctx->streamout.targets[i]);
2122         }
2123
2124         panfrost_emit_varyings(batch,
2125                         &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2126                         gen_stride, vertex_count);
2127
2128         /* fp32 vec4 gl_Position */
2129         tiler_postfix->position_varying = panfrost_emit_varyings(batch,
2130                         &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2131                         sizeof(float) * 4, vertex_count);
2132
2133         if (present & (1 << PAN_VARY_PSIZ)) {
2134                 primitive_size->pointer = panfrost_emit_varyings(batch,
2135                                 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2136                                 2, vertex_count);
2137         }
2138
2139         pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_VARYING_POINT_COORD);
2140         pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_VARYING_FRONT_FACING);
2141         pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_VARYING_FRAG_COORD);
2142
2143         vertex_postfix->varyings = T.gpu;
2144         tiler_postfix->varyings = T.gpu;
2145
2146         vertex_postfix->varying_meta = trans.gpu;
2147         tiler_postfix->varying_meta = trans.gpu + vs_size;
2148 }
2149
2150 void
2151 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2152                                 struct mali_vertex_tiler_prefix *vertex_prefix,
2153                                 struct mali_vertex_tiler_postfix *vertex_postfix,
2154                                 struct mali_vertex_tiler_prefix *tiler_prefix,
2155                                 struct mali_vertex_tiler_postfix *tiler_postfix,
2156                                 union midgard_primitive_size *primitive_size)
2157 {
2158         struct panfrost_context *ctx = batch->ctx;
2159         struct panfrost_device *device = pan_device(ctx->base.screen);
2160         bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
2161         struct bifrost_payload_vertex bifrost_vertex = {0,};
2162         struct bifrost_payload_tiler bifrost_tiler = {0,};
2163         struct midgard_payload_vertex_tiler midgard_vertex = {0,};
2164         struct midgard_payload_vertex_tiler midgard_tiler = {0,};
2165         void *vp, *tp;
2166         size_t vp_size, tp_size;
2167
2168         if (device->quirks & IS_BIFROST) {
2169                 bifrost_vertex.prefix = *vertex_prefix;
2170                 bifrost_vertex.postfix = *vertex_postfix;
2171                 vp = &bifrost_vertex;
2172                 vp_size = sizeof(bifrost_vertex);
2173
2174                 bifrost_tiler.prefix = *tiler_prefix;
2175                 bifrost_tiler.tiler.primitive_size = *primitive_size;
2176                 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
2177                 bifrost_tiler.postfix = *tiler_postfix;
2178                 tp = &bifrost_tiler;
2179                 tp_size = sizeof(bifrost_tiler);
2180         } else {
2181                 midgard_vertex.prefix = *vertex_prefix;
2182                 midgard_vertex.postfix = *vertex_postfix;
2183                 vp = &midgard_vertex;
2184                 vp_size = sizeof(midgard_vertex);
2185
2186                 midgard_tiler.prefix = *tiler_prefix;
2187                 midgard_tiler.postfix = *tiler_postfix;
2188                 midgard_tiler.primitive_size = *primitive_size;
2189                 tp = &midgard_tiler;
2190                 tp_size = sizeof(midgard_tiler);
2191         }
2192
2193         if (wallpapering) {
2194                 /* Inject in reverse order, with "predicted" job indices.
2195                  * THIS IS A HACK XXX */
2196                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false,
2197                                  batch->scoreboard.job_index + 2, tp, tp_size, true);
2198                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2199                                  vp, vp_size, true);
2200                 return;
2201         }
2202
2203         /* If rasterizer discard is enable, only submit the vertex */
2204
2205         bool rasterizer_discard = ctx->rasterizer &&
2206                                   ctx->rasterizer->base.rasterizer_discard;
2207
2208         unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2209                                            vp, vp_size, false);
2210
2211         if (rasterizer_discard)
2212                 return;
2213
2214         panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false, vertex, tp, tp_size,
2215                          false);
2216 }
2217
2218 /* TODO: stop hardcoding this */
2219 mali_ptr
2220 panfrost_emit_sample_locations(struct panfrost_batch *batch)
2221 {
2222         uint16_t locations[] = {
2223             128, 128,
2224             0, 256,
2225             0, 256,
2226             0, 256,
2227             0, 256,
2228             0, 256,
2229             0, 256,
2230             0, 256,
2231             0, 256,
2232             0, 256,
2233             0, 256,
2234             0, 256,
2235             0, 256,
2236             0, 256,
2237             0, 256,
2238             0, 256,
2239             0, 256,
2240             0, 256,
2241             0, 256,
2242             0, 256,
2243             0, 256,
2244             0, 256,
2245             0, 256,
2246             0, 256,
2247             0, 256,
2248             0, 256,
2249             0, 256,
2250             0, 256,
2251             0, 256,
2252             0, 256,
2253             0, 256,
2254             0, 256,
2255             128, 128,
2256             0, 0,
2257             0, 0,
2258             0, 0,
2259             0, 0,
2260             0, 0,
2261             0, 0,
2262             0, 0,
2263             0, 0,
2264             0, 0,
2265             0, 0,
2266             0, 0,
2267             0, 0,
2268             0, 0,
2269             0, 0,
2270             0, 0,
2271         };
2272
2273         return panfrost_pool_upload(&batch->pool, locations, 96 * sizeof(uint16_t));
2274 }