src/gallium/drivers/panfrost/pan_cmdstream.c

   1 /*
   2  * Copyright (C) 2018 Alyssa Rosenzweig
   3  * Copyright (C) 2020 Collabora Ltd.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "util/macros.h"
  26 #include "util/u_prim.h"
  27 #include "util/u_vbuf.h"
  28
  29 #include "panfrost-quirks.h"
  30
  31 #include "pan_pool.h"
  32 #include "pan_bo.h"
  33 #include "pan_cmdstream.h"
  34 #include "pan_context.h"
  35 #include "pan_job.h"
  36
  37 /* If a BO is accessed for a particular shader stage, will it be in the primary
  38  * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
  39  * fragment will be primary, e.g. compute jobs will be considered
  40  * "vertex/tiler" by analogy */
  41
  42 static inline uint32_t
  43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
  44 {
  45         assert(stage == PIPE_SHADER_FRAGMENT ||
  46                stage == PIPE_SHADER_VERTEX ||
  47                stage == PIPE_SHADER_COMPUTE);
  48
  49         return stage == PIPE_SHADER_FRAGMENT ?
  50                PAN_BO_ACCESS_FRAGMENT :
  51                PAN_BO_ACCESS_VERTEX_TILER;
  52 }
  53
  54 static void
  55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
  56                                struct mali_vertex_tiler_postfix *postfix)
  57 {
  58         struct panfrost_device *dev = pan_device(ctx->base.screen);
  59         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  60
  61         unsigned shift = panfrost_get_stack_shift(batch->stack_size);
  62         struct mali_shared_memory shared = {
  63                 .stack_shift = shift,
  64                 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
  65                 .shared_workgroup_count = ~0,
  66         };
  67         postfix->shared_memory = panfrost_pool_upload(&batch->pool, &shared, sizeof(shared));
  68 }
  69
  70 static void
  71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
  72                                struct mali_vertex_tiler_postfix *postfix)
  73 {
  74         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  75         postfix->shared_memory = panfrost_batch_reserve_framebuffer(batch);
  76 }
  77
  78 static void
  79 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
  80                               struct mali_vertex_tiler_prefix *prefix,
  81                               struct mali_vertex_tiler_postfix *postfix)
  82 {
  83         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
  84
  85         postfix->gl_enables |= 0x7;
  86         SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
  87                 rasterizer && rasterizer->base.front_ccw);
  88         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
  89                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
  90         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
  91                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
  92         SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
  93                 rasterizer && rasterizer->base.flatshade_first);
  94 }
  95
  96 void
  97 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
  98                                   struct mali_vertex_tiler_prefix *prefix,
  99                                   union midgard_primitive_size *primitive_size)
 100 {
 101         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 102
 103         if (!panfrost_writes_point_size(ctx)) {
 104                 bool points = prefix->draw_mode == MALI_POINTS;
 105                 float val = 0.0f;
 106
 107                 if (rasterizer)
 108                         val = points ?
 109                               rasterizer->base.point_size :
 110                               rasterizer->base.line_width;
 111
 112                 primitive_size->constant = val;
 113         }
 114 }
 115
 116 static void
 117 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
 118                                    struct mali_vertex_tiler_postfix *postfix)
 119 {
 120         SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
 121         if (ctx->occlusion_query) {
 122                 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
 123                 panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
 124                                       PAN_BO_ACCESS_SHARED |
 125                                       PAN_BO_ACCESS_RW |
 126                                       PAN_BO_ACCESS_FRAGMENT);
 127         } else {
 128                 postfix->occlusion_counter = 0;
 129         }
 130 }
 131
 132 void
 133 panfrost_vt_init(struct panfrost_context *ctx,
 134                  enum pipe_shader_type stage,
 135                  struct mali_vertex_tiler_prefix *prefix,
 136                  struct mali_vertex_tiler_postfix *postfix)
 137 {
 138         struct panfrost_device *device = pan_device(ctx->base.screen);
 139
 140         if (!ctx->shader[stage])
 141                 return;
 142
 143         memset(prefix, 0, sizeof(*prefix));
 144         memset(postfix, 0, sizeof(*postfix));
 145
 146         if (device->quirks & IS_BIFROST) {
 147                 postfix->gl_enables = 0x2;
 148                 panfrost_vt_emit_shared_memory(ctx, postfix);
 149         } else {
 150                 postfix->gl_enables = 0x6;
 151                 panfrost_vt_attach_framebuffer(ctx, postfix);
 152         }
 153
 154         if (stage == PIPE_SHADER_FRAGMENT) {
 155                 panfrost_vt_update_occlusion_query(ctx, postfix);
 156                 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
 157         }
 158 }
 159
 160 static unsigned
 161 panfrost_translate_index_size(unsigned size)
 162 {
 163         switch (size) {
 164         case 1:
 165                 return MALI_DRAW_INDEXED_UINT8;
 166
 167         case 2:
 168                 return MALI_DRAW_INDEXED_UINT16;
 169
 170         case 4:
 171                 return MALI_DRAW_INDEXED_UINT32;
 172
 173         default:
 174                 unreachable("Invalid index size");
 175         }
 176 }
 177
 178 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
 179  * good for the duration of the draw (transient), could last longer. Also get
 180  * the bounds on the index buffer for the range accessed by the draw. We do
 181  * these operations together because there are natural optimizations which
 182  * require them to be together. */
 183
 184 static mali_ptr
 185 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
 186                                   const struct pipe_draw_info *info,
 187                                   unsigned *min_index, unsigned *max_index)
 188 {
 189         struct panfrost_resource *rsrc = pan_resource(info->index.resource);
 190         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 191         off_t offset = info->start * info->index_size;
 192         bool needs_indices = true;
 193         mali_ptr out = 0;
 194
 195         if (info->max_index != ~0u) {
 196                 *min_index = info->min_index;
 197                 *max_index = info->max_index;
 198                 needs_indices = false;
 199         }
 200
 201         if (!info->has_user_indices) {
 202                 /* Only resources can be directly mapped */
 203                 panfrost_batch_add_bo(batch, rsrc->bo,
 204                                       PAN_BO_ACCESS_SHARED |
 205                                       PAN_BO_ACCESS_READ |
 206                                       PAN_BO_ACCESS_VERTEX_TILER);
 207                 out = rsrc->bo->gpu + offset;
 208
 209                 /* Check the cache */
 210                 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
 211                                                            info->start,
 212                                                            info->count,
 213                                                            min_index,
 214                                                            max_index);
 215         } else {
 216                 /* Otherwise, we need to upload to transient memory */
 217                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
 218                 out = panfrost_pool_upload(&batch->pool, ibuf8 + offset,
 219                                                 info->count *
 220                                                 info->index_size);
 221         }
 222
 223         if (needs_indices) {
 224                 /* Fallback */
 225                 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
 226
 227                 if (!info->has_user_indices)
 228                         panfrost_minmax_cache_add(rsrc->index_cache,
 229                                                   info->start, info->count,
 230                                                   *min_index, *max_index);
 231         }
 232
 233         return out;
 234 }
 235
 236 void
 237 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
 238                           const struct pipe_draw_info *info,
 239                           enum mali_draw_mode draw_mode,
 240                           struct mali_vertex_tiler_postfix *vertex_postfix,
 241                           struct mali_vertex_tiler_prefix *tiler_prefix,
 242                           struct mali_vertex_tiler_postfix *tiler_postfix,
 243                           unsigned *vertex_count,
 244                           unsigned *padded_count)
 245 {
 246         tiler_prefix->draw_mode = draw_mode;
 247
 248         unsigned draw_flags = 0;
 249
 250         if (panfrost_writes_point_size(ctx))
 251                 draw_flags |= MALI_DRAW_VARYING_SIZE;
 252
 253         if (info->primitive_restart)
 254                 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
 255
 256         /* These doesn't make much sense */
 257
 258         draw_flags |= 0x3000;
 259
 260         if (info->index_size) {
 261                 unsigned min_index = 0, max_index = 0;
 262
 263                 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
 264                                                                        info,
 265                                                                        &min_index,
 266                                                                        &max_index);
 267
 268                 /* Use the corresponding values */
 269                 *vertex_count = max_index - min_index + 1;
 270                 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
 271                 tiler_prefix->offset_bias_correction = -min_index;
 272                 tiler_prefix->index_count = MALI_POSITIVE(info->count);
 273                 draw_flags |= panfrost_translate_index_size(info->index_size);
 274         } else {
 275                 tiler_prefix->indices = 0;
 276                 *vertex_count = ctx->vertex_count;
 277                 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
 278                 tiler_prefix->offset_bias_correction = 0;
 279                 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
 280         }
 281
 282         tiler_prefix->unknown_draw = draw_flags;
 283
 284         /* Encode the padded vertex count */
 285
 286         if (info->instance_count > 1) {
 287                 *padded_count = panfrost_padded_vertex_count(*vertex_count);
 288
 289                 unsigned shift = __builtin_ctz(ctx->padded_count);
 290                 unsigned k = ctx->padded_count >> (shift + 1);
 291
 292                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
 293                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
 294         } else {
 295                 *padded_count = *vertex_count;
 296
 297                 /* Reset instancing state */
 298                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
 299                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
 300         }
 301 }
 302
 303 static void
 304 panfrost_shader_meta_init(struct panfrost_context *ctx,
 305                           enum pipe_shader_type st,
 306                           struct mali_shader_meta *meta)
 307 {
 308         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 309         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 310
 311         memset(meta, 0, sizeof(*meta));
 312         meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
 313         meta->attribute_count = ss->attribute_count;
 314         meta->varying_count = ss->varying_count;
 315         meta->texture_count = ctx->sampler_view_count[st];
 316         meta->sampler_count = ctx->sampler_count[st];
 317
 318         if (dev->quirks & IS_BIFROST) {
 319                 if (st == PIPE_SHADER_VERTEX)
 320                         meta->bifrost1.unk1 = 0x800000;
 321                 else {
 322                         /* First clause ATEST |= 0x4000000.
 323                          * Less than 32 regs |= 0x200 */
 324                         meta->bifrost1.unk1 = 0x950020;
 325                 }
 326
 327                 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 328                 if (st == PIPE_SHADER_VERTEX)
 329                         meta->bifrost2.preload_regs = 0xC0;
 330                 else {
 331                         meta->bifrost2.preload_regs = 0x1;
 332                         SET_BIT(meta->bifrost2.preload_regs, 0x10, ss->reads_frag_coord);
 333                 }
 334
 335                 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
 336                                                     ss->uniform_cutoff);
 337         } else {
 338                 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
 339                                                     ss->uniform_cutoff);
 340                 meta->midgard1.work_count = ss->work_reg_count;
 341
 342                 /* TODO: This is not conformant on ES3 */
 343                 meta->midgard1.flags_hi = MALI_SUPPRESS_INF_NAN;
 344
 345                 meta->midgard1.flags_lo = 0x20;
 346                 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 347
 348                 SET_BIT(meta->midgard1.flags_hi, MALI_WRITES_GLOBAL, ss->writes_global);
 349         }
 350 }
 351
 352 static unsigned
 353 panfrost_translate_compare_func(enum pipe_compare_func in)
 354 {
 355         switch (in) {
 356         case PIPE_FUNC_NEVER:
 357                 return MALI_FUNC_NEVER;
 358
 359         case PIPE_FUNC_LESS:
 360                 return MALI_FUNC_LESS;
 361
 362         case PIPE_FUNC_EQUAL:
 363                 return MALI_FUNC_EQUAL;
 364
 365         case PIPE_FUNC_LEQUAL:
 366                 return MALI_FUNC_LEQUAL;
 367
 368         case PIPE_FUNC_GREATER:
 369                 return MALI_FUNC_GREATER;
 370
 371         case PIPE_FUNC_NOTEQUAL:
 372                 return MALI_FUNC_NOTEQUAL;
 373
 374         case PIPE_FUNC_GEQUAL:
 375                 return MALI_FUNC_GEQUAL;
 376
 377         case PIPE_FUNC_ALWAYS:
 378                 return MALI_FUNC_ALWAYS;
 379
 380         default:
 381                 unreachable("Invalid func");
 382         }
 383 }
 384
 385 static unsigned
 386 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 387 {
 388         switch (in) {
 389         case PIPE_STENCIL_OP_KEEP:
 390                 return MALI_STENCIL_KEEP;
 391
 392         case PIPE_STENCIL_OP_ZERO:
 393                 return MALI_STENCIL_ZERO;
 394
 395         case PIPE_STENCIL_OP_REPLACE:
 396                return MALI_STENCIL_REPLACE;
 397
 398         case PIPE_STENCIL_OP_INCR:
 399                 return MALI_STENCIL_INCR;
 400
 401         case PIPE_STENCIL_OP_DECR:
 402                 return MALI_STENCIL_DECR;
 403
 404         case PIPE_STENCIL_OP_INCR_WRAP:
 405                 return MALI_STENCIL_INCR_WRAP;
 406
 407         case PIPE_STENCIL_OP_DECR_WRAP:
 408                 return MALI_STENCIL_DECR_WRAP;
 409
 410         case PIPE_STENCIL_OP_INVERT:
 411                 return MALI_STENCIL_INVERT;
 412
 413         default:
 414                 unreachable("Invalid stencil op");
 415         }
 416 }
 417
 418 static unsigned
 419 translate_tex_wrap(enum pipe_tex_wrap w)
 420 {
 421         switch (w) {
 422         case PIPE_TEX_WRAP_REPEAT:
 423                 return MALI_WRAP_REPEAT;
 424
 425         case PIPE_TEX_WRAP_CLAMP:
 426                 return MALI_WRAP_CLAMP;
 427
 428         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 429                 return MALI_WRAP_CLAMP_TO_EDGE;
 430
 431         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 432                 return MALI_WRAP_CLAMP_TO_BORDER;
 433
 434         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 435                 return MALI_WRAP_MIRRORED_REPEAT;
 436
 437         case PIPE_TEX_WRAP_MIRROR_CLAMP:
 438                 return MALI_WRAP_MIRRORED_CLAMP;
 439
 440         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 441                 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
 442
 443         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 444                 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
 445
 446         default:
 447                 unreachable("Invalid wrap");
 448         }
 449 }
 450
 451 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
 452                                 struct mali_sampler_descriptor *hw)
 453 {
 454         unsigned func = panfrost_translate_compare_func(cso->compare_func);
 455         bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
 456         bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
 457         bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
 458         unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
 459         unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
 460         unsigned mip_filter = mip_linear  ?
 461                               (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
 462         unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
 463
 464         *hw = (struct mali_sampler_descriptor) {
 465                 .filter_mode = min_filter | mag_filter | mip_filter |
 466                                normalized,
 467                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 468                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 469                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 470                 .compare_func = cso->compare_mode ?
 471                         panfrost_flip_compare_func(func) :
 472                         MALI_FUNC_NEVER,
 473                 .border_color = {
 474                         cso->border_color.f[0],
 475                         cso->border_color.f[1],
 476                         cso->border_color.f[2],
 477                         cso->border_color.f[3]
 478                 },
 479                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 480                 .max_lod = FIXED_16(cso->max_lod, false),
 481                 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
 482                 .seamless_cube_map = cso->seamless_cube_map,
 483         };
 484
 485         /* If necessary, we disable mipmapping in the sampler descriptor by
 486          * clamping the LOD as tight as possible (from 0 to epsilon,
 487          * essentially -- remember these are fixed point numbers, so
 488          * epsilon=1/256) */
 489
 490         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 491                 hw->max_lod = hw->min_lod + 1;
 492 }
 493
 494 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
 495                                         struct bifrost_sampler_descriptor *hw)
 496 {
 497         *hw = (struct bifrost_sampler_descriptor) {
 498                 .unk1 = 0x1,
 499                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 500                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 501                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 502                 .unk8 = 0x8,
 503                 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
 504                 .norm_coords = cso->normalized_coords,
 505                 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
 506                 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
 507                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 508                 .max_lod = FIXED_16(cso->max_lod, false),
 509         };
 510
 511         /* If necessary, we disable mipmapping in the sampler descriptor by
 512          * clamping the LOD as tight as possible (from 0 to epsilon,
 513          * essentially -- remember these are fixed point numbers, so
 514          * epsilon=1/256) */
 515
 516         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 517                 hw->max_lod = hw->min_lod + 1;
 518 }
 519
 520 static void
 521 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
 522                             struct mali_stencil_test *out)
 523 {
 524         out->ref = 0; /* Gallium gets it from elsewhere */
 525
 526         out->mask = in->valuemask;
 527         out->func = panfrost_translate_compare_func(in->func);
 528         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 529         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 530         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 531 }
 532
 533 static void
 534 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
 535                                      struct mali_shader_meta *fragmeta)
 536 {
 537         if (!ctx->rasterizer) {
 538                 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
 539                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
 540                 fragmeta->depth_units = 0.0f;
 541                 fragmeta->depth_factor = 0.0f;
 542                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
 543                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
 544                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, true);
 545                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, true);
 546                 return;
 547         }
 548
 549         struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 550
 551         bool msaa = rast->multisample;
 552
 553         /* TODO: Sample size */
 554         SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
 555         SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
 556
 557         SET_BIT(fragmeta->unknown2_3, MALI_PER_SAMPLE,
 558                         msaa && ctx->min_samples > 1);
 559
 560         fragmeta->depth_units = rast->offset_units * 2.0f;
 561         fragmeta->depth_factor = rast->offset_scale;
 562
 563         /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
 564
 565         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, rast->offset_tri);
 566         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, rast->offset_tri);
 567
 568         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, rast->depth_clip_near);
 569         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, rast->depth_clip_far);
 570 }
 571
 572 static void
 573 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
 574                               struct mali_shader_meta *fragmeta)
 575 {
 576         const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 577         int zfunc = PIPE_FUNC_ALWAYS;
 578
 579         if (!zsa) {
 580                 struct pipe_stencil_state default_stencil = {
 581                         .enabled = 0,
 582                         .func = PIPE_FUNC_ALWAYS,
 583                         .fail_op = MALI_STENCIL_KEEP,
 584                         .zfail_op = MALI_STENCIL_KEEP,
 585                         .zpass_op = MALI_STENCIL_KEEP,
 586                         .writemask = 0xFF,
 587                         .valuemask = 0xFF
 588                 };
 589
 590                 panfrost_make_stencil_state(&default_stencil,
 591                                             &fragmeta->stencil_front);
 592                 fragmeta->stencil_mask_front = default_stencil.writemask;
 593                 fragmeta->stencil_back = fragmeta->stencil_front;
 594                 fragmeta->stencil_mask_back = default_stencil.writemask;
 595                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
 596                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
 597         } else {
 598                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
 599                         zsa->stencil[0].enabled);
 600                 panfrost_make_stencil_state(&zsa->stencil[0],
 601                                             &fragmeta->stencil_front);
 602                 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
 603                 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
 604
 605                 /* If back-stencil is not enabled, use the front values */
 606
 607                 if (zsa->stencil[1].enabled) {
 608                         panfrost_make_stencil_state(&zsa->stencil[1],
 609                                                     &fragmeta->stencil_back);
 610                         fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
 611                         fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
 612                 } else {
 613                         fragmeta->stencil_back = fragmeta->stencil_front;
 614                         fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
 615                         fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
 616                 }
 617
 618                 if (zsa->depth.enabled)
 619                         zfunc = zsa->depth.func;
 620
 621                 /* Depth state (TODO: Refactor) */
 622
 623                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
 624                         zsa->depth.writemask);
 625         }
 626
 627         fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
 628         fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
 629 }
 630
 631 static bool
 632 panfrost_fs_required(
 633                 struct panfrost_shader_state *fs,
 634                 struct panfrost_blend_final *blend,
 635                 unsigned rt_count)
 636 {
 637         /* If we generally have side effects */
 638         if (fs->fs_sidefx)
 639                 return true;
 640
 641         /* If colour is written we need to execute */
 642         for (unsigned i = 0; i < rt_count; ++i) {
 643                 if (!blend[i].no_colour)
 644                         return true;
 645         }
 646
 647         /* If depth is written and not implied we need to execute.
 648          * TODO: Predicate on Z/S writes being enabled */
 649         return (fs->writes_depth || fs->writes_stencil);
 650 }
 651
 652 static void
 653 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
 654                                 struct mali_shader_meta *fragmeta,
 655                                 void *rts)
 656 {
 657         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 658         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 659         struct panfrost_shader_state *fs;
 660         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 661
 662         SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
 663                 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
 664                 !ctx->blend->base.dither);
 665
 666         SET_BIT(fragmeta->unknown2_4, MALI_ALPHA_TO_COVERAGE,
 667                         ctx->blend->base.alpha_to_coverage);
 668
 669         /* Get blending setup */
 670         unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 671
 672         struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 673         unsigned shader_offset = 0;
 674         struct panfrost_bo *shader_bo = NULL;
 675
 676         for (unsigned c = 0; c < rt_count; ++c)
 677                 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
 678                                                           &shader_offset);
 679
 680         /* Disable shader execution if we can */
 681         if (dev->quirks & MIDGARD_SHADERLESS
 682                         && !panfrost_fs_required(fs, blend, rt_count)) {
 683                 fragmeta->shader = 0;
 684                 fragmeta->attribute_count = 0;
 685                 fragmeta->varying_count = 0;
 686                 fragmeta->texture_count = 0;
 687                 fragmeta->sampler_count = 0;
 688
 689                 /* This feature is not known to work on Bifrost */
 690                 fragmeta->midgard1.work_count = 1;
 691                 fragmeta->midgard1.uniform_count = 0;
 692                 fragmeta->midgard1.uniform_buffer_count = 0;
 693         }
 694
 695          /* If there is a blend shader, work registers are shared. We impose 8
 696           * work registers as a limit for blend shaders. Should be lower XXX */
 697
 698         if (!(dev->quirks & IS_BIFROST)) {
 699                 for (unsigned c = 0; c < rt_count; ++c) {
 700                         if (blend[c].is_shader) {
 701                                 fragmeta->midgard1.work_count =
 702                                         MAX2(fragmeta->midgard1.work_count, 8);
 703                         }
 704                 }
 705         }
 706
 707         /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
 708          * copied to the blend_meta appended (by convention), but this is the
 709          * field actually read by the hardware. (Or maybe both are read...?).
 710          * Specify the last RTi with a blend shader. */
 711
 712         fragmeta->blend.shader = 0;
 713
 714         for (signed rt = (rt_count - 1); rt >= 0; --rt) {
 715                 if (!blend[rt].is_shader)
 716                         continue;
 717
 718                 fragmeta->blend.shader = blend[rt].shader.gpu |
 719                                          blend[rt].shader.first_tag;
 720                 break;
 721         }
 722
 723         if (dev->quirks & MIDGARD_SFBD) {
 724                 /* When only a single render target platform is used, the blend
 725                  * information is inside the shader meta itself. We additionally
 726                  * need to signal CAN_DISCARD for nontrivial blend modes (so
 727                  * we're able to read back the destination buffer) */
 728
 729                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
 730                         blend[0].is_shader);
 731
 732                 if (!blend[0].is_shader) {
 733                         fragmeta->blend.equation = *blend[0].equation.equation;
 734                         fragmeta->blend.constant = blend[0].equation.constant;
 735                 }
 736
 737                 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
 738                         !blend[0].no_blending || fs->can_discard);
 739
 740                 batch->draws |= PIPE_CLEAR_COLOR0;
 741                 return;
 742         }
 743
 744         if (dev->quirks & IS_BIFROST) {
 745                 bool no_blend = true;
 746
 747                 for (unsigned i = 0; i < rt_count; ++i)
 748                         no_blend &= (blend[i].no_blending | blend[i].no_colour);
 749
 750                 SET_BIT(fragmeta->bifrost1.unk1, MALI_BIFROST_EARLY_Z,
 751                         !fs->can_discard && !fs->writes_depth && no_blend);
 752         }
 753
 754         /* Additional blend descriptor tacked on for jobs using MFBD */
 755
 756         for (unsigned i = 0; i < rt_count; ++i) {
 757                 unsigned flags = 0;
 758
 759                 if (ctx->pipe_framebuffer.nr_cbufs > i && !blend[i].no_colour) {
 760                         flags = 0x200;
 761                         batch->draws |= (PIPE_CLEAR_COLOR0 << i);
 762
 763                         bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
 764                                        (ctx->pipe_framebuffer.cbufs[i]) &&
 765                                        util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
 766
 767                         SET_BIT(flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
 768                         SET_BIT(flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
 769                         SET_BIT(flags, MALI_BLEND_SRGB, is_srgb);
 770                         SET_BIT(flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
 771                 }
 772
 773                 if (dev->quirks & IS_BIFROST) {
 774                         struct bifrost_blend_rt *brts = rts;
 775
 776                         brts[i].flags = flags;
 777
 778                         if (blend[i].is_shader) {
 779                                 /* The blend shader's address needs to be at
 780                                  * the same top 32 bit as the fragment shader.
 781                                  * TODO: Ensure that's always the case.
 782                                  */
 783                                 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
 784                                        (fs->bo->gpu & (0xffffffffull << 32)));
 785                                 brts[i].shader = blend[i].shader.gpu;
 786                                 brts[i].unk2 = 0x0;
 787                         } else if (ctx->pipe_framebuffer.nr_cbufs > i) {
 788                                 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
 789                                 const struct util_format_description *format_desc;
 790                                 format_desc = util_format_description(format);
 791
 792                                 brts[i].equation = *blend[i].equation.equation;
 793
 794                                 /* TODO: this is a bit more complicated */
 795                                 brts[i].constant = blend[i].equation.constant;
 796
 797                                 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
 798
 799                                 /* 0x19 disables blending and forces REPLACE
 800                                  * mode (equivalent to rgb_mode = alpha_mode =
 801                                  * x122, colour mask = 0xF). 0x1a allows
 802                                  * blending. */
 803                                 brts[i].unk2 = blend[i].no_blending ? 0x19 : 0x1a;
 804
 805                                 brts[i].shader_type = fs->blend_types[i];
 806                         } else {
 807                                 /* Dummy attachment for depth-only */
 808                                 brts[i].unk2 = 0x3;
 809                                 brts[i].shader_type = fs->blend_types[i];
 810                         }
 811                 } else {
 812                         struct midgard_blend_rt *mrts = rts;
 813                         mrts[i].flags = flags;
 814
 815                         if (blend[i].is_shader) {
 816                                 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
 817                         } else {
 818                                 mrts[i].blend.equation = *blend[i].equation.equation;
 819                                 mrts[i].blend.constant = blend[i].equation.constant;
 820                         }
 821                 }
 822         }
 823 }
 824
 825 static void
 826 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
 827                                struct mali_shader_meta *fragmeta,
 828                                void *rts)
 829 {
 830         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 831         struct panfrost_shader_state *fs;
 832
 833         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 834
 835         bool msaa = ctx->rasterizer && ctx->rasterizer->base.multisample;
 836         fragmeta->coverage_mask = (msaa ? ctx->sample_mask : ~0) & 0xF;
 837
 838         fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10;
 839         fragmeta->unknown2_4 = 0x4e0;
 840
 841         /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
 842          * is required (independent of 32-bit/64-bit descriptors), or why it's
 843          * not used on later GPU revisions. Otherwise, all shader jobs fault on
 844          * these earlier chips (perhaps this is a chicken bit of some kind).
 845          * More investigation is needed. */
 846
 847         SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
 848
 849         if (dev->quirks & IS_BIFROST) {
 850                 /* TODO */
 851         } else {
 852                 /* Depending on whether it's legal to in the given shader, we try to
 853                  * enable early-z testing. TODO: respect e-z force */
 854
 855                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
 856                         !fs->can_discard && !fs->writes_global &&
 857                         !fs->writes_depth && !fs->writes_stencil &&
 858                         !ctx->blend->base.alpha_to_coverage);
 859
 860                 /* Add the writes Z/S flags if needed. */
 861                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
 862                 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
 863
 864                 /* Any time texturing is used, derivatives are implicitly calculated,
 865                  * so we need to enable helper invocations */
 866
 867                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
 868                         fs->helper_invocations);
 869
 870                 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 871
 872                 bool depth_enabled = fs->writes_depth ||
 873                    (zsa && zsa->depth.enabled && zsa->depth.func != PIPE_FUNC_ALWAYS);
 874
 875                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_TILEBUFFER,
 876                         fs->outputs_read || (!depth_enabled && fs->can_discard));
 877                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_ZS, depth_enabled && fs->can_discard);
 878         }
 879
 880         panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
 881         panfrost_frag_meta_zsa_update(ctx, fragmeta);
 882         panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
 883 }
 884
 885 void
 886 panfrost_emit_shader_meta(struct panfrost_batch *batch,
 887                           enum pipe_shader_type st,
 888                           struct mali_vertex_tiler_postfix *postfix)
 889 {
 890         struct panfrost_context *ctx = batch->ctx;
 891         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 892
 893         if (!ss) {
 894                 postfix->shader = 0;
 895                 return;
 896         }
 897
 898         struct mali_shader_meta meta;
 899
 900         panfrost_shader_meta_init(ctx, st, &meta);
 901
 902         /* Add the shader BO to the batch. */
 903         panfrost_batch_add_bo(batch, ss->bo,
 904                               PAN_BO_ACCESS_PRIVATE |
 905                               PAN_BO_ACCESS_READ |
 906                               panfrost_bo_access_for_stage(st));
 907
 908         mali_ptr shader_ptr;
 909
 910         if (st == PIPE_SHADER_FRAGMENT) {
 911                 struct panfrost_device *dev = pan_device(ctx->base.screen);
 912                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 913                 size_t desc_size = sizeof(meta);
 914                 void *rts = NULL;
 915                 struct panfrost_transfer xfer;
 916                 unsigned rt_size;
 917
 918                 if (dev->quirks & MIDGARD_SFBD)
 919                         rt_size = 0;
 920                 else if (dev->quirks & IS_BIFROST)
 921                         rt_size = sizeof(struct bifrost_blend_rt);
 922                 else
 923                         rt_size = sizeof(struct midgard_blend_rt);
 924
 925                 desc_size += rt_size * rt_count;
 926
 927                 if (rt_size)
 928                         rts = rzalloc_size(ctx, rt_size * rt_count);
 929
 930                 panfrost_frag_shader_meta_init(ctx, &meta, rts);
 931
 932                 xfer = panfrost_pool_alloc(&batch->pool, desc_size);
 933
 934                 memcpy(xfer.cpu, &meta, sizeof(meta));
 935                 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
 936
 937                 if (rt_size)
 938                         ralloc_free(rts);
 939
 940                 shader_ptr = xfer.gpu;
 941         } else {
 942                 shader_ptr = panfrost_pool_upload(&batch->pool, &meta,
 943                                                        sizeof(meta));
 944         }
 945
 946         postfix->shader = shader_ptr;
 947 }
 948
 949 static void
 950 panfrost_mali_viewport_init(struct panfrost_context *ctx,
 951                             struct mali_viewport *mvp)
 952 {
 953         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 954
 955         /* Clip bounds are encoded as floats. The viewport itself is encoded as
 956          * (somewhat) asymmetric ints. */
 957
 958         const struct pipe_scissor_state *ss = &ctx->scissor;
 959
 960         memset(mvp, 0, sizeof(*mvp));
 961
 962         /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
 963          * each direction. Clipping to the viewport in theory should work, but
 964          * in practice causes issues when we're not explicitly trying to
 965          * scissor */
 966
 967         *mvp = (struct mali_viewport) {
 968                 .clip_minx = -INFINITY,
 969                 .clip_miny = -INFINITY,
 970                 .clip_maxx = INFINITY,
 971                 .clip_maxy = INFINITY,
 972         };
 973
 974         /* Always scissor to the viewport by default. */
 975         float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
 976         float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
 977
 978         float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
 979         float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
 980
 981         float minz = (vp->translate[2] - fabsf(vp->scale[2]));
 982         float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
 983
 984         /* Apply the scissor test */
 985
 986         unsigned minx, miny, maxx, maxy;
 987
 988         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
 989                 minx = MAX2(ss->minx, vp_minx);
 990                 miny = MAX2(ss->miny, vp_miny);
 991                 maxx = MIN2(ss->maxx, vp_maxx);
 992                 maxy = MIN2(ss->maxy, vp_maxy);
 993         } else {
 994                 minx = vp_minx;
 995                 miny = vp_miny;
 996                 maxx = vp_maxx;
 997                 maxy = vp_maxy;
 998         }
 999
1000         /* Hardware needs the min/max to be strictly ordered, so flip if we
1001          * need to. The viewport transformation in the vertex shader will
1002          * handle the negatives if we don't */
1003
1004         if (miny > maxy) {
1005                 unsigned temp = miny;
1006                 miny = maxy;
1007                 maxy = temp;
1008         }
1009
1010         if (minx > maxx) {
1011                 unsigned temp = minx;
1012                 minx = maxx;
1013                 maxx = temp;
1014         }
1015
1016         if (minz > maxz) {
1017                 float temp = minz;
1018                 minz = maxz;
1019                 maxz = temp;
1020         }
1021
1022         /* Clamp to the framebuffer size as a last check */
1023
1024         minx = MIN2(ctx->pipe_framebuffer.width, minx);
1025         maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
1026
1027         miny = MIN2(ctx->pipe_framebuffer.height, miny);
1028         maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
1029
1030         /* Upload */
1031
1032         mvp->viewport0[0] = minx;
1033         mvp->viewport1[0] = MALI_POSITIVE(maxx);
1034
1035         mvp->viewport0[1] = miny;
1036         mvp->viewport1[1] = MALI_POSITIVE(maxy);
1037
1038         bool clip_near = true;
1039         bool clip_far = true;
1040
1041         if (ctx->rasterizer) {
1042                 clip_near = ctx->rasterizer->base.depth_clip_near;
1043                 clip_far = ctx->rasterizer->base.depth_clip_far;
1044         }
1045
1046         mvp->clip_minz = clip_near ? minz : -INFINITY;
1047         mvp->clip_maxz = clip_far ? maxz : INFINITY;
1048 }
1049
1050 void
1051 panfrost_emit_viewport(struct panfrost_batch *batch,
1052                        struct mali_vertex_tiler_postfix *tiler_postfix)
1053 {
1054         struct panfrost_context *ctx = batch->ctx;
1055         struct mali_viewport mvp;
1056
1057         panfrost_mali_viewport_init(batch->ctx,  &mvp);
1058
1059         /* Update the job, unless we're doing wallpapering (whose lack of
1060          * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
1061          * just... be faster :) */
1062
1063         if (!ctx->wallpaper_batch)
1064                 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
1065                                              mvp.viewport0[1],
1066                                              mvp.viewport1[0] + 1,
1067                                              mvp.viewport1[1] + 1);
1068
1069         tiler_postfix->viewport = panfrost_pool_upload(&batch->pool, &mvp,
1070                                                             sizeof(mvp));
1071 }
1072
1073 static mali_ptr
1074 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
1075                                  enum pipe_shader_type st,
1076                                  struct panfrost_constant_buffer *buf,
1077                                  unsigned index)
1078 {
1079         struct pipe_constant_buffer *cb = &buf->cb[index];
1080         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1081
1082         if (rsrc) {
1083                 panfrost_batch_add_bo(batch, rsrc->bo,
1084                                       PAN_BO_ACCESS_SHARED |
1085                                       PAN_BO_ACCESS_READ |
1086                                       panfrost_bo_access_for_stage(st));
1087
1088                 /* Alignment gauranteed by
1089                  * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1090                 return rsrc->bo->gpu + cb->buffer_offset;
1091         } else if (cb->user_buffer) {
1092                 return panfrost_pool_upload(&batch->pool,
1093                                                  cb->user_buffer +
1094                                                  cb->buffer_offset,
1095                                                  cb->buffer_size);
1096         } else {
1097                 unreachable("No constant buffer");
1098         }
1099 }
1100
1101 struct sysval_uniform {
1102         union {
1103                 float f[4];
1104                 int32_t i[4];
1105                 uint32_t u[4];
1106                 uint64_t du[2];
1107         };
1108 };
1109
1110 static void
1111 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1112                                       struct sysval_uniform *uniform)
1113 {
1114         struct panfrost_context *ctx = batch->ctx;
1115         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1116
1117         uniform->f[0] = vp->scale[0];
1118         uniform->f[1] = vp->scale[1];
1119         uniform->f[2] = vp->scale[2];
1120 }
1121
1122 static void
1123 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1124                                        struct sysval_uniform *uniform)
1125 {
1126         struct panfrost_context *ctx = batch->ctx;
1127         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1128
1129         uniform->f[0] = vp->translate[0];
1130         uniform->f[1] = vp->translate[1];
1131         uniform->f[2] = vp->translate[2];
1132 }
1133
1134 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1135                                        enum pipe_shader_type st,
1136                                        unsigned int sysvalid,
1137                                        struct sysval_uniform *uniform)
1138 {
1139         struct panfrost_context *ctx = batch->ctx;
1140         unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1141         unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1142         bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1143         struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1144
1145         assert(dim);
1146         uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1147
1148         if (dim > 1)
1149                 uniform->i[1] = u_minify(tex->texture->height0,
1150                                          tex->u.tex.first_level);
1151
1152         if (dim > 2)
1153                 uniform->i[2] = u_minify(tex->texture->depth0,
1154                                          tex->u.tex.first_level);
1155
1156         if (is_array)
1157                 uniform->i[dim] = tex->texture->array_size;
1158 }
1159
1160 static void
1161 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1162                             enum pipe_shader_type st,
1163                             unsigned ssbo_id,
1164                             struct sysval_uniform *uniform)
1165 {
1166         struct panfrost_context *ctx = batch->ctx;
1167
1168         assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1169         struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1170
1171         /* Compute address */
1172         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1173
1174         panfrost_batch_add_bo(batch, bo,
1175                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1176                               panfrost_bo_access_for_stage(st));
1177
1178         /* Upload address and size as sysval */
1179         uniform->du[0] = bo->gpu + sb.buffer_offset;
1180         uniform->u[2] = sb.buffer_size;
1181 }
1182
1183 static void
1184 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1185                                enum pipe_shader_type st,
1186                                unsigned samp_idx,
1187                                struct sysval_uniform *uniform)
1188 {
1189         struct panfrost_context *ctx = batch->ctx;
1190         struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1191
1192         uniform->f[0] = sampl->min_lod;
1193         uniform->f[1] = sampl->max_lod;
1194         uniform->f[2] = sampl->lod_bias;
1195
1196         /* Even without any errata, Midgard represents "no mipmapping" as
1197          * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1198          * panfrost_create_sampler_state which also explains our choice of
1199          * epsilon value (again to keep behaviour consistent) */
1200
1201         if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1202                 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1203 }
1204
1205 static void
1206 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1207                                        struct sysval_uniform *uniform)
1208 {
1209         struct panfrost_context *ctx = batch->ctx;
1210
1211         uniform->u[0] = ctx->compute_grid->grid[0];
1212         uniform->u[1] = ctx->compute_grid->grid[1];
1213         uniform->u[2] = ctx->compute_grid->grid[2];
1214 }
1215
1216 static void
1217 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1218                         struct panfrost_shader_state *ss,
1219                         enum pipe_shader_type st)
1220 {
1221         struct sysval_uniform *uniforms = (void *)buf;
1222
1223         for (unsigned i = 0; i < ss->sysval_count; ++i) {
1224                 int sysval = ss->sysval[i];
1225
1226                 switch (PAN_SYSVAL_TYPE(sysval)) {
1227                 case PAN_SYSVAL_VIEWPORT_SCALE:
1228                         panfrost_upload_viewport_scale_sysval(batch,
1229                                                               &uniforms[i]);
1230                         break;
1231                 case PAN_SYSVAL_VIEWPORT_OFFSET:
1232                         panfrost_upload_viewport_offset_sysval(batch,
1233                                                                &uniforms[i]);
1234                         break;
1235                 case PAN_SYSVAL_TEXTURE_SIZE:
1236                         panfrost_upload_txs_sysval(batch, st,
1237                                                    PAN_SYSVAL_ID(sysval),
1238                                                    &uniforms[i]);
1239                         break;
1240                 case PAN_SYSVAL_SSBO:
1241                         panfrost_upload_ssbo_sysval(batch, st,
1242                                                     PAN_SYSVAL_ID(sysval),
1243                                                     &uniforms[i]);
1244                         break;
1245                 case PAN_SYSVAL_NUM_WORK_GROUPS:
1246                         panfrost_upload_num_work_groups_sysval(batch,
1247                                                                &uniforms[i]);
1248                         break;
1249                 case PAN_SYSVAL_SAMPLER:
1250                         panfrost_upload_sampler_sysval(batch, st,
1251                                                        PAN_SYSVAL_ID(sysval),
1252                                                        &uniforms[i]);
1253                         break;
1254                 default:
1255                         assert(0);
1256                 }
1257         }
1258 }
1259
1260 static const void *
1261 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1262                                  unsigned index)
1263 {
1264         struct pipe_constant_buffer *cb = &buf->cb[index];
1265         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1266
1267         if (rsrc)
1268                 return rsrc->bo->cpu;
1269         else if (cb->user_buffer)
1270                 return cb->user_buffer;
1271         else
1272                 unreachable("No constant buffer");
1273 }
1274
1275 void
1276 panfrost_emit_const_buf(struct panfrost_batch *batch,
1277                         enum pipe_shader_type stage,
1278                         struct mali_vertex_tiler_postfix *postfix)
1279 {
1280         struct panfrost_context *ctx = batch->ctx;
1281         struct panfrost_shader_variants *all = ctx->shader[stage];
1282
1283         if (!all)
1284                 return;
1285
1286         struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1287
1288         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1289
1290         /* Uniforms are implicitly UBO #0 */
1291         bool has_uniforms = buf->enabled_mask & (1 << 0);
1292
1293         /* Allocate room for the sysval and the uniforms */
1294         size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1295         size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1296         size_t size = sys_size + uniform_size;
1297         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1298                                                                         size);
1299
1300         /* Upload sysvals requested by the shader */
1301         panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1302
1303         /* Upload uniforms */
1304         if (has_uniforms && uniform_size) {
1305                 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1306                 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1307         }
1308
1309         /* Next up, attach UBOs. UBO #0 is the uniforms we just
1310          * uploaded */
1311
1312         unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1313         assert(ubo_count >= 1);
1314
1315         size_t sz = sizeof(uint64_t) * ubo_count;
1316         uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1317         int uniform_count = ss->uniform_count;
1318
1319         /* Upload uniforms as a UBO */
1320         ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1321
1322         /* The rest are honest-to-goodness UBOs */
1323
1324         for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1325                 size_t usz = buf->cb[ubo].buffer_size;
1326                 bool enabled = buf->enabled_mask & (1 << ubo);
1327                 bool empty = usz == 0;
1328
1329                 if (!enabled || empty) {
1330                         /* Stub out disabled UBOs to catch accesses */
1331                         ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1332                         continue;
1333                 }
1334
1335                 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1336                                                                 buf, ubo);
1337
1338                 unsigned bytes_per_field = 16;
1339                 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1340                 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1341         }
1342
1343         mali_ptr ubufs = panfrost_pool_upload(&batch->pool, ubos, sz);
1344         postfix->uniforms = transfer.gpu;
1345         postfix->uniform_buffers = ubufs;
1346
1347         buf->dirty_mask = 0;
1348 }
1349
1350 void
1351 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1352                             const struct pipe_grid_info *info,
1353                             struct midgard_payload_vertex_tiler *vtp)
1354 {
1355         struct panfrost_context *ctx = batch->ctx;
1356         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1357         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1358         unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1359                                                            128));
1360         unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1361                                info->grid[2] * 4;
1362         struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1363                                                                   shared_size,
1364                                                                   1);
1365
1366         struct mali_shared_memory shared = {
1367                 .shared_memory = bo->gpu,
1368                 .shared_workgroup_count =
1369                         util_logbase2_ceil(info->grid[0]) +
1370                         util_logbase2_ceil(info->grid[1]) +
1371                         util_logbase2_ceil(info->grid[2]),
1372                 .shared_unk1 = 0x2,
1373                 .shared_shift = util_logbase2(single_size) - 1
1374         };
1375
1376         vtp->postfix.shared_memory = panfrost_pool_upload(&batch->pool, &shared,
1377                                                                sizeof(shared));
1378 }
1379
1380 static mali_ptr
1381 panfrost_get_tex_desc(struct panfrost_batch *batch,
1382                       enum pipe_shader_type st,
1383                       struct panfrost_sampler_view *view)
1384 {
1385         if (!view)
1386                 return (mali_ptr) 0;
1387
1388         struct pipe_sampler_view *pview = &view->base;
1389         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1390
1391         /* Add the BO to the job so it's retained until the job is done. */
1392
1393         panfrost_batch_add_bo(batch, rsrc->bo,
1394                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1395                               panfrost_bo_access_for_stage(st));
1396
1397         panfrost_batch_add_bo(batch, view->bo,
1398                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1399                               panfrost_bo_access_for_stage(st));
1400
1401         return view->bo->gpu;
1402 }
1403
1404 static void
1405 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1406                              struct pipe_context *pctx)
1407 {
1408         struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1409         if (view->texture_bo != rsrc->bo->gpu ||
1410             view->layout != rsrc->layout) {
1411                 panfrost_bo_unreference(view->bo);
1412                 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1413         }
1414 }
1415
1416 void
1417 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1418                                   enum pipe_shader_type stage,
1419                                   struct mali_vertex_tiler_postfix *postfix)
1420 {
1421         struct panfrost_context *ctx = batch->ctx;
1422         struct panfrost_device *device = pan_device(ctx->base.screen);
1423
1424         if (!ctx->sampler_view_count[stage])
1425                 return;
1426
1427         if (device->quirks & IS_BIFROST) {
1428                 struct bifrost_texture_descriptor *descriptors;
1429
1430                 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1431                                      ctx->sampler_view_count[stage]);
1432
1433                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1434                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1435                         struct pipe_sampler_view *pview = &view->base;
1436                         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1437                         panfrost_update_sampler_view(view, &ctx->base);
1438
1439                         /* Add the BOs to the job so they are retained until the job is done. */
1440
1441                         panfrost_batch_add_bo(batch, rsrc->bo,
1442                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1443                                               panfrost_bo_access_for_stage(stage));
1444
1445                         panfrost_batch_add_bo(batch, view->bo,
1446                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1447                                               panfrost_bo_access_for_stage(stage));
1448
1449                         memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1450                 }
1451
1452                 postfix->textures = panfrost_pool_upload(&batch->pool,
1453                                                               descriptors,
1454                                                               sizeof(struct bifrost_texture_descriptor) *
1455                                                                       ctx->sampler_view_count[stage]);
1456
1457                 free(descriptors);
1458         } else {
1459                 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1460
1461                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1462                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1463
1464                         panfrost_update_sampler_view(view, &ctx->base);
1465
1466                         trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1467                 }
1468
1469                 postfix->textures = panfrost_pool_upload(&batch->pool,
1470                                                               trampolines,
1471                                                               sizeof(uint64_t) *
1472                                                               ctx->sampler_view_count[stage]);
1473         }
1474 }
1475
1476 void
1477 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1478                                   enum pipe_shader_type stage,
1479                                   struct mali_vertex_tiler_postfix *postfix)
1480 {
1481         struct panfrost_context *ctx = batch->ctx;
1482         struct panfrost_device *device = pan_device(ctx->base.screen);
1483
1484         if (!ctx->sampler_count[stage])
1485                 return;
1486
1487         if (device->quirks & IS_BIFROST) {
1488                 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1489                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1490                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1491                                                                                 transfer_size);
1492                 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1493
1494                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1495                         desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1496
1497                 postfix->sampler_descriptor = transfer.gpu;
1498         } else {
1499                 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1500                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1501                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1502                                                                                 transfer_size);
1503                 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1504
1505                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1506                         desc[i] = ctx->samplers[stage][i]->midgard_hw;
1507
1508                 postfix->sampler_descriptor = transfer.gpu;
1509         }
1510 }
1511
1512 void
1513 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1514                                struct mali_vertex_tiler_postfix *vertex_postfix)
1515 {
1516         struct panfrost_context *ctx = batch->ctx;
1517
1518         if (!ctx->vertex)
1519                 return;
1520
1521         struct panfrost_vertex_state *so = ctx->vertex;
1522
1523         panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1524         vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
1525                                                                sizeof(*so->hw) *
1526                                                                PAN_MAX_ATTRIBUTE);
1527 }
1528
1529 void
1530 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1531                           struct mali_vertex_tiler_postfix *vertex_postfix)
1532 {
1533         struct panfrost_context *ctx = batch->ctx;
1534         struct panfrost_vertex_state *so = ctx->vertex;
1535
1536         /* Staged mali_attr, and index into them. i =/= k, depending on the
1537          * vertex buffer mask and instancing. Twice as much room is allocated,
1538          * for a worst case of NPOT_DIVIDEs which take up extra slot */
1539         union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1540         unsigned k = 0;
1541
1542         for (unsigned i = 0; i < so->num_elements; ++i) {
1543                 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1544                  * means duplicating some vertex buffers (who cares? aside from
1545                  * maybe some caching implications but I somehow doubt that
1546                  * matters) */
1547
1548                 struct pipe_vertex_element *elem = &so->pipe[i];
1549                 unsigned vbi = elem->vertex_buffer_index;
1550
1551                 /* The exception to 1:1 mapping is that we can have multiple
1552                  * entries (NPOT divisors), so we fixup anyways */
1553
1554                 so->hw[i].index = k;
1555
1556                 if (!(ctx->vb_mask & (1 << vbi)))
1557                         continue;
1558
1559                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1560                 struct panfrost_resource *rsrc;
1561
1562                 rsrc = pan_resource(buf->buffer.resource);
1563                 if (!rsrc)
1564                         continue;
1565
1566                 /* Align to 64 bytes by masking off the lower bits. This
1567                  * will be adjusted back when we fixup the src_offset in
1568                  * mali_attr_meta */
1569
1570                 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1571                 mali_ptr addr = raw_addr & ~63;
1572                 unsigned chopped_addr = raw_addr - addr;
1573
1574                 /* Add a dependency of the batch on the vertex buffer */
1575                 panfrost_batch_add_bo(batch, rsrc->bo,
1576                                       PAN_BO_ACCESS_SHARED |
1577                                       PAN_BO_ACCESS_READ |
1578                                       PAN_BO_ACCESS_VERTEX_TILER);
1579
1580                 /* Set common fields */
1581                 attrs[k].elements = addr;
1582                 attrs[k].stride = buf->stride;
1583
1584                 /* Since we advanced the base pointer, we shrink the buffer
1585                  * size */
1586                 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1587
1588                 /* We need to add the extra size we masked off (for
1589                  * correctness) so the data doesn't get clamped away */
1590                 attrs[k].size += chopped_addr;
1591
1592                 /* For non-instancing make sure we initialize */
1593                 attrs[k].shift = attrs[k].extra_flags = 0;
1594
1595                 /* Instancing uses a dramatically different code path than
1596                  * linear, so dispatch for the actual emission now that the
1597                  * common code is finished */
1598
1599                 unsigned divisor = elem->instance_divisor;
1600
1601                 if (divisor && ctx->instance_count == 1) {
1602                         /* Silly corner case where there's a divisor(=1) but
1603                          * there's no legitimate instancing. So we want *every*
1604                          * attribute to be the same. So set stride to zero so
1605                          * we don't go anywhere. */
1606
1607                         attrs[k].size = attrs[k].stride + chopped_addr;
1608                         attrs[k].stride = 0;
1609                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1610                 } else if (ctx->instance_count <= 1) {
1611                         /* Normal, non-instanced attributes */
1612                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1613                 } else {
1614                         unsigned instance_shift = vertex_postfix->instance_shift;
1615                         unsigned instance_odd = vertex_postfix->instance_odd;
1616
1617                         k += panfrost_vertex_instanced(ctx->padded_count,
1618                                                        instance_shift,
1619                                                        instance_odd,
1620                                                        divisor, &attrs[k]);
1621                 }
1622         }
1623
1624         /* Add special gl_VertexID/gl_InstanceID buffers */
1625
1626         panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1627         so->hw[PAN_VERTEX_ID].index = k++;
1628         panfrost_instance_id(ctx->padded_count, &attrs[k]);
1629         so->hw[PAN_INSTANCE_ID].index = k++;
1630
1631         /* Upload whatever we emitted and go */
1632
1633         vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs,
1634                                                            k * sizeof(*attrs));
1635 }
1636
1637 static mali_ptr
1638 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1639                        unsigned stride, unsigned count)
1640 {
1641         /* Fill out the descriptor */
1642         slot->stride = stride;
1643         slot->size = stride * count;
1644         slot->shift = slot->extra_flags = 0;
1645
1646         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1647                                                                         slot->size);
1648
1649         slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1650
1651         return transfer.gpu;
1652 }
1653
1654 static unsigned
1655 panfrost_streamout_offset(unsigned stride, unsigned offset,
1656                         struct pipe_stream_output_target *target)
1657 {
1658         return (target->buffer_offset + (offset * stride * 4)) & 63;
1659 }
1660
1661 static void
1662 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1663                         unsigned stride, unsigned offset, unsigned count,
1664                         struct pipe_stream_output_target *target)
1665 {
1666         /* Fill out the descriptor */
1667         slot->stride = stride * 4;
1668         slot->shift = slot->extra_flags = 0;
1669
1670         unsigned max_size = target->buffer_size;
1671         unsigned expected_size = slot->stride * count;
1672
1673         /* Grab the BO and bind it to the batch */
1674         struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1675
1676         /* Varyings are WRITE from the perspective of the VERTEX but READ from
1677          * the perspective of the TILER and FRAGMENT.
1678          */
1679         panfrost_batch_add_bo(batch, bo,
1680                               PAN_BO_ACCESS_SHARED |
1681                               PAN_BO_ACCESS_RW |
1682                               PAN_BO_ACCESS_VERTEX_TILER |
1683                               PAN_BO_ACCESS_FRAGMENT);
1684
1685         /* We will have an offset applied to get alignment */
1686         mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1687         slot->elements = (addr & ~63) | MALI_ATTR_LINEAR;
1688         slot->size = MIN2(max_size, expected_size) + (addr & 63);
1689 }
1690
1691 static bool
1692 has_point_coord(unsigned mask, gl_varying_slot loc)
1693 {
1694         if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1695                 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1696         else if (loc == VARYING_SLOT_PNTC)
1697                 return (mask & (1 << 8));
1698         else
1699                 return false;
1700 }
1701
1702 /* Helpers for manipulating stream out information so we can pack varyings
1703  * accordingly. Compute the src_offset for a given captured varying */
1704
1705 static struct pipe_stream_output *
1706 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1707 {
1708         for (unsigned i = 0; i < info->num_outputs; ++i) {
1709                 if (info->output[i].register_index == loc)
1710                         return &info->output[i];
1711         }
1712
1713         unreachable("Varying not captured");
1714 }
1715
1716 static unsigned
1717 pan_varying_size(enum mali_format fmt)
1718 {
1719         unsigned type = MALI_EXTRACT_TYPE(fmt);
1720         unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1721         unsigned bits = MALI_EXTRACT_BITS(fmt);
1722         unsigned bpc = 0;
1723
1724         if (bits == MALI_CHANNEL_FLOAT) {
1725                 /* No doubles */
1726                 bool fp16 = (type == MALI_FORMAT_SINT);
1727                 assert(fp16 || (type == MALI_FORMAT_UNORM));
1728
1729                 bpc = fp16 ? 2 : 4;
1730         } else {
1731                 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1732
1733                 /* See the enums */
1734                 bits = 1 << bits;
1735                 assert(bits >= 8);
1736                 bpc = bits / 8;
1737         }
1738
1739         return bpc * chan;
1740 }
1741
1742 /* Indices for named (non-XFB) varyings that are present. These are packed
1743  * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1744  * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1745  * of a given special field given a shift S by:
1746  *
1747  *      idx = popcount(P & ((1 << S) - 1))
1748  *
1749  * That is... look at all of the varyings that come earlier and count them, the
1750  * count is the new index since plus one. Likewise, the total number of special
1751  * buffers required is simply popcount(P)
1752  */
1753
1754 enum pan_special_varying {
1755         PAN_VARY_GENERAL = 0,
1756         PAN_VARY_POSITION = 1,
1757         PAN_VARY_PSIZ = 2,
1758         PAN_VARY_PNTCOORD = 3,
1759         PAN_VARY_FACE = 4,
1760         PAN_VARY_FRAGCOORD = 5,
1761
1762         /* Keep last */
1763         PAN_VARY_MAX,
1764 };
1765
1766 /* Given a varying, figure out which index it correpsonds to */
1767
1768 static inline unsigned
1769 pan_varying_index(unsigned present, enum pan_special_varying v)
1770 {
1771         unsigned mask = (1 << v) - 1;
1772         return util_bitcount(present & mask);
1773 }
1774
1775 /* Get the base offset for XFB buffers, which by convention come after
1776  * everything else. Wrapper function for semantic reasons; by construction this
1777  * is just popcount. */
1778
1779 static inline unsigned
1780 pan_xfb_base(unsigned present)
1781 {
1782         return util_bitcount(present);
1783 }
1784
1785 /* Computes the present mask for varyings so we can start emitting varying records */
1786
1787 static inline unsigned
1788 pan_varying_present(
1789         struct panfrost_shader_state *vs,
1790         struct panfrost_shader_state *fs,
1791         unsigned quirks)
1792 {
1793         /* At the moment we always emit general and position buffers. Not
1794          * strictly necessary but usually harmless */
1795
1796         unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1797
1798         /* Enable special buffers by the shader info */
1799
1800         if (vs->writes_point_size)
1801                 present |= (1 << PAN_VARY_PSIZ);
1802
1803         if (fs->reads_point_coord)
1804                 present |= (1 << PAN_VARY_PNTCOORD);
1805
1806         if (fs->reads_face)
1807                 present |= (1 << PAN_VARY_FACE);
1808
1809         if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1810                 present |= (1 << PAN_VARY_FRAGCOORD);
1811
1812         /* Also, if we have a point sprite, we need a point coord buffer */
1813
1814         for (unsigned i = 0; i < fs->varying_count; i++)  {
1815                 gl_varying_slot loc = fs->varyings_loc[i];
1816
1817                 if (has_point_coord(fs->point_sprite_mask, loc))
1818                         present |= (1 << PAN_VARY_PNTCOORD);
1819         }
1820
1821         return present;
1822 }
1823
1824 /* Emitters for varying records */
1825
1826 static struct mali_attr_meta
1827 pan_emit_vary(unsigned present, enum pan_special_varying buf,
1828                 unsigned quirks, enum mali_format format,
1829                 unsigned offset)
1830 {
1831         unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
1832
1833         struct mali_attr_meta meta = {
1834                 .index = pan_varying_index(present, buf),
1835                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1836                 .swizzle = quirks & HAS_SWIZZLES ?
1837                         panfrost_get_default_swizzle(nr_channels) :
1838                         panfrost_bifrost_swizzle(nr_channels),
1839                 .format = format,
1840                 .src_offset = offset
1841         };
1842
1843         return meta;
1844 }
1845
1846 /* General varying that is unused */
1847
1848 static struct mali_attr_meta
1849 pan_emit_vary_only(unsigned present, unsigned quirks)
1850 {
1851         return pan_emit_vary(present, 0, quirks, MALI_VARYING_DISCARD, 0);
1852 }
1853
1854 /* Special records */
1855
1856 static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1857         [PAN_VARY_POSITION]     = MALI_VARYING_POS,
1858         [PAN_VARY_PSIZ]         = MALI_R16F,
1859         [PAN_VARY_PNTCOORD]     = MALI_R16F,
1860         [PAN_VARY_FACE]         = MALI_R32I,
1861         [PAN_VARY_FRAGCOORD]    = MALI_RGBA32F
1862 };
1863
1864 static struct mali_attr_meta
1865 pan_emit_vary_special(unsigned present, enum pan_special_varying buf,
1866                 unsigned quirks)
1867 {
1868         assert(buf < PAN_VARY_MAX);
1869         return pan_emit_vary(present, buf, quirks, pan_varying_formats[buf], 0);
1870 }
1871
1872 static enum mali_format
1873 pan_xfb_format(enum mali_format format, unsigned nr)
1874 {
1875         if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1876                 return MALI_R32F | MALI_NR_CHANNELS(nr);
1877         else
1878                 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1879 }
1880
1881 /* Transform feedback records. Note struct pipe_stream_output is (if packed as
1882  * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1883  * value. */
1884
1885 static struct mali_attr_meta
1886 pan_emit_vary_xfb(unsigned present,
1887                 unsigned max_xfb,
1888                 unsigned *streamout_offsets,
1889                 unsigned quirks,
1890                 enum mali_format format,
1891                 struct pipe_stream_output o)
1892 {
1893         /* Otherwise construct a record for it */
1894         struct mali_attr_meta meta = {
1895                 /* XFB buffers come after everything else */
1896                 .index = pan_xfb_base(present) + o.output_buffer,
1897
1898                 /* As usual unknown bit */
1899                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1900
1901                 /* Override swizzle with number of channels */
1902                 .swizzle = quirks & HAS_SWIZZLES ?
1903                         panfrost_get_default_swizzle(o.num_components) :
1904                         panfrost_bifrost_swizzle(o.num_components),
1905
1906                 /* Override number of channels and precision to highp */
1907                 .format = pan_xfb_format(format, o.num_components),
1908
1909                 /* Apply given offsets together */
1910                 .src_offset = (o.dst_offset * 4) /* dwords */
1911                         + streamout_offsets[o.output_buffer]
1912         };
1913
1914         return meta;
1915 }
1916
1917 /* Determine if we should capture a varying for XFB. This requires actually
1918  * having a buffer for it. If we don't capture it, we'll fallback to a general
1919  * varying path (linked or unlinked, possibly discarding the write) */
1920
1921 static bool
1922 panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1923                 unsigned loc, unsigned max_xfb)
1924 {
1925         if (!(xfb->so_mask & (1ll << loc)))
1926                 return false;
1927
1928         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1929         return o->output_buffer < max_xfb;
1930 }
1931
1932 /* Higher-level wrapper around all of the above, classifying a varying into one
1933  * of the above types */
1934
1935 static struct mali_attr_meta
1936 panfrost_emit_varying(
1937                 struct panfrost_shader_state *stage,
1938                 struct panfrost_shader_state *other,
1939                 struct panfrost_shader_state *xfb,
1940                 unsigned present,
1941                 unsigned max_xfb,
1942                 unsigned *streamout_offsets,
1943                 unsigned quirks,
1944                 unsigned *gen_offsets,
1945                 enum mali_format *gen_formats,
1946                 unsigned *gen_stride,
1947                 unsigned idx,
1948                 bool should_alloc,
1949                 bool is_fragment)
1950 {
1951         gl_varying_slot loc = stage->varyings_loc[idx];
1952         enum mali_format format = stage->varyings[idx];
1953
1954         /* Override format to match linkage */
1955         if (!should_alloc && gen_formats[idx])
1956                 format = gen_formats[idx];
1957
1958         if (has_point_coord(stage->point_sprite_mask, loc)) {
1959                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1960         } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1961                 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1962                 return pan_emit_vary_xfb(present, max_xfb, streamout_offsets, quirks, format, *o);
1963         } else if (loc == VARYING_SLOT_POS) {
1964                 if (is_fragment)
1965                         return pan_emit_vary_special(present, PAN_VARY_FRAGCOORD, quirks);
1966                 else
1967                         return pan_emit_vary_special(present, PAN_VARY_POSITION, quirks);
1968         } else if (loc == VARYING_SLOT_PSIZ) {
1969                 return pan_emit_vary_special(present, PAN_VARY_PSIZ, quirks);
1970         } else if (loc == VARYING_SLOT_PNTC) {
1971                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1972         } else if (loc == VARYING_SLOT_FACE) {
1973                 return pan_emit_vary_special(present, PAN_VARY_FACE, quirks);
1974         }
1975
1976         /* We've exhausted special cases, so it's otherwise a general varying. Check if we're linked */
1977         signed other_idx = -1;
1978
1979         for (unsigned j = 0; j < other->varying_count; ++j) {
1980                 if (other->varyings_loc[j] == loc) {
1981                         other_idx = j;
1982                         break;
1983                 }
1984         }
1985
1986         if (other_idx < 0)
1987                 return pan_emit_vary_only(present, quirks);
1988
1989         unsigned offset = gen_offsets[other_idx];
1990
1991         if (should_alloc) {
1992                 /* We're linked, so allocate a space via a watermark allocation */
1993                 enum mali_format alt = other->varyings[other_idx];
1994
1995                 /* Do interpolation at minimum precision */
1996                 unsigned size_main = pan_varying_size(format);
1997                 unsigned size_alt = pan_varying_size(alt);
1998                 unsigned size = MIN2(size_main, size_alt);
1999
2000                 /* If a varying is marked for XFB but not actually captured, we
2001                  * should match the format to the format that would otherwise
2002                  * be used for XFB, since dEQP checks for invariance here. It's
2003                  * unclear if this is required by the spec. */
2004
2005                 if (xfb->so_mask & (1ull << loc)) {
2006                         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
2007                         format = pan_xfb_format(format, o->num_components);
2008                         size = pan_varying_size(format);
2009                 } else if (size == size_alt) {
2010                         format = alt;
2011                 }
2012
2013                 gen_offsets[idx] = *gen_stride;
2014                 gen_formats[other_idx] = format;
2015                 offset = *gen_stride;
2016                 *gen_stride += size;
2017         }
2018
2019         return pan_emit_vary(present, PAN_VARY_GENERAL,
2020                         quirks, format, offset);
2021 }
2022
2023 static void
2024 pan_emit_special_input(union mali_attr *varyings,
2025                 unsigned present,
2026                 enum pan_special_varying v,
2027                 mali_ptr addr)
2028 {
2029         if (present & (1 << v)) {
2030                 /* Ensure we write exactly once for performance and with fields
2031                  * zeroed appropriately to avoid flakes */
2032
2033                 union mali_attr s = {
2034                         .elements = addr
2035                 };
2036
2037                 varyings[pan_varying_index(present, v)] = s;
2038         }
2039 }
2040
2041 void
2042 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2043                                  unsigned vertex_count,
2044                                  struct mali_vertex_tiler_postfix *vertex_postfix,
2045                                  struct mali_vertex_tiler_postfix *tiler_postfix,
2046                                  union midgard_primitive_size *primitive_size)
2047 {
2048         /* Load the shaders */
2049         struct panfrost_context *ctx = batch->ctx;
2050         struct panfrost_device *dev = pan_device(ctx->base.screen);
2051         struct panfrost_shader_state *vs, *fs;
2052         size_t vs_size, fs_size;
2053
2054         /* Allocate the varying descriptor */
2055
2056         vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2057         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2058         vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
2059         fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
2060
2061         struct panfrost_transfer trans = panfrost_pool_alloc(&batch->pool,
2062                                                                      vs_size +
2063                                                                      fs_size);
2064
2065         struct pipe_stream_output_info *so = &vs->stream_output;
2066         unsigned present = pan_varying_present(vs, fs, dev->quirks);
2067
2068         /* Check if this varying is linked by us. This is the case for
2069          * general-purpose, non-captured varyings. If it is, link it. If it's
2070          * not, use the provided stream out information to determine the
2071          * offset, since it was already linked for us. */
2072
2073         unsigned gen_offsets[32];
2074         enum mali_format gen_formats[32];
2075         memset(gen_offsets, 0, sizeof(gen_offsets));
2076         memset(gen_formats, 0, sizeof(gen_formats));
2077
2078         unsigned gen_stride = 0;
2079         assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
2080         assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
2081
2082         unsigned streamout_offsets[32];
2083
2084         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2085                 streamout_offsets[i] = panfrost_streamout_offset(
2086                                         so->stride[i],
2087                                         ctx->streamout.offsets[i],
2088                                         ctx->streamout.targets[i]);
2089         }
2090
2091         struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
2092         struct mali_attr_meta *ofs = ovs + vs->varying_count;
2093
2094         for (unsigned i = 0; i < vs->varying_count; i++) {
2095                 ovs[i] = panfrost_emit_varying(vs, fs, vs, present,
2096                                 ctx->streamout.num_targets, streamout_offsets,
2097                                 dev->quirks,
2098                                 gen_offsets, gen_formats, &gen_stride, i, true, false);
2099         }
2100
2101         for (unsigned i = 0; i < fs->varying_count; i++) {
2102                 ofs[i] = panfrost_emit_varying(fs, vs, vs, present,
2103                                 ctx->streamout.num_targets, streamout_offsets,
2104                                 dev->quirks,
2105                                 gen_offsets, gen_formats, &gen_stride, i, false, true);
2106         }
2107
2108         unsigned xfb_base = pan_xfb_base(present);
2109         struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
2110                         sizeof(union mali_attr) * (xfb_base + ctx->streamout.num_targets));
2111         union mali_attr *varyings = (union mali_attr *) T.cpu;
2112
2113         /* Emit the stream out buffers */
2114
2115         unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
2116                                                            ctx->vertex_count);
2117
2118         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2119                 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2120                                         so->stride[i],
2121                                         ctx->streamout.offsets[i],
2122                                         out_count,
2123                                         ctx->streamout.targets[i]);
2124         }
2125
2126         panfrost_emit_varyings(batch,
2127                         &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2128                         gen_stride, vertex_count);
2129
2130         /* fp32 vec4 gl_Position */
2131         tiler_postfix->position_varying = panfrost_emit_varyings(batch,
2132                         &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2133                         sizeof(float) * 4, vertex_count);
2134
2135         if (present & (1 << PAN_VARY_PSIZ)) {
2136                 primitive_size->pointer = panfrost_emit_varyings(batch,
2137                                 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2138                                 2, vertex_count);
2139         }
2140
2141         pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_VARYING_POINT_COORD);
2142         pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_VARYING_FRONT_FACING);
2143         pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_VARYING_FRAG_COORD);
2144
2145         vertex_postfix->varyings = T.gpu;
2146         tiler_postfix->varyings = T.gpu;
2147
2148         vertex_postfix->varying_meta = trans.gpu;
2149         tiler_postfix->varying_meta = trans.gpu + vs_size;
2150 }
2151
2152 void
2153 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2154                                 struct mali_vertex_tiler_prefix *vertex_prefix,
2155                                 struct mali_vertex_tiler_postfix *vertex_postfix,
2156                                 struct mali_vertex_tiler_prefix *tiler_prefix,
2157                                 struct mali_vertex_tiler_postfix *tiler_postfix,
2158                                 union midgard_primitive_size *primitive_size)
2159 {
2160         struct panfrost_context *ctx = batch->ctx;
2161         struct panfrost_device *device = pan_device(ctx->base.screen);
2162         bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
2163         struct bifrost_payload_vertex bifrost_vertex = {0,};
2164         struct bifrost_payload_tiler bifrost_tiler = {0,};
2165         struct midgard_payload_vertex_tiler midgard_vertex = {0,};
2166         struct midgard_payload_vertex_tiler midgard_tiler = {0,};
2167         void *vp, *tp;
2168         size_t vp_size, tp_size;
2169
2170         if (device->quirks & IS_BIFROST) {
2171                 bifrost_vertex.prefix = *vertex_prefix;
2172                 bifrost_vertex.postfix = *vertex_postfix;
2173                 vp = &bifrost_vertex;
2174                 vp_size = sizeof(bifrost_vertex);
2175
2176                 bifrost_tiler.prefix = *tiler_prefix;
2177                 bifrost_tiler.tiler.primitive_size = *primitive_size;
2178                 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
2179                 bifrost_tiler.postfix = *tiler_postfix;
2180                 tp = &bifrost_tiler;
2181                 tp_size = sizeof(bifrost_tiler);
2182         } else {
2183                 midgard_vertex.prefix = *vertex_prefix;
2184                 midgard_vertex.postfix = *vertex_postfix;
2185                 vp = &midgard_vertex;
2186                 vp_size = sizeof(midgard_vertex);
2187
2188                 midgard_tiler.prefix = *tiler_prefix;
2189                 midgard_tiler.postfix = *tiler_postfix;
2190                 midgard_tiler.primitive_size = *primitive_size;
2191                 tp = &midgard_tiler;
2192                 tp_size = sizeof(midgard_tiler);
2193         }
2194
2195         if (wallpapering) {
2196                 /* Inject in reverse order, with "predicted" job indices.
2197                  * THIS IS A HACK XXX */
2198                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false,
2199                                  batch->scoreboard.job_index + 2, tp, tp_size, true);
2200                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2201                                  vp, vp_size, true);
2202                 return;
2203         }
2204
2205         /* If rasterizer discard is enable, only submit the vertex */
2206
2207         bool rasterizer_discard = ctx->rasterizer &&
2208                                   ctx->rasterizer->base.rasterizer_discard;
2209
2210         unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2211                                            vp, vp_size, false);
2212
2213         if (rasterizer_discard)
2214                 return;
2215
2216         panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false, vertex, tp, tp_size,
2217                          false);
2218 }
2219
2220 /* TODO: stop hardcoding this */
2221 mali_ptr
2222 panfrost_emit_sample_locations(struct panfrost_batch *batch)
2223 {
2224         uint16_t locations[] = {
2225             128, 128,
2226             0, 256,
2227             0, 256,
2228             0, 256,
2229             0, 256,
2230             0, 256,
2231             0, 256,
2232             0, 256,
2233             0, 256,
2234             0, 256,
2235             0, 256,
2236             0, 256,
2237             0, 256,
2238             0, 256,
2239             0, 256,
2240             0, 256,
2241             0, 256,
2242             0, 256,
2243             0, 256,
2244             0, 256,
2245             0, 256,
2246             0, 256,
2247             0, 256,
2248             0, 256,
2249             0, 256,
2250             0, 256,
2251             0, 256,
2252             0, 256,
2253             0, 256,
2254             0, 256,
2255             0, 256,
2256             0, 256,
2257             128, 128,
2258             0, 0,
2259             0, 0,
2260             0, 0,
2261             0, 0,
2262             0, 0,
2263             0, 0,
2264             0, 0,
2265             0, 0,
2266             0, 0,
2267             0, 0,
2268             0, 0,
2269             0, 0,
2270             0, 0,
2271             0, 0,
2272             0, 0,
2273         };
2274
2275         return panfrost_pool_upload(&batch->pool, locations, 96 * sizeof(uint16_t));
2276 }