src/gallium/drivers/panfrost/pan_cmdstream.c

   1 /*
   2  * Copyright (C) 2018 Alyssa Rosenzweig
   3  * Copyright (C) 2020 Collabora Ltd.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "util/macros.h"
  26 #include "util/u_prim.h"
  27 #include "util/u_vbuf.h"
  28
  29 #include "panfrost-quirks.h"
  30
  31 #include "pan_pool.h"
  32 #include "pan_bo.h"
  33 #include "pan_cmdstream.h"
  34 #include "pan_context.h"
  35 #include "pan_job.h"
  36
  37 /* If a BO is accessed for a particular shader stage, will it be in the primary
  38  * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
  39  * fragment will be primary, e.g. compute jobs will be considered
  40  * "vertex/tiler" by analogy */
  41
  42 static inline uint32_t
  43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
  44 {
  45         assert(stage == PIPE_SHADER_FRAGMENT ||
  46                stage == PIPE_SHADER_VERTEX ||
  47                stage == PIPE_SHADER_COMPUTE);
  48
  49         return stage == PIPE_SHADER_FRAGMENT ?
  50                PAN_BO_ACCESS_FRAGMENT :
  51                PAN_BO_ACCESS_VERTEX_TILER;
  52 }
  53
  54 static void
  55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
  56                                struct mali_vertex_tiler_postfix *postfix)
  57 {
  58         struct panfrost_device *dev = pan_device(ctx->base.screen);
  59         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  60
  61         unsigned shift = panfrost_get_stack_shift(batch->stack_size);
  62         struct mali_shared_memory shared = {
  63                 .stack_shift = shift,
  64                 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
  65                 .shared_workgroup_count = ~0,
  66         };
  67         postfix->shared_memory = panfrost_pool_upload(&batch->pool, &shared, sizeof(shared));
  68 }
  69
  70 static void
  71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
  72                                struct mali_vertex_tiler_postfix *postfix)
  73 {
  74         struct panfrost_device *dev = pan_device(ctx->base.screen);
  75         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  76
  77         /* If we haven't, reserve space for the framebuffer */
  78
  79         if (!batch->framebuffer.gpu) {
  80                 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
  81                         sizeof(struct mali_single_framebuffer) :
  82                         sizeof(struct mali_framebuffer);
  83
  84                 batch->framebuffer = panfrost_pool_alloc(&batch->pool, size);
  85
  86                 /* Tag the pointer */
  87                 if (!(dev->quirks & MIDGARD_SFBD))
  88                         batch->framebuffer.gpu |= MALI_MFBD;
  89         }
  90
  91         postfix->shared_memory = batch->framebuffer.gpu;
  92 }
  93
  94 static void
  95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
  96                               struct mali_vertex_tiler_prefix *prefix,
  97                               struct mali_vertex_tiler_postfix *postfix)
  98 {
  99         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 100
 101         postfix->gl_enables |= 0x7;
 102         SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
 103                 rasterizer && rasterizer->base.front_ccw);
 104         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
 105                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
 106         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
 107                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
 108         SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
 109                 rasterizer && rasterizer->base.flatshade_first);
 110 }
 111
 112 void
 113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
 114                                   struct mali_vertex_tiler_prefix *prefix,
 115                                   union midgard_primitive_size *primitive_size)
 116 {
 117         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 118
 119         if (!panfrost_writes_point_size(ctx)) {
 120                 bool points = prefix->draw_mode == MALI_POINTS;
 121                 float val = 0.0f;
 122
 123                 if (rasterizer)
 124                         val = points ?
 125                               rasterizer->base.point_size :
 126                               rasterizer->base.line_width;
 127
 128                 primitive_size->constant = val;
 129         }
 130 }
 131
 132 static void
 133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
 134                                    struct mali_vertex_tiler_postfix *postfix)
 135 {
 136         SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
 137         if (ctx->occlusion_query) {
 138                 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
 139                 panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
 140                                       PAN_BO_ACCESS_SHARED |
 141                                       PAN_BO_ACCESS_RW |
 142                                       PAN_BO_ACCESS_FRAGMENT);
 143         } else {
 144                 postfix->occlusion_counter = 0;
 145         }
 146 }
 147
 148 void
 149 panfrost_vt_init(struct panfrost_context *ctx,
 150                  enum pipe_shader_type stage,
 151                  struct mali_vertex_tiler_prefix *prefix,
 152                  struct mali_vertex_tiler_postfix *postfix)
 153 {
 154         struct panfrost_device *device = pan_device(ctx->base.screen);
 155
 156         if (!ctx->shader[stage])
 157                 return;
 158
 159         memset(prefix, 0, sizeof(*prefix));
 160         memset(postfix, 0, sizeof(*postfix));
 161
 162         if (device->quirks & IS_BIFROST) {
 163                 postfix->gl_enables = 0x2;
 164                 panfrost_vt_emit_shared_memory(ctx, postfix);
 165         } else {
 166                 postfix->gl_enables = 0x6;
 167                 panfrost_vt_attach_framebuffer(ctx, postfix);
 168         }
 169
 170         if (stage == PIPE_SHADER_FRAGMENT) {
 171                 panfrost_vt_update_occlusion_query(ctx, postfix);
 172                 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
 173         }
 174 }
 175
 176 static unsigned
 177 panfrost_translate_index_size(unsigned size)
 178 {
 179         switch (size) {
 180         case 1:
 181                 return MALI_DRAW_INDEXED_UINT8;
 182
 183         case 2:
 184                 return MALI_DRAW_INDEXED_UINT16;
 185
 186         case 4:
 187                 return MALI_DRAW_INDEXED_UINT32;
 188
 189         default:
 190                 unreachable("Invalid index size");
 191         }
 192 }
 193
 194 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
 195  * good for the duration of the draw (transient), could last longer. Also get
 196  * the bounds on the index buffer for the range accessed by the draw. We do
 197  * these operations together because there are natural optimizations which
 198  * require them to be together. */
 199
 200 static mali_ptr
 201 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
 202                                   const struct pipe_draw_info *info,
 203                                   unsigned *min_index, unsigned *max_index)
 204 {
 205         struct panfrost_resource *rsrc = pan_resource(info->index.resource);
 206         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 207         off_t offset = info->start * info->index_size;
 208         bool needs_indices = true;
 209         mali_ptr out = 0;
 210
 211         if (info->max_index != ~0u) {
 212                 *min_index = info->min_index;
 213                 *max_index = info->max_index;
 214                 needs_indices = false;
 215         }
 216
 217         if (!info->has_user_indices) {
 218                 /* Only resources can be directly mapped */
 219                 panfrost_batch_add_bo(batch, rsrc->bo,
 220                                       PAN_BO_ACCESS_SHARED |
 221                                       PAN_BO_ACCESS_READ |
 222                                       PAN_BO_ACCESS_VERTEX_TILER);
 223                 out = rsrc->bo->gpu + offset;
 224
 225                 /* Check the cache */
 226                 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
 227                                                            info->start,
 228                                                            info->count,
 229                                                            min_index,
 230                                                            max_index);
 231         } else {
 232                 /* Otherwise, we need to upload to transient memory */
 233                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
 234                 out = panfrost_pool_upload(&batch->pool, ibuf8 + offset,
 235                                                 info->count *
 236                                                 info->index_size);
 237         }
 238
 239         if (needs_indices) {
 240                 /* Fallback */
 241                 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
 242
 243                 if (!info->has_user_indices)
 244                         panfrost_minmax_cache_add(rsrc->index_cache,
 245                                                   info->start, info->count,
 246                                                   *min_index, *max_index);
 247         }
 248
 249         return out;
 250 }
 251
 252 void
 253 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
 254                           const struct pipe_draw_info *info,
 255                           enum mali_draw_mode draw_mode,
 256                           struct mali_vertex_tiler_postfix *vertex_postfix,
 257                           struct mali_vertex_tiler_prefix *tiler_prefix,
 258                           struct mali_vertex_tiler_postfix *tiler_postfix,
 259                           unsigned *vertex_count,
 260                           unsigned *padded_count)
 261 {
 262         tiler_prefix->draw_mode = draw_mode;
 263
 264         unsigned draw_flags = 0;
 265
 266         if (panfrost_writes_point_size(ctx))
 267                 draw_flags |= MALI_DRAW_VARYING_SIZE;
 268
 269         if (info->primitive_restart)
 270                 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
 271
 272         /* These doesn't make much sense */
 273
 274         draw_flags |= 0x3000;
 275
 276         if (info->index_size) {
 277                 unsigned min_index = 0, max_index = 0;
 278
 279                 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
 280                                                                        info,
 281                                                                        &min_index,
 282                                                                        &max_index);
 283
 284                 /* Use the corresponding values */
 285                 *vertex_count = max_index - min_index + 1;
 286                 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
 287                 tiler_prefix->offset_bias_correction = -min_index;
 288                 tiler_prefix->index_count = MALI_POSITIVE(info->count);
 289                 draw_flags |= panfrost_translate_index_size(info->index_size);
 290         } else {
 291                 tiler_prefix->indices = 0;
 292                 *vertex_count = ctx->vertex_count;
 293                 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
 294                 tiler_prefix->offset_bias_correction = 0;
 295                 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
 296         }
 297
 298         tiler_prefix->unknown_draw = draw_flags;
 299
 300         /* Encode the padded vertex count */
 301
 302         if (info->instance_count > 1) {
 303                 *padded_count = panfrost_padded_vertex_count(*vertex_count);
 304
 305                 unsigned shift = __builtin_ctz(ctx->padded_count);
 306                 unsigned k = ctx->padded_count >> (shift + 1);
 307
 308                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
 309                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
 310         } else {
 311                 *padded_count = *vertex_count;
 312
 313                 /* Reset instancing state */
 314                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
 315                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
 316         }
 317 }
 318
 319 static void
 320 panfrost_shader_meta_init(struct panfrost_context *ctx,
 321                           enum pipe_shader_type st,
 322                           struct mali_shader_meta *meta)
 323 {
 324         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 325         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 326
 327         memset(meta, 0, sizeof(*meta));
 328         meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
 329         meta->attribute_count = ss->attribute_count;
 330         meta->varying_count = ss->varying_count;
 331         meta->texture_count = ctx->sampler_view_count[st];
 332         meta->sampler_count = ctx->sampler_count[st];
 333
 334         if (dev->quirks & IS_BIFROST) {
 335                 if (st == PIPE_SHADER_VERTEX)
 336                         meta->bifrost1.unk1 = 0x800000;
 337                 else {
 338                         /* First clause ATEST |= 0x4000000.
 339                          * Less than 32 regs |= 0x200 */
 340                         meta->bifrost1.unk1 = 0x950020;
 341                 }
 342
 343                 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 344                 if (st == PIPE_SHADER_VERTEX)
 345                         meta->bifrost2.preload_regs = 0xC0;
 346                 else {
 347                         meta->bifrost2.preload_regs = 0x1;
 348                         SET_BIT(meta->bifrost2.preload_regs, 0x10, ss->reads_frag_coord);
 349                 }
 350
 351                 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
 352                                                     ss->uniform_cutoff);
 353         } else {
 354                 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
 355                                                     ss->uniform_cutoff);
 356                 meta->midgard1.work_count = ss->work_reg_count;
 357
 358                 /* TODO: This is not conformant on ES3 */
 359                 meta->midgard1.flags_hi = MALI_SUPPRESS_INF_NAN;
 360
 361                 meta->midgard1.flags_lo = 0x20;
 362                 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 363
 364                 SET_BIT(meta->midgard1.flags_hi, MALI_WRITES_GLOBAL, ss->writes_global);
 365         }
 366 }
 367
 368 static unsigned
 369 panfrost_translate_compare_func(enum pipe_compare_func in)
 370 {
 371         switch (in) {
 372         case PIPE_FUNC_NEVER:
 373                 return MALI_FUNC_NEVER;
 374
 375         case PIPE_FUNC_LESS:
 376                 return MALI_FUNC_LESS;
 377
 378         case PIPE_FUNC_EQUAL:
 379                 return MALI_FUNC_EQUAL;
 380
 381         case PIPE_FUNC_LEQUAL:
 382                 return MALI_FUNC_LEQUAL;
 383
 384         case PIPE_FUNC_GREATER:
 385                 return MALI_FUNC_GREATER;
 386
 387         case PIPE_FUNC_NOTEQUAL:
 388                 return MALI_FUNC_NOTEQUAL;
 389
 390         case PIPE_FUNC_GEQUAL:
 391                 return MALI_FUNC_GEQUAL;
 392
 393         case PIPE_FUNC_ALWAYS:
 394                 return MALI_FUNC_ALWAYS;
 395
 396         default:
 397                 unreachable("Invalid func");
 398         }
 399 }
 400
 401 static unsigned
 402 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 403 {
 404         switch (in) {
 405         case PIPE_STENCIL_OP_KEEP:
 406                 return MALI_STENCIL_KEEP;
 407
 408         case PIPE_STENCIL_OP_ZERO:
 409                 return MALI_STENCIL_ZERO;
 410
 411         case PIPE_STENCIL_OP_REPLACE:
 412                return MALI_STENCIL_REPLACE;
 413
 414         case PIPE_STENCIL_OP_INCR:
 415                 return MALI_STENCIL_INCR;
 416
 417         case PIPE_STENCIL_OP_DECR:
 418                 return MALI_STENCIL_DECR;
 419
 420         case PIPE_STENCIL_OP_INCR_WRAP:
 421                 return MALI_STENCIL_INCR_WRAP;
 422
 423         case PIPE_STENCIL_OP_DECR_WRAP:
 424                 return MALI_STENCIL_DECR_WRAP;
 425
 426         case PIPE_STENCIL_OP_INVERT:
 427                 return MALI_STENCIL_INVERT;
 428
 429         default:
 430                 unreachable("Invalid stencil op");
 431         }
 432 }
 433
 434 static unsigned
 435 translate_tex_wrap(enum pipe_tex_wrap w)
 436 {
 437         switch (w) {
 438         case PIPE_TEX_WRAP_REPEAT:
 439                 return MALI_WRAP_REPEAT;
 440
 441         case PIPE_TEX_WRAP_CLAMP:
 442                 return MALI_WRAP_CLAMP;
 443
 444         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 445                 return MALI_WRAP_CLAMP_TO_EDGE;
 446
 447         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 448                 return MALI_WRAP_CLAMP_TO_BORDER;
 449
 450         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 451                 return MALI_WRAP_MIRRORED_REPEAT;
 452
 453         case PIPE_TEX_WRAP_MIRROR_CLAMP:
 454                 return MALI_WRAP_MIRRORED_CLAMP;
 455
 456         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 457                 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
 458
 459         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 460                 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
 461
 462         default:
 463                 unreachable("Invalid wrap");
 464         }
 465 }
 466
 467 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
 468                                 struct mali_sampler_descriptor *hw)
 469 {
 470         unsigned func = panfrost_translate_compare_func(cso->compare_func);
 471         bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
 472         bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
 473         bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
 474         unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
 475         unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
 476         unsigned mip_filter = mip_linear  ?
 477                               (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
 478         unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
 479
 480         *hw = (struct mali_sampler_descriptor) {
 481                 .filter_mode = min_filter | mag_filter | mip_filter |
 482                                normalized,
 483                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 484                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 485                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 486                 .compare_func = panfrost_flip_compare_func(func),
 487                 .border_color = {
 488                         cso->border_color.f[0],
 489                         cso->border_color.f[1],
 490                         cso->border_color.f[2],
 491                         cso->border_color.f[3]
 492                 },
 493                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 494                 .max_lod = FIXED_16(cso->max_lod, false),
 495                 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
 496                 .seamless_cube_map = cso->seamless_cube_map,
 497         };
 498
 499         /* If necessary, we disable mipmapping in the sampler descriptor by
 500          * clamping the LOD as tight as possible (from 0 to epsilon,
 501          * essentially -- remember these are fixed point numbers, so
 502          * epsilon=1/256) */
 503
 504         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 505                 hw->max_lod = hw->min_lod + 1;
 506 }
 507
 508 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
 509                                         struct bifrost_sampler_descriptor *hw)
 510 {
 511         *hw = (struct bifrost_sampler_descriptor) {
 512                 .unk1 = 0x1,
 513                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 514                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 515                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 516                 .unk8 = 0x8,
 517                 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
 518                 .norm_coords = cso->normalized_coords,
 519                 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
 520                 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
 521                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 522                 .max_lod = FIXED_16(cso->max_lod, false),
 523         };
 524
 525         /* If necessary, we disable mipmapping in the sampler descriptor by
 526          * clamping the LOD as tight as possible (from 0 to epsilon,
 527          * essentially -- remember these are fixed point numbers, so
 528          * epsilon=1/256) */
 529
 530         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 531                 hw->max_lod = hw->min_lod + 1;
 532 }
 533
 534 static void
 535 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
 536                             struct mali_stencil_test *out)
 537 {
 538         out->ref = 0; /* Gallium gets it from elsewhere */
 539
 540         out->mask = in->valuemask;
 541         out->func = panfrost_translate_compare_func(in->func);
 542         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 543         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 544         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 545 }
 546
 547 static void
 548 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
 549                                      struct mali_shader_meta *fragmeta)
 550 {
 551         if (!ctx->rasterizer) {
 552                 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
 553                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
 554                 fragmeta->depth_units = 0.0f;
 555                 fragmeta->depth_factor = 0.0f;
 556                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
 557                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
 558                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, true);
 559                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, true);
 560                 return;
 561         }
 562
 563         struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 564
 565         bool msaa = rast->multisample;
 566
 567         /* TODO: Sample size */
 568         SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
 569         SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
 570         fragmeta->depth_units = rast->offset_units * 2.0f;
 571         fragmeta->depth_factor = rast->offset_scale;
 572
 573         /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
 574
 575         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, rast->offset_tri);
 576         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, rast->offset_tri);
 577
 578         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, rast->depth_clip_near);
 579         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, rast->depth_clip_far);
 580 }
 581
 582 static void
 583 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
 584                               struct mali_shader_meta *fragmeta)
 585 {
 586         const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 587         int zfunc = PIPE_FUNC_ALWAYS;
 588
 589         if (!zsa) {
 590                 struct pipe_stencil_state default_stencil = {
 591                         .enabled = 0,
 592                         .func = PIPE_FUNC_ALWAYS,
 593                         .fail_op = MALI_STENCIL_KEEP,
 594                         .zfail_op = MALI_STENCIL_KEEP,
 595                         .zpass_op = MALI_STENCIL_KEEP,
 596                         .writemask = 0xFF,
 597                         .valuemask = 0xFF
 598                 };
 599
 600                 panfrost_make_stencil_state(&default_stencil,
 601                                             &fragmeta->stencil_front);
 602                 fragmeta->stencil_mask_front = default_stencil.writemask;
 603                 fragmeta->stencil_back = fragmeta->stencil_front;
 604                 fragmeta->stencil_mask_back = default_stencil.writemask;
 605                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
 606                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
 607         } else {
 608                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
 609                         zsa->stencil[0].enabled);
 610                 panfrost_make_stencil_state(&zsa->stencil[0],
 611                                             &fragmeta->stencil_front);
 612                 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
 613                 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
 614
 615                 /* If back-stencil is not enabled, use the front values */
 616
 617                 if (zsa->stencil[1].enabled) {
 618                         panfrost_make_stencil_state(&zsa->stencil[1],
 619                                                     &fragmeta->stencil_back);
 620                         fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
 621                         fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
 622                 } else {
 623                         fragmeta->stencil_back = fragmeta->stencil_front;
 624                         fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
 625                         fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
 626                 }
 627
 628                 if (zsa->depth.enabled)
 629                         zfunc = zsa->depth.func;
 630
 631                 /* Depth state (TODO: Refactor) */
 632
 633                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
 634                         zsa->depth.writemask);
 635         }
 636
 637         fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
 638         fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
 639 }
 640
 641 static bool
 642 panfrost_fs_required(
 643                 struct panfrost_shader_state *fs,
 644                 struct panfrost_blend_final *blend,
 645                 unsigned rt_count)
 646 {
 647         /* If we generally have side effects */
 648         if (fs->fs_sidefx)
 649                 return true;
 650
 651         /* If colour is written we need to execute */
 652         for (unsigned i = 0; i < rt_count; ++i) {
 653                 if (!blend[i].no_colour)
 654                         return true;
 655         }
 656
 657         /* If depth is written and not implied we need to execute.
 658          * TODO: Predicate on Z/S writes being enabled */
 659         return (fs->writes_depth || fs->writes_stencil);
 660 }
 661
 662 static void
 663 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
 664                                 struct mali_shader_meta *fragmeta,
 665                                 void *rts)
 666 {
 667         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 668         struct panfrost_shader_state *fs;
 669         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 670
 671         SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
 672                 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
 673                 !ctx->blend->base.dither);
 674
 675         SET_BIT(fragmeta->unknown2_4, MALI_ALPHA_TO_COVERAGE,
 676                         ctx->blend->base.alpha_to_coverage);
 677
 678         /* Get blending setup */
 679         unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 680
 681         struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 682         unsigned shader_offset = 0;
 683         struct panfrost_bo *shader_bo = NULL;
 684
 685         for (unsigned c = 0; c < rt_count; ++c)
 686                 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
 687                                                           &shader_offset);
 688
 689         /* Disable shader execution if we can */
 690         if (dev->quirks & MIDGARD_SHADERLESS
 691                         && !panfrost_fs_required(fs, blend, rt_count)) {
 692                 fragmeta->shader = 0;
 693                 fragmeta->attribute_count = 0;
 694                 fragmeta->varying_count = 0;
 695                 fragmeta->texture_count = 0;
 696                 fragmeta->sampler_count = 0;
 697
 698                 /* This feature is not known to work on Bifrost */
 699                 fragmeta->midgard1.work_count = 1;
 700                 fragmeta->midgard1.uniform_count = 0;
 701                 fragmeta->midgard1.uniform_buffer_count = 0;
 702         }
 703
 704          /* If there is a blend shader, work registers are shared. We impose 8
 705           * work registers as a limit for blend shaders. Should be lower XXX */
 706
 707         if (!(dev->quirks & IS_BIFROST)) {
 708                 for (unsigned c = 0; c < rt_count; ++c) {
 709                         if (blend[c].is_shader) {
 710                                 fragmeta->midgard1.work_count =
 711                                         MAX2(fragmeta->midgard1.work_count, 8);
 712                         }
 713                 }
 714         }
 715
 716         /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
 717          * copied to the blend_meta appended (by convention), but this is the
 718          * field actually read by the hardware. (Or maybe both are read...?).
 719          * Specify the last RTi with a blend shader. */
 720
 721         fragmeta->blend.shader = 0;
 722
 723         for (signed rt = (rt_count - 1); rt >= 0; --rt) {
 724                 if (!blend[rt].is_shader)
 725                         continue;
 726
 727                 fragmeta->blend.shader = blend[rt].shader.gpu |
 728                                          blend[rt].shader.first_tag;
 729                 break;
 730         }
 731
 732         if (dev->quirks & MIDGARD_SFBD) {
 733                 /* When only a single render target platform is used, the blend
 734                  * information is inside the shader meta itself. We additionally
 735                  * need to signal CAN_DISCARD for nontrivial blend modes (so
 736                  * we're able to read back the destination buffer) */
 737
 738                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
 739                         blend[0].is_shader);
 740
 741                 if (!blend[0].is_shader) {
 742                         fragmeta->blend.equation = *blend[0].equation.equation;
 743                         fragmeta->blend.constant = blend[0].equation.constant;
 744                 }
 745
 746                 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
 747                         !blend[0].no_blending || fs->can_discard);
 748                 return;
 749         }
 750
 751         if (dev->quirks & IS_BIFROST) {
 752                 bool no_blend = true;
 753
 754                 for (unsigned i = 0; i < rt_count; ++i)
 755                         no_blend &= (blend[i].no_blending | blend[i].no_colour);
 756
 757                 SET_BIT(fragmeta->bifrost1.unk1, MALI_BIFROST_EARLY_Z,
 758                         !fs->can_discard && !fs->writes_depth && no_blend);
 759         }
 760
 761         /* Additional blend descriptor tacked on for jobs using MFBD */
 762
 763         for (unsigned i = 0; i < rt_count; ++i) {
 764                 unsigned flags = 0;
 765
 766                 if (ctx->pipe_framebuffer.nr_cbufs > i && !blend[i].no_colour) {
 767                         flags = 0x200;
 768
 769                         bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
 770                                        (ctx->pipe_framebuffer.cbufs[i]) &&
 771                                        util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
 772
 773                         SET_BIT(flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
 774                         SET_BIT(flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
 775                         SET_BIT(flags, MALI_BLEND_SRGB, is_srgb);
 776                         SET_BIT(flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
 777                 }
 778
 779                 if (dev->quirks & IS_BIFROST) {
 780                         struct bifrost_blend_rt *brts = rts;
 781
 782                         brts[i].flags = flags;
 783
 784                         if (blend[i].is_shader) {
 785                                 /* The blend shader's address needs to be at
 786                                  * the same top 32 bit as the fragment shader.
 787                                  * TODO: Ensure that's always the case.
 788                                  */
 789                                 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
 790                                        (fs->bo->gpu & (0xffffffffull << 32)));
 791                                 brts[i].shader = blend[i].shader.gpu;
 792                                 brts[i].unk2 = 0x0;
 793                         } else if (ctx->pipe_framebuffer.nr_cbufs > i) {
 794                                 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
 795                                 const struct util_format_description *format_desc;
 796                                 format_desc = util_format_description(format);
 797
 798                                 brts[i].equation = *blend[i].equation.equation;
 799
 800                                 /* TODO: this is a bit more complicated */
 801                                 brts[i].constant = blend[i].equation.constant;
 802
 803                                 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
 804
 805                                 /* 0x19 disables blending and forces REPLACE
 806                                  * mode (equivalent to rgb_mode = alpha_mode =
 807                                  * x122, colour mask = 0xF). 0x1a allows
 808                                  * blending. */
 809                                 brts[i].unk2 = blend[i].no_blending ? 0x19 : 0x1a;
 810
 811                                 brts[i].shader_type = fs->blend_types[i];
 812                         } else {
 813                                 /* Dummy attachment for depth-only */
 814                                 brts[i].unk2 = 0x3;
 815                                 brts[i].shader_type = fs->blend_types[i];
 816                         }
 817                 } else {
 818                         struct midgard_blend_rt *mrts = rts;
 819                         mrts[i].flags = flags;
 820
 821                         if (blend[i].is_shader) {
 822                                 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
 823                         } else {
 824                                 mrts[i].blend.equation = *blend[i].equation.equation;
 825                                 mrts[i].blend.constant = blend[i].equation.constant;
 826                         }
 827                 }
 828         }
 829 }
 830
 831 static void
 832 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
 833                                struct mali_shader_meta *fragmeta,
 834                                void *rts)
 835 {
 836         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 837         struct panfrost_shader_state *fs;
 838
 839         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 840
 841         bool msaa = ctx->rasterizer && ctx->rasterizer->base.multisample;
 842         fragmeta->coverage_mask = (msaa ? ctx->sample_mask : ~0) & 0xF;
 843
 844         fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10;
 845         fragmeta->unknown2_4 = 0x4e0;
 846
 847         /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
 848          * is required (independent of 32-bit/64-bit descriptors), or why it's
 849          * not used on later GPU revisions. Otherwise, all shader jobs fault on
 850          * these earlier chips (perhaps this is a chicken bit of some kind).
 851          * More investigation is needed. */
 852
 853         SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
 854
 855         if (dev->quirks & IS_BIFROST) {
 856                 /* TODO */
 857         } else {
 858                 /* Depending on whether it's legal to in the given shader, we try to
 859                  * enable early-z testing. TODO: respect e-z force */
 860
 861                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
 862                         !fs->can_discard && !fs->writes_global &&
 863                         !fs->writes_depth && !fs->writes_stencil &&
 864                         !ctx->blend->base.alpha_to_coverage);
 865
 866                 /* Add the writes Z/S flags if needed. */
 867                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
 868                 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
 869
 870                 /* Any time texturing is used, derivatives are implicitly calculated,
 871                  * so we need to enable helper invocations */
 872
 873                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
 874                         fs->helper_invocations);
 875
 876                 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 877
 878                 bool depth_enabled = fs->writes_depth ||
 879                    (zsa && zsa->depth.enabled && zsa->depth.func != PIPE_FUNC_ALWAYS);
 880
 881                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_TILEBUFFER,
 882                         fs->outputs_read || (!depth_enabled && fs->can_discard));
 883                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_ZS, depth_enabled && fs->can_discard);
 884         }
 885
 886         panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
 887         panfrost_frag_meta_zsa_update(ctx, fragmeta);
 888         panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
 889 }
 890
 891 void
 892 panfrost_emit_shader_meta(struct panfrost_batch *batch,
 893                           enum pipe_shader_type st,
 894                           struct mali_vertex_tiler_postfix *postfix)
 895 {
 896         struct panfrost_context *ctx = batch->ctx;
 897         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 898
 899         if (!ss) {
 900                 postfix->shader = 0;
 901                 return;
 902         }
 903
 904         struct mali_shader_meta meta;
 905
 906         panfrost_shader_meta_init(ctx, st, &meta);
 907
 908         /* Add the shader BO to the batch. */
 909         panfrost_batch_add_bo(batch, ss->bo,
 910                               PAN_BO_ACCESS_PRIVATE |
 911                               PAN_BO_ACCESS_READ |
 912                               panfrost_bo_access_for_stage(st));
 913
 914         mali_ptr shader_ptr;
 915
 916         if (st == PIPE_SHADER_FRAGMENT) {
 917                 struct panfrost_device *dev = pan_device(ctx->base.screen);
 918                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 919                 size_t desc_size = sizeof(meta);
 920                 void *rts = NULL;
 921                 struct panfrost_transfer xfer;
 922                 unsigned rt_size;
 923
 924                 if (dev->quirks & MIDGARD_SFBD)
 925                         rt_size = 0;
 926                 else if (dev->quirks & IS_BIFROST)
 927                         rt_size = sizeof(struct bifrost_blend_rt);
 928                 else
 929                         rt_size = sizeof(struct midgard_blend_rt);
 930
 931                 desc_size += rt_size * rt_count;
 932
 933                 if (rt_size)
 934                         rts = rzalloc_size(ctx, rt_size * rt_count);
 935
 936                 panfrost_frag_shader_meta_init(ctx, &meta, rts);
 937
 938                 xfer = panfrost_pool_alloc(&batch->pool, desc_size);
 939
 940                 memcpy(xfer.cpu, &meta, sizeof(meta));
 941                 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
 942
 943                 if (rt_size)
 944                         ralloc_free(rts);
 945
 946                 shader_ptr = xfer.gpu;
 947         } else {
 948                 shader_ptr = panfrost_pool_upload(&batch->pool, &meta,
 949                                                        sizeof(meta));
 950         }
 951
 952         postfix->shader = shader_ptr;
 953 }
 954
 955 static void
 956 panfrost_mali_viewport_init(struct panfrost_context *ctx,
 957                             struct mali_viewport *mvp)
 958 {
 959         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 960
 961         /* Clip bounds are encoded as floats. The viewport itself is encoded as
 962          * (somewhat) asymmetric ints. */
 963
 964         const struct pipe_scissor_state *ss = &ctx->scissor;
 965
 966         memset(mvp, 0, sizeof(*mvp));
 967
 968         /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
 969          * each direction. Clipping to the viewport in theory should work, but
 970          * in practice causes issues when we're not explicitly trying to
 971          * scissor */
 972
 973         *mvp = (struct mali_viewport) {
 974                 .clip_minx = -INFINITY,
 975                 .clip_miny = -INFINITY,
 976                 .clip_maxx = INFINITY,
 977                 .clip_maxy = INFINITY,
 978         };
 979
 980         /* Always scissor to the viewport by default. */
 981         float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
 982         float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
 983
 984         float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
 985         float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
 986
 987         float minz = (vp->translate[2] - fabsf(vp->scale[2]));
 988         float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
 989
 990         /* Apply the scissor test */
 991
 992         unsigned minx, miny, maxx, maxy;
 993
 994         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
 995                 minx = MAX2(ss->minx, vp_minx);
 996                 miny = MAX2(ss->miny, vp_miny);
 997                 maxx = MIN2(ss->maxx, vp_maxx);
 998                 maxy = MIN2(ss->maxy, vp_maxy);
 999         } else {
1000                 minx = vp_minx;
1001                 miny = vp_miny;
1002                 maxx = vp_maxx;
1003                 maxy = vp_maxy;
1004         }
1005
1006         /* Hardware needs the min/max to be strictly ordered, so flip if we
1007          * need to. The viewport transformation in the vertex shader will
1008          * handle the negatives if we don't */
1009
1010         if (miny > maxy) {
1011                 unsigned temp = miny;
1012                 miny = maxy;
1013                 maxy = temp;
1014         }
1015
1016         if (minx > maxx) {
1017                 unsigned temp = minx;
1018                 minx = maxx;
1019                 maxx = temp;
1020         }
1021
1022         if (minz > maxz) {
1023                 float temp = minz;
1024                 minz = maxz;
1025                 maxz = temp;
1026         }
1027
1028         /* Clamp to the framebuffer size as a last check */
1029
1030         minx = MIN2(ctx->pipe_framebuffer.width, minx);
1031         maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
1032
1033         miny = MIN2(ctx->pipe_framebuffer.height, miny);
1034         maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
1035
1036         /* Upload */
1037
1038         mvp->viewport0[0] = minx;
1039         mvp->viewport1[0] = MALI_POSITIVE(maxx);
1040
1041         mvp->viewport0[1] = miny;
1042         mvp->viewport1[1] = MALI_POSITIVE(maxy);
1043
1044         bool clip_near = true;
1045         bool clip_far = true;
1046
1047         if (ctx->rasterizer) {
1048                 clip_near = ctx->rasterizer->base.depth_clip_near;
1049                 clip_far = ctx->rasterizer->base.depth_clip_far;
1050         }
1051
1052         mvp->clip_minz = clip_near ? minz : -INFINITY;
1053         mvp->clip_maxz = clip_far ? maxz : INFINITY;
1054 }
1055
1056 void
1057 panfrost_emit_viewport(struct panfrost_batch *batch,
1058                        struct mali_vertex_tiler_postfix *tiler_postfix)
1059 {
1060         struct panfrost_context *ctx = batch->ctx;
1061         struct mali_viewport mvp;
1062
1063         panfrost_mali_viewport_init(batch->ctx,  &mvp);
1064
1065         /* Update the job, unless we're doing wallpapering (whose lack of
1066          * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
1067          * just... be faster :) */
1068
1069         if (!ctx->wallpaper_batch)
1070                 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
1071                                              mvp.viewport0[1],
1072                                              mvp.viewport1[0] + 1,
1073                                              mvp.viewport1[1] + 1);
1074
1075         tiler_postfix->viewport = panfrost_pool_upload(&batch->pool, &mvp,
1076                                                             sizeof(mvp));
1077 }
1078
1079 static mali_ptr
1080 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
1081                                  enum pipe_shader_type st,
1082                                  struct panfrost_constant_buffer *buf,
1083                                  unsigned index)
1084 {
1085         struct pipe_constant_buffer *cb = &buf->cb[index];
1086         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1087
1088         if (rsrc) {
1089                 panfrost_batch_add_bo(batch, rsrc->bo,
1090                                       PAN_BO_ACCESS_SHARED |
1091                                       PAN_BO_ACCESS_READ |
1092                                       panfrost_bo_access_for_stage(st));
1093
1094                 /* Alignment gauranteed by
1095                  * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1096                 return rsrc->bo->gpu + cb->buffer_offset;
1097         } else if (cb->user_buffer) {
1098                 return panfrost_pool_upload(&batch->pool,
1099                                                  cb->user_buffer +
1100                                                  cb->buffer_offset,
1101                                                  cb->buffer_size);
1102         } else {
1103                 unreachable("No constant buffer");
1104         }
1105 }
1106
1107 struct sysval_uniform {
1108         union {
1109                 float f[4];
1110                 int32_t i[4];
1111                 uint32_t u[4];
1112                 uint64_t du[2];
1113         };
1114 };
1115
1116 static void
1117 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1118                                       struct sysval_uniform *uniform)
1119 {
1120         struct panfrost_context *ctx = batch->ctx;
1121         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1122
1123         uniform->f[0] = vp->scale[0];
1124         uniform->f[1] = vp->scale[1];
1125         uniform->f[2] = vp->scale[2];
1126 }
1127
1128 static void
1129 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1130                                        struct sysval_uniform *uniform)
1131 {
1132         struct panfrost_context *ctx = batch->ctx;
1133         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1134
1135         uniform->f[0] = vp->translate[0];
1136         uniform->f[1] = vp->translate[1];
1137         uniform->f[2] = vp->translate[2];
1138 }
1139
1140 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1141                                        enum pipe_shader_type st,
1142                                        unsigned int sysvalid,
1143                                        struct sysval_uniform *uniform)
1144 {
1145         struct panfrost_context *ctx = batch->ctx;
1146         unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1147         unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1148         bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1149         struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1150
1151         assert(dim);
1152         uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1153
1154         if (dim > 1)
1155                 uniform->i[1] = u_minify(tex->texture->height0,
1156                                          tex->u.tex.first_level);
1157
1158         if (dim > 2)
1159                 uniform->i[2] = u_minify(tex->texture->depth0,
1160                                          tex->u.tex.first_level);
1161
1162         if (is_array)
1163                 uniform->i[dim] = tex->texture->array_size;
1164 }
1165
1166 static void
1167 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1168                             enum pipe_shader_type st,
1169                             unsigned ssbo_id,
1170                             struct sysval_uniform *uniform)
1171 {
1172         struct panfrost_context *ctx = batch->ctx;
1173
1174         assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1175         struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1176
1177         /* Compute address */
1178         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1179
1180         panfrost_batch_add_bo(batch, bo,
1181                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1182                               panfrost_bo_access_for_stage(st));
1183
1184         /* Upload address and size as sysval */
1185         uniform->du[0] = bo->gpu + sb.buffer_offset;
1186         uniform->u[2] = sb.buffer_size;
1187 }
1188
1189 static void
1190 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1191                                enum pipe_shader_type st,
1192                                unsigned samp_idx,
1193                                struct sysval_uniform *uniform)
1194 {
1195         struct panfrost_context *ctx = batch->ctx;
1196         struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1197
1198         uniform->f[0] = sampl->min_lod;
1199         uniform->f[1] = sampl->max_lod;
1200         uniform->f[2] = sampl->lod_bias;
1201
1202         /* Even without any errata, Midgard represents "no mipmapping" as
1203          * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1204          * panfrost_create_sampler_state which also explains our choice of
1205          * epsilon value (again to keep behaviour consistent) */
1206
1207         if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1208                 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1209 }
1210
1211 static void
1212 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1213                                        struct sysval_uniform *uniform)
1214 {
1215         struct panfrost_context *ctx = batch->ctx;
1216
1217         uniform->u[0] = ctx->compute_grid->grid[0];
1218         uniform->u[1] = ctx->compute_grid->grid[1];
1219         uniform->u[2] = ctx->compute_grid->grid[2];
1220 }
1221
1222 static void
1223 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1224                         struct panfrost_shader_state *ss,
1225                         enum pipe_shader_type st)
1226 {
1227         struct sysval_uniform *uniforms = (void *)buf;
1228
1229         for (unsigned i = 0; i < ss->sysval_count; ++i) {
1230                 int sysval = ss->sysval[i];
1231
1232                 switch (PAN_SYSVAL_TYPE(sysval)) {
1233                 case PAN_SYSVAL_VIEWPORT_SCALE:
1234                         panfrost_upload_viewport_scale_sysval(batch,
1235                                                               &uniforms[i]);
1236                         break;
1237                 case PAN_SYSVAL_VIEWPORT_OFFSET:
1238                         panfrost_upload_viewport_offset_sysval(batch,
1239                                                                &uniforms[i]);
1240                         break;
1241                 case PAN_SYSVAL_TEXTURE_SIZE:
1242                         panfrost_upload_txs_sysval(batch, st,
1243                                                    PAN_SYSVAL_ID(sysval),
1244                                                    &uniforms[i]);
1245                         break;
1246                 case PAN_SYSVAL_SSBO:
1247                         panfrost_upload_ssbo_sysval(batch, st,
1248                                                     PAN_SYSVAL_ID(sysval),
1249                                                     &uniforms[i]);
1250                         break;
1251                 case PAN_SYSVAL_NUM_WORK_GROUPS:
1252                         panfrost_upload_num_work_groups_sysval(batch,
1253                                                                &uniforms[i]);
1254                         break;
1255                 case PAN_SYSVAL_SAMPLER:
1256                         panfrost_upload_sampler_sysval(batch, st,
1257                                                        PAN_SYSVAL_ID(sysval),
1258                                                        &uniforms[i]);
1259                         break;
1260                 default:
1261                         assert(0);
1262                 }
1263         }
1264 }
1265
1266 static const void *
1267 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1268                                  unsigned index)
1269 {
1270         struct pipe_constant_buffer *cb = &buf->cb[index];
1271         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1272
1273         if (rsrc)
1274                 return rsrc->bo->cpu;
1275         else if (cb->user_buffer)
1276                 return cb->user_buffer;
1277         else
1278                 unreachable("No constant buffer");
1279 }
1280
1281 void
1282 panfrost_emit_const_buf(struct panfrost_batch *batch,
1283                         enum pipe_shader_type stage,
1284                         struct mali_vertex_tiler_postfix *postfix)
1285 {
1286         struct panfrost_context *ctx = batch->ctx;
1287         struct panfrost_shader_variants *all = ctx->shader[stage];
1288
1289         if (!all)
1290                 return;
1291
1292         struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1293
1294         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1295
1296         /* Uniforms are implicitly UBO #0 */
1297         bool has_uniforms = buf->enabled_mask & (1 << 0);
1298
1299         /* Allocate room for the sysval and the uniforms */
1300         size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1301         size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1302         size_t size = sys_size + uniform_size;
1303         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1304                                                                         size);
1305
1306         /* Upload sysvals requested by the shader */
1307         panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1308
1309         /* Upload uniforms */
1310         if (has_uniforms && uniform_size) {
1311                 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1312                 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1313         }
1314
1315         /* Next up, attach UBOs. UBO #0 is the uniforms we just
1316          * uploaded */
1317
1318         unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1319         assert(ubo_count >= 1);
1320
1321         size_t sz = sizeof(uint64_t) * ubo_count;
1322         uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1323         int uniform_count = ss->uniform_count;
1324
1325         /* Upload uniforms as a UBO */
1326         ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1327
1328         /* The rest are honest-to-goodness UBOs */
1329
1330         for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1331                 size_t usz = buf->cb[ubo].buffer_size;
1332                 bool enabled = buf->enabled_mask & (1 << ubo);
1333                 bool empty = usz == 0;
1334
1335                 if (!enabled || empty) {
1336                         /* Stub out disabled UBOs to catch accesses */
1337                         ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1338                         continue;
1339                 }
1340
1341                 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1342                                                                 buf, ubo);
1343
1344                 unsigned bytes_per_field = 16;
1345                 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1346                 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1347         }
1348
1349         mali_ptr ubufs = panfrost_pool_upload(&batch->pool, ubos, sz);
1350         postfix->uniforms = transfer.gpu;
1351         postfix->uniform_buffers = ubufs;
1352
1353         buf->dirty_mask = 0;
1354 }
1355
1356 void
1357 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1358                             const struct pipe_grid_info *info,
1359                             struct midgard_payload_vertex_tiler *vtp)
1360 {
1361         struct panfrost_context *ctx = batch->ctx;
1362         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1363         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1364         unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1365                                                            128));
1366         unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1367                                info->grid[2] * 4;
1368         struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1369                                                                   shared_size,
1370                                                                   1);
1371
1372         struct mali_shared_memory shared = {
1373                 .shared_memory = bo->gpu,
1374                 .shared_workgroup_count =
1375                         util_logbase2_ceil(info->grid[0]) +
1376                         util_logbase2_ceil(info->grid[1]) +
1377                         util_logbase2_ceil(info->grid[2]),
1378                 .shared_unk1 = 0x2,
1379                 .shared_shift = util_logbase2(single_size) - 1
1380         };
1381
1382         vtp->postfix.shared_memory = panfrost_pool_upload(&batch->pool, &shared,
1383                                                                sizeof(shared));
1384 }
1385
1386 static mali_ptr
1387 panfrost_get_tex_desc(struct panfrost_batch *batch,
1388                       enum pipe_shader_type st,
1389                       struct panfrost_sampler_view *view)
1390 {
1391         if (!view)
1392                 return (mali_ptr) 0;
1393
1394         struct pipe_sampler_view *pview = &view->base;
1395         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1396
1397         /* Add the BO to the job so it's retained until the job is done. */
1398
1399         panfrost_batch_add_bo(batch, rsrc->bo,
1400                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1401                               panfrost_bo_access_for_stage(st));
1402
1403         panfrost_batch_add_bo(batch, view->bo,
1404                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1405                               panfrost_bo_access_for_stage(st));
1406
1407         return view->bo->gpu;
1408 }
1409
1410 static void
1411 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1412                              struct pipe_context *pctx)
1413 {
1414         struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1415         if (view->texture_bo != rsrc->bo->gpu ||
1416             view->layout != rsrc->layout) {
1417                 panfrost_bo_unreference(view->bo);
1418                 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1419         }
1420 }
1421
1422 void
1423 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1424                                   enum pipe_shader_type stage,
1425                                   struct mali_vertex_tiler_postfix *postfix)
1426 {
1427         struct panfrost_context *ctx = batch->ctx;
1428         struct panfrost_device *device = pan_device(ctx->base.screen);
1429
1430         if (!ctx->sampler_view_count[stage])
1431                 return;
1432
1433         if (device->quirks & IS_BIFROST) {
1434                 struct bifrost_texture_descriptor *descriptors;
1435
1436                 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1437                                      ctx->sampler_view_count[stage]);
1438
1439                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1440                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1441                         struct pipe_sampler_view *pview = &view->base;
1442                         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1443                         panfrost_update_sampler_view(view, &ctx->base);
1444
1445                         /* Add the BOs to the job so they are retained until the job is done. */
1446
1447                         panfrost_batch_add_bo(batch, rsrc->bo,
1448                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1449                                               panfrost_bo_access_for_stage(stage));
1450
1451                         panfrost_batch_add_bo(batch, view->bo,
1452                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1453                                               panfrost_bo_access_for_stage(stage));
1454
1455                         memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1456                 }
1457
1458                 postfix->textures = panfrost_pool_upload(&batch->pool,
1459                                                               descriptors,
1460                                                               sizeof(struct bifrost_texture_descriptor) *
1461                                                                       ctx->sampler_view_count[stage]);
1462
1463                 free(descriptors);
1464         } else {
1465                 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1466
1467                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1468                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1469
1470                         panfrost_update_sampler_view(view, &ctx->base);
1471
1472                         trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1473                 }
1474
1475                 postfix->textures = panfrost_pool_upload(&batch->pool,
1476                                                               trampolines,
1477                                                               sizeof(uint64_t) *
1478                                                               ctx->sampler_view_count[stage]);
1479         }
1480 }
1481
1482 void
1483 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1484                                   enum pipe_shader_type stage,
1485                                   struct mali_vertex_tiler_postfix *postfix)
1486 {
1487         struct panfrost_context *ctx = batch->ctx;
1488         struct panfrost_device *device = pan_device(ctx->base.screen);
1489
1490         if (!ctx->sampler_count[stage])
1491                 return;
1492
1493         if (device->quirks & IS_BIFROST) {
1494                 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1495                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1496                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1497                                                                                 transfer_size);
1498                 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1499
1500                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1501                         desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1502
1503                 postfix->sampler_descriptor = transfer.gpu;
1504         } else {
1505                 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1506                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1507                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1508                                                                                 transfer_size);
1509                 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1510
1511                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1512                         desc[i] = ctx->samplers[stage][i]->midgard_hw;
1513
1514                 postfix->sampler_descriptor = transfer.gpu;
1515         }
1516 }
1517
1518 void
1519 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1520                                struct mali_vertex_tiler_postfix *vertex_postfix)
1521 {
1522         struct panfrost_context *ctx = batch->ctx;
1523
1524         if (!ctx->vertex)
1525                 return;
1526
1527         struct panfrost_vertex_state *so = ctx->vertex;
1528
1529         panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1530         vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
1531                                                                sizeof(*so->hw) *
1532                                                                PAN_MAX_ATTRIBUTE);
1533 }
1534
1535 void
1536 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1537                           struct mali_vertex_tiler_postfix *vertex_postfix)
1538 {
1539         struct panfrost_context *ctx = batch->ctx;
1540         struct panfrost_vertex_state *so = ctx->vertex;
1541
1542         /* Staged mali_attr, and index into them. i =/= k, depending on the
1543          * vertex buffer mask and instancing. Twice as much room is allocated,
1544          * for a worst case of NPOT_DIVIDEs which take up extra slot */
1545         union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1546         unsigned k = 0;
1547
1548         for (unsigned i = 0; i < so->num_elements; ++i) {
1549                 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1550                  * means duplicating some vertex buffers (who cares? aside from
1551                  * maybe some caching implications but I somehow doubt that
1552                  * matters) */
1553
1554                 struct pipe_vertex_element *elem = &so->pipe[i];
1555                 unsigned vbi = elem->vertex_buffer_index;
1556
1557                 /* The exception to 1:1 mapping is that we can have multiple
1558                  * entries (NPOT divisors), so we fixup anyways */
1559
1560                 so->hw[i].index = k;
1561
1562                 if (!(ctx->vb_mask & (1 << vbi)))
1563                         continue;
1564
1565                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1566                 struct panfrost_resource *rsrc;
1567
1568                 rsrc = pan_resource(buf->buffer.resource);
1569                 if (!rsrc)
1570                         continue;
1571
1572                 /* Align to 64 bytes by masking off the lower bits. This
1573                  * will be adjusted back when we fixup the src_offset in
1574                  * mali_attr_meta */
1575
1576                 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1577                 mali_ptr addr = raw_addr & ~63;
1578                 unsigned chopped_addr = raw_addr - addr;
1579
1580                 /* Add a dependency of the batch on the vertex buffer */
1581                 panfrost_batch_add_bo(batch, rsrc->bo,
1582                                       PAN_BO_ACCESS_SHARED |
1583                                       PAN_BO_ACCESS_READ |
1584                                       PAN_BO_ACCESS_VERTEX_TILER);
1585
1586                 /* Set common fields */
1587                 attrs[k].elements = addr;
1588                 attrs[k].stride = buf->stride;
1589
1590                 /* Since we advanced the base pointer, we shrink the buffer
1591                  * size */
1592                 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1593
1594                 /* We need to add the extra size we masked off (for
1595                  * correctness) so the data doesn't get clamped away */
1596                 attrs[k].size += chopped_addr;
1597
1598                 /* For non-instancing make sure we initialize */
1599                 attrs[k].shift = attrs[k].extra_flags = 0;
1600
1601                 /* Instancing uses a dramatically different code path than
1602                  * linear, so dispatch for the actual emission now that the
1603                  * common code is finished */
1604
1605                 unsigned divisor = elem->instance_divisor;
1606
1607                 if (divisor && ctx->instance_count == 1) {
1608                         /* Silly corner case where there's a divisor(=1) but
1609                          * there's no legitimate instancing. So we want *every*
1610                          * attribute to be the same. So set stride to zero so
1611                          * we don't go anywhere. */
1612
1613                         attrs[k].size = attrs[k].stride + chopped_addr;
1614                         attrs[k].stride = 0;
1615                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1616                 } else if (ctx->instance_count <= 1) {
1617                         /* Normal, non-instanced attributes */
1618                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1619                 } else {
1620                         unsigned instance_shift = vertex_postfix->instance_shift;
1621                         unsigned instance_odd = vertex_postfix->instance_odd;
1622
1623                         k += panfrost_vertex_instanced(ctx->padded_count,
1624                                                        instance_shift,
1625                                                        instance_odd,
1626                                                        divisor, &attrs[k]);
1627                 }
1628         }
1629
1630         /* Add special gl_VertexID/gl_InstanceID buffers */
1631
1632         panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1633         so->hw[PAN_VERTEX_ID].index = k++;
1634         panfrost_instance_id(ctx->padded_count, &attrs[k]);
1635         so->hw[PAN_INSTANCE_ID].index = k++;
1636
1637         /* Upload whatever we emitted and go */
1638
1639         vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs,
1640                                                            k * sizeof(*attrs));
1641 }
1642
1643 static mali_ptr
1644 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1645                        unsigned stride, unsigned count)
1646 {
1647         /* Fill out the descriptor */
1648         slot->stride = stride;
1649         slot->size = stride * count;
1650         slot->shift = slot->extra_flags = 0;
1651
1652         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1653                                                                         slot->size);
1654
1655         slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1656
1657         return transfer.gpu;
1658 }
1659
1660 static unsigned
1661 panfrost_streamout_offset(unsigned stride, unsigned offset,
1662                         struct pipe_stream_output_target *target)
1663 {
1664         return (target->buffer_offset + (offset * stride * 4)) & 63;
1665 }
1666
1667 static void
1668 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1669                         unsigned stride, unsigned offset, unsigned count,
1670                         struct pipe_stream_output_target *target)
1671 {
1672         /* Fill out the descriptor */
1673         slot->stride = stride * 4;
1674         slot->shift = slot->extra_flags = 0;
1675
1676         unsigned max_size = target->buffer_size;
1677         unsigned expected_size = slot->stride * count;
1678
1679         /* Grab the BO and bind it to the batch */
1680         struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1681
1682         /* Varyings are WRITE from the perspective of the VERTEX but READ from
1683          * the perspective of the TILER and FRAGMENT.
1684          */
1685         panfrost_batch_add_bo(batch, bo,
1686                               PAN_BO_ACCESS_SHARED |
1687                               PAN_BO_ACCESS_RW |
1688                               PAN_BO_ACCESS_VERTEX_TILER |
1689                               PAN_BO_ACCESS_FRAGMENT);
1690
1691         /* We will have an offset applied to get alignment */
1692         mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1693         slot->elements = (addr & ~63) | MALI_ATTR_LINEAR;
1694         slot->size = MIN2(max_size, expected_size) + (addr & 63);
1695 }
1696
1697 static bool
1698 has_point_coord(unsigned mask, gl_varying_slot loc)
1699 {
1700         if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1701                 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1702         else if (loc == VARYING_SLOT_PNTC)
1703                 return (mask & (1 << 8));
1704         else
1705                 return false;
1706 }
1707
1708 /* Helpers for manipulating stream out information so we can pack varyings
1709  * accordingly. Compute the src_offset for a given captured varying */
1710
1711 static struct pipe_stream_output *
1712 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1713 {
1714         for (unsigned i = 0; i < info->num_outputs; ++i) {
1715                 if (info->output[i].register_index == loc)
1716                         return &info->output[i];
1717         }
1718
1719         unreachable("Varying not captured");
1720 }
1721
1722 static unsigned
1723 pan_varying_size(enum mali_format fmt)
1724 {
1725         unsigned type = MALI_EXTRACT_TYPE(fmt);
1726         unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1727         unsigned bits = MALI_EXTRACT_BITS(fmt);
1728         unsigned bpc = 0;
1729
1730         if (bits == MALI_CHANNEL_FLOAT) {
1731                 /* No doubles */
1732                 bool fp16 = (type == MALI_FORMAT_SINT);
1733                 assert(fp16 || (type == MALI_FORMAT_UNORM));
1734
1735                 bpc = fp16 ? 2 : 4;
1736         } else {
1737                 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1738
1739                 /* See the enums */
1740                 bits = 1 << bits;
1741                 assert(bits >= 8);
1742                 bpc = bits / 8;
1743         }
1744
1745         return bpc * chan;
1746 }
1747
1748 /* Indices for named (non-XFB) varyings that are present. These are packed
1749  * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1750  * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1751  * of a given special field given a shift S by:
1752  *
1753  *      idx = popcount(P & ((1 << S) - 1))
1754  *
1755  * That is... look at all of the varyings that come earlier and count them, the
1756  * count is the new index since plus one. Likewise, the total number of special
1757  * buffers required is simply popcount(P)
1758  */
1759
1760 enum pan_special_varying {
1761         PAN_VARY_GENERAL = 0,
1762         PAN_VARY_POSITION = 1,
1763         PAN_VARY_PSIZ = 2,
1764         PAN_VARY_PNTCOORD = 3,
1765         PAN_VARY_FACE = 4,
1766         PAN_VARY_FRAGCOORD = 5,
1767
1768         /* Keep last */
1769         PAN_VARY_MAX,
1770 };
1771
1772 /* Given a varying, figure out which index it correpsonds to */
1773
1774 static inline unsigned
1775 pan_varying_index(unsigned present, enum pan_special_varying v)
1776 {
1777         unsigned mask = (1 << v) - 1;
1778         return util_bitcount(present & mask);
1779 }
1780
1781 /* Get the base offset for XFB buffers, which by convention come after
1782  * everything else. Wrapper function for semantic reasons; by construction this
1783  * is just popcount. */
1784
1785 static inline unsigned
1786 pan_xfb_base(unsigned present)
1787 {
1788         return util_bitcount(present);
1789 }
1790
1791 /* Computes the present mask for varyings so we can start emitting varying records */
1792
1793 static inline unsigned
1794 pan_varying_present(
1795         struct panfrost_shader_state *vs,
1796         struct panfrost_shader_state *fs,
1797         unsigned quirks)
1798 {
1799         /* At the moment we always emit general and position buffers. Not
1800          * strictly necessary but usually harmless */
1801
1802         unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1803
1804         /* Enable special buffers by the shader info */
1805
1806         if (vs->writes_point_size)
1807                 present |= (1 << PAN_VARY_PSIZ);
1808
1809         if (fs->reads_point_coord)
1810                 present |= (1 << PAN_VARY_PNTCOORD);
1811
1812         if (fs->reads_face)
1813                 present |= (1 << PAN_VARY_FACE);
1814
1815         if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1816                 present |= (1 << PAN_VARY_FRAGCOORD);
1817
1818         /* Also, if we have a point sprite, we need a point coord buffer */
1819
1820         for (unsigned i = 0; i < fs->varying_count; i++)  {
1821                 gl_varying_slot loc = fs->varyings_loc[i];
1822
1823                 if (has_point_coord(fs->point_sprite_mask, loc))
1824                         present |= (1 << PAN_VARY_PNTCOORD);
1825         }
1826
1827         return present;
1828 }
1829
1830 /* Emitters for varying records */
1831
1832 static struct mali_attr_meta
1833 pan_emit_vary(unsigned present, enum pan_special_varying buf,
1834                 unsigned quirks, enum mali_format format,
1835                 unsigned offset)
1836 {
1837         unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
1838
1839         struct mali_attr_meta meta = {
1840                 .index = pan_varying_index(present, buf),
1841                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1842                 .swizzle = quirks & HAS_SWIZZLES ?
1843                         panfrost_get_default_swizzle(nr_channels) :
1844                         panfrost_bifrost_swizzle(nr_channels),
1845                 .format = format,
1846                 .src_offset = offset
1847         };
1848
1849         return meta;
1850 }
1851
1852 /* General varying that is unused */
1853
1854 static struct mali_attr_meta
1855 pan_emit_vary_only(unsigned present, unsigned quirks)
1856 {
1857         return pan_emit_vary(present, 0, quirks, MALI_VARYING_DISCARD, 0);
1858 }
1859
1860 /* Special records */
1861
1862 static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1863         [PAN_VARY_POSITION]     = MALI_VARYING_POS,
1864         [PAN_VARY_PSIZ]         = MALI_R16F,
1865         [PAN_VARY_PNTCOORD]     = MALI_R16F,
1866         [PAN_VARY_FACE]         = MALI_R32I,
1867         [PAN_VARY_FRAGCOORD]    = MALI_RGBA32F
1868 };
1869
1870 static struct mali_attr_meta
1871 pan_emit_vary_special(unsigned present, enum pan_special_varying buf,
1872                 unsigned quirks)
1873 {
1874         assert(buf < PAN_VARY_MAX);
1875         return pan_emit_vary(present, buf, quirks, pan_varying_formats[buf], 0);
1876 }
1877
1878 static enum mali_format
1879 pan_xfb_format(enum mali_format format, unsigned nr)
1880 {
1881         if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1882                 return MALI_R32F | MALI_NR_CHANNELS(nr);
1883         else
1884                 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1885 }
1886
1887 /* Transform feedback records. Note struct pipe_stream_output is (if packed as
1888  * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1889  * value. */
1890
1891 static struct mali_attr_meta
1892 pan_emit_vary_xfb(unsigned present,
1893                 unsigned max_xfb,
1894                 unsigned *streamout_offsets,
1895                 unsigned quirks,
1896                 enum mali_format format,
1897                 struct pipe_stream_output o)
1898 {
1899         /* Otherwise construct a record for it */
1900         struct mali_attr_meta meta = {
1901                 /* XFB buffers come after everything else */
1902                 .index = pan_xfb_base(present) + o.output_buffer,
1903
1904                 /* As usual unknown bit */
1905                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1906
1907                 /* Override swizzle with number of channels */
1908                 .swizzle = quirks & HAS_SWIZZLES ?
1909                         panfrost_get_default_swizzle(o.num_components) :
1910                         panfrost_bifrost_swizzle(o.num_components),
1911
1912                 /* Override number of channels and precision to highp */
1913                 .format = pan_xfb_format(format, o.num_components),
1914
1915                 /* Apply given offsets together */
1916                 .src_offset = (o.dst_offset * 4) /* dwords */
1917                         + streamout_offsets[o.output_buffer]
1918         };
1919
1920         return meta;
1921 }
1922
1923 /* Determine if we should capture a varying for XFB. This requires actually
1924  * having a buffer for it. If we don't capture it, we'll fallback to a general
1925  * varying path (linked or unlinked, possibly discarding the write) */
1926
1927 static bool
1928 panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1929                 unsigned loc, unsigned max_xfb)
1930 {
1931         if (!(xfb->so_mask & (1ll << loc)))
1932                 return false;
1933
1934         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1935         return o->output_buffer < max_xfb;
1936 }
1937
1938 /* Higher-level wrapper around all of the above, classifying a varying into one
1939  * of the above types */
1940
1941 static struct mali_attr_meta
1942 panfrost_emit_varying(
1943                 struct panfrost_shader_state *stage,
1944                 struct panfrost_shader_state *other,
1945                 struct panfrost_shader_state *xfb,
1946                 unsigned present,
1947                 unsigned max_xfb,
1948                 unsigned *streamout_offsets,
1949                 unsigned quirks,
1950                 unsigned *gen_offsets,
1951                 enum mali_format *gen_formats,
1952                 unsigned *gen_stride,
1953                 unsigned idx,
1954                 bool should_alloc,
1955                 bool is_fragment)
1956 {
1957         gl_varying_slot loc = stage->varyings_loc[idx];
1958         enum mali_format format = stage->varyings[idx];
1959
1960         /* Override format to match linkage */
1961         if (!should_alloc && gen_formats[idx])
1962                 format = gen_formats[idx];
1963
1964         if (has_point_coord(stage->point_sprite_mask, loc)) {
1965                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1966         } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1967                 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1968                 return pan_emit_vary_xfb(present, max_xfb, streamout_offsets, quirks, format, *o);
1969         } else if (loc == VARYING_SLOT_POS) {
1970                 if (is_fragment)
1971                         return pan_emit_vary_special(present, PAN_VARY_FRAGCOORD, quirks);
1972                 else
1973                         return pan_emit_vary_special(present, PAN_VARY_POSITION, quirks);
1974         } else if (loc == VARYING_SLOT_PSIZ) {
1975                 return pan_emit_vary_special(present, PAN_VARY_PSIZ, quirks);
1976         } else if (loc == VARYING_SLOT_PNTC) {
1977                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1978         } else if (loc == VARYING_SLOT_FACE) {
1979                 return pan_emit_vary_special(present, PAN_VARY_FACE, quirks);
1980         }
1981
1982         /* We've exhausted special cases, so it's otherwise a general varying. Check if we're linked */
1983         signed other_idx = -1;
1984
1985         for (unsigned j = 0; j < other->varying_count; ++j) {
1986                 if (other->varyings_loc[j] == loc) {
1987                         other_idx = j;
1988                         break;
1989                 }
1990         }
1991
1992         if (other_idx < 0)
1993                 return pan_emit_vary_only(present, quirks);
1994
1995         unsigned offset = gen_offsets[other_idx];
1996
1997         if (should_alloc) {
1998                 /* We're linked, so allocate a space via a watermark allocation */
1999                 enum mali_format alt = other->varyings[other_idx];
2000
2001                 /* Do interpolation at minimum precision */
2002                 unsigned size_main = pan_varying_size(format);
2003                 unsigned size_alt = pan_varying_size(alt);
2004                 unsigned size = MIN2(size_main, size_alt);
2005
2006                 /* If a varying is marked for XFB but not actually captured, we
2007                  * should match the format to the format that would otherwise
2008                  * be used for XFB, since dEQP checks for invariance here. It's
2009                  * unclear if this is required by the spec. */
2010
2011                 if (xfb->so_mask & (1ull << loc)) {
2012                         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
2013                         format = pan_xfb_format(format, o->num_components);
2014                         size = pan_varying_size(format);
2015                 } else if (size == size_alt) {
2016                         format = alt;
2017                 }
2018
2019                 gen_offsets[idx] = *gen_stride;
2020                 gen_formats[other_idx] = format;
2021                 offset = *gen_stride;
2022                 *gen_stride += size;
2023         }
2024
2025         return pan_emit_vary(present, PAN_VARY_GENERAL,
2026                         quirks, format, offset);
2027 }
2028
2029 static void
2030 pan_emit_special_input(union mali_attr *varyings,
2031                 unsigned present,
2032                 enum pan_special_varying v,
2033                 mali_ptr addr)
2034 {
2035         if (present & (1 << v)) {
2036                 /* Ensure we write exactly once for performance and with fields
2037                  * zeroed appropriately to avoid flakes */
2038
2039                 union mali_attr s = {
2040                         .elements = addr
2041                 };
2042
2043                 varyings[pan_varying_index(present, v)] = s;
2044         }
2045 }
2046
2047 void
2048 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2049                                  unsigned vertex_count,
2050                                  struct mali_vertex_tiler_postfix *vertex_postfix,
2051                                  struct mali_vertex_tiler_postfix *tiler_postfix,
2052                                  union midgard_primitive_size *primitive_size)
2053 {
2054         /* Load the shaders */
2055         struct panfrost_context *ctx = batch->ctx;
2056         struct panfrost_device *dev = pan_device(ctx->base.screen);
2057         struct panfrost_shader_state *vs, *fs;
2058         size_t vs_size, fs_size;
2059
2060         /* Allocate the varying descriptor */
2061
2062         vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2063         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2064         vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
2065         fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
2066
2067         struct panfrost_transfer trans = panfrost_pool_alloc(&batch->pool,
2068                                                                      vs_size +
2069                                                                      fs_size);
2070
2071         struct pipe_stream_output_info *so = &vs->stream_output;
2072         unsigned present = pan_varying_present(vs, fs, dev->quirks);
2073
2074         /* Check if this varying is linked by us. This is the case for
2075          * general-purpose, non-captured varyings. If it is, link it. If it's
2076          * not, use the provided stream out information to determine the
2077          * offset, since it was already linked for us. */
2078
2079         unsigned gen_offsets[32];
2080         enum mali_format gen_formats[32];
2081         memset(gen_offsets, 0, sizeof(gen_offsets));
2082         memset(gen_formats, 0, sizeof(gen_formats));
2083
2084         unsigned gen_stride = 0;
2085         assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
2086         assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
2087
2088         unsigned streamout_offsets[32];
2089
2090         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2091                 streamout_offsets[i] = panfrost_streamout_offset(
2092                                         so->stride[i],
2093                                         ctx->streamout.offsets[i],
2094                                         ctx->streamout.targets[i]);
2095         }
2096
2097         struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
2098         struct mali_attr_meta *ofs = ovs + vs->varying_count;
2099
2100         for (unsigned i = 0; i < vs->varying_count; i++) {
2101                 ovs[i] = panfrost_emit_varying(vs, fs, vs, present,
2102                                 ctx->streamout.num_targets, streamout_offsets,
2103                                 dev->quirks,
2104                                 gen_offsets, gen_formats, &gen_stride, i, true, false);
2105         }
2106
2107         for (unsigned i = 0; i < fs->varying_count; i++) {
2108                 ofs[i] = panfrost_emit_varying(fs, vs, vs, present,
2109                                 ctx->streamout.num_targets, streamout_offsets,
2110                                 dev->quirks,
2111                                 gen_offsets, gen_formats, &gen_stride, i, false, true);
2112         }
2113
2114         unsigned xfb_base = pan_xfb_base(present);
2115         struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
2116                         sizeof(union mali_attr) * (xfb_base + ctx->streamout.num_targets));
2117         union mali_attr *varyings = (union mali_attr *) T.cpu;
2118
2119         /* Emit the stream out buffers */
2120
2121         unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
2122                                                            ctx->vertex_count);
2123
2124         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2125                 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2126                                         so->stride[i],
2127                                         ctx->streamout.offsets[i],
2128                                         out_count,
2129                                         ctx->streamout.targets[i]);
2130         }
2131
2132         panfrost_emit_varyings(batch,
2133                         &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2134                         gen_stride, vertex_count);
2135
2136         /* fp32 vec4 gl_Position */
2137         tiler_postfix->position_varying = panfrost_emit_varyings(batch,
2138                         &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2139                         sizeof(float) * 4, vertex_count);
2140
2141         if (present & (1 << PAN_VARY_PSIZ)) {
2142                 primitive_size->pointer = panfrost_emit_varyings(batch,
2143                                 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2144                                 2, vertex_count);
2145         }
2146
2147         pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_VARYING_POINT_COORD);
2148         pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_VARYING_FRONT_FACING);
2149         pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_VARYING_FRAG_COORD);
2150
2151         vertex_postfix->varyings = T.gpu;
2152         tiler_postfix->varyings = T.gpu;
2153
2154         vertex_postfix->varying_meta = trans.gpu;
2155         tiler_postfix->varying_meta = trans.gpu + vs_size;
2156 }
2157
2158 void
2159 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2160                                 struct mali_vertex_tiler_prefix *vertex_prefix,
2161                                 struct mali_vertex_tiler_postfix *vertex_postfix,
2162                                 struct mali_vertex_tiler_prefix *tiler_prefix,
2163                                 struct mali_vertex_tiler_postfix *tiler_postfix,
2164                                 union midgard_primitive_size *primitive_size)
2165 {
2166         struct panfrost_context *ctx = batch->ctx;
2167         struct panfrost_device *device = pan_device(ctx->base.screen);
2168         bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
2169         struct bifrost_payload_vertex bifrost_vertex = {0,};
2170         struct bifrost_payload_tiler bifrost_tiler = {0,};
2171         struct midgard_payload_vertex_tiler midgard_vertex = {0,};
2172         struct midgard_payload_vertex_tiler midgard_tiler = {0,};
2173         void *vp, *tp;
2174         size_t vp_size, tp_size;
2175
2176         if (device->quirks & IS_BIFROST) {
2177                 bifrost_vertex.prefix = *vertex_prefix;
2178                 bifrost_vertex.postfix = *vertex_postfix;
2179                 vp = &bifrost_vertex;
2180                 vp_size = sizeof(bifrost_vertex);
2181
2182                 bifrost_tiler.prefix = *tiler_prefix;
2183                 bifrost_tiler.tiler.primitive_size = *primitive_size;
2184                 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
2185                 bifrost_tiler.postfix = *tiler_postfix;
2186                 tp = &bifrost_tiler;
2187                 tp_size = sizeof(bifrost_tiler);
2188         } else {
2189                 midgard_vertex.prefix = *vertex_prefix;
2190                 midgard_vertex.postfix = *vertex_postfix;
2191                 vp = &midgard_vertex;
2192                 vp_size = sizeof(midgard_vertex);
2193
2194                 midgard_tiler.prefix = *tiler_prefix;
2195                 midgard_tiler.postfix = *tiler_postfix;
2196                 midgard_tiler.primitive_size = *primitive_size;
2197                 tp = &midgard_tiler;
2198                 tp_size = sizeof(midgard_tiler);
2199         }
2200
2201         if (wallpapering) {
2202                 /* Inject in reverse order, with "predicted" job indices.
2203                  * THIS IS A HACK XXX */
2204                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false,
2205                                  batch->scoreboard.job_index + 2, tp, tp_size, true);
2206                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2207                                  vp, vp_size, true);
2208                 return;
2209         }
2210
2211         /* If rasterizer discard is enable, only submit the vertex */
2212
2213         bool rasterizer_discard = ctx->rasterizer &&
2214                                   ctx->rasterizer->base.rasterizer_discard;
2215
2216         unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2217                                            vp, vp_size, false);
2218
2219         if (rasterizer_discard)
2220                 return;
2221
2222         panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false, vertex, tp, tp_size,
2223                          false);
2224 }
2225
2226 /* TODO: stop hardcoding this */
2227 mali_ptr
2228 panfrost_emit_sample_locations(struct panfrost_batch *batch)
2229 {
2230         uint16_t locations[] = {
2231             128, 128,
2232             0, 256,
2233             0, 256,
2234             0, 256,
2235             0, 256,
2236             0, 256,
2237             0, 256,
2238             0, 256,
2239             0, 256,
2240             0, 256,
2241             0, 256,
2242             0, 256,
2243             0, 256,
2244             0, 256,
2245             0, 256,
2246             0, 256,
2247             0, 256,
2248             0, 256,
2249             0, 256,
2250             0, 256,
2251             0, 256,
2252             0, 256,
2253             0, 256,
2254             0, 256,
2255             0, 256,
2256             0, 256,
2257             0, 256,
2258             0, 256,
2259             0, 256,
2260             0, 256,
2261             0, 256,
2262             0, 256,
2263             128, 128,
2264             0, 0,
2265             0, 0,
2266             0, 0,
2267             0, 0,
2268             0, 0,
2269             0, 0,
2270             0, 0,
2271             0, 0,
2272             0, 0,
2273             0, 0,
2274             0, 0,
2275             0, 0,
2276             0, 0,
2277             0, 0,
2278             0, 0,
2279         };
2280
2281         return panfrost_pool_upload(&batch->pool, locations, 96 * sizeof(uint16_t));
2282 }