src/gallium/drivers/panfrost/pan_cmdstream.c

   1 /*
   2  * Copyright (C) 2018 Alyssa Rosenzweig
   3  * Copyright (C) 2020 Collabora Ltd.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "util/macros.h"
  26 #include "util/u_prim.h"
  27 #include "util/u_vbuf.h"
  28
  29 #include "panfrost-quirks.h"
  30
  31 #include "pan_pool.h"
  32 #include "pan_bo.h"
  33 #include "pan_cmdstream.h"
  34 #include "pan_context.h"
  35 #include "pan_job.h"
  36
  37 /* If a BO is accessed for a particular shader stage, will it be in the primary
  38  * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
  39  * fragment will be primary, e.g. compute jobs will be considered
  40  * "vertex/tiler" by analogy */
  41
  42 static inline uint32_t
  43 panfrost_bo_access_for_stage(enum pipe_shader_type stage)
  44 {
  45         assert(stage == PIPE_SHADER_FRAGMENT ||
  46                stage == PIPE_SHADER_VERTEX ||
  47                stage == PIPE_SHADER_COMPUTE);
  48
  49         return stage == PIPE_SHADER_FRAGMENT ?
  50                PAN_BO_ACCESS_FRAGMENT :
  51                PAN_BO_ACCESS_VERTEX_TILER;
  52 }
  53
  54 static void
  55 panfrost_vt_emit_shared_memory(struct panfrost_context *ctx,
  56                                struct mali_vertex_tiler_postfix *postfix)
  57 {
  58         struct panfrost_device *dev = pan_device(ctx->base.screen);
  59         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  60
  61         unsigned shift = panfrost_get_stack_shift(batch->stack_size);
  62         struct mali_shared_memory shared = {
  63                 .stack_shift = shift,
  64                 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
  65                 .shared_workgroup_count = ~0,
  66         };
  67         postfix->shared_memory = panfrost_pool_upload(&batch->pool, &shared, sizeof(shared));
  68 }
  69
  70 static void
  71 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
  72                                struct mali_vertex_tiler_postfix *postfix)
  73 {
  74         struct panfrost_device *dev = pan_device(ctx->base.screen);
  75         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  76
  77         /* If we haven't, reserve space for the framebuffer */
  78
  79         if (!batch->framebuffer.gpu) {
  80                 unsigned size = (dev->quirks & MIDGARD_SFBD) ?
  81                         sizeof(struct mali_single_framebuffer) :
  82                         sizeof(struct mali_framebuffer);
  83
  84                 batch->framebuffer = panfrost_pool_alloc(&batch->pool, size);
  85
  86                 /* Tag the pointer */
  87                 if (!(dev->quirks & MIDGARD_SFBD))
  88                         batch->framebuffer.gpu |= MALI_MFBD;
  89         }
  90
  91         postfix->shared_memory = batch->framebuffer.gpu;
  92 }
  93
  94 static void
  95 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
  96                               struct mali_vertex_tiler_prefix *prefix,
  97                               struct mali_vertex_tiler_postfix *postfix)
  98 {
  99         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 100
 101         postfix->gl_enables |= 0x7;
 102         SET_BIT(postfix->gl_enables, MALI_FRONT_CCW_TOP,
 103                 rasterizer && rasterizer->base.front_ccw);
 104         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_FRONT,
 105                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
 106         SET_BIT(postfix->gl_enables, MALI_CULL_FACE_BACK,
 107                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
 108         SET_BIT(prefix->unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
 109                 rasterizer && rasterizer->base.flatshade_first);
 110 }
 111
 112 void
 113 panfrost_vt_update_primitive_size(struct panfrost_context *ctx,
 114                                   struct mali_vertex_tiler_prefix *prefix,
 115                                   union midgard_primitive_size *primitive_size)
 116 {
 117         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
 118
 119         if (!panfrost_writes_point_size(ctx)) {
 120                 bool points = prefix->draw_mode == MALI_POINTS;
 121                 float val = 0.0f;
 122
 123                 if (rasterizer)
 124                         val = points ?
 125                               rasterizer->base.point_size :
 126                               rasterizer->base.line_width;
 127
 128                 primitive_size->constant = val;
 129         }
 130 }
 131
 132 static void
 133 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
 134                                    struct mali_vertex_tiler_postfix *postfix)
 135 {
 136         SET_BIT(postfix->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
 137         if (ctx->occlusion_query) {
 138                 postfix->occlusion_counter = ctx->occlusion_query->bo->gpu;
 139                 panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
 140                                       PAN_BO_ACCESS_SHARED |
 141                                       PAN_BO_ACCESS_RW |
 142                                       PAN_BO_ACCESS_FRAGMENT);
 143         } else {
 144                 postfix->occlusion_counter = 0;
 145         }
 146 }
 147
 148 void
 149 panfrost_vt_init(struct panfrost_context *ctx,
 150                  enum pipe_shader_type stage,
 151                  struct mali_vertex_tiler_prefix *prefix,
 152                  struct mali_vertex_tiler_postfix *postfix)
 153 {
 154         struct panfrost_device *device = pan_device(ctx->base.screen);
 155
 156         if (!ctx->shader[stage])
 157                 return;
 158
 159         memset(prefix, 0, sizeof(*prefix));
 160         memset(postfix, 0, sizeof(*postfix));
 161
 162         if (device->quirks & IS_BIFROST) {
 163                 postfix->gl_enables = 0x2;
 164                 panfrost_vt_emit_shared_memory(ctx, postfix);
 165         } else {
 166                 postfix->gl_enables = 0x6;
 167                 panfrost_vt_attach_framebuffer(ctx, postfix);
 168         }
 169
 170         if (stage == PIPE_SHADER_FRAGMENT) {
 171                 panfrost_vt_update_occlusion_query(ctx, postfix);
 172                 panfrost_vt_update_rasterizer(ctx, prefix, postfix);
 173         }
 174 }
 175
 176 static unsigned
 177 panfrost_translate_index_size(unsigned size)
 178 {
 179         switch (size) {
 180         case 1:
 181                 return MALI_DRAW_INDEXED_UINT8;
 182
 183         case 2:
 184                 return MALI_DRAW_INDEXED_UINT16;
 185
 186         case 4:
 187                 return MALI_DRAW_INDEXED_UINT32;
 188
 189         default:
 190                 unreachable("Invalid index size");
 191         }
 192 }
 193
 194 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
 195  * good for the duration of the draw (transient), could last longer. Also get
 196  * the bounds on the index buffer for the range accessed by the draw. We do
 197  * these operations together because there are natural optimizations which
 198  * require them to be together. */
 199
 200 static mali_ptr
 201 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
 202                                   const struct pipe_draw_info *info,
 203                                   unsigned *min_index, unsigned *max_index)
 204 {
 205         struct panfrost_resource *rsrc = pan_resource(info->index.resource);
 206         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 207         off_t offset = info->start * info->index_size;
 208         bool needs_indices = true;
 209         mali_ptr out = 0;
 210
 211         if (info->max_index != ~0u) {
 212                 *min_index = info->min_index;
 213                 *max_index = info->max_index;
 214                 needs_indices = false;
 215         }
 216
 217         if (!info->has_user_indices) {
 218                 /* Only resources can be directly mapped */
 219                 panfrost_batch_add_bo(batch, rsrc->bo,
 220                                       PAN_BO_ACCESS_SHARED |
 221                                       PAN_BO_ACCESS_READ |
 222                                       PAN_BO_ACCESS_VERTEX_TILER);
 223                 out = rsrc->bo->gpu + offset;
 224
 225                 /* Check the cache */
 226                 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
 227                                                            info->start,
 228                                                            info->count,
 229                                                            min_index,
 230                                                            max_index);
 231         } else {
 232                 /* Otherwise, we need to upload to transient memory */
 233                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
 234                 out = panfrost_pool_upload(&batch->pool, ibuf8 + offset,
 235                                                 info->count *
 236                                                 info->index_size);
 237         }
 238
 239         if (needs_indices) {
 240                 /* Fallback */
 241                 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
 242
 243                 if (!info->has_user_indices)
 244                         panfrost_minmax_cache_add(rsrc->index_cache,
 245                                                   info->start, info->count,
 246                                                   *min_index, *max_index);
 247         }
 248
 249         return out;
 250 }
 251
 252 void
 253 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
 254                           const struct pipe_draw_info *info,
 255                           enum mali_draw_mode draw_mode,
 256                           struct mali_vertex_tiler_postfix *vertex_postfix,
 257                           struct mali_vertex_tiler_prefix *tiler_prefix,
 258                           struct mali_vertex_tiler_postfix *tiler_postfix,
 259                           unsigned *vertex_count,
 260                           unsigned *padded_count)
 261 {
 262         tiler_prefix->draw_mode = draw_mode;
 263
 264         unsigned draw_flags = 0;
 265
 266         if (panfrost_writes_point_size(ctx))
 267                 draw_flags |= MALI_DRAW_VARYING_SIZE;
 268
 269         if (info->primitive_restart)
 270                 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
 271
 272         /* These doesn't make much sense */
 273
 274         draw_flags |= 0x3000;
 275
 276         if (info->index_size) {
 277                 unsigned min_index = 0, max_index = 0;
 278
 279                 tiler_prefix->indices = panfrost_get_index_buffer_bounded(ctx,
 280                                                                        info,
 281                                                                        &min_index,
 282                                                                        &max_index);
 283
 284                 /* Use the corresponding values */
 285                 *vertex_count = max_index - min_index + 1;
 286                 tiler_postfix->offset_start = vertex_postfix->offset_start = min_index + info->index_bias;
 287                 tiler_prefix->offset_bias_correction = -min_index;
 288                 tiler_prefix->index_count = MALI_POSITIVE(info->count);
 289                 draw_flags |= panfrost_translate_index_size(info->index_size);
 290         } else {
 291                 tiler_prefix->indices = 0;
 292                 *vertex_count = ctx->vertex_count;
 293                 tiler_postfix->offset_start = vertex_postfix->offset_start = info->start;
 294                 tiler_prefix->offset_bias_correction = 0;
 295                 tiler_prefix->index_count = MALI_POSITIVE(ctx->vertex_count);
 296         }
 297
 298         tiler_prefix->unknown_draw = draw_flags;
 299
 300         /* Encode the padded vertex count */
 301
 302         if (info->instance_count > 1) {
 303                 *padded_count = panfrost_padded_vertex_count(*vertex_count);
 304
 305                 unsigned shift = __builtin_ctz(ctx->padded_count);
 306                 unsigned k = ctx->padded_count >> (shift + 1);
 307
 308                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = shift;
 309                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = k;
 310         } else {
 311                 *padded_count = *vertex_count;
 312
 313                 /* Reset instancing state */
 314                 tiler_postfix->instance_shift = vertex_postfix->instance_shift = 0;
 315                 tiler_postfix->instance_odd = vertex_postfix->instance_odd = 0;
 316         }
 317 }
 318
 319 static void
 320 panfrost_shader_meta_init(struct panfrost_context *ctx,
 321                           enum pipe_shader_type st,
 322                           struct mali_shader_meta *meta)
 323 {
 324         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 325         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 326
 327         memset(meta, 0, sizeof(*meta));
 328         meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
 329         meta->attribute_count = ss->attribute_count;
 330         meta->varying_count = ss->varying_count;
 331         meta->texture_count = ctx->sampler_view_count[st];
 332         meta->sampler_count = ctx->sampler_count[st];
 333
 334         if (dev->quirks & IS_BIFROST) {
 335                 if (st == PIPE_SHADER_VERTEX)
 336                         meta->bifrost1.unk1 = 0x800000;
 337                 else {
 338                         /* First clause ATEST |= 0x4000000.
 339                          * Less than 32 regs |= 0x200 */
 340                         meta->bifrost1.unk1 = 0x950020;
 341                 }
 342
 343                 meta->bifrost1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 344                 if (st == PIPE_SHADER_VERTEX)
 345                         meta->bifrost2.preload_regs = 0xC0;
 346                 else {
 347                         meta->bifrost2.preload_regs = 0x1;
 348                         SET_BIT(meta->bifrost2.preload_regs, 0x10, ss->reads_frag_coord);
 349                 }
 350
 351                 meta->bifrost2.uniform_count = MIN2(ss->uniform_count,
 352                                                     ss->uniform_cutoff);
 353         } else {
 354                 meta->midgard1.uniform_count = MIN2(ss->uniform_count,
 355                                                     ss->uniform_cutoff);
 356                 meta->midgard1.work_count = ss->work_reg_count;
 357
 358                 /* TODO: This is not conformant on ES3 */
 359                 meta->midgard1.flags_hi = MALI_SUPPRESS_INF_NAN;
 360
 361                 meta->midgard1.flags_lo = 0x20;
 362                 meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 363
 364                 SET_BIT(meta->midgard1.flags_hi, MALI_WRITES_GLOBAL, ss->writes_global);
 365         }
 366 }
 367
 368 static unsigned
 369 panfrost_translate_compare_func(enum pipe_compare_func in)
 370 {
 371         switch (in) {
 372         case PIPE_FUNC_NEVER:
 373                 return MALI_FUNC_NEVER;
 374
 375         case PIPE_FUNC_LESS:
 376                 return MALI_FUNC_LESS;
 377
 378         case PIPE_FUNC_EQUAL:
 379                 return MALI_FUNC_EQUAL;
 380
 381         case PIPE_FUNC_LEQUAL:
 382                 return MALI_FUNC_LEQUAL;
 383
 384         case PIPE_FUNC_GREATER:
 385                 return MALI_FUNC_GREATER;
 386
 387         case PIPE_FUNC_NOTEQUAL:
 388                 return MALI_FUNC_NOTEQUAL;
 389
 390         case PIPE_FUNC_GEQUAL:
 391                 return MALI_FUNC_GEQUAL;
 392
 393         case PIPE_FUNC_ALWAYS:
 394                 return MALI_FUNC_ALWAYS;
 395
 396         default:
 397                 unreachable("Invalid func");
 398         }
 399 }
 400
 401 static unsigned
 402 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 403 {
 404         switch (in) {
 405         case PIPE_STENCIL_OP_KEEP:
 406                 return MALI_STENCIL_KEEP;
 407
 408         case PIPE_STENCIL_OP_ZERO:
 409                 return MALI_STENCIL_ZERO;
 410
 411         case PIPE_STENCIL_OP_REPLACE:
 412                return MALI_STENCIL_REPLACE;
 413
 414         case PIPE_STENCIL_OP_INCR:
 415                 return MALI_STENCIL_INCR;
 416
 417         case PIPE_STENCIL_OP_DECR:
 418                 return MALI_STENCIL_DECR;
 419
 420         case PIPE_STENCIL_OP_INCR_WRAP:
 421                 return MALI_STENCIL_INCR_WRAP;
 422
 423         case PIPE_STENCIL_OP_DECR_WRAP:
 424                 return MALI_STENCIL_DECR_WRAP;
 425
 426         case PIPE_STENCIL_OP_INVERT:
 427                 return MALI_STENCIL_INVERT;
 428
 429         default:
 430                 unreachable("Invalid stencil op");
 431         }
 432 }
 433
 434 static unsigned
 435 translate_tex_wrap(enum pipe_tex_wrap w)
 436 {
 437         switch (w) {
 438         case PIPE_TEX_WRAP_REPEAT:
 439                 return MALI_WRAP_REPEAT;
 440
 441         case PIPE_TEX_WRAP_CLAMP:
 442                 return MALI_WRAP_CLAMP;
 443
 444         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 445                 return MALI_WRAP_CLAMP_TO_EDGE;
 446
 447         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 448                 return MALI_WRAP_CLAMP_TO_BORDER;
 449
 450         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 451                 return MALI_WRAP_MIRRORED_REPEAT;
 452
 453         case PIPE_TEX_WRAP_MIRROR_CLAMP:
 454                 return MALI_WRAP_MIRRORED_CLAMP;
 455
 456         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 457                 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
 458
 459         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 460                 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
 461
 462         default:
 463                 unreachable("Invalid wrap");
 464         }
 465 }
 466
 467 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
 468                                 struct mali_sampler_descriptor *hw)
 469 {
 470         unsigned func = panfrost_translate_compare_func(cso->compare_func);
 471         bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
 472         bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
 473         bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
 474         unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
 475         unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
 476         unsigned mip_filter = mip_linear  ?
 477                               (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
 478         unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
 479
 480         *hw = (struct mali_sampler_descriptor) {
 481                 .filter_mode = min_filter | mag_filter | mip_filter |
 482                                normalized,
 483                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 484                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 485                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 486                 .compare_func = panfrost_flip_compare_func(func),
 487                 .border_color = {
 488                         cso->border_color.f[0],
 489                         cso->border_color.f[1],
 490                         cso->border_color.f[2],
 491                         cso->border_color.f[3]
 492                 },
 493                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 494                 .max_lod = FIXED_16(cso->max_lod, false),
 495                 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
 496                 .seamless_cube_map = cso->seamless_cube_map,
 497         };
 498
 499         /* If necessary, we disable mipmapping in the sampler descriptor by
 500          * clamping the LOD as tight as possible (from 0 to epsilon,
 501          * essentially -- remember these are fixed point numbers, so
 502          * epsilon=1/256) */
 503
 504         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 505                 hw->max_lod = hw->min_lod + 1;
 506 }
 507
 508 void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
 509                                         struct bifrost_sampler_descriptor *hw)
 510 {
 511         *hw = (struct bifrost_sampler_descriptor) {
 512                 .unk1 = 0x1,
 513                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 514                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 515                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 516                 .unk8 = 0x8,
 517                 .min_filter = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST,
 518                 .norm_coords = cso->normalized_coords,
 519                 .mip_filter = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR,
 520                 .mag_filter = cso->mag_img_filter == PIPE_TEX_FILTER_LINEAR,
 521                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 522                 .max_lod = FIXED_16(cso->max_lod, false),
 523         };
 524
 525         /* If necessary, we disable mipmapping in the sampler descriptor by
 526          * clamping the LOD as tight as possible (from 0 to epsilon,
 527          * essentially -- remember these are fixed point numbers, so
 528          * epsilon=1/256) */
 529
 530         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 531                 hw->max_lod = hw->min_lod + 1;
 532 }
 533
 534 static void
 535 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
 536                             struct mali_stencil_test *out)
 537 {
 538         out->ref = 0; /* Gallium gets it from elsewhere */
 539
 540         out->mask = in->valuemask;
 541         out->func = panfrost_translate_compare_func(in->func);
 542         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 543         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 544         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 545 }
 546
 547 static void
 548 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
 549                                      struct mali_shader_meta *fragmeta)
 550 {
 551         if (!ctx->rasterizer) {
 552                 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
 553                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
 554                 fragmeta->depth_units = 0.0f;
 555                 fragmeta->depth_factor = 0.0f;
 556                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
 557                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
 558                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, true);
 559                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, true);
 560                 return;
 561         }
 562
 563         struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
 564
 565         bool msaa = rast->multisample;
 566
 567         /* TODO: Sample size */
 568         SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
 569         SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
 570
 571         SET_BIT(fragmeta->unknown2_3, MALI_PER_SAMPLE,
 572                         msaa && ctx->min_samples > 1);
 573
 574         fragmeta->depth_units = rast->offset_units * 2.0f;
 575         fragmeta->depth_factor = rast->offset_scale;
 576
 577         /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
 578
 579         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, rast->offset_tri);
 580         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, rast->offset_tri);
 581
 582         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_NEAR, rast->depth_clip_near);
 583         SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_CLIP_FAR, rast->depth_clip_far);
 584 }
 585
 586 static void
 587 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
 588                               struct mali_shader_meta *fragmeta)
 589 {
 590         const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 591         int zfunc = PIPE_FUNC_ALWAYS;
 592
 593         if (!zsa) {
 594                 struct pipe_stencil_state default_stencil = {
 595                         .enabled = 0,
 596                         .func = PIPE_FUNC_ALWAYS,
 597                         .fail_op = MALI_STENCIL_KEEP,
 598                         .zfail_op = MALI_STENCIL_KEEP,
 599                         .zpass_op = MALI_STENCIL_KEEP,
 600                         .writemask = 0xFF,
 601                         .valuemask = 0xFF
 602                 };
 603
 604                 panfrost_make_stencil_state(&default_stencil,
 605                                             &fragmeta->stencil_front);
 606                 fragmeta->stencil_mask_front = default_stencil.writemask;
 607                 fragmeta->stencil_back = fragmeta->stencil_front;
 608                 fragmeta->stencil_mask_back = default_stencil.writemask;
 609                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
 610                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
 611         } else {
 612                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
 613                         zsa->stencil[0].enabled);
 614                 panfrost_make_stencil_state(&zsa->stencil[0],
 615                                             &fragmeta->stencil_front);
 616                 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
 617                 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
 618
 619                 /* If back-stencil is not enabled, use the front values */
 620
 621                 if (zsa->stencil[1].enabled) {
 622                         panfrost_make_stencil_state(&zsa->stencil[1],
 623                                                     &fragmeta->stencil_back);
 624                         fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
 625                         fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
 626                 } else {
 627                         fragmeta->stencil_back = fragmeta->stencil_front;
 628                         fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
 629                         fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
 630                 }
 631
 632                 if (zsa->depth.enabled)
 633                         zfunc = zsa->depth.func;
 634
 635                 /* Depth state (TODO: Refactor) */
 636
 637                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
 638                         zsa->depth.writemask);
 639         }
 640
 641         fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
 642         fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
 643 }
 644
 645 static bool
 646 panfrost_fs_required(
 647                 struct panfrost_shader_state *fs,
 648                 struct panfrost_blend_final *blend,
 649                 unsigned rt_count)
 650 {
 651         /* If we generally have side effects */
 652         if (fs->fs_sidefx)
 653                 return true;
 654
 655         /* If colour is written we need to execute */
 656         for (unsigned i = 0; i < rt_count; ++i) {
 657                 if (!blend[i].no_colour)
 658                         return true;
 659         }
 660
 661         /* If depth is written and not implied we need to execute.
 662          * TODO: Predicate on Z/S writes being enabled */
 663         return (fs->writes_depth || fs->writes_stencil);
 664 }
 665
 666 static void
 667 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
 668                                 struct mali_shader_meta *fragmeta,
 669                                 void *rts)
 670 {
 671         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 672         struct panfrost_shader_state *fs;
 673         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 674
 675         SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
 676                 (dev->quirks & MIDGARD_SFBD) && ctx->blend &&
 677                 !ctx->blend->base.dither);
 678
 679         SET_BIT(fragmeta->unknown2_4, MALI_ALPHA_TO_COVERAGE,
 680                         ctx->blend->base.alpha_to_coverage);
 681
 682         /* Get blending setup */
 683         unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 684
 685         struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 686         unsigned shader_offset = 0;
 687         struct panfrost_bo *shader_bo = NULL;
 688
 689         for (unsigned c = 0; c < rt_count; ++c)
 690                 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
 691                                                           &shader_offset);
 692
 693         /* Disable shader execution if we can */
 694         if (dev->quirks & MIDGARD_SHADERLESS
 695                         && !panfrost_fs_required(fs, blend, rt_count)) {
 696                 fragmeta->shader = 0;
 697                 fragmeta->attribute_count = 0;
 698                 fragmeta->varying_count = 0;
 699                 fragmeta->texture_count = 0;
 700                 fragmeta->sampler_count = 0;
 701
 702                 /* This feature is not known to work on Bifrost */
 703                 fragmeta->midgard1.work_count = 1;
 704                 fragmeta->midgard1.uniform_count = 0;
 705                 fragmeta->midgard1.uniform_buffer_count = 0;
 706         }
 707
 708          /* If there is a blend shader, work registers are shared. We impose 8
 709           * work registers as a limit for blend shaders. Should be lower XXX */
 710
 711         if (!(dev->quirks & IS_BIFROST)) {
 712                 for (unsigned c = 0; c < rt_count; ++c) {
 713                         if (blend[c].is_shader) {
 714                                 fragmeta->midgard1.work_count =
 715                                         MAX2(fragmeta->midgard1.work_count, 8);
 716                         }
 717                 }
 718         }
 719
 720         /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
 721          * copied to the blend_meta appended (by convention), but this is the
 722          * field actually read by the hardware. (Or maybe both are read...?).
 723          * Specify the last RTi with a blend shader. */
 724
 725         fragmeta->blend.shader = 0;
 726
 727         for (signed rt = (rt_count - 1); rt >= 0; --rt) {
 728                 if (!blend[rt].is_shader)
 729                         continue;
 730
 731                 fragmeta->blend.shader = blend[rt].shader.gpu |
 732                                          blend[rt].shader.first_tag;
 733                 break;
 734         }
 735
 736         if (dev->quirks & MIDGARD_SFBD) {
 737                 /* When only a single render target platform is used, the blend
 738                  * information is inside the shader meta itself. We additionally
 739                  * need to signal CAN_DISCARD for nontrivial blend modes (so
 740                  * we're able to read back the destination buffer) */
 741
 742                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
 743                         blend[0].is_shader);
 744
 745                 if (!blend[0].is_shader) {
 746                         fragmeta->blend.equation = *blend[0].equation.equation;
 747                         fragmeta->blend.constant = blend[0].equation.constant;
 748                 }
 749
 750                 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
 751                         !blend[0].no_blending || fs->can_discard);
 752                 return;
 753         }
 754
 755         if (dev->quirks & IS_BIFROST) {
 756                 bool no_blend = true;
 757
 758                 for (unsigned i = 0; i < rt_count; ++i)
 759                         no_blend &= (blend[i].no_blending | blend[i].no_colour);
 760
 761                 SET_BIT(fragmeta->bifrost1.unk1, MALI_BIFROST_EARLY_Z,
 762                         !fs->can_discard && !fs->writes_depth && no_blend);
 763         }
 764
 765         /* Additional blend descriptor tacked on for jobs using MFBD */
 766
 767         for (unsigned i = 0; i < rt_count; ++i) {
 768                 unsigned flags = 0;
 769
 770                 if (ctx->pipe_framebuffer.nr_cbufs > i && !blend[i].no_colour) {
 771                         flags = 0x200;
 772
 773                         bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
 774                                        (ctx->pipe_framebuffer.cbufs[i]) &&
 775                                        util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
 776
 777                         SET_BIT(flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
 778                         SET_BIT(flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
 779                         SET_BIT(flags, MALI_BLEND_SRGB, is_srgb);
 780                         SET_BIT(flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
 781                 }
 782
 783                 if (dev->quirks & IS_BIFROST) {
 784                         struct bifrost_blend_rt *brts = rts;
 785
 786                         brts[i].flags = flags;
 787
 788                         if (blend[i].is_shader) {
 789                                 /* The blend shader's address needs to be at
 790                                  * the same top 32 bit as the fragment shader.
 791                                  * TODO: Ensure that's always the case.
 792                                  */
 793                                 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
 794                                        (fs->bo->gpu & (0xffffffffull << 32)));
 795                                 brts[i].shader = blend[i].shader.gpu;
 796                                 brts[i].unk2 = 0x0;
 797                         } else if (ctx->pipe_framebuffer.nr_cbufs > i) {
 798                                 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
 799                                 const struct util_format_description *format_desc;
 800                                 format_desc = util_format_description(format);
 801
 802                                 brts[i].equation = *blend[i].equation.equation;
 803
 804                                 /* TODO: this is a bit more complicated */
 805                                 brts[i].constant = blend[i].equation.constant;
 806
 807                                 brts[i].format = panfrost_format_to_bifrost_blend(format_desc);
 808
 809                                 /* 0x19 disables blending and forces REPLACE
 810                                  * mode (equivalent to rgb_mode = alpha_mode =
 811                                  * x122, colour mask = 0xF). 0x1a allows
 812                                  * blending. */
 813                                 brts[i].unk2 = blend[i].no_blending ? 0x19 : 0x1a;
 814
 815                                 brts[i].shader_type = fs->blend_types[i];
 816                         } else {
 817                                 /* Dummy attachment for depth-only */
 818                                 brts[i].unk2 = 0x3;
 819                                 brts[i].shader_type = fs->blend_types[i];
 820                         }
 821                 } else {
 822                         struct midgard_blend_rt *mrts = rts;
 823                         mrts[i].flags = flags;
 824
 825                         if (blend[i].is_shader) {
 826                                 mrts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
 827                         } else {
 828                                 mrts[i].blend.equation = *blend[i].equation.equation;
 829                                 mrts[i].blend.constant = blend[i].equation.constant;
 830                         }
 831                 }
 832         }
 833 }
 834
 835 static void
 836 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
 837                                struct mali_shader_meta *fragmeta,
 838                                void *rts)
 839 {
 840         const struct panfrost_device *dev = pan_device(ctx->base.screen);
 841         struct panfrost_shader_state *fs;
 842
 843         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 844
 845         bool msaa = ctx->rasterizer && ctx->rasterizer->base.multisample;
 846         fragmeta->coverage_mask = (msaa ? ctx->sample_mask : ~0) & 0xF;
 847
 848         fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x10;
 849         fragmeta->unknown2_4 = 0x4e0;
 850
 851         /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
 852          * is required (independent of 32-bit/64-bit descriptors), or why it's
 853          * not used on later GPU revisions. Otherwise, all shader jobs fault on
 854          * these earlier chips (perhaps this is a chicken bit of some kind).
 855          * More investigation is needed. */
 856
 857         SET_BIT(fragmeta->unknown2_4, 0x10, dev->quirks & MIDGARD_SFBD);
 858
 859         if (dev->quirks & IS_BIFROST) {
 860                 /* TODO */
 861         } else {
 862                 /* Depending on whether it's legal to in the given shader, we try to
 863                  * enable early-z testing. TODO: respect e-z force */
 864
 865                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
 866                         !fs->can_discard && !fs->writes_global &&
 867                         !fs->writes_depth && !fs->writes_stencil &&
 868                         !ctx->blend->base.alpha_to_coverage);
 869
 870                 /* Add the writes Z/S flags if needed. */
 871                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
 872                 SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
 873
 874                 /* Any time texturing is used, derivatives are implicitly calculated,
 875                  * so we need to enable helper invocations */
 876
 877                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
 878                         fs->helper_invocations);
 879
 880                 const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 881
 882                 bool depth_enabled = fs->writes_depth ||
 883                    (zsa && zsa->depth.enabled && zsa->depth.func != PIPE_FUNC_ALWAYS);
 884
 885                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_TILEBUFFER,
 886                         fs->outputs_read || (!depth_enabled && fs->can_discard));
 887                 SET_BIT(fragmeta->midgard1.flags_lo, MALI_READS_ZS, depth_enabled && fs->can_discard);
 888         }
 889
 890         panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
 891         panfrost_frag_meta_zsa_update(ctx, fragmeta);
 892         panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
 893 }
 894
 895 void
 896 panfrost_emit_shader_meta(struct panfrost_batch *batch,
 897                           enum pipe_shader_type st,
 898                           struct mali_vertex_tiler_postfix *postfix)
 899 {
 900         struct panfrost_context *ctx = batch->ctx;
 901         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 902
 903         if (!ss) {
 904                 postfix->shader = 0;
 905                 return;
 906         }
 907
 908         struct mali_shader_meta meta;
 909
 910         panfrost_shader_meta_init(ctx, st, &meta);
 911
 912         /* Add the shader BO to the batch. */
 913         panfrost_batch_add_bo(batch, ss->bo,
 914                               PAN_BO_ACCESS_PRIVATE |
 915                               PAN_BO_ACCESS_READ |
 916                               panfrost_bo_access_for_stage(st));
 917
 918         mali_ptr shader_ptr;
 919
 920         if (st == PIPE_SHADER_FRAGMENT) {
 921                 struct panfrost_device *dev = pan_device(ctx->base.screen);
 922                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 923                 size_t desc_size = sizeof(meta);
 924                 void *rts = NULL;
 925                 struct panfrost_transfer xfer;
 926                 unsigned rt_size;
 927
 928                 if (dev->quirks & MIDGARD_SFBD)
 929                         rt_size = 0;
 930                 else if (dev->quirks & IS_BIFROST)
 931                         rt_size = sizeof(struct bifrost_blend_rt);
 932                 else
 933                         rt_size = sizeof(struct midgard_blend_rt);
 934
 935                 desc_size += rt_size * rt_count;
 936
 937                 if (rt_size)
 938                         rts = rzalloc_size(ctx, rt_size * rt_count);
 939
 940                 panfrost_frag_shader_meta_init(ctx, &meta, rts);
 941
 942                 xfer = panfrost_pool_alloc(&batch->pool, desc_size);
 943
 944                 memcpy(xfer.cpu, &meta, sizeof(meta));
 945                 memcpy(xfer.cpu + sizeof(meta), rts, rt_size * rt_count);
 946
 947                 if (rt_size)
 948                         ralloc_free(rts);
 949
 950                 shader_ptr = xfer.gpu;
 951         } else {
 952                 shader_ptr = panfrost_pool_upload(&batch->pool, &meta,
 953                                                        sizeof(meta));
 954         }
 955
 956         postfix->shader = shader_ptr;
 957 }
 958
 959 static void
 960 panfrost_mali_viewport_init(struct panfrost_context *ctx,
 961                             struct mali_viewport *mvp)
 962 {
 963         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 964
 965         /* Clip bounds are encoded as floats. The viewport itself is encoded as
 966          * (somewhat) asymmetric ints. */
 967
 968         const struct pipe_scissor_state *ss = &ctx->scissor;
 969
 970         memset(mvp, 0, sizeof(*mvp));
 971
 972         /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
 973          * each direction. Clipping to the viewport in theory should work, but
 974          * in practice causes issues when we're not explicitly trying to
 975          * scissor */
 976
 977         *mvp = (struct mali_viewport) {
 978                 .clip_minx = -INFINITY,
 979                 .clip_miny = -INFINITY,
 980                 .clip_maxx = INFINITY,
 981                 .clip_maxy = INFINITY,
 982         };
 983
 984         /* Always scissor to the viewport by default. */
 985         float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
 986         float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
 987
 988         float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
 989         float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
 990
 991         float minz = (vp->translate[2] - fabsf(vp->scale[2]));
 992         float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
 993
 994         /* Apply the scissor test */
 995
 996         unsigned minx, miny, maxx, maxy;
 997
 998         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
 999                 minx = MAX2(ss->minx, vp_minx);
1000                 miny = MAX2(ss->miny, vp_miny);
1001                 maxx = MIN2(ss->maxx, vp_maxx);
1002                 maxy = MIN2(ss->maxy, vp_maxy);
1003         } else {
1004                 minx = vp_minx;
1005                 miny = vp_miny;
1006                 maxx = vp_maxx;
1007                 maxy = vp_maxy;
1008         }
1009
1010         /* Hardware needs the min/max to be strictly ordered, so flip if we
1011          * need to. The viewport transformation in the vertex shader will
1012          * handle the negatives if we don't */
1013
1014         if (miny > maxy) {
1015                 unsigned temp = miny;
1016                 miny = maxy;
1017                 maxy = temp;
1018         }
1019
1020         if (minx > maxx) {
1021                 unsigned temp = minx;
1022                 minx = maxx;
1023                 maxx = temp;
1024         }
1025
1026         if (minz > maxz) {
1027                 float temp = minz;
1028                 minz = maxz;
1029                 maxz = temp;
1030         }
1031
1032         /* Clamp to the framebuffer size as a last check */
1033
1034         minx = MIN2(ctx->pipe_framebuffer.width, minx);
1035         maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
1036
1037         miny = MIN2(ctx->pipe_framebuffer.height, miny);
1038         maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
1039
1040         /* Upload */
1041
1042         mvp->viewport0[0] = minx;
1043         mvp->viewport1[0] = MALI_POSITIVE(maxx);
1044
1045         mvp->viewport0[1] = miny;
1046         mvp->viewport1[1] = MALI_POSITIVE(maxy);
1047
1048         bool clip_near = true;
1049         bool clip_far = true;
1050
1051         if (ctx->rasterizer) {
1052                 clip_near = ctx->rasterizer->base.depth_clip_near;
1053                 clip_far = ctx->rasterizer->base.depth_clip_far;
1054         }
1055
1056         mvp->clip_minz = clip_near ? minz : -INFINITY;
1057         mvp->clip_maxz = clip_far ? maxz : INFINITY;
1058 }
1059
1060 void
1061 panfrost_emit_viewport(struct panfrost_batch *batch,
1062                        struct mali_vertex_tiler_postfix *tiler_postfix)
1063 {
1064         struct panfrost_context *ctx = batch->ctx;
1065         struct mali_viewport mvp;
1066
1067         panfrost_mali_viewport_init(batch->ctx,  &mvp);
1068
1069         /* Update the job, unless we're doing wallpapering (whose lack of
1070          * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
1071          * just... be faster :) */
1072
1073         if (!ctx->wallpaper_batch)
1074                 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
1075                                              mvp.viewport0[1],
1076                                              mvp.viewport1[0] + 1,
1077                                              mvp.viewport1[1] + 1);
1078
1079         tiler_postfix->viewport = panfrost_pool_upload(&batch->pool, &mvp,
1080                                                             sizeof(mvp));
1081 }
1082
1083 static mali_ptr
1084 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
1085                                  enum pipe_shader_type st,
1086                                  struct panfrost_constant_buffer *buf,
1087                                  unsigned index)
1088 {
1089         struct pipe_constant_buffer *cb = &buf->cb[index];
1090         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1091
1092         if (rsrc) {
1093                 panfrost_batch_add_bo(batch, rsrc->bo,
1094                                       PAN_BO_ACCESS_SHARED |
1095                                       PAN_BO_ACCESS_READ |
1096                                       panfrost_bo_access_for_stage(st));
1097
1098                 /* Alignment gauranteed by
1099                  * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
1100                 return rsrc->bo->gpu + cb->buffer_offset;
1101         } else if (cb->user_buffer) {
1102                 return panfrost_pool_upload(&batch->pool,
1103                                                  cb->user_buffer +
1104                                                  cb->buffer_offset,
1105                                                  cb->buffer_size);
1106         } else {
1107                 unreachable("No constant buffer");
1108         }
1109 }
1110
1111 struct sysval_uniform {
1112         union {
1113                 float f[4];
1114                 int32_t i[4];
1115                 uint32_t u[4];
1116                 uint64_t du[2];
1117         };
1118 };
1119
1120 static void
1121 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
1122                                       struct sysval_uniform *uniform)
1123 {
1124         struct panfrost_context *ctx = batch->ctx;
1125         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1126
1127         uniform->f[0] = vp->scale[0];
1128         uniform->f[1] = vp->scale[1];
1129         uniform->f[2] = vp->scale[2];
1130 }
1131
1132 static void
1133 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
1134                                        struct sysval_uniform *uniform)
1135 {
1136         struct panfrost_context *ctx = batch->ctx;
1137         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1138
1139         uniform->f[0] = vp->translate[0];
1140         uniform->f[1] = vp->translate[1];
1141         uniform->f[2] = vp->translate[2];
1142 }
1143
1144 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
1145                                        enum pipe_shader_type st,
1146                                        unsigned int sysvalid,
1147                                        struct sysval_uniform *uniform)
1148 {
1149         struct panfrost_context *ctx = batch->ctx;
1150         unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
1151         unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
1152         bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
1153         struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
1154
1155         assert(dim);
1156         uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
1157
1158         if (dim > 1)
1159                 uniform->i[1] = u_minify(tex->texture->height0,
1160                                          tex->u.tex.first_level);
1161
1162         if (dim > 2)
1163                 uniform->i[2] = u_minify(tex->texture->depth0,
1164                                          tex->u.tex.first_level);
1165
1166         if (is_array)
1167                 uniform->i[dim] = tex->texture->array_size;
1168 }
1169
1170 static void
1171 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
1172                             enum pipe_shader_type st,
1173                             unsigned ssbo_id,
1174                             struct sysval_uniform *uniform)
1175 {
1176         struct panfrost_context *ctx = batch->ctx;
1177
1178         assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
1179         struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
1180
1181         /* Compute address */
1182         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
1183
1184         panfrost_batch_add_bo(batch, bo,
1185                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
1186                               panfrost_bo_access_for_stage(st));
1187
1188         /* Upload address and size as sysval */
1189         uniform->du[0] = bo->gpu + sb.buffer_offset;
1190         uniform->u[2] = sb.buffer_size;
1191 }
1192
1193 static void
1194 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
1195                                enum pipe_shader_type st,
1196                                unsigned samp_idx,
1197                                struct sysval_uniform *uniform)
1198 {
1199         struct panfrost_context *ctx = batch->ctx;
1200         struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
1201
1202         uniform->f[0] = sampl->min_lod;
1203         uniform->f[1] = sampl->max_lod;
1204         uniform->f[2] = sampl->lod_bias;
1205
1206         /* Even without any errata, Midgard represents "no mipmapping" as
1207          * fixing the LOD with the clamps; keep behaviour consistent. c.f.
1208          * panfrost_create_sampler_state which also explains our choice of
1209          * epsilon value (again to keep behaviour consistent) */
1210
1211         if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1212                 uniform->f[1] = uniform->f[0] + (1.0/256.0);
1213 }
1214
1215 static void
1216 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
1217                                        struct sysval_uniform *uniform)
1218 {
1219         struct panfrost_context *ctx = batch->ctx;
1220
1221         uniform->u[0] = ctx->compute_grid->grid[0];
1222         uniform->u[1] = ctx->compute_grid->grid[1];
1223         uniform->u[2] = ctx->compute_grid->grid[2];
1224 }
1225
1226 static void
1227 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
1228                         struct panfrost_shader_state *ss,
1229                         enum pipe_shader_type st)
1230 {
1231         struct sysval_uniform *uniforms = (void *)buf;
1232
1233         for (unsigned i = 0; i < ss->sysval_count; ++i) {
1234                 int sysval = ss->sysval[i];
1235
1236                 switch (PAN_SYSVAL_TYPE(sysval)) {
1237                 case PAN_SYSVAL_VIEWPORT_SCALE:
1238                         panfrost_upload_viewport_scale_sysval(batch,
1239                                                               &uniforms[i]);
1240                         break;
1241                 case PAN_SYSVAL_VIEWPORT_OFFSET:
1242                         panfrost_upload_viewport_offset_sysval(batch,
1243                                                                &uniforms[i]);
1244                         break;
1245                 case PAN_SYSVAL_TEXTURE_SIZE:
1246                         panfrost_upload_txs_sysval(batch, st,
1247                                                    PAN_SYSVAL_ID(sysval),
1248                                                    &uniforms[i]);
1249                         break;
1250                 case PAN_SYSVAL_SSBO:
1251                         panfrost_upload_ssbo_sysval(batch, st,
1252                                                     PAN_SYSVAL_ID(sysval),
1253                                                     &uniforms[i]);
1254                         break;
1255                 case PAN_SYSVAL_NUM_WORK_GROUPS:
1256                         panfrost_upload_num_work_groups_sysval(batch,
1257                                                                &uniforms[i]);
1258                         break;
1259                 case PAN_SYSVAL_SAMPLER:
1260                         panfrost_upload_sampler_sysval(batch, st,
1261                                                        PAN_SYSVAL_ID(sysval),
1262                                                        &uniforms[i]);
1263                         break;
1264                 default:
1265                         assert(0);
1266                 }
1267         }
1268 }
1269
1270 static const void *
1271 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1272                                  unsigned index)
1273 {
1274         struct pipe_constant_buffer *cb = &buf->cb[index];
1275         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1276
1277         if (rsrc)
1278                 return rsrc->bo->cpu;
1279         else if (cb->user_buffer)
1280                 return cb->user_buffer;
1281         else
1282                 unreachable("No constant buffer");
1283 }
1284
1285 void
1286 panfrost_emit_const_buf(struct panfrost_batch *batch,
1287                         enum pipe_shader_type stage,
1288                         struct mali_vertex_tiler_postfix *postfix)
1289 {
1290         struct panfrost_context *ctx = batch->ctx;
1291         struct panfrost_shader_variants *all = ctx->shader[stage];
1292
1293         if (!all)
1294                 return;
1295
1296         struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1297
1298         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1299
1300         /* Uniforms are implicitly UBO #0 */
1301         bool has_uniforms = buf->enabled_mask & (1 << 0);
1302
1303         /* Allocate room for the sysval and the uniforms */
1304         size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1305         size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1306         size_t size = sys_size + uniform_size;
1307         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1308                                                                         size);
1309
1310         /* Upload sysvals requested by the shader */
1311         panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1312
1313         /* Upload uniforms */
1314         if (has_uniforms && uniform_size) {
1315                 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1316                 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1317         }
1318
1319         /* Next up, attach UBOs. UBO #0 is the uniforms we just
1320          * uploaded */
1321
1322         unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1323         assert(ubo_count >= 1);
1324
1325         size_t sz = sizeof(uint64_t) * ubo_count;
1326         uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1327         int uniform_count = ss->uniform_count;
1328
1329         /* Upload uniforms as a UBO */
1330         ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1331
1332         /* The rest are honest-to-goodness UBOs */
1333
1334         for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1335                 size_t usz = buf->cb[ubo].buffer_size;
1336                 bool enabled = buf->enabled_mask & (1 << ubo);
1337                 bool empty = usz == 0;
1338
1339                 if (!enabled || empty) {
1340                         /* Stub out disabled UBOs to catch accesses */
1341                         ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1342                         continue;
1343                 }
1344
1345                 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1346                                                                 buf, ubo);
1347
1348                 unsigned bytes_per_field = 16;
1349                 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1350                 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1351         }
1352
1353         mali_ptr ubufs = panfrost_pool_upload(&batch->pool, ubos, sz);
1354         postfix->uniforms = transfer.gpu;
1355         postfix->uniform_buffers = ubufs;
1356
1357         buf->dirty_mask = 0;
1358 }
1359
1360 void
1361 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1362                             const struct pipe_grid_info *info,
1363                             struct midgard_payload_vertex_tiler *vtp)
1364 {
1365         struct panfrost_context *ctx = batch->ctx;
1366         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1367         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1368         unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1369                                                            128));
1370         unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1371                                info->grid[2] * 4;
1372         struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1373                                                                   shared_size,
1374                                                                   1);
1375
1376         struct mali_shared_memory shared = {
1377                 .shared_memory = bo->gpu,
1378                 .shared_workgroup_count =
1379                         util_logbase2_ceil(info->grid[0]) +
1380                         util_logbase2_ceil(info->grid[1]) +
1381                         util_logbase2_ceil(info->grid[2]),
1382                 .shared_unk1 = 0x2,
1383                 .shared_shift = util_logbase2(single_size) - 1
1384         };
1385
1386         vtp->postfix.shared_memory = panfrost_pool_upload(&batch->pool, &shared,
1387                                                                sizeof(shared));
1388 }
1389
1390 static mali_ptr
1391 panfrost_get_tex_desc(struct panfrost_batch *batch,
1392                       enum pipe_shader_type st,
1393                       struct panfrost_sampler_view *view)
1394 {
1395         if (!view)
1396                 return (mali_ptr) 0;
1397
1398         struct pipe_sampler_view *pview = &view->base;
1399         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1400
1401         /* Add the BO to the job so it's retained until the job is done. */
1402
1403         panfrost_batch_add_bo(batch, rsrc->bo,
1404                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1405                               panfrost_bo_access_for_stage(st));
1406
1407         panfrost_batch_add_bo(batch, view->bo,
1408                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1409                               panfrost_bo_access_for_stage(st));
1410
1411         return view->bo->gpu;
1412 }
1413
1414 static void
1415 panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1416                              struct pipe_context *pctx)
1417 {
1418         struct panfrost_resource *rsrc = pan_resource(view->base.texture);
1419         if (view->texture_bo != rsrc->bo->gpu ||
1420             view->layout != rsrc->layout) {
1421                 panfrost_bo_unreference(view->bo);
1422                 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1423         }
1424 }
1425
1426 void
1427 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1428                                   enum pipe_shader_type stage,
1429                                   struct mali_vertex_tiler_postfix *postfix)
1430 {
1431         struct panfrost_context *ctx = batch->ctx;
1432         struct panfrost_device *device = pan_device(ctx->base.screen);
1433
1434         if (!ctx->sampler_view_count[stage])
1435                 return;
1436
1437         if (device->quirks & IS_BIFROST) {
1438                 struct bifrost_texture_descriptor *descriptors;
1439
1440                 descriptors = malloc(sizeof(struct bifrost_texture_descriptor) *
1441                                      ctx->sampler_view_count[stage]);
1442
1443                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1444                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1445                         struct pipe_sampler_view *pview = &view->base;
1446                         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1447                         panfrost_update_sampler_view(view, &ctx->base);
1448
1449                         /* Add the BOs to the job so they are retained until the job is done. */
1450
1451                         panfrost_batch_add_bo(batch, rsrc->bo,
1452                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1453                                               panfrost_bo_access_for_stage(stage));
1454
1455                         panfrost_batch_add_bo(batch, view->bo,
1456                                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1457                                               panfrost_bo_access_for_stage(stage));
1458
1459                         memcpy(&descriptors[i], view->bifrost_descriptor, sizeof(*view->bifrost_descriptor));
1460                 }
1461
1462                 postfix->textures = panfrost_pool_upload(&batch->pool,
1463                                                               descriptors,
1464                                                               sizeof(struct bifrost_texture_descriptor) *
1465                                                                       ctx->sampler_view_count[stage]);
1466
1467                 free(descriptors);
1468         } else {
1469                 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1470
1471                 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1472                         struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1473
1474                         panfrost_update_sampler_view(view, &ctx->base);
1475
1476                         trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1477                 }
1478
1479                 postfix->textures = panfrost_pool_upload(&batch->pool,
1480                                                               trampolines,
1481                                                               sizeof(uint64_t) *
1482                                                               ctx->sampler_view_count[stage]);
1483         }
1484 }
1485
1486 void
1487 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1488                                   enum pipe_shader_type stage,
1489                                   struct mali_vertex_tiler_postfix *postfix)
1490 {
1491         struct panfrost_context *ctx = batch->ctx;
1492         struct panfrost_device *device = pan_device(ctx->base.screen);
1493
1494         if (!ctx->sampler_count[stage])
1495                 return;
1496
1497         if (device->quirks & IS_BIFROST) {
1498                 size_t desc_size = sizeof(struct bifrost_sampler_descriptor);
1499                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1500                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1501                                                                                 transfer_size);
1502                 struct bifrost_sampler_descriptor *desc = (struct bifrost_sampler_descriptor *)transfer.cpu;
1503
1504                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1505                         desc[i] = ctx->samplers[stage][i]->bifrost_hw;
1506
1507                 postfix->sampler_descriptor = transfer.gpu;
1508         } else {
1509                 size_t desc_size = sizeof(struct mali_sampler_descriptor);
1510                 size_t transfer_size = desc_size * ctx->sampler_count[stage];
1511                 struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1512                                                                                 transfer_size);
1513                 struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1514
1515                 for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1516                         desc[i] = ctx->samplers[stage][i]->midgard_hw;
1517
1518                 postfix->sampler_descriptor = transfer.gpu;
1519         }
1520 }
1521
1522 void
1523 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1524                                struct mali_vertex_tiler_postfix *vertex_postfix)
1525 {
1526         struct panfrost_context *ctx = batch->ctx;
1527
1528         if (!ctx->vertex)
1529                 return;
1530
1531         struct panfrost_vertex_state *so = ctx->vertex;
1532
1533         panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
1534         vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
1535                                                                sizeof(*so->hw) *
1536                                                                PAN_MAX_ATTRIBUTE);
1537 }
1538
1539 void
1540 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1541                           struct mali_vertex_tiler_postfix *vertex_postfix)
1542 {
1543         struct panfrost_context *ctx = batch->ctx;
1544         struct panfrost_vertex_state *so = ctx->vertex;
1545
1546         /* Staged mali_attr, and index into them. i =/= k, depending on the
1547          * vertex buffer mask and instancing. Twice as much room is allocated,
1548          * for a worst case of NPOT_DIVIDEs which take up extra slot */
1549         union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1550         unsigned k = 0;
1551
1552         for (unsigned i = 0; i < so->num_elements; ++i) {
1553                 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1554                  * means duplicating some vertex buffers (who cares? aside from
1555                  * maybe some caching implications but I somehow doubt that
1556                  * matters) */
1557
1558                 struct pipe_vertex_element *elem = &so->pipe[i];
1559                 unsigned vbi = elem->vertex_buffer_index;
1560
1561                 /* The exception to 1:1 mapping is that we can have multiple
1562                  * entries (NPOT divisors), so we fixup anyways */
1563
1564                 so->hw[i].index = k;
1565
1566                 if (!(ctx->vb_mask & (1 << vbi)))
1567                         continue;
1568
1569                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1570                 struct panfrost_resource *rsrc;
1571
1572                 rsrc = pan_resource(buf->buffer.resource);
1573                 if (!rsrc)
1574                         continue;
1575
1576                 /* Align to 64 bytes by masking off the lower bits. This
1577                  * will be adjusted back when we fixup the src_offset in
1578                  * mali_attr_meta */
1579
1580                 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1581                 mali_ptr addr = raw_addr & ~63;
1582                 unsigned chopped_addr = raw_addr - addr;
1583
1584                 /* Add a dependency of the batch on the vertex buffer */
1585                 panfrost_batch_add_bo(batch, rsrc->bo,
1586                                       PAN_BO_ACCESS_SHARED |
1587                                       PAN_BO_ACCESS_READ |
1588                                       PAN_BO_ACCESS_VERTEX_TILER);
1589
1590                 /* Set common fields */
1591                 attrs[k].elements = addr;
1592                 attrs[k].stride = buf->stride;
1593
1594                 /* Since we advanced the base pointer, we shrink the buffer
1595                  * size */
1596                 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1597
1598                 /* We need to add the extra size we masked off (for
1599                  * correctness) so the data doesn't get clamped away */
1600                 attrs[k].size += chopped_addr;
1601
1602                 /* For non-instancing make sure we initialize */
1603                 attrs[k].shift = attrs[k].extra_flags = 0;
1604
1605                 /* Instancing uses a dramatically different code path than
1606                  * linear, so dispatch for the actual emission now that the
1607                  * common code is finished */
1608
1609                 unsigned divisor = elem->instance_divisor;
1610
1611                 if (divisor && ctx->instance_count == 1) {
1612                         /* Silly corner case where there's a divisor(=1) but
1613                          * there's no legitimate instancing. So we want *every*
1614                          * attribute to be the same. So set stride to zero so
1615                          * we don't go anywhere. */
1616
1617                         attrs[k].size = attrs[k].stride + chopped_addr;
1618                         attrs[k].stride = 0;
1619                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1620                 } else if (ctx->instance_count <= 1) {
1621                         /* Normal, non-instanced attributes */
1622                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1623                 } else {
1624                         unsigned instance_shift = vertex_postfix->instance_shift;
1625                         unsigned instance_odd = vertex_postfix->instance_odd;
1626
1627                         k += panfrost_vertex_instanced(ctx->padded_count,
1628                                                        instance_shift,
1629                                                        instance_odd,
1630                                                        divisor, &attrs[k]);
1631                 }
1632         }
1633
1634         /* Add special gl_VertexID/gl_InstanceID buffers */
1635
1636         panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1637         so->hw[PAN_VERTEX_ID].index = k++;
1638         panfrost_instance_id(ctx->padded_count, &attrs[k]);
1639         so->hw[PAN_INSTANCE_ID].index = k++;
1640
1641         /* Upload whatever we emitted and go */
1642
1643         vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs,
1644                                                            k * sizeof(*attrs));
1645 }
1646
1647 static mali_ptr
1648 panfrost_emit_varyings(struct panfrost_batch *batch, union mali_attr *slot,
1649                        unsigned stride, unsigned count)
1650 {
1651         /* Fill out the descriptor */
1652         slot->stride = stride;
1653         slot->size = stride * count;
1654         slot->shift = slot->extra_flags = 0;
1655
1656         struct panfrost_transfer transfer = panfrost_pool_alloc(&batch->pool,
1657                                                                         slot->size);
1658
1659         slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
1660
1661         return transfer.gpu;
1662 }
1663
1664 static unsigned
1665 panfrost_streamout_offset(unsigned stride, unsigned offset,
1666                         struct pipe_stream_output_target *target)
1667 {
1668         return (target->buffer_offset + (offset * stride * 4)) & 63;
1669 }
1670
1671 static void
1672 panfrost_emit_streamout(struct panfrost_batch *batch, union mali_attr *slot,
1673                         unsigned stride, unsigned offset, unsigned count,
1674                         struct pipe_stream_output_target *target)
1675 {
1676         /* Fill out the descriptor */
1677         slot->stride = stride * 4;
1678         slot->shift = slot->extra_flags = 0;
1679
1680         unsigned max_size = target->buffer_size;
1681         unsigned expected_size = slot->stride * count;
1682
1683         /* Grab the BO and bind it to the batch */
1684         struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1685
1686         /* Varyings are WRITE from the perspective of the VERTEX but READ from
1687          * the perspective of the TILER and FRAGMENT.
1688          */
1689         panfrost_batch_add_bo(batch, bo,
1690                               PAN_BO_ACCESS_SHARED |
1691                               PAN_BO_ACCESS_RW |
1692                               PAN_BO_ACCESS_VERTEX_TILER |
1693                               PAN_BO_ACCESS_FRAGMENT);
1694
1695         /* We will have an offset applied to get alignment */
1696         mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
1697         slot->elements = (addr & ~63) | MALI_ATTR_LINEAR;
1698         slot->size = MIN2(max_size, expected_size) + (addr & 63);
1699 }
1700
1701 static bool
1702 has_point_coord(unsigned mask, gl_varying_slot loc)
1703 {
1704         if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7))
1705                 return (mask & (1 << (loc - VARYING_SLOT_TEX0)));
1706         else if (loc == VARYING_SLOT_PNTC)
1707                 return (mask & (1 << 8));
1708         else
1709                 return false;
1710 }
1711
1712 /* Helpers for manipulating stream out information so we can pack varyings
1713  * accordingly. Compute the src_offset for a given captured varying */
1714
1715 static struct pipe_stream_output *
1716 pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1717 {
1718         for (unsigned i = 0; i < info->num_outputs; ++i) {
1719                 if (info->output[i].register_index == loc)
1720                         return &info->output[i];
1721         }
1722
1723         unreachable("Varying not captured");
1724 }
1725
1726 static unsigned
1727 pan_varying_size(enum mali_format fmt)
1728 {
1729         unsigned type = MALI_EXTRACT_TYPE(fmt);
1730         unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1731         unsigned bits = MALI_EXTRACT_BITS(fmt);
1732         unsigned bpc = 0;
1733
1734         if (bits == MALI_CHANNEL_FLOAT) {
1735                 /* No doubles */
1736                 bool fp16 = (type == MALI_FORMAT_SINT);
1737                 assert(fp16 || (type == MALI_FORMAT_UNORM));
1738
1739                 bpc = fp16 ? 2 : 4;
1740         } else {
1741                 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1742
1743                 /* See the enums */
1744                 bits = 1 << bits;
1745                 assert(bits >= 8);
1746                 bpc = bits / 8;
1747         }
1748
1749         return bpc * chan;
1750 }
1751
1752 /* Indices for named (non-XFB) varyings that are present. These are packed
1753  * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1754  * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1755  * of a given special field given a shift S by:
1756  *
1757  *      idx = popcount(P & ((1 << S) - 1))
1758  *
1759  * That is... look at all of the varyings that come earlier and count them, the
1760  * count is the new index since plus one. Likewise, the total number of special
1761  * buffers required is simply popcount(P)
1762  */
1763
1764 enum pan_special_varying {
1765         PAN_VARY_GENERAL = 0,
1766         PAN_VARY_POSITION = 1,
1767         PAN_VARY_PSIZ = 2,
1768         PAN_VARY_PNTCOORD = 3,
1769         PAN_VARY_FACE = 4,
1770         PAN_VARY_FRAGCOORD = 5,
1771
1772         /* Keep last */
1773         PAN_VARY_MAX,
1774 };
1775
1776 /* Given a varying, figure out which index it correpsonds to */
1777
1778 static inline unsigned
1779 pan_varying_index(unsigned present, enum pan_special_varying v)
1780 {
1781         unsigned mask = (1 << v) - 1;
1782         return util_bitcount(present & mask);
1783 }
1784
1785 /* Get the base offset for XFB buffers, which by convention come after
1786  * everything else. Wrapper function for semantic reasons; by construction this
1787  * is just popcount. */
1788
1789 static inline unsigned
1790 pan_xfb_base(unsigned present)
1791 {
1792         return util_bitcount(present);
1793 }
1794
1795 /* Computes the present mask for varyings so we can start emitting varying records */
1796
1797 static inline unsigned
1798 pan_varying_present(
1799         struct panfrost_shader_state *vs,
1800         struct panfrost_shader_state *fs,
1801         unsigned quirks)
1802 {
1803         /* At the moment we always emit general and position buffers. Not
1804          * strictly necessary but usually harmless */
1805
1806         unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1807
1808         /* Enable special buffers by the shader info */
1809
1810         if (vs->writes_point_size)
1811                 present |= (1 << PAN_VARY_PSIZ);
1812
1813         if (fs->reads_point_coord)
1814                 present |= (1 << PAN_VARY_PNTCOORD);
1815
1816         if (fs->reads_face)
1817                 present |= (1 << PAN_VARY_FACE);
1818
1819         if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1820                 present |= (1 << PAN_VARY_FRAGCOORD);
1821
1822         /* Also, if we have a point sprite, we need a point coord buffer */
1823
1824         for (unsigned i = 0; i < fs->varying_count; i++)  {
1825                 gl_varying_slot loc = fs->varyings_loc[i];
1826
1827                 if (has_point_coord(fs->point_sprite_mask, loc))
1828                         present |= (1 << PAN_VARY_PNTCOORD);
1829         }
1830
1831         return present;
1832 }
1833
1834 /* Emitters for varying records */
1835
1836 static struct mali_attr_meta
1837 pan_emit_vary(unsigned present, enum pan_special_varying buf,
1838                 unsigned quirks, enum mali_format format,
1839                 unsigned offset)
1840 {
1841         unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
1842
1843         struct mali_attr_meta meta = {
1844                 .index = pan_varying_index(present, buf),
1845                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1846                 .swizzle = quirks & HAS_SWIZZLES ?
1847                         panfrost_get_default_swizzle(nr_channels) :
1848                         panfrost_bifrost_swizzle(nr_channels),
1849                 .format = format,
1850                 .src_offset = offset
1851         };
1852
1853         return meta;
1854 }
1855
1856 /* General varying that is unused */
1857
1858 static struct mali_attr_meta
1859 pan_emit_vary_only(unsigned present, unsigned quirks)
1860 {
1861         return pan_emit_vary(present, 0, quirks, MALI_VARYING_DISCARD, 0);
1862 }
1863
1864 /* Special records */
1865
1866 static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1867         [PAN_VARY_POSITION]     = MALI_VARYING_POS,
1868         [PAN_VARY_PSIZ]         = MALI_R16F,
1869         [PAN_VARY_PNTCOORD]     = MALI_R16F,
1870         [PAN_VARY_FACE]         = MALI_R32I,
1871         [PAN_VARY_FRAGCOORD]    = MALI_RGBA32F
1872 };
1873
1874 static struct mali_attr_meta
1875 pan_emit_vary_special(unsigned present, enum pan_special_varying buf,
1876                 unsigned quirks)
1877 {
1878         assert(buf < PAN_VARY_MAX);
1879         return pan_emit_vary(present, buf, quirks, pan_varying_formats[buf], 0);
1880 }
1881
1882 static enum mali_format
1883 pan_xfb_format(enum mali_format format, unsigned nr)
1884 {
1885         if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1886                 return MALI_R32F | MALI_NR_CHANNELS(nr);
1887         else
1888                 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1889 }
1890
1891 /* Transform feedback records. Note struct pipe_stream_output is (if packed as
1892  * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1893  * value. */
1894
1895 static struct mali_attr_meta
1896 pan_emit_vary_xfb(unsigned present,
1897                 unsigned max_xfb,
1898                 unsigned *streamout_offsets,
1899                 unsigned quirks,
1900                 enum mali_format format,
1901                 struct pipe_stream_output o)
1902 {
1903         /* Otherwise construct a record for it */
1904         struct mali_attr_meta meta = {
1905                 /* XFB buffers come after everything else */
1906                 .index = pan_xfb_base(present) + o.output_buffer,
1907
1908                 /* As usual unknown bit */
1909                 .unknown1 = quirks & IS_BIFROST ? 0x0 : 0x2,
1910
1911                 /* Override swizzle with number of channels */
1912                 .swizzle = quirks & HAS_SWIZZLES ?
1913                         panfrost_get_default_swizzle(o.num_components) :
1914                         panfrost_bifrost_swizzle(o.num_components),
1915
1916                 /* Override number of channels and precision to highp */
1917                 .format = pan_xfb_format(format, o.num_components),
1918
1919                 /* Apply given offsets together */
1920                 .src_offset = (o.dst_offset * 4) /* dwords */
1921                         + streamout_offsets[o.output_buffer]
1922         };
1923
1924         return meta;
1925 }
1926
1927 /* Determine if we should capture a varying for XFB. This requires actually
1928  * having a buffer for it. If we don't capture it, we'll fallback to a general
1929  * varying path (linked or unlinked, possibly discarding the write) */
1930
1931 static bool
1932 panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1933                 unsigned loc, unsigned max_xfb)
1934 {
1935         if (!(xfb->so_mask & (1ll << loc)))
1936                 return false;
1937
1938         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1939         return o->output_buffer < max_xfb;
1940 }
1941
1942 /* Higher-level wrapper around all of the above, classifying a varying into one
1943  * of the above types */
1944
1945 static struct mali_attr_meta
1946 panfrost_emit_varying(
1947                 struct panfrost_shader_state *stage,
1948                 struct panfrost_shader_state *other,
1949                 struct panfrost_shader_state *xfb,
1950                 unsigned present,
1951                 unsigned max_xfb,
1952                 unsigned *streamout_offsets,
1953                 unsigned quirks,
1954                 unsigned *gen_offsets,
1955                 enum mali_format *gen_formats,
1956                 unsigned *gen_stride,
1957                 unsigned idx,
1958                 bool should_alloc,
1959                 bool is_fragment)
1960 {
1961         gl_varying_slot loc = stage->varyings_loc[idx];
1962         enum mali_format format = stage->varyings[idx];
1963
1964         /* Override format to match linkage */
1965         if (!should_alloc && gen_formats[idx])
1966                 format = gen_formats[idx];
1967
1968         if (has_point_coord(stage->point_sprite_mask, loc)) {
1969                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1970         } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1971                 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1972                 return pan_emit_vary_xfb(present, max_xfb, streamout_offsets, quirks, format, *o);
1973         } else if (loc == VARYING_SLOT_POS) {
1974                 if (is_fragment)
1975                         return pan_emit_vary_special(present, PAN_VARY_FRAGCOORD, quirks);
1976                 else
1977                         return pan_emit_vary_special(present, PAN_VARY_POSITION, quirks);
1978         } else if (loc == VARYING_SLOT_PSIZ) {
1979                 return pan_emit_vary_special(present, PAN_VARY_PSIZ, quirks);
1980         } else if (loc == VARYING_SLOT_PNTC) {
1981                 return pan_emit_vary_special(present, PAN_VARY_PNTCOORD, quirks);
1982         } else if (loc == VARYING_SLOT_FACE) {
1983                 return pan_emit_vary_special(present, PAN_VARY_FACE, quirks);
1984         }
1985
1986         /* We've exhausted special cases, so it's otherwise a general varying. Check if we're linked */
1987         signed other_idx = -1;
1988
1989         for (unsigned j = 0; j < other->varying_count; ++j) {
1990                 if (other->varyings_loc[j] == loc) {
1991                         other_idx = j;
1992                         break;
1993                 }
1994         }
1995
1996         if (other_idx < 0)
1997                 return pan_emit_vary_only(present, quirks);
1998
1999         unsigned offset = gen_offsets[other_idx];
2000
2001         if (should_alloc) {
2002                 /* We're linked, so allocate a space via a watermark allocation */
2003                 enum mali_format alt = other->varyings[other_idx];
2004
2005                 /* Do interpolation at minimum precision */
2006                 unsigned size_main = pan_varying_size(format);
2007                 unsigned size_alt = pan_varying_size(alt);
2008                 unsigned size = MIN2(size_main, size_alt);
2009
2010                 /* If a varying is marked for XFB but not actually captured, we
2011                  * should match the format to the format that would otherwise
2012                  * be used for XFB, since dEQP checks for invariance here. It's
2013                  * unclear if this is required by the spec. */
2014
2015                 if (xfb->so_mask & (1ull << loc)) {
2016                         struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
2017                         format = pan_xfb_format(format, o->num_components);
2018                         size = pan_varying_size(format);
2019                 } else if (size == size_alt) {
2020                         format = alt;
2021                 }
2022
2023                 gen_offsets[idx] = *gen_stride;
2024                 gen_formats[other_idx] = format;
2025                 offset = *gen_stride;
2026                 *gen_stride += size;
2027         }
2028
2029         return pan_emit_vary(present, PAN_VARY_GENERAL,
2030                         quirks, format, offset);
2031 }
2032
2033 static void
2034 pan_emit_special_input(union mali_attr *varyings,
2035                 unsigned present,
2036                 enum pan_special_varying v,
2037                 mali_ptr addr)
2038 {
2039         if (present & (1 << v)) {
2040                 /* Ensure we write exactly once for performance and with fields
2041                  * zeroed appropriately to avoid flakes */
2042
2043                 union mali_attr s = {
2044                         .elements = addr
2045                 };
2046
2047                 varyings[pan_varying_index(present, v)] = s;
2048         }
2049 }
2050
2051 void
2052 panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
2053                                  unsigned vertex_count,
2054                                  struct mali_vertex_tiler_postfix *vertex_postfix,
2055                                  struct mali_vertex_tiler_postfix *tiler_postfix,
2056                                  union midgard_primitive_size *primitive_size)
2057 {
2058         /* Load the shaders */
2059         struct panfrost_context *ctx = batch->ctx;
2060         struct panfrost_device *dev = pan_device(ctx->base.screen);
2061         struct panfrost_shader_state *vs, *fs;
2062         size_t vs_size, fs_size;
2063
2064         /* Allocate the varying descriptor */
2065
2066         vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
2067         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
2068         vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
2069         fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
2070
2071         struct panfrost_transfer trans = panfrost_pool_alloc(&batch->pool,
2072                                                                      vs_size +
2073                                                                      fs_size);
2074
2075         struct pipe_stream_output_info *so = &vs->stream_output;
2076         unsigned present = pan_varying_present(vs, fs, dev->quirks);
2077
2078         /* Check if this varying is linked by us. This is the case for
2079          * general-purpose, non-captured varyings. If it is, link it. If it's
2080          * not, use the provided stream out information to determine the
2081          * offset, since it was already linked for us. */
2082
2083         unsigned gen_offsets[32];
2084         enum mali_format gen_formats[32];
2085         memset(gen_offsets, 0, sizeof(gen_offsets));
2086         memset(gen_formats, 0, sizeof(gen_formats));
2087
2088         unsigned gen_stride = 0;
2089         assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
2090         assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
2091
2092         unsigned streamout_offsets[32];
2093
2094         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2095                 streamout_offsets[i] = panfrost_streamout_offset(
2096                                         so->stride[i],
2097                                         ctx->streamout.offsets[i],
2098                                         ctx->streamout.targets[i]);
2099         }
2100
2101         struct mali_attr_meta *ovs = (struct mali_attr_meta *)trans.cpu;
2102         struct mali_attr_meta *ofs = ovs + vs->varying_count;
2103
2104         for (unsigned i = 0; i < vs->varying_count; i++) {
2105                 ovs[i] = panfrost_emit_varying(vs, fs, vs, present,
2106                                 ctx->streamout.num_targets, streamout_offsets,
2107                                 dev->quirks,
2108                                 gen_offsets, gen_formats, &gen_stride, i, true, false);
2109         }
2110
2111         for (unsigned i = 0; i < fs->varying_count; i++) {
2112                 ofs[i] = panfrost_emit_varying(fs, vs, vs, present,
2113                                 ctx->streamout.num_targets, streamout_offsets,
2114                                 dev->quirks,
2115                                 gen_offsets, gen_formats, &gen_stride, i, false, true);
2116         }
2117
2118         unsigned xfb_base = pan_xfb_base(present);
2119         struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool,
2120                         sizeof(union mali_attr) * (xfb_base + ctx->streamout.num_targets));
2121         union mali_attr *varyings = (union mali_attr *) T.cpu;
2122
2123         /* Emit the stream out buffers */
2124
2125         unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
2126                                                            ctx->vertex_count);
2127
2128         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
2129                 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
2130                                         so->stride[i],
2131                                         ctx->streamout.offsets[i],
2132                                         out_count,
2133                                         ctx->streamout.targets[i]);
2134         }
2135
2136         panfrost_emit_varyings(batch,
2137                         &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
2138                         gen_stride, vertex_count);
2139
2140         /* fp32 vec4 gl_Position */
2141         tiler_postfix->position_varying = panfrost_emit_varyings(batch,
2142                         &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
2143                         sizeof(float) * 4, vertex_count);
2144
2145         if (present & (1 << PAN_VARY_PSIZ)) {
2146                 primitive_size->pointer = panfrost_emit_varyings(batch,
2147                                 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
2148                                 2, vertex_count);
2149         }
2150
2151         pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_VARYING_POINT_COORD);
2152         pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_VARYING_FRONT_FACING);
2153         pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_VARYING_FRAG_COORD);
2154
2155         vertex_postfix->varyings = T.gpu;
2156         tiler_postfix->varyings = T.gpu;
2157
2158         vertex_postfix->varying_meta = trans.gpu;
2159         tiler_postfix->varying_meta = trans.gpu + vs_size;
2160 }
2161
2162 void
2163 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
2164                                 struct mali_vertex_tiler_prefix *vertex_prefix,
2165                                 struct mali_vertex_tiler_postfix *vertex_postfix,
2166                                 struct mali_vertex_tiler_prefix *tiler_prefix,
2167                                 struct mali_vertex_tiler_postfix *tiler_postfix,
2168                                 union midgard_primitive_size *primitive_size)
2169 {
2170         struct panfrost_context *ctx = batch->ctx;
2171         struct panfrost_device *device = pan_device(ctx->base.screen);
2172         bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
2173         struct bifrost_payload_vertex bifrost_vertex = {0,};
2174         struct bifrost_payload_tiler bifrost_tiler = {0,};
2175         struct midgard_payload_vertex_tiler midgard_vertex = {0,};
2176         struct midgard_payload_vertex_tiler midgard_tiler = {0,};
2177         void *vp, *tp;
2178         size_t vp_size, tp_size;
2179
2180         if (device->quirks & IS_BIFROST) {
2181                 bifrost_vertex.prefix = *vertex_prefix;
2182                 bifrost_vertex.postfix = *vertex_postfix;
2183                 vp = &bifrost_vertex;
2184                 vp_size = sizeof(bifrost_vertex);
2185
2186                 bifrost_tiler.prefix = *tiler_prefix;
2187                 bifrost_tiler.tiler.primitive_size = *primitive_size;
2188                 bifrost_tiler.tiler.tiler_meta = panfrost_batch_get_tiler_meta(batch, ~0);
2189                 bifrost_tiler.postfix = *tiler_postfix;
2190                 tp = &bifrost_tiler;
2191                 tp_size = sizeof(bifrost_tiler);
2192         } else {
2193                 midgard_vertex.prefix = *vertex_prefix;
2194                 midgard_vertex.postfix = *vertex_postfix;
2195                 vp = &midgard_vertex;
2196                 vp_size = sizeof(midgard_vertex);
2197
2198                 midgard_tiler.prefix = *tiler_prefix;
2199                 midgard_tiler.postfix = *tiler_postfix;
2200                 midgard_tiler.primitive_size = *primitive_size;
2201                 tp = &midgard_tiler;
2202                 tp_size = sizeof(midgard_tiler);
2203         }
2204
2205         if (wallpapering) {
2206                 /* Inject in reverse order, with "predicted" job indices.
2207                  * THIS IS A HACK XXX */
2208                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false,
2209                                  batch->scoreboard.job_index + 2, tp, tp_size, true);
2210                 panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2211                                  vp, vp_size, true);
2212                 return;
2213         }
2214
2215         /* If rasterizer discard is enable, only submit the vertex */
2216
2217         bool rasterizer_discard = ctx->rasterizer &&
2218                                   ctx->rasterizer->base.rasterizer_discard;
2219
2220         unsigned vertex = panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_VERTEX, false, 0,
2221                                            vp, vp_size, false);
2222
2223         if (rasterizer_discard)
2224                 return;
2225
2226         panfrost_new_job(&batch->pool, &batch->scoreboard, JOB_TYPE_TILER, false, vertex, tp, tp_size,
2227                          false);
2228 }
2229
2230 /* TODO: stop hardcoding this */
2231 mali_ptr
2232 panfrost_emit_sample_locations(struct panfrost_batch *batch)
2233 {
2234         uint16_t locations[] = {
2235             128, 128,
2236             0, 256,
2237             0, 256,
2238             0, 256,
2239             0, 256,
2240             0, 256,
2241             0, 256,
2242             0, 256,
2243             0, 256,
2244             0, 256,
2245             0, 256,
2246             0, 256,
2247             0, 256,
2248             0, 256,
2249             0, 256,
2250             0, 256,
2251             0, 256,
2252             0, 256,
2253             0, 256,
2254             0, 256,
2255             0, 256,
2256             0, 256,
2257             0, 256,
2258             0, 256,
2259             0, 256,
2260             0, 256,
2261             0, 256,
2262             0, 256,
2263             0, 256,
2264             0, 256,
2265             0, 256,
2266             0, 256,
2267             128, 128,
2268             0, 0,
2269             0, 0,
2270             0, 0,
2271             0, 0,
2272             0, 0,
2273             0, 0,
2274             0, 0,
2275             0, 0,
2276             0, 0,
2277             0, 0,
2278             0, 0,
2279             0, 0,
2280             0, 0,
2281             0, 0,
2282             0, 0,
2283         };
2284
2285         return panfrost_pool_upload(&batch->pool, locations, 96 * sizeof(uint16_t));
2286 }