src/gallium/drivers/panfrost/pan_cmdstream.c

   1 /*
   2  * Copyright (C) 2018 Alyssa Rosenzweig
   3  * Copyright (C) 2020 Collabora Ltd.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "util/macros.h"
  26 #include "util/u_vbuf.h"
  27
  28 #include "panfrost-quirks.h"
  29
  30 #include "pan_allocate.h"
  31 #include "pan_bo.h"
  32 #include "pan_cmdstream.h"
  33 #include "pan_context.h"
  34 #include "pan_job.h"
  35
  36 /* TODO: Bifrost requires just a mali_shared_memory, without the rest of the
  37  * framebuffer */
  38
  39 void
  40 panfrost_vt_attach_framebuffer(struct panfrost_context *ctx,
  41                                struct midgard_payload_vertex_tiler *vt)
  42 {
  43         struct panfrost_screen *screen = pan_screen(ctx->base.screen);
  44         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  45
  46         /* If we haven't, reserve space for the framebuffer */
  47
  48         if (!batch->framebuffer.gpu) {
  49                 unsigned size = (screen->quirks & MIDGARD_SFBD) ?
  50                         sizeof(struct mali_single_framebuffer) :
  51                         sizeof(struct mali_framebuffer);
  52
  53                 batch->framebuffer = panfrost_allocate_transient(batch, size);
  54
  55                 /* Tag the pointer */
  56                 if (!(screen->quirks & MIDGARD_SFBD))
  57                         batch->framebuffer.gpu |= MALI_MFBD;
  58         }
  59
  60         vt->postfix.shared_memory = batch->framebuffer.gpu;
  61 }
  62
  63 void
  64 panfrost_vt_update_rasterizer(struct panfrost_context *ctx,
  65                               struct midgard_payload_vertex_tiler *tp)
  66 {
  67         struct panfrost_rasterizer *rasterizer = ctx->rasterizer;
  68
  69         tp->gl_enables |= 0x7;
  70         SET_BIT(tp->gl_enables, MALI_FRONT_CCW_TOP,
  71                 rasterizer && rasterizer->base.front_ccw);
  72         SET_BIT(tp->gl_enables, MALI_CULL_FACE_FRONT,
  73                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_FRONT));
  74         SET_BIT(tp->gl_enables, MALI_CULL_FACE_BACK,
  75                 rasterizer && (rasterizer->base.cull_face & PIPE_FACE_BACK));
  76         SET_BIT(tp->prefix.unknown_draw, MALI_DRAW_FLATSHADE_FIRST,
  77                 rasterizer && rasterizer->base.flatshade_first);
  78
  79         if (!panfrost_writes_point_size(ctx)) {
  80                 bool points = tp->prefix.draw_mode == MALI_POINTS;
  81                 float val = 0.0f;
  82
  83                 if (rasterizer)
  84                         val = points ?
  85                               rasterizer->base.point_size :
  86                               rasterizer->base.line_width;
  87
  88                 tp->primitive_size.constant = val;
  89         }
  90 }
  91
  92 void
  93 panfrost_vt_update_occlusion_query(struct panfrost_context *ctx,
  94                                    struct midgard_payload_vertex_tiler *tp)
  95 {
  96         SET_BIT(tp->gl_enables, MALI_OCCLUSION_QUERY, ctx->occlusion_query);
  97         if (ctx->occlusion_query)
  98                 tp->postfix.occlusion_counter = ctx->occlusion_query->bo->gpu;
  99         else
 100                 tp->postfix.occlusion_counter = 0;
 101 }
 102
 103 static unsigned
 104 panfrost_translate_index_size(unsigned size)
 105 {
 106         switch (size) {
 107         case 1:
 108                 return MALI_DRAW_INDEXED_UINT8;
 109
 110         case 2:
 111                 return MALI_DRAW_INDEXED_UINT16;
 112
 113         case 4:
 114                 return MALI_DRAW_INDEXED_UINT32;
 115
 116         default:
 117                 unreachable("Invalid index size");
 118         }
 119 }
 120
 121 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
 122  * good for the duration of the draw (transient), could last longer. Also get
 123  * the bounds on the index buffer for the range accessed by the draw. We do
 124  * these operations together because there are natural optimizations which
 125  * require them to be together. */
 126
 127 static mali_ptr
 128 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
 129                                   const struct pipe_draw_info *info,
 130                                   unsigned *min_index, unsigned *max_index)
 131 {
 132         struct panfrost_resource *rsrc = pan_resource(info->index.resource);
 133         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 134         off_t offset = info->start * info->index_size;
 135         bool needs_indices = true;
 136         mali_ptr out = 0;
 137
 138         if (info->max_index != ~0u) {
 139                 *min_index = info->min_index;
 140                 *max_index = info->max_index;
 141                 needs_indices = false;
 142         }
 143
 144         if (!info->has_user_indices) {
 145                 /* Only resources can be directly mapped */
 146                 panfrost_batch_add_bo(batch, rsrc->bo,
 147                                       PAN_BO_ACCESS_SHARED |
 148                                       PAN_BO_ACCESS_READ |
 149                                       PAN_BO_ACCESS_VERTEX_TILER);
 150                 out = rsrc->bo->gpu + offset;
 151
 152                 /* Check the cache */
 153                 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
 154                                                            info->start,
 155                                                            info->count,
 156                                                            min_index,
 157                                                            max_index);
 158         } else {
 159                 /* Otherwise, we need to upload to transient memory */
 160                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
 161                 out = panfrost_upload_transient(batch, ibuf8 + offset,
 162                                                 info->count *
 163                                                 info->index_size);
 164         }
 165
 166         if (needs_indices) {
 167                 /* Fallback */
 168                 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
 169
 170                 if (!info->has_user_indices)
 171                         panfrost_minmax_cache_add(rsrc->index_cache,
 172                                                   info->start, info->count,
 173                                                   *min_index, *max_index);
 174         }
 175
 176         return out;
 177 }
 178
 179 void
 180 panfrost_vt_set_draw_info(struct panfrost_context *ctx,
 181                           const struct pipe_draw_info *info,
 182                           enum mali_draw_mode draw_mode,
 183                           struct midgard_payload_vertex_tiler *vp,
 184                           struct midgard_payload_vertex_tiler *tp,
 185                           unsigned *vertex_count,
 186                           unsigned *padded_count)
 187 {
 188         tp->prefix.draw_mode = draw_mode;
 189
 190         unsigned draw_flags = 0;
 191
 192         if (panfrost_writes_point_size(ctx))
 193                 draw_flags |= MALI_DRAW_VARYING_SIZE;
 194
 195         if (info->primitive_restart)
 196                 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
 197
 198         /* These doesn't make much sense */
 199
 200         draw_flags |= 0x3000;
 201
 202         if (info->index_size) {
 203                 unsigned min_index = 0, max_index = 0;
 204
 205                 tp->prefix.indices = panfrost_get_index_buffer_bounded(ctx,
 206                                                                        info,
 207                                                                        &min_index,
 208                                                                        &max_index);
 209
 210                 /* Use the corresponding values */
 211                 *vertex_count = max_index - min_index + 1;
 212                 tp->offset_start = vp->offset_start = min_index + info->index_bias;
 213                 tp->prefix.offset_bias_correction = -min_index;
 214                 tp->prefix.index_count = MALI_POSITIVE(info->count);
 215                 draw_flags |= panfrost_translate_index_size(info->index_size);
 216         } else {
 217                 tp->prefix.indices = 0;
 218                 *vertex_count = ctx->vertex_count;
 219                 tp->offset_start = vp->offset_start = info->start;
 220                 tp->prefix.offset_bias_correction = 0;
 221                 tp->prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
 222         }
 223
 224         tp->prefix.unknown_draw = draw_flags;
 225
 226         /* Encode the padded vertex count */
 227
 228         if (info->instance_count > 1) {
 229                 *padded_count = panfrost_padded_vertex_count(*vertex_count);
 230
 231                 unsigned shift = __builtin_ctz(ctx->padded_count);
 232                 unsigned k = ctx->padded_count >> (shift + 1);
 233
 234                 tp->instance_shift = vp->instance_shift = shift;
 235                 tp->instance_odd = vp->instance_odd = k;
 236         } else {
 237                 *padded_count = *vertex_count;
 238
 239                 /* Reset instancing state */
 240                 tp->instance_shift = vp->instance_shift = 0;
 241                 tp->instance_odd = vp->instance_odd = 0;
 242         }
 243 }
 244
 245 static void
 246 panfrost_shader_meta_init(struct panfrost_context *ctx,
 247                           enum pipe_shader_type st,
 248                           struct mali_shader_meta *meta)
 249 {
 250         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 251
 252         memset(meta, 0, sizeof(*meta));
 253         meta->shader = (ss->bo ? ss->bo->gpu : 0) | ss->first_tag;
 254         meta->midgard1.uniform_count = MIN2(ss->uniform_count,
 255                                             ss->uniform_cutoff);
 256         meta->midgard1.work_count = ss->work_reg_count;
 257         meta->attribute_count = ss->attribute_count;
 258         meta->varying_count = ss->varying_count;
 259         meta->midgard1.flags_hi = 0x8; /* XXX */
 260         meta->midgard1.flags_lo = 0x220;
 261         meta->texture_count = ctx->sampler_view_count[st];
 262         meta->sampler_count = ctx->sampler_count[st];
 263         meta->midgard1.uniform_buffer_count = panfrost_ubo_count(ctx, st);
 264 }
 265
 266 static unsigned
 267 panfrost_translate_compare_func(enum pipe_compare_func in)
 268 {
 269         switch (in) {
 270         case PIPE_FUNC_NEVER:
 271                 return MALI_FUNC_NEVER;
 272
 273         case PIPE_FUNC_LESS:
 274                 return MALI_FUNC_LESS;
 275
 276         case PIPE_FUNC_EQUAL:
 277                 return MALI_FUNC_EQUAL;
 278
 279         case PIPE_FUNC_LEQUAL:
 280                 return MALI_FUNC_LEQUAL;
 281
 282         case PIPE_FUNC_GREATER:
 283                 return MALI_FUNC_GREATER;
 284
 285         case PIPE_FUNC_NOTEQUAL:
 286                 return MALI_FUNC_NOTEQUAL;
 287
 288         case PIPE_FUNC_GEQUAL:
 289                 return MALI_FUNC_GEQUAL;
 290
 291         case PIPE_FUNC_ALWAYS:
 292                 return MALI_FUNC_ALWAYS;
 293
 294         default:
 295                 unreachable("Invalid func");
 296         }
 297 }
 298
 299 static unsigned
 300 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 301 {
 302         switch (in) {
 303         case PIPE_STENCIL_OP_KEEP:
 304                 return MALI_STENCIL_KEEP;
 305
 306         case PIPE_STENCIL_OP_ZERO:
 307                 return MALI_STENCIL_ZERO;
 308
 309         case PIPE_STENCIL_OP_REPLACE:
 310                return MALI_STENCIL_REPLACE;
 311
 312         case PIPE_STENCIL_OP_INCR:
 313                 return MALI_STENCIL_INCR;
 314
 315         case PIPE_STENCIL_OP_DECR:
 316                 return MALI_STENCIL_DECR;
 317
 318         case PIPE_STENCIL_OP_INCR_WRAP:
 319                 return MALI_STENCIL_INCR_WRAP;
 320
 321         case PIPE_STENCIL_OP_DECR_WRAP:
 322                 return MALI_STENCIL_DECR_WRAP;
 323
 324         case PIPE_STENCIL_OP_INVERT:
 325                 return MALI_STENCIL_INVERT;
 326
 327         default:
 328                 unreachable("Invalid stencil op");
 329         }
 330 }
 331
 332 static unsigned
 333 translate_tex_wrap(enum pipe_tex_wrap w)
 334 {
 335         switch (w) {
 336         case PIPE_TEX_WRAP_REPEAT:
 337                 return MALI_WRAP_REPEAT;
 338
 339         case PIPE_TEX_WRAP_CLAMP:
 340                 return MALI_WRAP_CLAMP;
 341
 342         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 343                 return MALI_WRAP_CLAMP_TO_EDGE;
 344
 345         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 346                 return MALI_WRAP_CLAMP_TO_BORDER;
 347
 348         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 349                 return MALI_WRAP_MIRRORED_REPEAT;
 350
 351         case PIPE_TEX_WRAP_MIRROR_CLAMP:
 352                 return MALI_WRAP_MIRRORED_CLAMP;
 353
 354         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 355                 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
 356
 357         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 358                 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
 359
 360         default:
 361                 unreachable("Invalid wrap");
 362         }
 363 }
 364
 365 void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
 366                                 struct mali_sampler_descriptor *hw)
 367 {
 368         unsigned func = panfrost_translate_compare_func(cso->compare_func);
 369         bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
 370         bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
 371         bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
 372         unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
 373         unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
 374         unsigned mip_filter = mip_linear  ?
 375                               (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
 376         unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
 377
 378         *hw = (struct mali_sampler_descriptor) {
 379                 .filter_mode = min_filter | mag_filter | mip_filter |
 380                                normalized,
 381                 .wrap_s = translate_tex_wrap(cso->wrap_s),
 382                 .wrap_t = translate_tex_wrap(cso->wrap_t),
 383                 .wrap_r = translate_tex_wrap(cso->wrap_r),
 384                 .compare_func = panfrost_flip_compare_func(func),
 385                 .border_color = {
 386                         cso->border_color.f[0],
 387                         cso->border_color.f[1],
 388                         cso->border_color.f[2],
 389                         cso->border_color.f[3]
 390                 },
 391                 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
 392                 .max_lod = FIXED_16(cso->max_lod, false),
 393                 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
 394                 .seamless_cube_map = cso->seamless_cube_map,
 395         };
 396
 397         /* If necessary, we disable mipmapping in the sampler descriptor by
 398          * clamping the LOD as tight as possible (from 0 to epsilon,
 399          * essentially -- remember these are fixed point numbers, so
 400          * epsilon=1/256) */
 401
 402         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 403                 hw->max_lod = hw->min_lod + 1;
 404 }
 405
 406 static void
 407 panfrost_make_stencil_state(const struct pipe_stencil_state *in,
 408                             struct mali_stencil_test *out)
 409 {
 410         out->ref = 0; /* Gallium gets it from elsewhere */
 411
 412         out->mask = in->valuemask;
 413         out->func = panfrost_translate_compare_func(in->func);
 414         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 415         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 416         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 417 }
 418
 419 static void
 420 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
 421                                      struct mali_shader_meta *fragmeta)
 422 {
 423         if (!ctx->rasterizer) {
 424                 SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, true);
 425                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, false);
 426                 fragmeta->depth_units = 0.0f;
 427                 fragmeta->depth_factor = 0.0f;
 428                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A, false);
 429                 SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B, false);
 430                 return;
 431         }
 432
 433         bool msaa = ctx->rasterizer->base.multisample;
 434
 435         /* TODO: Sample size */
 436         SET_BIT(fragmeta->unknown2_3, MALI_HAS_MSAA, msaa);
 437         SET_BIT(fragmeta->unknown2_4, MALI_NO_MSAA, !msaa);
 438         fragmeta->depth_units = ctx->rasterizer->base.offset_units * 2.0f;
 439         fragmeta->depth_factor = ctx->rasterizer->base.offset_scale;
 440
 441         /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
 442
 443         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_A,
 444                 ctx->rasterizer->base.offset_tri);
 445         SET_BIT(fragmeta->unknown2_4, MALI_DEPTH_RANGE_B,
 446                 ctx->rasterizer->base.offset_tri);
 447 }
 448
 449 static void
 450 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
 451                               struct mali_shader_meta *fragmeta)
 452 {
 453         const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
 454         int zfunc = PIPE_FUNC_ALWAYS;
 455
 456         if (!zsa) {
 457                 struct pipe_stencil_state default_stencil = {
 458                         .enabled = 0,
 459                         .func = PIPE_FUNC_ALWAYS,
 460                         .fail_op = MALI_STENCIL_KEEP,
 461                         .zfail_op = MALI_STENCIL_KEEP,
 462                         .zpass_op = MALI_STENCIL_KEEP,
 463                         .writemask = 0xFF,
 464                         .valuemask = 0xFF
 465                 };
 466
 467                 panfrost_make_stencil_state(&default_stencil,
 468                                             &fragmeta->stencil_front);
 469                 fragmeta->stencil_mask_front = default_stencil.writemask;
 470                 fragmeta->stencil_back = fragmeta->stencil_front;
 471                 fragmeta->stencil_mask_back = default_stencil.writemask;
 472                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
 473                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
 474         } else {
 475                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
 476                         zsa->stencil[0].enabled);
 477                 panfrost_make_stencil_state(&zsa->stencil[0],
 478                                             &fragmeta->stencil_front);
 479                 fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
 480                 fragmeta->stencil_front.ref = ctx->stencil_ref.ref_value[0];
 481
 482                 /* If back-stencil is not enabled, use the front values */
 483
 484                 if (zsa->stencil[1].enabled) {
 485                         panfrost_make_stencil_state(&zsa->stencil[1],
 486                                                     &fragmeta->stencil_back);
 487                         fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
 488                         fragmeta->stencil_back.ref = ctx->stencil_ref.ref_value[1];
 489                 } else {
 490                         fragmeta->stencil_back = fragmeta->stencil_front;
 491                         fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
 492                         fragmeta->stencil_back.ref = fragmeta->stencil_front.ref;
 493                 }
 494
 495                 if (zsa->depth.enabled)
 496                         zfunc = zsa->depth.func;
 497
 498                 /* Depth state (TODO: Refactor) */
 499
 500                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
 501                         zsa->depth.writemask);
 502         }
 503
 504         fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
 505         fragmeta->unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(zfunc));
 506 }
 507
 508 static void
 509 panfrost_frag_meta_blend_update(struct panfrost_context *ctx,
 510                                 struct mali_shader_meta *fragmeta,
 511                                 struct midgard_blend_rt *rts)
 512 {
 513         const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 514
 515         SET_BIT(fragmeta->unknown2_4, MALI_NO_DITHER,
 516                 (screen->quirks & MIDGARD_SFBD) && ctx->blend &&
 517                 !ctx->blend->base.dither);
 518
 519         /* Get blending setup */
 520         unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 521
 522         struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 523         unsigned shader_offset = 0;
 524         struct panfrost_bo *shader_bo = NULL;
 525
 526         for (unsigned c = 0; c < rt_count; ++c)
 527                 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
 528                                                           &shader_offset);
 529
 530          /* If there is a blend shader, work registers are shared. XXX: opt */
 531
 532         for (unsigned c = 0; c < rt_count; ++c) {
 533                 if (blend[c].is_shader)
 534                         fragmeta->midgard1.work_count = 16;
 535         }
 536
 537         /* Even on MFBD, the shader descriptor gets blend shaders. It's *also*
 538          * copied to the blend_meta appended (by convention), but this is the
 539          * field actually read by the hardware. (Or maybe both are read...?).
 540          * Specify the last RTi with a blend shader. */
 541
 542         fragmeta->blend.shader = 0;
 543
 544         for (signed rt = (rt_count - 1); rt >= 0; --rt) {
 545                 if (!blend[rt].is_shader)
 546                         continue;
 547
 548                 fragmeta->blend.shader = blend[rt].shader.gpu |
 549                                          blend[rt].shader.first_tag;
 550                 break;
 551         }
 552
 553         if (screen->quirks & MIDGARD_SFBD) {
 554                 /* When only a single render target platform is used, the blend
 555                  * information is inside the shader meta itself. We additionally
 556                  * need to signal CAN_DISCARD for nontrivial blend modes (so
 557                  * we're able to read back the destination buffer) */
 558
 559                 SET_BIT(fragmeta->unknown2_3, MALI_HAS_BLEND_SHADER,
 560                         blend[0].is_shader);
 561
 562                 if (!blend[0].is_shader) {
 563                         fragmeta->blend.equation = *blend[0].equation.equation;
 564                         fragmeta->blend.constant = blend[0].equation.constant;
 565                 }
 566
 567                 SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD,
 568                         !blend[0].no_blending);
 569                 return;
 570         }
 571
 572         /* Additional blend descriptor tacked on for jobs using MFBD */
 573
 574         for (unsigned i = 0; i < rt_count; ++i) {
 575                 rts[i].flags = 0x200;
 576
 577                 bool is_srgb = (ctx->pipe_framebuffer.nr_cbufs > i) &&
 578                                (ctx->pipe_framebuffer.cbufs[i]) &&
 579                                util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
 580
 581                 SET_BIT(rts[i].flags, MALI_BLEND_MRT_SHADER, blend[i].is_shader);
 582                 SET_BIT(rts[i].flags, MALI_BLEND_LOAD_TIB, !blend[i].no_blending);
 583                 SET_BIT(rts[i].flags, MALI_BLEND_SRGB, is_srgb);
 584                 SET_BIT(rts[i].flags, MALI_BLEND_NO_DITHER, !ctx->blend->base.dither);
 585
 586                 if (blend[i].is_shader) {
 587                         rts[i].blend.shader = blend[i].shader.gpu | blend[i].shader.first_tag;
 588                 } else {
 589                         rts[i].blend.equation = *blend[i].equation.equation;
 590                         rts[i].blend.constant = blend[i].equation.constant;
 591                 }
 592         }
 593 }
 594
 595 static void
 596 panfrost_frag_shader_meta_init(struct panfrost_context *ctx,
 597                                struct mali_shader_meta *fragmeta,
 598                                struct midgard_blend_rt *rts)
 599 {
 600         const struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 601         struct panfrost_shader_state *fs;
 602
 603         fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
 604
 605         fragmeta->alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000);
 606         fragmeta->unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010;
 607         fragmeta->unknown2_4 = 0x4e0;
 608
 609         /* unknown2_4 has 0x10 bit set on T6XX and T720. We don't know why this
 610          * is required (independent of 32-bit/64-bit descriptors), or why it's
 611          * not used on later GPU revisions. Otherwise, all shader jobs fault on
 612          * these earlier chips (perhaps this is a chicken bit of some kind).
 613          * More investigation is needed. */
 614
 615         SET_BIT(fragmeta->unknown2_4, 0x10, screen->quirks & MIDGARD_SFBD);
 616
 617         /* Depending on whether it's legal to in the given shader, we try to
 618          * enable early-z testing (or forward-pixel kill?) */
 619
 620         SET_BIT(fragmeta->midgard1.flags_lo, MALI_EARLY_Z,
 621                 !fs->can_discard && !fs->writes_depth);
 622
 623         /* Add the writes Z/S flags if needed. */
 624         SET_BIT(fragmeta->midgard1.flags_lo, MALI_WRITES_Z, fs->writes_depth);
 625         SET_BIT(fragmeta->midgard1.flags_hi, MALI_WRITES_S, fs->writes_stencil);
 626
 627         /* Any time texturing is used, derivatives are implicitly calculated,
 628          * so we need to enable helper invocations */
 629
 630         SET_BIT(fragmeta->midgard1.flags_lo, MALI_HELPER_INVOCATIONS,
 631                 fs->helper_invocations);
 632
 633         /* CAN_DISCARD should be set if the fragment shader possibly contains a
 634          * 'discard' instruction. It is likely this is related to optimizations
 635          * related to forward-pixel kill, as per "Mali Performance 3: Is
 636          * EGL_BUFFER_PRESERVED a good thing?" by Peter Harris */
 637
 638         SET_BIT(fragmeta->unknown2_3, MALI_CAN_DISCARD, fs->can_discard);
 639         SET_BIT(fragmeta->midgard1.flags_lo, 0x400, fs->can_discard);
 640
 641         panfrost_frag_meta_rasterizer_update(ctx, fragmeta);
 642         panfrost_frag_meta_zsa_update(ctx, fragmeta);
 643         panfrost_frag_meta_blend_update(ctx, fragmeta, rts);
 644 }
 645
 646 void
 647 panfrost_emit_shader_meta(struct panfrost_batch *batch,
 648                           enum pipe_shader_type st,
 649                           struct midgard_payload_vertex_tiler *vtp)
 650 {
 651         struct panfrost_context *ctx = batch->ctx;
 652         struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st);
 653
 654         if (!ss) {
 655                 vtp->postfix.shader = 0;
 656                 return;
 657         }
 658
 659         struct mali_shader_meta meta;
 660
 661         panfrost_shader_meta_init(ctx, st, &meta);
 662
 663         /* Add the shader BO to the batch. */
 664         panfrost_batch_add_bo(batch, ss->bo,
 665                               PAN_BO_ACCESS_PRIVATE |
 666                               PAN_BO_ACCESS_READ |
 667                               panfrost_bo_access_for_stage(st));
 668
 669         mali_ptr shader_ptr;
 670
 671         if (st == PIPE_SHADER_FRAGMENT) {
 672                 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 673                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 674                 size_t desc_size = sizeof(meta);
 675                 struct midgard_blend_rt rts[4];
 676                 struct panfrost_transfer xfer;
 677
 678                 assert(rt_count <= ARRAY_SIZE(rts));
 679
 680                 panfrost_frag_shader_meta_init(ctx, &meta, rts);
 681
 682                 if (!(screen->quirks & MIDGARD_SFBD))
 683                         desc_size += sizeof(*rts) * rt_count;
 684
 685                 xfer = panfrost_allocate_transient(batch, desc_size);
 686
 687                 memcpy(xfer.cpu, &meta, sizeof(meta));
 688                 memcpy(xfer.cpu + sizeof(meta), rts, sizeof(*rts) * rt_count);
 689
 690                 shader_ptr = xfer.gpu;
 691         } else {
 692                 shader_ptr = panfrost_upload_transient(batch, &meta,
 693                                                        sizeof(meta));
 694         }
 695
 696         vtp->postfix.shader = shader_ptr;
 697 }
 698
 699 static void
 700 panfrost_mali_viewport_init(struct panfrost_context *ctx,
 701                             struct mali_viewport *mvp)
 702 {
 703         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 704
 705         /* Clip bounds are encoded as floats. The viewport itself is encoded as
 706          * (somewhat) asymmetric ints. */
 707
 708         const struct pipe_scissor_state *ss = &ctx->scissor;
 709
 710         memset(mvp, 0, sizeof(*mvp));
 711
 712         /* By default, do no viewport clipping, i.e. clip to (-inf, inf) in
 713          * each direction. Clipping to the viewport in theory should work, but
 714          * in practice causes issues when we're not explicitly trying to
 715          * scissor */
 716
 717         *mvp = (struct mali_viewport) {
 718                 .clip_minx = -INFINITY,
 719                 .clip_miny = -INFINITY,
 720                 .clip_maxx = INFINITY,
 721                 .clip_maxy = INFINITY,
 722         };
 723
 724         /* Always scissor to the viewport by default. */
 725         float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
 726         float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
 727
 728         float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
 729         float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
 730
 731         float minz = (vp->translate[2] - fabsf(vp->scale[2]));
 732         float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
 733
 734         /* Apply the scissor test */
 735
 736         unsigned minx, miny, maxx, maxy;
 737
 738         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
 739                 minx = MAX2(ss->minx, vp_minx);
 740                 miny = MAX2(ss->miny, vp_miny);
 741                 maxx = MIN2(ss->maxx, vp_maxx);
 742                 maxy = MIN2(ss->maxy, vp_maxy);
 743         } else {
 744                 minx = vp_minx;
 745                 miny = vp_miny;
 746                 maxx = vp_maxx;
 747                 maxy = vp_maxy;
 748         }
 749
 750         /* Hardware needs the min/max to be strictly ordered, so flip if we
 751          * need to. The viewport transformation in the vertex shader will
 752          * handle the negatives if we don't */
 753
 754         if (miny > maxy) {
 755                 unsigned temp = miny;
 756                 miny = maxy;
 757                 maxy = temp;
 758         }
 759
 760         if (minx > maxx) {
 761                 unsigned temp = minx;
 762                 minx = maxx;
 763                 maxx = temp;
 764         }
 765
 766         if (minz > maxz) {
 767                 float temp = minz;
 768                 minz = maxz;
 769                 maxz = temp;
 770         }
 771
 772         /* Clamp to the framebuffer size as a last check */
 773
 774         minx = MIN2(ctx->pipe_framebuffer.width, minx);
 775         maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
 776
 777         miny = MIN2(ctx->pipe_framebuffer.height, miny);
 778         maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
 779
 780         /* Upload */
 781
 782         mvp->viewport0[0] = minx;
 783         mvp->viewport1[0] = MALI_POSITIVE(maxx);
 784
 785         mvp->viewport0[1] = miny;
 786         mvp->viewport1[1] = MALI_POSITIVE(maxy);
 787
 788         mvp->clip_minz = minz;
 789         mvp->clip_maxz = maxz;
 790 }
 791
 792 void
 793 panfrost_emit_viewport(struct panfrost_batch *batch,
 794                        struct midgard_payload_vertex_tiler *tp)
 795 {
 796         struct panfrost_context *ctx = batch->ctx;
 797         struct mali_viewport mvp;
 798
 799         panfrost_mali_viewport_init(batch->ctx,  &mvp);
 800
 801         /* Update the job, unless we're doing wallpapering (whose lack of
 802          * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
 803          * just... be faster :) */
 804
 805         if (!ctx->wallpaper_batch)
 806                 panfrost_batch_union_scissor(batch, mvp.viewport0[0],
 807                                              mvp.viewport0[1],
 808                                              mvp.viewport1[0] + 1,
 809                                              mvp.viewport1[1] + 1);
 810
 811         tp->postfix.viewport = panfrost_upload_transient(batch, &mvp,
 812                                                          sizeof(mvp));
 813 }
 814
 815 static mali_ptr
 816 panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
 817                                  enum pipe_shader_type st,
 818                                  struct panfrost_constant_buffer *buf,
 819                                  unsigned index)
 820 {
 821         struct pipe_constant_buffer *cb = &buf->cb[index];
 822         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
 823
 824         if (rsrc) {
 825                 panfrost_batch_add_bo(batch, rsrc->bo,
 826                                       PAN_BO_ACCESS_SHARED |
 827                                       PAN_BO_ACCESS_READ |
 828                                       panfrost_bo_access_for_stage(st));
 829
 830                 /* Alignment gauranteed by
 831                  * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
 832                 return rsrc->bo->gpu + cb->buffer_offset;
 833         } else if (cb->user_buffer) {
 834                 return panfrost_upload_transient(batch,
 835                                                  cb->user_buffer +
 836                                                  cb->buffer_offset,
 837                                                  cb->buffer_size);
 838         } else {
 839                 unreachable("No constant buffer");
 840         }
 841 }
 842
 843 struct sysval_uniform {
 844         union {
 845                 float f[4];
 846                 int32_t i[4];
 847                 uint32_t u[4];
 848                 uint64_t du[2];
 849         };
 850 };
 851
 852 static void
 853 panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
 854                                       struct sysval_uniform *uniform)
 855 {
 856         struct panfrost_context *ctx = batch->ctx;
 857         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 858
 859         uniform->f[0] = vp->scale[0];
 860         uniform->f[1] = vp->scale[1];
 861         uniform->f[2] = vp->scale[2];
 862 }
 863
 864 static void
 865 panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
 866                                        struct sysval_uniform *uniform)
 867 {
 868         struct panfrost_context *ctx = batch->ctx;
 869         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 870
 871         uniform->f[0] = vp->translate[0];
 872         uniform->f[1] = vp->translate[1];
 873         uniform->f[2] = vp->translate[2];
 874 }
 875
 876 static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
 877                                        enum pipe_shader_type st,
 878                                        unsigned int sysvalid,
 879                                        struct sysval_uniform *uniform)
 880 {
 881         struct panfrost_context *ctx = batch->ctx;
 882         unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
 883         unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
 884         bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
 885         struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
 886
 887         assert(dim);
 888         uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
 889
 890         if (dim > 1)
 891                 uniform->i[1] = u_minify(tex->texture->height0,
 892                                          tex->u.tex.first_level);
 893
 894         if (dim > 2)
 895                 uniform->i[2] = u_minify(tex->texture->depth0,
 896                                          tex->u.tex.first_level);
 897
 898         if (is_array)
 899                 uniform->i[dim] = tex->texture->array_size;
 900 }
 901
 902 static void
 903 panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
 904                             enum pipe_shader_type st,
 905                             unsigned ssbo_id,
 906                             struct sysval_uniform *uniform)
 907 {
 908         struct panfrost_context *ctx = batch->ctx;
 909
 910         assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
 911         struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
 912
 913         /* Compute address */
 914         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
 915
 916         panfrost_batch_add_bo(batch, bo,
 917                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
 918                               panfrost_bo_access_for_stage(st));
 919
 920         /* Upload address and size as sysval */
 921         uniform->du[0] = bo->gpu + sb.buffer_offset;
 922         uniform->u[2] = sb.buffer_size;
 923 }
 924
 925 static void
 926 panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
 927                                enum pipe_shader_type st,
 928                                unsigned samp_idx,
 929                                struct sysval_uniform *uniform)
 930 {
 931         struct panfrost_context *ctx = batch->ctx;
 932         struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
 933
 934         uniform->f[0] = sampl->min_lod;
 935         uniform->f[1] = sampl->max_lod;
 936         uniform->f[2] = sampl->lod_bias;
 937
 938         /* Even without any errata, Midgard represents "no mipmapping" as
 939          * fixing the LOD with the clamps; keep behaviour consistent. c.f.
 940          * panfrost_create_sampler_state which also explains our choice of
 941          * epsilon value (again to keep behaviour consistent) */
 942
 943         if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
 944                 uniform->f[1] = uniform->f[0] + (1.0/256.0);
 945 }
 946
 947 static void
 948 panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
 949                                        struct sysval_uniform *uniform)
 950 {
 951         struct panfrost_context *ctx = batch->ctx;
 952
 953         uniform->u[0] = ctx->compute_grid->grid[0];
 954         uniform->u[1] = ctx->compute_grid->grid[1];
 955         uniform->u[2] = ctx->compute_grid->grid[2];
 956 }
 957
 958 static void
 959 panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
 960                         struct panfrost_shader_state *ss,
 961                         enum pipe_shader_type st)
 962 {
 963         struct sysval_uniform *uniforms = (void *)buf;
 964
 965         for (unsigned i = 0; i < ss->sysval_count; ++i) {
 966                 int sysval = ss->sysval[i];
 967
 968                 switch (PAN_SYSVAL_TYPE(sysval)) {
 969                 case PAN_SYSVAL_VIEWPORT_SCALE:
 970                         panfrost_upload_viewport_scale_sysval(batch,
 971                                                               &uniforms[i]);
 972                         break;
 973                 case PAN_SYSVAL_VIEWPORT_OFFSET:
 974                         panfrost_upload_viewport_offset_sysval(batch,
 975                                                                &uniforms[i]);
 976                         break;
 977                 case PAN_SYSVAL_TEXTURE_SIZE:
 978                         panfrost_upload_txs_sysval(batch, st,
 979                                                    PAN_SYSVAL_ID(sysval),
 980                                                    &uniforms[i]);
 981                         break;
 982                 case PAN_SYSVAL_SSBO:
 983                         panfrost_upload_ssbo_sysval(batch, st,
 984                                                     PAN_SYSVAL_ID(sysval),
 985                                                     &uniforms[i]);
 986                         break;
 987                 case PAN_SYSVAL_NUM_WORK_GROUPS:
 988                         panfrost_upload_num_work_groups_sysval(batch,
 989                                                                &uniforms[i]);
 990                         break;
 991                 case PAN_SYSVAL_SAMPLER:
 992                         panfrost_upload_sampler_sysval(batch, st,
 993                                                        PAN_SYSVAL_ID(sysval),
 994                                                        &uniforms[i]);
 995                         break;
 996                 default:
 997                         assert(0);
 998                 }
 999         }
1000 }
1001
1002 static const void *
1003 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
1004                                  unsigned index)
1005 {
1006         struct pipe_constant_buffer *cb = &buf->cb[index];
1007         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
1008
1009         if (rsrc)
1010                 return rsrc->bo->cpu;
1011         else if (cb->user_buffer)
1012                 return cb->user_buffer;
1013         else
1014                 unreachable("No constant buffer");
1015 }
1016
1017 void
1018 panfrost_emit_const_buf(struct panfrost_batch *batch,
1019                         enum pipe_shader_type stage,
1020                         struct midgard_payload_vertex_tiler *vtp)
1021 {
1022         struct panfrost_context *ctx = batch->ctx;
1023         struct panfrost_shader_variants *all = ctx->shader[stage];
1024
1025         if (!all)
1026                 return;
1027
1028         struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
1029
1030         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1031
1032         /* Uniforms are implicitly UBO #0 */
1033         bool has_uniforms = buf->enabled_mask & (1 << 0);
1034
1035         /* Allocate room for the sysval and the uniforms */
1036         size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1037         size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1038         size_t size = sys_size + uniform_size;
1039         struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1040                                                                         size);
1041
1042         /* Upload sysvals requested by the shader */
1043         panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
1044
1045         /* Upload uniforms */
1046         if (has_uniforms && uniform_size) {
1047                 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1048                 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1049         }
1050
1051         struct mali_vertex_tiler_postfix *postfix = &vtp->postfix;
1052
1053         /* Next up, attach UBOs. UBO #0 is the uniforms we just
1054          * uploaded */
1055
1056         unsigned ubo_count = panfrost_ubo_count(ctx, stage);
1057         assert(ubo_count >= 1);
1058
1059         size_t sz = sizeof(uint64_t) * ubo_count;
1060         uint64_t ubos[PAN_MAX_CONST_BUFFERS];
1061         int uniform_count = ss->uniform_count;
1062
1063         /* Upload uniforms as a UBO */
1064         ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
1065
1066         /* The rest are honest-to-goodness UBOs */
1067
1068         for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1069                 size_t usz = buf->cb[ubo].buffer_size;
1070                 bool enabled = buf->enabled_mask & (1 << ubo);
1071                 bool empty = usz == 0;
1072
1073                 if (!enabled || empty) {
1074                         /* Stub out disabled UBOs to catch accesses */
1075                         ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
1076                         continue;
1077                 }
1078
1079                 mali_ptr gpu = panfrost_map_constant_buffer_gpu(batch, stage,
1080                                                                 buf, ubo);
1081
1082                 unsigned bytes_per_field = 16;
1083                 unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1084                 ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
1085         }
1086
1087         mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1088         postfix->uniforms = transfer.gpu;
1089         postfix->uniform_buffers = ubufs;
1090
1091         buf->dirty_mask = 0;
1092 }
1093
1094 void
1095 panfrost_emit_shared_memory(struct panfrost_batch *batch,
1096                             const struct pipe_grid_info *info,
1097                             struct midgard_payload_vertex_tiler *vtp)
1098 {
1099         struct panfrost_context *ctx = batch->ctx;
1100         struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
1101         struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1102         unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
1103                                                            128));
1104         unsigned shared_size = single_size * info->grid[0] * info->grid[1] *
1105                                info->grid[2] * 4;
1106         struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
1107                                                                   shared_size,
1108                                                                   1);
1109
1110         struct mali_shared_memory shared = {
1111                 .shared_memory = bo->gpu,
1112                 .shared_workgroup_count =
1113                         util_logbase2_ceil(info->grid[0]) +
1114                         util_logbase2_ceil(info->grid[1]) +
1115                         util_logbase2_ceil(info->grid[2]),
1116                 .shared_unk1 = 0x2,
1117                 .shared_shift = util_logbase2(single_size) - 1
1118         };
1119
1120         vtp->postfix.shared_memory = panfrost_upload_transient(batch, &shared,
1121                                                                sizeof(shared));
1122 }
1123
1124 static mali_ptr
1125 panfrost_get_tex_desc(struct panfrost_batch *batch,
1126                       enum pipe_shader_type st,
1127                       struct panfrost_sampler_view *view)
1128 {
1129         if (!view)
1130                 return (mali_ptr) 0;
1131
1132         struct pipe_sampler_view *pview = &view->base;
1133         struct panfrost_resource *rsrc = pan_resource(pview->texture);
1134
1135         /* Add the BO to the job so it's retained until the job is done. */
1136
1137         panfrost_batch_add_bo(batch, rsrc->bo,
1138                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1139                               panfrost_bo_access_for_stage(st));
1140
1141         panfrost_batch_add_bo(batch, view->bo,
1142                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1143                               panfrost_bo_access_for_stage(st));
1144
1145         return view->bo->gpu;
1146 }
1147
1148 void
1149 panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
1150                                   enum pipe_shader_type stage,
1151                                   struct midgard_payload_vertex_tiler *vtp)
1152 {
1153         struct panfrost_context *ctx = batch->ctx;
1154
1155         if (!ctx->sampler_view_count[stage])
1156                 return;
1157
1158         uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1159
1160          for (int i = 0; i < ctx->sampler_view_count[stage]; ++i)
1161                 trampolines[i] = panfrost_get_tex_desc(batch, stage,
1162                                                        ctx->sampler_views[stage][i]);
1163
1164          vtp->postfix.texture_trampoline = panfrost_upload_transient(batch,
1165                                                                      trampolines,
1166                                                                      sizeof(uint64_t) *
1167                                                                      ctx->sampler_view_count[stage]);
1168 }
1169
1170 void
1171 panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
1172                                   enum pipe_shader_type stage,
1173                                   struct midgard_payload_vertex_tiler *vtp)
1174 {
1175         struct panfrost_context *ctx = batch->ctx;
1176
1177         if (!ctx->sampler_count[stage])
1178                 return;
1179
1180         size_t desc_size = sizeof(struct mali_sampler_descriptor);
1181         size_t transfer_size = desc_size * ctx->sampler_count[stage];
1182         struct panfrost_transfer transfer = panfrost_allocate_transient(batch,
1183                                                                         transfer_size);
1184         struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *)transfer.cpu;
1185
1186         for (int i = 0; i < ctx->sampler_count[stage]; ++i)
1187                 desc[i] = ctx->samplers[stage][i]->hw;
1188
1189         vtp->postfix.sampler_descriptor = transfer.gpu;
1190 }
1191
1192 void
1193 panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
1194                                struct midgard_payload_vertex_tiler *vp)
1195 {
1196         struct panfrost_context *ctx = batch->ctx;
1197
1198         if (!ctx->vertex)
1199                 return;
1200
1201         struct panfrost_vertex_state *so = ctx->vertex;
1202
1203         panfrost_vertex_state_upd_attr_offs(ctx, vp);
1204         vp->postfix.attribute_meta = panfrost_upload_transient(batch, so->hw,
1205                                                                sizeof(*so->hw) *
1206                                                                PAN_MAX_ATTRIBUTE);
1207 }
1208
1209 void
1210 panfrost_emit_vertex_data(struct panfrost_batch *batch,
1211                           struct midgard_payload_vertex_tiler *vp)
1212 {
1213         struct panfrost_context *ctx = batch->ctx;
1214         struct panfrost_vertex_state *so = ctx->vertex;
1215
1216         /* Staged mali_attr, and index into them. i =/= k, depending on the
1217          * vertex buffer mask and instancing. Twice as much room is allocated,
1218          * for a worst case of NPOT_DIVIDEs which take up extra slot */
1219         union mali_attr attrs[PIPE_MAX_ATTRIBS * 2];
1220         unsigned k = 0;
1221
1222         for (unsigned i = 0; i < so->num_elements; ++i) {
1223                 /* We map a mali_attr to be 1:1 with the mali_attr_meta, which
1224                  * means duplicating some vertex buffers (who cares? aside from
1225                  * maybe some caching implications but I somehow doubt that
1226                  * matters) */
1227
1228                 struct pipe_vertex_element *elem = &so->pipe[i];
1229                 unsigned vbi = elem->vertex_buffer_index;
1230
1231                 /* The exception to 1:1 mapping is that we can have multiple
1232                  * entries (NPOT divisors), so we fixup anyways */
1233
1234                 so->hw[i].index = k;
1235
1236                 if (!(ctx->vb_mask & (1 << vbi)))
1237                         continue;
1238
1239                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1240                 struct panfrost_resource *rsrc;
1241
1242                 rsrc = pan_resource(buf->buffer.resource);
1243                 if (!rsrc)
1244                         continue;
1245
1246                 /* Align to 64 bytes by masking off the lower bits. This
1247                  * will be adjusted back when we fixup the src_offset in
1248                  * mali_attr_meta */
1249
1250                 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1251                 mali_ptr addr = raw_addr & ~63;
1252                 unsigned chopped_addr = raw_addr - addr;
1253
1254                 /* Add a dependency of the batch on the vertex buffer */
1255                 panfrost_batch_add_bo(batch, rsrc->bo,
1256                                       PAN_BO_ACCESS_SHARED |
1257                                       PAN_BO_ACCESS_READ |
1258                                       PAN_BO_ACCESS_VERTEX_TILER);
1259
1260                 /* Set common fields */
1261                 attrs[k].elements = addr;
1262                 attrs[k].stride = buf->stride;
1263
1264                 /* Since we advanced the base pointer, we shrink the buffer
1265                  * size */
1266                 attrs[k].size = rsrc->base.width0 - buf->buffer_offset;
1267
1268                 /* We need to add the extra size we masked off (for
1269                  * correctness) so the data doesn't get clamped away */
1270                 attrs[k].size += chopped_addr;
1271
1272                 /* For non-instancing make sure we initialize */
1273                 attrs[k].shift = attrs[k].extra_flags = 0;
1274
1275                 /* Instancing uses a dramatically different code path than
1276                  * linear, so dispatch for the actual emission now that the
1277                  * common code is finished */
1278
1279                 unsigned divisor = elem->instance_divisor;
1280
1281                 if (divisor && ctx->instance_count == 1) {
1282                         /* Silly corner case where there's a divisor(=1) but
1283                          * there's no legitimate instancing. So we want *every*
1284                          * attribute to be the same. So set stride to zero so
1285                          * we don't go anywhere. */
1286
1287                         attrs[k].size = attrs[k].stride + chopped_addr;
1288                         attrs[k].stride = 0;
1289                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1290                 } else if (ctx->instance_count <= 1) {
1291                         /* Normal, non-instanced attributes */
1292                         attrs[k++].elements |= MALI_ATTR_LINEAR;
1293                 } else {
1294                         unsigned instance_shift = vp->instance_shift;
1295                         unsigned instance_odd = vp->instance_odd;
1296
1297                         k += panfrost_vertex_instanced(ctx->padded_count,
1298                                                        instance_shift,
1299                                                        instance_odd,
1300                                                        divisor, &attrs[k]);
1301                 }
1302         }
1303
1304         /* Add special gl_VertexID/gl_InstanceID buffers */
1305
1306         panfrost_vertex_id(ctx->padded_count, &attrs[k]);
1307         so->hw[PAN_VERTEX_ID].index = k++;
1308         panfrost_instance_id(ctx->padded_count, &attrs[k]);
1309         so->hw[PAN_INSTANCE_ID].index = k++;
1310
1311         /* Upload whatever we emitted and go */
1312
1313         vp->postfix.attributes = panfrost_upload_transient(batch, attrs,
1314                                                            k * sizeof(*attrs));
1315 }
1316
1317 void
1318 panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
1319                                 struct midgard_payload_vertex_tiler *vp,
1320                                 struct midgard_payload_vertex_tiler *tp)
1321 {
1322         struct panfrost_context *ctx = batch->ctx;
1323         bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
1324
1325         if (wallpapering) {
1326                 /* Inject in reverse order, with "predicted" job indices.
1327                  * THIS IS A HACK XXX */
1328                 panfrost_new_job(batch, JOB_TYPE_TILER, false,
1329                                  batch->job_index + 2, tp, sizeof(*tp), true);
1330                 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1331                                  vp, sizeof(*vp), true);
1332                 return;
1333         }
1334
1335         /* If rasterizer discard is enable, only submit the vertex */
1336
1337         bool rasterizer_discard = ctx->rasterizer &&
1338                                   ctx->rasterizer->base.rasterizer_discard;
1339
1340         unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0,
1341                                            vp, sizeof(*vp), false);
1342
1343         if (rasterizer_discard)
1344                 return;
1345
1346         panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tp, sizeof(*tp),
1347                          false);
1348 }