src/gallium/drivers/panfrost/pan_context.c

   1 /*
   2  * © Copyright 2018 Alyssa Rosenzweig
   3  * Copyright © 2014-2017 Broadcom
   4  * Copyright (C) 2017 Intel Corporation
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23  * SOFTWARE.
  24  *
  25  */
  26
  27 #include <sys/poll.h>
  28 #include <errno.h>
  29
  30 #include "pan_context.h"
  31 #include "pan_format.h"
  32
  33 #include "util/macros.h"
  34 #include "util/u_format.h"
  35 #include "util/u_inlines.h"
  36 #include "util/u_upload_mgr.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_vbuf.h"
  39 #include "util/half_float.h"
  40 #include "util/u_helpers.h"
  41 #include "util/u_format.h"
  42 #include "util/u_prim.h"
  43 #include "util/u_prim_restart.h"
  44 #include "indices/u_primconvert.h"
  45 #include "tgsi/tgsi_parse.h"
  46 #include "tgsi/tgsi_from_mesa.h"
  47 #include "util/u_math.h"
  48
  49 #include "pan_screen.h"
  50 #include "pan_blending.h"
  51 #include "pan_blend_shaders.h"
  52 #include "pan_util.h"
  53
  54 /* Framebuffer descriptor */
  55
  56 static struct midgard_tiler_descriptor
  57 panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
  58 {
  59         struct panfrost_context *ctx = batch->ctx;
  60         struct midgard_tiler_descriptor t = {};
  61         unsigned height = batch->key.height;
  62         unsigned width = batch->key.width;
  63
  64         t.hierarchy_mask =
  65                 panfrost_choose_hierarchy_mask(width, height, vertex_count);
  66
  67         /* Compute the polygon header size and use that to offset the body */
  68
  69         unsigned header_size = panfrost_tiler_header_size(
  70                                        width, height, t.hierarchy_mask);
  71
  72         t.polygon_list_size = panfrost_tiler_full_size(
  73                                      width, height, t.hierarchy_mask);
  74
  75         /* Sanity check */
  76
  77         if (t.hierarchy_mask) {
  78                 t.polygon_list = panfrost_batch_get_polygon_list(batch,
  79                                                                  header_size +
  80                                                                  t.polygon_list_size);
  81
  82
  83                 /* Allow the entire tiler heap */
  84                 t.heap_start = ctx->tiler_heap->gpu;
  85                 t.heap_end = ctx->tiler_heap->gpu + ctx->tiler_heap->size;
  86         } else {
  87                 /* The tiler is disabled, so don't allow the tiler heap */
  88                 t.heap_start = ctx->tiler_heap->gpu;
  89                 t.heap_end = t.heap_start;
  90
  91                 /* Use a dummy polygon list */
  92                 t.polygon_list = ctx->tiler_dummy->gpu;
  93
  94                 /* Disable the tiler */
  95                 t.hierarchy_mask |= MALI_TILER_DISABLED;
  96         }
  97
  98         t.polygon_list_body =
  99                 t.polygon_list + header_size;
 100
 101         return t;
 102 }
 103
 104 struct mali_single_framebuffer
 105 panfrost_emit_sfbd(struct panfrost_batch *batch, unsigned vertex_count)
 106 {
 107         struct panfrost_context *ctx = batch->ctx;
 108         unsigned width = batch->key.width;
 109         unsigned height = batch->key.height;
 110
 111         struct mali_single_framebuffer framebuffer = {
 112                 .width = MALI_POSITIVE(width),
 113                 .height = MALI_POSITIVE(height),
 114                 .unknown2 = 0x1f,
 115                 .format = 0x30000000,
 116                 .clear_flags = 0x1000,
 117                 .unknown_address_0 = ctx->scratchpad->gpu,
 118                 .tiler = panfrost_emit_midg_tiler(batch, vertex_count),
 119         };
 120
 121         return framebuffer;
 122 }
 123
 124 struct bifrost_framebuffer
 125 panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
 126 {
 127         struct panfrost_context *ctx = batch->ctx;
 128         unsigned width = batch->key.width;
 129         unsigned height = batch->key.height;
 130
 131         struct bifrost_framebuffer framebuffer = {
 132                 .unk0 = 0x1e5, /* 1e4 if no spill */
 133                 .width1 = MALI_POSITIVE(width),
 134                 .height1 = MALI_POSITIVE(height),
 135                 .width2 = MALI_POSITIVE(width),
 136                 .height2 = MALI_POSITIVE(height),
 137
 138                 .unk1 = 0x1080,
 139
 140                 .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
 141                 .rt_count_2 = 4,
 142
 143                 .unknown2 = 0x1f,
 144
 145                 .scratchpad = ctx->scratchpad->gpu,
 146                 .tiler = panfrost_emit_midg_tiler(batch, vertex_count)
 147         };
 148
 149         return framebuffer;
 150 }
 151
 152 static void
 153 panfrost_clear(
 154         struct pipe_context *pipe,
 155         unsigned buffers,
 156         const union pipe_color_union *color,
 157         double depth, unsigned stencil)
 158 {
 159         struct panfrost_context *ctx = pan_context(pipe);
 160         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 161
 162         panfrost_batch_clear(batch, buffers, color, depth, stencil);
 163 }
 164
 165 static mali_ptr
 166 panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
 167 {
 168         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 169         struct bifrost_framebuffer mfbd = panfrost_emit_mfbd(batch, ~0);
 170
 171         return panfrost_upload_transient(batch, &mfbd, sizeof(mfbd)) | MALI_MFBD;
 172 }
 173
 174 static mali_ptr
 175 panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
 176 {
 177         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 178         struct mali_single_framebuffer sfbd = panfrost_emit_sfbd(batch, ~0);
 179
 180         return panfrost_upload_transient(batch, &sfbd, sizeof(sfbd)) | MALI_SFBD;
 181 }
 182
 183 static void
 184 panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
 185 {
 186         /* Skip the attach if we can */
 187
 188         if (ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer) {
 189                 assert(ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
 190                 return;
 191         }
 192
 193         struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 194         mali_ptr framebuffer = screen->require_sfbd ?
 195                                panfrost_attach_vt_sfbd(ctx) :
 196                                panfrost_attach_vt_mfbd(ctx);
 197
 198         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
 199                 ctx->payloads[i].postfix.framebuffer = framebuffer;
 200 }
 201
 202 /* Reset per-frame context, called on context initialisation as well as after
 203  * flushing a frame */
 204
 205 static void
 206 panfrost_invalidate_frame(struct panfrost_context *ctx)
 207 {
 208         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
 209                 ctx->payloads[i].postfix.framebuffer = 0;
 210
 211         if (ctx->rasterizer)
 212                 ctx->dirty |= PAN_DIRTY_RASTERIZER;
 213
 214         /* XXX */
 215         ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
 216
 217         /* TODO: When does this need to be handled? */
 218         ctx->active_queries = true;
 219 }
 220
 221 /* In practice, every field of these payloads should be configurable
 222  * arbitrarily, which means these functions are basically catch-all's for
 223  * as-of-yet unwavering unknowns */
 224
 225 static void
 226 panfrost_emit_vertex_payload(struct panfrost_context *ctx)
 227 {
 228         /* 0x2 bit clear on 32-bit T6XX */
 229
 230         struct midgard_payload_vertex_tiler payload = {
 231                 .gl_enables = 0x4 | 0x2,
 232         };
 233
 234         /* Vertex and compute are closely coupled, so share a payload */
 235
 236         memcpy(&ctx->payloads[PIPE_SHADER_VERTEX], &payload, sizeof(payload));
 237         memcpy(&ctx->payloads[PIPE_SHADER_COMPUTE], &payload, sizeof(payload));
 238 }
 239
 240 static void
 241 panfrost_emit_tiler_payload(struct panfrost_context *ctx)
 242 {
 243         struct midgard_payload_vertex_tiler payload = {
 244                 .prefix = {
 245                         .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */
 246                 },
 247         };
 248
 249         memcpy(&ctx->payloads[PIPE_SHADER_FRAGMENT], &payload, sizeof(payload));
 250 }
 251
 252 static unsigned
 253 translate_tex_wrap(enum pipe_tex_wrap w)
 254 {
 255         switch (w) {
 256         case PIPE_TEX_WRAP_REPEAT:
 257                 return MALI_WRAP_REPEAT;
 258
 259                 /* TODO: lower GL_CLAMP? */
 260         case PIPE_TEX_WRAP_CLAMP:
 261         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 262                 return MALI_WRAP_CLAMP_TO_EDGE;
 263
 264         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 265                 return MALI_WRAP_CLAMP_TO_BORDER;
 266
 267         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 268                 return MALI_WRAP_MIRRORED_REPEAT;
 269
 270         default:
 271                 unreachable("Invalid wrap");
 272         }
 273 }
 274
 275 static unsigned
 276 panfrost_translate_compare_func(enum pipe_compare_func in)
 277 {
 278         switch (in) {
 279         case PIPE_FUNC_NEVER:
 280                 return MALI_FUNC_NEVER;
 281
 282         case PIPE_FUNC_LESS:
 283                 return MALI_FUNC_LESS;
 284
 285         case PIPE_FUNC_EQUAL:
 286                 return MALI_FUNC_EQUAL;
 287
 288         case PIPE_FUNC_LEQUAL:
 289                 return MALI_FUNC_LEQUAL;
 290
 291         case PIPE_FUNC_GREATER:
 292                 return MALI_FUNC_GREATER;
 293
 294         case PIPE_FUNC_NOTEQUAL:
 295                 return MALI_FUNC_NOTEQUAL;
 296
 297         case PIPE_FUNC_GEQUAL:
 298                 return MALI_FUNC_GEQUAL;
 299
 300         case PIPE_FUNC_ALWAYS:
 301                 return MALI_FUNC_ALWAYS;
 302
 303         default:
 304                 unreachable("Invalid func");
 305         }
 306 }
 307
 308 static unsigned
 309 panfrost_translate_alt_compare_func(enum pipe_compare_func in)
 310 {
 311         switch (in) {
 312         case PIPE_FUNC_NEVER:
 313                 return MALI_ALT_FUNC_NEVER;
 314
 315         case PIPE_FUNC_LESS:
 316                 return MALI_ALT_FUNC_LESS;
 317
 318         case PIPE_FUNC_EQUAL:
 319                 return MALI_ALT_FUNC_EQUAL;
 320
 321         case PIPE_FUNC_LEQUAL:
 322                 return MALI_ALT_FUNC_LEQUAL;
 323
 324         case PIPE_FUNC_GREATER:
 325                 return MALI_ALT_FUNC_GREATER;
 326
 327         case PIPE_FUNC_NOTEQUAL:
 328                 return MALI_ALT_FUNC_NOTEQUAL;
 329
 330         case PIPE_FUNC_GEQUAL:
 331                 return MALI_ALT_FUNC_GEQUAL;
 332
 333         case PIPE_FUNC_ALWAYS:
 334                 return MALI_ALT_FUNC_ALWAYS;
 335
 336         default:
 337                 unreachable("Invalid alt func");
 338         }
 339 }
 340
 341 static unsigned
 342 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 343 {
 344         switch (in) {
 345         case PIPE_STENCIL_OP_KEEP:
 346                 return MALI_STENCIL_KEEP;
 347
 348         case PIPE_STENCIL_OP_ZERO:
 349                 return MALI_STENCIL_ZERO;
 350
 351         case PIPE_STENCIL_OP_REPLACE:
 352                 return MALI_STENCIL_REPLACE;
 353
 354         case PIPE_STENCIL_OP_INCR:
 355                 return MALI_STENCIL_INCR;
 356
 357         case PIPE_STENCIL_OP_DECR:
 358                 return MALI_STENCIL_DECR;
 359
 360         case PIPE_STENCIL_OP_INCR_WRAP:
 361                 return MALI_STENCIL_INCR_WRAP;
 362
 363         case PIPE_STENCIL_OP_DECR_WRAP:
 364                 return MALI_STENCIL_DECR_WRAP;
 365
 366         case PIPE_STENCIL_OP_INVERT:
 367                 return MALI_STENCIL_INVERT;
 368
 369         default:
 370                 unreachable("Invalid stencil op");
 371         }
 372 }
 373
 374 static void
 375 panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
 376 {
 377         out->ref = 0; /* Gallium gets it from elsewhere */
 378
 379         out->mask = in->valuemask;
 380         out->func = panfrost_translate_compare_func(in->func);
 381         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 382         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 383         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 384 }
 385
 386 static void
 387 panfrost_default_shader_backend(struct panfrost_context *ctx)
 388 {
 389         struct mali_shader_meta shader = {
 390                 .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
 391
 392                 .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
 393                 .unknown2_4 = MALI_NO_MSAA | 0x4e0,
 394         };
 395
 396         /* unknown2_4 has 0x10 bit set on T6XX. We don't know why this is
 397          * required (independent of 32-bit/64-bit descriptors), or why it's not
 398          * used on later GPU revisions. Otherwise, all shader jobs fault on
 399          * these earlier chips (perhaps this is a chicken bit of some kind).
 400          * More investigation is needed. */
 401
 402         if (ctx->is_t6xx) {
 403                 shader.unknown2_4 |= 0x10;
 404         }
 405
 406         struct pipe_stencil_state default_stencil = {
 407                 .enabled = 0,
 408                 .func = PIPE_FUNC_ALWAYS,
 409                 .fail_op = MALI_STENCIL_KEEP,
 410                 .zfail_op = MALI_STENCIL_KEEP,
 411                 .zpass_op = MALI_STENCIL_KEEP,
 412                 .writemask = 0xFF,
 413                 .valuemask = 0xFF
 414         };
 415
 416         panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
 417         shader.stencil_mask_front = default_stencil.writemask;
 418
 419         panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
 420         shader.stencil_mask_back = default_stencil.writemask;
 421
 422         if (default_stencil.enabled)
 423                 shader.unknown2_4 |= MALI_STENCIL_TEST;
 424
 425         memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
 426 }
 427
 428 /* Generates a vertex/tiler job. This is, in some sense, the heart of the
 429  * graphics command stream. It should be called once per draw, accordding to
 430  * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
 431  * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for
 432  * vertex jobs. */
 433
 434 struct panfrost_transfer
 435 panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler)
 436 {
 437         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 438         struct mali_job_descriptor_header job = {
 439                 .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
 440                 .job_descriptor_size = 1,
 441         };
 442
 443         struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX];
 444
 445         struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload));
 446         memcpy(transfer.cpu, &job, sizeof(job));
 447         memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
 448         return transfer;
 449 }
 450
 451 mali_ptr
 452 panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i)
 453 {
 454         struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
 455         struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
 456
 457         return rsrc->bo->gpu + buf->buffer_offset;
 458 }
 459
 460 static bool
 461 panfrost_writes_point_size(struct panfrost_context *ctx)
 462 {
 463         assert(ctx->shader[PIPE_SHADER_VERTEX]);
 464         struct panfrost_shader_state *vs = &ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
 465
 466         return vs->writes_point_size && ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS;
 467 }
 468
 469 /* Stage the attribute descriptors so we can adjust src_offset
 470  * to let BOs align nicely */
 471
 472 static void
 473 panfrost_stage_attributes(struct panfrost_context *ctx)
 474 {
 475         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 476         struct panfrost_vertex_state *so = ctx->vertex;
 477
 478         size_t sz = sizeof(struct mali_attr_meta) * so->num_elements;
 479         struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz);
 480         struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
 481
 482         /* Copy as-is for the first pass */
 483         memcpy(target, so->hw, sz);
 484
 485         /* Fixup offsets for the second pass. Recall that the hardware
 486          * calculates attribute addresses as:
 487          *
 488          *      addr = base + (stride * vtx) + src_offset;
 489          *
 490          * However, on Mali, base must be aligned to 64-bytes, so we
 491          * instead let:
 492          *
 493          *      base' = base & ~63 = base - (base & 63)
 494          *
 495          * To compensate when using base' (see emit_vertex_data), we have
 496          * to adjust src_offset by the masked off piece:
 497          *
 498          *      addr' = base' + (stride * vtx) + (src_offset + (base & 63))
 499          *            = base - (base & 63) + (stride * vtx) + src_offset + (base & 63)
 500          *            = base + (stride * vtx) + src_offset
 501          *            = addr;
 502          *
 503          * QED.
 504          */
 505
 506         unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].offset_start;
 507
 508         for (unsigned i = 0; i < so->num_elements; ++i) {
 509                 unsigned vbi = so->pipe[i].vertex_buffer_index;
 510                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
 511                 mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi);
 512
 513                 /* Adjust by the masked off bits of the offset */
 514                 target[i].src_offset += (addr & 63);
 515
 516                 /* Also, somewhat obscurely per-instance data needs to be
 517                  * offset in response to a delayed start in an indexed draw */
 518
 519                 if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start) {
 520                         target[i].src_offset -= buf->stride * start;
 521                 }
 522
 523
 524         }
 525
 526         ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
 527 }
 528
 529 static void
 530 panfrost_upload_sampler_descriptors(struct panfrost_context *ctx)
 531 {
 532         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 533         size_t desc_size = sizeof(struct mali_sampler_descriptor);
 534
 535         for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
 536                 mali_ptr upload = 0;
 537
 538                 if (ctx->sampler_count[t] && ctx->sampler_view_count[t]) {
 539                         size_t transfer_size = desc_size * ctx->sampler_count[t];
 540
 541                         struct panfrost_transfer transfer =
 542                                 panfrost_allocate_transient(batch, transfer_size);
 543
 544                         struct mali_sampler_descriptor *desc =
 545                                 (struct mali_sampler_descriptor *) transfer.cpu;
 546
 547                         for (int i = 0; i < ctx->sampler_count[t]; ++i)
 548                                 desc[i] = ctx->samplers[t][i]->hw;
 549
 550                         upload = transfer.gpu;
 551                 }
 552
 553                 ctx->payloads[t].postfix.sampler_descriptor = upload;
 554         }
 555 }
 556
 557 static enum mali_texture_layout
 558 panfrost_layout_for_texture(struct panfrost_resource *rsrc)
 559 {
 560         /* TODO: other linear depth textures */
 561         bool is_depth = rsrc->base.format == PIPE_FORMAT_Z32_UNORM;
 562
 563         switch (rsrc->layout) {
 564         case PAN_AFBC:
 565                 return MALI_TEXTURE_AFBC;
 566         case PAN_TILED:
 567                 assert(!is_depth);
 568                 return MALI_TEXTURE_TILED;
 569         case PAN_LINEAR:
 570                 return is_depth ? MALI_TEXTURE_TILED : MALI_TEXTURE_LINEAR;
 571         default:
 572                 unreachable("Invalid texture layout");
 573         }
 574 }
 575
 576 static mali_ptr
 577 panfrost_upload_tex(
 578         struct panfrost_context *ctx,
 579         struct panfrost_sampler_view *view)
 580 {
 581         if (!view)
 582                 return (mali_ptr) 0;
 583
 584         struct pipe_sampler_view *pview = &view->base;
 585         struct panfrost_resource *rsrc = pan_resource(pview->texture);
 586
 587         /* Do we interleave an explicit stride with every element? */
 588
 589         bool has_manual_stride = view->manual_stride;
 590
 591         /* For easy access */
 592
 593         bool is_buffer = pview->target == PIPE_BUFFER;
 594         unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level;
 595         unsigned last_level  = is_buffer ? 0 : pview->u.tex.last_level;
 596         unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer;
 597         unsigned last_layer  = is_buffer ? 0 : pview->u.tex.last_layer;
 598
 599         /* Lower-bit is set when sampling from colour AFBC */
 600         bool is_afbc = rsrc->layout == PAN_AFBC;
 601         bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL;
 602         unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0;
 603
 604         /* Add the BO to the job so it's retained until the job is done. */
 605         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 606         panfrost_batch_add_bo(batch, rsrc->bo);
 607
 608         /* Add the usage flags in, since they can change across the CSO
 609          * lifetime due to layout switches */
 610
 611         view->hw.format.layout = panfrost_layout_for_texture(rsrc);
 612         view->hw.format.manual_stride = has_manual_stride;
 613
 614         /* Inject the addresses in, interleaving mip levels, cube faces, and
 615          * strides in that order */
 616
 617         unsigned idx = 0;
 618
 619         for (unsigned l = first_level; l <= last_level; ++l) {
 620                 for (unsigned f = first_layer; f <= last_layer; ++f) {
 621
 622                         view->hw.payload[idx++] =
 623                                 panfrost_get_texture_address(rsrc, l, f) + afbc_bit;
 624
 625                         if (has_manual_stride) {
 626                                 view->hw.payload[idx++] =
 627                                         rsrc->slices[l].stride;
 628                         }
 629                 }
 630         }
 631
 632         return panfrost_upload_transient(batch, &view->hw,
 633                                          sizeof(struct mali_texture_descriptor));
 634 }
 635
 636 static void
 637 panfrost_upload_texture_descriptors(struct panfrost_context *ctx)
 638 {
 639         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 640
 641         for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
 642                 mali_ptr trampoline = 0;
 643
 644                 if (ctx->sampler_view_count[t]) {
 645                         uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 646
 647                         for (int i = 0; i < ctx->sampler_view_count[t]; ++i)
 648                                 trampolines[i] =
 649                                         panfrost_upload_tex(ctx, ctx->sampler_views[t][i]);
 650
 651                         trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
 652                 }
 653
 654                 ctx->payloads[t].postfix.texture_trampoline = trampoline;
 655         }
 656 }
 657
 658 struct sysval_uniform {
 659         union {
 660                 float f[4];
 661                 int32_t i[4];
 662                 uint32_t u[4];
 663                 uint64_t du[2];
 664         };
 665 };
 666
 667 static void panfrost_upload_viewport_scale_sysval(struct panfrost_context *ctx,
 668                 struct sysval_uniform *uniform)
 669 {
 670         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 671
 672         uniform->f[0] = vp->scale[0];
 673         uniform->f[1] = vp->scale[1];
 674         uniform->f[2] = vp->scale[2];
 675 }
 676
 677 static void panfrost_upload_viewport_offset_sysval(struct panfrost_context *ctx,
 678                 struct sysval_uniform *uniform)
 679 {
 680         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 681
 682         uniform->f[0] = vp->translate[0];
 683         uniform->f[1] = vp->translate[1];
 684         uniform->f[2] = vp->translate[2];
 685 }
 686
 687 static void panfrost_upload_txs_sysval(struct panfrost_context *ctx,
 688                                        enum pipe_shader_type st,
 689                                        unsigned int sysvalid,
 690                                        struct sysval_uniform *uniform)
 691 {
 692         unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
 693         unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
 694         bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
 695         struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
 696
 697         assert(dim);
 698         uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
 699
 700         if (dim > 1)
 701                 uniform->i[1] = u_minify(tex->texture->height0,
 702                                          tex->u.tex.first_level);
 703
 704         if (dim > 2)
 705                 uniform->i[2] = u_minify(tex->texture->depth0,
 706                                          tex->u.tex.first_level);
 707
 708         if (is_array)
 709                 uniform->i[dim] = tex->texture->array_size;
 710 }
 711
 712 static void panfrost_upload_ssbo_sysval(
 713         struct panfrost_context *ctx,
 714         enum pipe_shader_type st,
 715         unsigned ssbo_id,
 716         struct sysval_uniform *uniform)
 717 {
 718         assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
 719         struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
 720
 721         /* Compute address */
 722         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 723         struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
 724
 725         panfrost_batch_add_bo(batch, bo);
 726
 727         /* Upload address and size as sysval */
 728         uniform->du[0] = bo->gpu + sb.buffer_offset;
 729         uniform->u[2] = sb.buffer_size;
 730 }
 731
 732 static void panfrost_upload_num_work_groups_sysval(struct panfrost_context *ctx,
 733                 struct sysval_uniform *uniform)
 734 {
 735         uniform->u[0] = ctx->compute_grid->grid[0];
 736         uniform->u[1] = ctx->compute_grid->grid[1];
 737         uniform->u[2] = ctx->compute_grid->grid[2];
 738 }
 739
 740 static void panfrost_upload_sysvals(struct panfrost_context *ctx, void *buf,
 741                                     struct panfrost_shader_state *ss,
 742                                     enum pipe_shader_type st)
 743 {
 744         struct sysval_uniform *uniforms = (void *)buf;
 745
 746         for (unsigned i = 0; i < ss->sysval_count; ++i) {
 747                 int sysval = ss->sysval[i];
 748
 749                 switch (PAN_SYSVAL_TYPE(sysval)) {
 750                 case PAN_SYSVAL_VIEWPORT_SCALE:
 751                         panfrost_upload_viewport_scale_sysval(ctx, &uniforms[i]);
 752                         break;
 753                 case PAN_SYSVAL_VIEWPORT_OFFSET:
 754                         panfrost_upload_viewport_offset_sysval(ctx, &uniforms[i]);
 755                         break;
 756                 case PAN_SYSVAL_TEXTURE_SIZE:
 757                         panfrost_upload_txs_sysval(ctx, st, PAN_SYSVAL_ID(sysval),
 758                                                    &uniforms[i]);
 759                         break;
 760                 case PAN_SYSVAL_SSBO:
 761                         panfrost_upload_ssbo_sysval(ctx, st, PAN_SYSVAL_ID(sysval),
 762                                                     &uniforms[i]);
 763                         break;
 764                 case PAN_SYSVAL_NUM_WORK_GROUPS:
 765                         panfrost_upload_num_work_groups_sysval(ctx, &uniforms[i]);
 766                         break;
 767
 768                 default:
 769                         assert(0);
 770                 }
 771         }
 772 }
 773
 774 static const void *
 775 panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned index)
 776 {
 777         struct pipe_constant_buffer *cb = &buf->cb[index];
 778         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
 779
 780         if (rsrc)
 781                 return rsrc->bo->cpu;
 782         else if (cb->user_buffer)
 783                 return cb->user_buffer;
 784         else
 785                 unreachable("No constant buffer");
 786 }
 787
 788 static mali_ptr
 789 panfrost_map_constant_buffer_gpu(
 790         struct panfrost_context *ctx,
 791         struct panfrost_constant_buffer *buf,
 792         unsigned index)
 793 {
 794         struct pipe_constant_buffer *cb = &buf->cb[index];
 795         struct panfrost_resource *rsrc = pan_resource(cb->buffer);
 796
 797         if (rsrc) {
 798                 return rsrc->bo->gpu;
 799         } else if (cb->user_buffer) {
 800                 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 801                 return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size);
 802         } else {
 803                 unreachable("No constant buffer");
 804         }
 805 }
 806
 807 /* Compute number of UBOs active (more specifically, compute the highest UBO
 808  * number addressable -- if there are gaps, include them in the count anyway).
 809  * We always include UBO #0 in the count, since we *need* uniforms enabled for
 810  * sysvals. */
 811
 812 static unsigned
 813 panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage)
 814 {
 815         unsigned mask = ctx->constant_buffer[stage].enabled_mask | 1;
 816         return 32 - __builtin_clz(mask);
 817 }
 818
 819 /* Fixes up a shader state with current state, returning a GPU address to the
 820  * patched shader */
 821
 822 static mali_ptr
 823 panfrost_patch_shader_state(
 824         struct panfrost_context *ctx,
 825         struct panfrost_shader_state *ss,
 826         enum pipe_shader_type stage,
 827         bool should_upload)
 828 {
 829         ss->tripipe->texture_count = ctx->sampler_view_count[stage];
 830         ss->tripipe->sampler_count = ctx->sampler_count[stage];
 831
 832         ss->tripipe->midgard1.flags = 0x220;
 833
 834         unsigned ubo_count = panfrost_ubo_count(ctx, stage);
 835         ss->tripipe->midgard1.uniform_buffer_count = ubo_count;
 836
 837         /* We can't reuse over frames; that's not safe. The descriptor must be
 838          * transient uploaded */
 839
 840         if (should_upload) {
 841                 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 842
 843                 return panfrost_upload_transient(batch, ss->tripipe,
 844                                                  sizeof(struct mali_shader_meta));
 845         }
 846
 847         /* If we don't need an upload, don't bother */
 848         return 0;
 849
 850 }
 851
 852 static void
 853 panfrost_patch_shader_state_compute(
 854         struct panfrost_context *ctx,
 855         enum pipe_shader_type stage,
 856         bool should_upload)
 857 {
 858         struct panfrost_shader_variants *all = ctx->shader[stage];
 859
 860         if (!all) {
 861                 ctx->payloads[stage].postfix._shader_upper = 0;
 862                 return;
 863         }
 864
 865         struct panfrost_shader_state *s = &all->variants[all->active_variant];
 866
 867         ctx->payloads[stage].postfix._shader_upper =
 868                 panfrost_patch_shader_state(ctx, s, stage, should_upload) >> 4;
 869 }
 870
 871 /* Go through dirty flags and actualise them in the cmdstream. */
 872
 873 void
 874 panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 875 {
 876         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 877         struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 878
 879         panfrost_attach_vt_framebuffer(ctx);
 880
 881         if (with_vertex_data) {
 882                 panfrost_emit_vertex_data(batch);
 883
 884                 /* Varyings emitted for -all- geometry */
 885                 unsigned total_count = ctx->padded_count * ctx->instance_count;
 886                 panfrost_emit_varying_descriptor(ctx, total_count);
 887         }
 888
 889         bool msaa = ctx->rasterizer->base.multisample;
 890
 891         if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
 892                 ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables = ctx->rasterizer->tiler_gl_enables;
 893
 894                 /* TODO: Sample size */
 895                 SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, msaa);
 896                 SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !msaa);
 897         }
 898
 899         panfrost_batch_set_requirements(batch);
 900
 901         if (ctx->occlusion_query) {
 902                 ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
 903                 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
 904         }
 905
 906         panfrost_patch_shader_state_compute(ctx, PIPE_SHADER_VERTEX, true);
 907         panfrost_patch_shader_state_compute(ctx, PIPE_SHADER_COMPUTE, true);
 908
 909         if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
 910                 /* Check if we need to link the gl_PointSize varying */
 911                 if (!panfrost_writes_point_size(ctx)) {
 912                         /* If the size is constant, write it out. Otherwise,
 913                          * don't touch primitive_size (since we would clobber
 914                          * the pointer there) */
 915
 916                         ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.constant = ctx->rasterizer->base.line_width;
 917                 }
 918         }
 919
 920         /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */
 921         if (ctx->shader[PIPE_SHADER_FRAGMENT])
 922                 ctx->dirty |= PAN_DIRTY_FS;
 923
 924         if (ctx->dirty & PAN_DIRTY_FS) {
 925                 assert(ctx->shader[PIPE_SHADER_FRAGMENT]);
 926                 struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
 927
 928                 panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false);
 929
 930                 panfrost_batch_add_bo(batch, variant->bo);
 931
 932 #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
 933
 934                 COPY(shader);
 935                 COPY(attribute_count);
 936                 COPY(varying_count);
 937                 COPY(texture_count);
 938                 COPY(sampler_count);
 939                 COPY(midgard1.uniform_count);
 940                 COPY(midgard1.uniform_buffer_count);
 941                 COPY(midgard1.work_count);
 942                 COPY(midgard1.flags);
 943                 COPY(midgard1.unknown2);
 944
 945 #undef COPY
 946
 947                 /* Get blending setup */
 948                 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
 949
 950                 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
 951
 952                 for (unsigned c = 0; c < rt_count; ++c)
 953                         blend[c] = panfrost_get_blend_for_context(ctx, c);
 954
 955                 /* If there is a blend shader, work registers are shared. XXX: opt */
 956
 957                 for (unsigned c = 0; c < rt_count; ++c) {
 958                         if (blend[c].is_shader)
 959                                 ctx->fragment_shader_core.midgard1.work_count = 16;
 960                 }
 961
 962                 /* Set late due to depending on render state */
 963                 unsigned flags = ctx->fragment_shader_core.midgard1.flags;
 964
 965                 /* Depending on whether it's legal to in the given shader, we
 966                  * try to enable early-z testing (or forward-pixel kill?) */
 967
 968                 if (!variant->can_discard)
 969                         flags |= MALI_EARLY_Z;
 970
 971                 /* Any time texturing is used, derivatives are implicitly
 972                  * calculated, so we need to enable helper invocations */
 973
 974                 if (variant->helper_invocations)
 975                         flags |= MALI_HELPER_INVOCATIONS;
 976
 977                 ctx->fragment_shader_core.midgard1.flags = flags;
 978
 979                 /* Assign the stencil refs late */
 980
 981                 unsigned front_ref = ctx->stencil_ref.ref_value[0];
 982                 unsigned back_ref = ctx->stencil_ref.ref_value[1];
 983                 bool back_enab = ctx->depth_stencil->stencil[1].enabled;
 984
 985                 ctx->fragment_shader_core.stencil_front.ref = front_ref;
 986                 ctx->fragment_shader_core.stencil_back.ref = back_enab ? back_ref : front_ref;
 987
 988                 /* CAN_DISCARD should be set if the fragment shader possibly
 989                  * contains a 'discard' instruction. It is likely this is
 990                  * related to optimizations related to forward-pixel kill, as
 991                  * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
 992                  * thing?" by Peter Harris
 993                  */
 994
 995                 if (variant->can_discard) {
 996                         ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
 997                         ctx->fragment_shader_core.midgard1.flags |= 0x400;
 998                 }
 999
1000                 /* Even on MFBD, the shader descriptor gets blend shaders. It's
1001                  * *also* copied to the blend_meta appended (by convention),
1002                  * but this is the field actually read by the hardware. (Or
1003                  * maybe both are read...?) */
1004
1005                 if (blend[0].is_shader) {
1006                         ctx->fragment_shader_core.blend.shader =
1007                                 blend[0].shader.bo->gpu | blend[0].shader.first_tag;
1008                 } else {
1009                         ctx->fragment_shader_core.blend.shader = 0;
1010                 }
1011
1012                 if (screen->require_sfbd) {
1013                         /* When only a single render target platform is used, the blend
1014                          * information is inside the shader meta itself. We
1015                          * additionally need to signal CAN_DISCARD for nontrivial blend
1016                          * modes (so we're able to read back the destination buffer) */
1017
1018                         if (!blend[0].is_shader) {
1019                                 ctx->fragment_shader_core.blend.equation =
1020                                         *blend[0].equation.equation;
1021                                 ctx->fragment_shader_core.blend.constant =
1022                                         blend[0].equation.constant;
1023                         }
1024
1025                         if (!blend[0].no_blending) {
1026                                 ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
1027                         }
1028                 }
1029
1030                 size_t size = sizeof(struct mali_shader_meta) + (sizeof(struct midgard_blend_rt) * rt_count);
1031                 struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size);
1032                 memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
1033
1034                 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix._shader_upper = (transfer.gpu) >> 4;
1035
1036                 if (!screen->require_sfbd) {
1037                         /* Additional blend descriptor tacked on for jobs using MFBD */
1038
1039                         struct midgard_blend_rt rts[4];
1040
1041                         for (unsigned i = 0; i < rt_count; ++i) {
1042                                 unsigned blend_count = 0x200;
1043
1044                                 if (blend[i].is_shader) {
1045                                         /* For a blend shader, the bottom nibble corresponds to
1046                                          * the number of work registers used, which signals the
1047                                          * -existence- of a blend shader */
1048
1049                                         assert(blend[i].shader.work_count >= 2);
1050                                         blend_count |= MIN2(blend[i].shader.work_count, 3);
1051                                 } else {
1052                                         /* Otherwise, the bottom bit simply specifies if
1053                                          * blending (anything other than REPLACE) is enabled */
1054
1055                                         if (!blend[i].no_blending)
1056                                                 blend_count |= 0x1;
1057                                 }
1058
1059
1060                                 bool is_srgb =
1061                                         (ctx->pipe_framebuffer.nr_cbufs > i) &&
1062                                         (ctx->pipe_framebuffer.cbufs[i]) &&
1063                                         util_format_is_srgb(ctx->pipe_framebuffer.cbufs[i]->format);
1064
1065                                 rts[i].flags = blend_count;
1066
1067                                 if (is_srgb)
1068                                         rts[i].flags |= MALI_BLEND_SRGB;
1069
1070                                 if (!ctx->blend->base.dither)
1071                                         rts[i].flags |= MALI_BLEND_NO_DITHER;
1072
1073                                 /* TODO: sRGB in blend shaders is currently
1074                                  * unimplemented. Contact me (Alyssa) if you're
1075                                  * interested in working on this. We have
1076                                  * native Midgard ops for helping here, but
1077                                  * they're not well-understood yet. */
1078
1079                                 assert(!(is_srgb && blend[i].is_shader));
1080
1081                                 if (blend[i].is_shader) {
1082                                         rts[i].blend.shader = blend[i].shader.bo->gpu | blend[i].shader.first_tag;
1083                                 } else {
1084                                         rts[i].blend.equation = *blend[i].equation.equation;
1085                                         rts[i].blend.constant = blend[i].equation.constant;
1086                                 }
1087                         }
1088
1089                         memcpy(transfer.cpu + sizeof(struct mali_shader_meta), rts, sizeof(rts[0]) * rt_count);
1090                 }
1091         }
1092
1093         /* We stage to transient, so always dirty.. */
1094         if (ctx->vertex)
1095                 panfrost_stage_attributes(ctx);
1096
1097         if (ctx->dirty & PAN_DIRTY_SAMPLERS)
1098                 panfrost_upload_sampler_descriptors(ctx);
1099
1100         if (ctx->dirty & PAN_DIRTY_TEXTURES)
1101                 panfrost_upload_texture_descriptors(ctx);
1102
1103         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1104
1105         for (int i = 0; i < PIPE_SHADER_TYPES; ++i) {
1106                 struct panfrost_shader_variants *all = ctx->shader[i];
1107
1108                 if (!all)
1109                         continue;
1110
1111                 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
1112
1113                 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
1114
1115                 /* Uniforms are implicitly UBO #0 */
1116                 bool has_uniforms = buf->enabled_mask & (1 << 0);
1117
1118                 /* Allocate room for the sysval and the uniforms */
1119                 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
1120                 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
1121                 size_t size = sys_size + uniform_size;
1122                 struct panfrost_transfer transfer = panfrost_allocate_transient(batch, size);
1123
1124                 /* Upload sysvals requested by the shader */
1125                 panfrost_upload_sysvals(ctx, transfer.cpu, ss, i);
1126
1127                 /* Upload uniforms */
1128                 if (has_uniforms) {
1129                         const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
1130                         memcpy(transfer.cpu + sys_size, cpu, uniform_size);
1131                 }
1132
1133                 int uniform_count =
1134                         ctx->shader[i]->variants[ctx->shader[i]->active_variant].uniform_count;
1135
1136                 struct mali_vertex_tiler_postfix *postfix =
1137                         &ctx->payloads[i].postfix;
1138
1139                 /* Next up, attach UBOs. UBO #0 is the uniforms we just
1140                  * uploaded */
1141
1142                 unsigned ubo_count = panfrost_ubo_count(ctx, i);
1143                 assert(ubo_count >= 1);
1144
1145                 size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count;
1146                 struct mali_uniform_buffer_meta ubos[PAN_MAX_CONST_BUFFERS];
1147
1148                 /* Upload uniforms as a UBO */
1149                 ubos[0].size = MALI_POSITIVE((2 + uniform_count));
1150                 ubos[0].ptr = transfer.gpu >> 2;
1151
1152                 /* The rest are honest-to-goodness UBOs */
1153
1154                 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
1155                         size_t usz = buf->cb[ubo].buffer_size;
1156
1157                         bool enabled = buf->enabled_mask & (1 << ubo);
1158                         bool empty = usz == 0;
1159
1160                         if (!enabled || empty) {
1161                                 /* Stub out disabled UBOs to catch accesses */
1162
1163                                 ubos[ubo].size = 0;
1164                                 ubos[ubo].ptr = 0xDEAD0000;
1165                                 continue;
1166                         }
1167
1168                         mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo);
1169
1170                         unsigned bytes_per_field = 16;
1171                         unsigned aligned = ALIGN_POT(usz, bytes_per_field);
1172                         unsigned fields = aligned / bytes_per_field;
1173
1174                         ubos[ubo].size = MALI_POSITIVE(fields);
1175                         ubos[ubo].ptr = gpu >> 2;
1176                 }
1177
1178                 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
1179                 postfix->uniforms = transfer.gpu;
1180                 postfix->uniform_buffers = ubufs;
1181
1182                 buf->dirty_mask = 0;
1183         }
1184
1185         /* TODO: Upload the viewport somewhere more appropriate */
1186
1187         /* Clip bounds are encoded as floats. The viewport itself is encoded as
1188          * (somewhat) asymmetric ints. */
1189         const struct pipe_scissor_state *ss = &ctx->scissor;
1190
1191         struct mali_viewport view = {
1192                 /* By default, do no viewport clipping, i.e. clip to (-inf,
1193                  * inf) in each direction. Clipping to the viewport in theory
1194                  * should work, but in practice causes issues when we're not
1195                  * explicitly trying to scissor */
1196
1197                 .clip_minx = -INFINITY,
1198                 .clip_miny = -INFINITY,
1199                 .clip_maxx = INFINITY,
1200                 .clip_maxy = INFINITY,
1201         };
1202
1203         /* Always scissor to the viewport by default. */
1204         float vp_minx = (int) (vp->translate[0] - fabsf(vp->scale[0]));
1205         float vp_maxx = (int) (vp->translate[0] + fabsf(vp->scale[0]));
1206
1207         float vp_miny = (int) (vp->translate[1] - fabsf(vp->scale[1]));
1208         float vp_maxy = (int) (vp->translate[1] + fabsf(vp->scale[1]));
1209
1210         float minz = (vp->translate[2] - fabsf(vp->scale[2]));
1211         float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
1212
1213         /* Apply the scissor test */
1214
1215         unsigned minx, miny, maxx, maxy;
1216
1217         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor) {
1218                 minx = MAX2(ss->minx, vp_minx);
1219                 miny = MAX2(ss->miny, vp_miny);
1220                 maxx = MIN2(ss->maxx, vp_maxx);
1221                 maxy = MIN2(ss->maxy, vp_maxy);
1222         } else {
1223                 minx = vp_minx;
1224                 miny = vp_miny;
1225                 maxx = vp_maxx;
1226                 maxy = vp_maxy;
1227         }
1228
1229         /* Hardware needs the min/max to be strictly ordered, so flip if we
1230          * need to. The viewport transformation in the vertex shader will
1231          * handle the negatives if we don't */
1232
1233         if (miny > maxy) {
1234                 unsigned temp = miny;
1235                 miny = maxy;
1236                 maxy = temp;
1237         }
1238
1239         if (minx > maxx) {
1240                 unsigned temp = minx;
1241                 minx = maxx;
1242                 maxx = temp;
1243         }
1244
1245         if (minz > maxz) {
1246                 float temp = minz;
1247                 minz = maxz;
1248                 maxz = temp;
1249         }
1250
1251         /* Clamp to the framebuffer size as a last check */
1252
1253         minx = MIN2(ctx->pipe_framebuffer.width, minx);
1254         maxx = MIN2(ctx->pipe_framebuffer.width, maxx);
1255
1256         miny = MIN2(ctx->pipe_framebuffer.height, miny);
1257         maxy = MIN2(ctx->pipe_framebuffer.height, maxy);
1258
1259         /* Update the job, unless we're doing wallpapering (whose lack of
1260          * scissor we can ignore, since if we "miss" a tile of wallpaper, it'll
1261          * just... be faster :) */
1262
1263         if (!ctx->wallpaper_batch)
1264                 panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
1265
1266         /* Upload */
1267
1268         view.viewport0[0] = minx;
1269         view.viewport1[0] = MALI_POSITIVE(maxx);
1270
1271         view.viewport0[1] = miny;
1272         view.viewport1[1] = MALI_POSITIVE(maxy);
1273
1274         view.clip_minz = minz;
1275         view.clip_maxz = maxz;
1276
1277         ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.viewport =
1278                 panfrost_upload_transient(batch,
1279                                           &view,
1280                                           sizeof(struct mali_viewport));
1281
1282         ctx->dirty = 0;
1283 }
1284
1285 /* Corresponds to exactly one draw, but does not submit anything */
1286
1287 static void
1288 panfrost_queue_draw(struct panfrost_context *ctx)
1289 {
1290         /* Handle dirty flags now */
1291         panfrost_emit_for_draw(ctx, true);
1292
1293         /* If rasterizer discard is enable, only submit the vertex */
1294
1295         bool rasterizer_discard = ctx->rasterizer
1296                                   && ctx->rasterizer->base.rasterizer_discard;
1297
1298         struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false);
1299         struct panfrost_transfer tiler;
1300
1301         if (!rasterizer_discard)
1302                 tiler = panfrost_vertex_tiler_job(ctx, true);
1303
1304         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
1305
1306         if (rasterizer_discard)
1307                 panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE);
1308         else if (ctx->wallpaper_batch)
1309                 panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler);
1310         else
1311                 panfrost_scoreboard_queue_fused_job(batch, vertex, tiler);
1312 }
1313
1314 /* The entire frame is in memory -- send it off to the kernel! */
1315
1316 static void
1317 panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate,
1318                       struct pipe_fence_handle **fence,
1319                       struct panfrost_batch *batch)
1320 {
1321         panfrost_batch_submit(batch);
1322
1323         /* If visual, we can stall a frame */
1324
1325         if (!flush_immediate)
1326                 panfrost_drm_force_flush_fragment(ctx, fence);
1327
1328         ctx->last_fragment_flushed = false;
1329         ctx->last_batch = batch;
1330
1331         /* If readback, flush now (hurts the pipelined performance) */
1332         if (flush_immediate)
1333                 panfrost_drm_force_flush_fragment(ctx, fence);
1334 }
1335
1336 static void
1337 panfrost_draw_wallpaper(struct pipe_context *pipe)
1338 {
1339         struct panfrost_context *ctx = pan_context(pipe);
1340
1341         /* Nothing to reload? TODO: MRT wallpapers */
1342         if (ctx->pipe_framebuffer.cbufs[0] == NULL)
1343                 return;
1344
1345         /* Check if the buffer has any content on it worth preserving */
1346
1347         struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
1348         struct panfrost_resource *rsrc = pan_resource(surf->texture);
1349         unsigned level = surf->u.tex.level;
1350
1351         if (!rsrc->slices[level].initialized)
1352                 return;
1353
1354         /* Save the batch */
1355         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
1356
1357         ctx->wallpaper_batch = batch;
1358
1359         /* Clamp the rendering area to the damage extent. The
1360          * KHR_partial_update() spec states that trying to render outside of
1361          * the damage region is "undefined behavior", so we should be safe.
1362          */
1363         unsigned damage_width = (rsrc->damage.extent.maxx - rsrc->damage.extent.minx);
1364         unsigned damage_height = (rsrc->damage.extent.maxy - rsrc->damage.extent.miny);
1365
1366         if (damage_width && damage_height) {
1367                 panfrost_batch_intersection_scissor(batch,
1368                                                     rsrc->damage.extent.minx,
1369                                                     rsrc->damage.extent.miny,
1370                                                     rsrc->damage.extent.maxx,
1371                                                     rsrc->damage.extent.maxy);
1372         }
1373
1374         /* FIXME: Looks like aligning on a tile is not enough, but
1375          * aligning on twice the tile size seems to works. We don't
1376          * know exactly what happens here but this deserves extra
1377          * investigation to figure it out.
1378          */
1379         batch->minx = batch->minx & ~((MALI_TILE_LENGTH * 2) - 1);
1380         batch->miny = batch->miny & ~((MALI_TILE_LENGTH * 2) - 1);
1381         batch->maxx = MIN2(ALIGN_POT(batch->maxx, MALI_TILE_LENGTH * 2),
1382                            rsrc->base.width0);
1383         batch->maxy = MIN2(ALIGN_POT(batch->maxy, MALI_TILE_LENGTH * 2),
1384                            rsrc->base.height0);
1385
1386         struct pipe_scissor_state damage;
1387         struct pipe_box rects[4];
1388
1389         /* Clamp the damage box to the rendering area. */
1390         damage.minx = MAX2(batch->minx, rsrc->damage.biggest_rect.x);
1391         damage.miny = MAX2(batch->miny, rsrc->damage.biggest_rect.y);
1392         damage.maxx = MIN2(batch->maxx,
1393                            rsrc->damage.biggest_rect.x +
1394                            rsrc->damage.biggest_rect.width);
1395         damage.maxy = MIN2(batch->maxy,
1396                            rsrc->damage.biggest_rect.y +
1397                            rsrc->damage.biggest_rect.height);
1398
1399         /* One damage rectangle means we can end up with at most 4 reload
1400          * regions:
1401          * 1: left region, only exists if damage.x > 0
1402          * 2: right region, only exists if damage.x + damage.width < fb->width
1403          * 3: top region, only exists if damage.y > 0. The intersection with
1404          *    the left and right regions are dropped
1405          * 4: bottom region, only exists if damage.y + damage.height < fb->height.
1406          *    The intersection with the left and right regions are dropped
1407          *
1408          *                    ____________________________
1409          *                    |       |     3     |      |
1410          *                    |       |___________|      |
1411          *                    |       |   damage  |      |
1412          *                    |   1   |    rect   |   2  |
1413          *                    |       |___________|      |
1414          *                    |       |     4     |      |
1415          *                    |_______|___________|______|
1416          */
1417         u_box_2d(batch->minx, batch->miny, damage.minx - batch->minx,
1418                  batch->maxy - batch->miny, &rects[0]);
1419         u_box_2d(damage.maxx, batch->miny, batch->maxx - damage.maxx,
1420                  batch->maxy - batch->miny, &rects[1]);
1421         u_box_2d(damage.minx, batch->miny, damage.maxx - damage.minx,
1422                  damage.miny - batch->miny, &rects[2]);
1423         u_box_2d(damage.minx, damage.maxy, damage.maxx - damage.minx,
1424                  batch->maxy - damage.maxy, &rects[3]);
1425
1426         for (unsigned i = 0; i < 4; i++) {
1427                 /* Width and height are always >= 0 even if width is declared as a
1428                  * signed integer: u_box_2d() helper takes unsigned args and
1429                  * panfrost_set_damage_region() is taking care of clamping
1430                  * negative values.
1431                  */
1432                 if (!rects[i].width || !rects[i].height)
1433                         continue;
1434
1435                 /* Blit the wallpaper in */
1436                 panfrost_blit_wallpaper(ctx, &rects[i]);
1437         }
1438         ctx->wallpaper_batch = NULL;
1439 }
1440
1441 void
1442 panfrost_flush(
1443         struct pipe_context *pipe,
1444         struct pipe_fence_handle **fence,
1445         unsigned flags)
1446 {
1447         struct panfrost_context *ctx = pan_context(pipe);
1448         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
1449
1450         /* Nothing to do! */
1451         if (!batch->last_job.gpu && !batch->clear) return;
1452
1453         if (!batch->clear && batch->last_tiler.gpu)
1454                 panfrost_draw_wallpaper(&ctx->base);
1455
1456         /* Whether to stall the pipeline for immediately correct results. Since
1457          * pipelined rendering is quite broken right now (to be fixed by the
1458          * panfrost_job refactor, just take the perf hit for correctness) */
1459         bool flush_immediate = /*flags & PIPE_FLUSH_END_OF_FRAME*/true;
1460
1461         /* Submit the frame itself */
1462         panfrost_submit_frame(ctx, flush_immediate, fence, batch);
1463
1464         /* Prepare for the next frame */
1465         panfrost_invalidate_frame(ctx);
1466 }
1467
1468 #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c;
1469
1470 static int
1471 g2m_draw_mode(enum pipe_prim_type mode)
1472 {
1473         switch (mode) {
1474                 DEFINE_CASE(POINTS);
1475                 DEFINE_CASE(LINES);
1476                 DEFINE_CASE(LINE_LOOP);
1477                 DEFINE_CASE(LINE_STRIP);
1478                 DEFINE_CASE(TRIANGLES);
1479                 DEFINE_CASE(TRIANGLE_STRIP);
1480                 DEFINE_CASE(TRIANGLE_FAN);
1481                 DEFINE_CASE(QUADS);
1482                 DEFINE_CASE(QUAD_STRIP);
1483                 DEFINE_CASE(POLYGON);
1484
1485         default:
1486                 unreachable("Invalid draw mode");
1487         }
1488 }
1489
1490 #undef DEFINE_CASE
1491
1492 static unsigned
1493 panfrost_translate_index_size(unsigned size)
1494 {
1495         switch (size) {
1496         case 1:
1497                 return MALI_DRAW_INDEXED_UINT8;
1498
1499         case 2:
1500                 return MALI_DRAW_INDEXED_UINT16;
1501
1502         case 4:
1503                 return MALI_DRAW_INDEXED_UINT32;
1504
1505         default:
1506                 unreachable("Invalid index size");
1507         }
1508 }
1509
1510 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
1511  * good for the duration of the draw (transient), could last longer */
1512
1513 static mali_ptr
1514 panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
1515 {
1516         struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
1517
1518         off_t offset = info->start * info->index_size;
1519         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
1520
1521         if (!info->has_user_indices) {
1522                 /* Only resources can be directly mapped */
1523                 panfrost_batch_add_bo(batch, rsrc->bo);
1524                 return rsrc->bo->gpu + offset;
1525         } else {
1526                 /* Otherwise, we need to upload to transient memory */
1527                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
1528                 return panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
1529         }
1530 }
1531
1532 static bool
1533 panfrost_scissor_culls_everything(struct panfrost_context *ctx)
1534 {
1535         const struct pipe_scissor_state *ss = &ctx->scissor;
1536
1537         /* Check if we're scissoring at all */
1538
1539         if (!(ctx->rasterizer && ctx->rasterizer->base.scissor))
1540                 return false;
1541
1542         return (ss->minx == ss->maxx) || (ss->miny == ss->maxy);
1543 }
1544
1545 /* Count generated primitives (when there is no geom/tess shaders) for
1546  * transform feedback */
1547
1548 static void
1549 panfrost_statistics_record(
1550                 struct panfrost_context *ctx,
1551                 const struct pipe_draw_info *info)
1552 {
1553         if (!ctx->active_queries)
1554                 return;
1555
1556         uint32_t prims = u_prims_for_vertices(info->mode, info->count);
1557         ctx->prims_generated += prims;
1558
1559         if (!ctx->streamout.num_targets)
1560                 return;
1561
1562         ctx->tf_prims_generated += prims;
1563 }
1564
1565 static void
1566 panfrost_draw_vbo(
1567         struct pipe_context *pipe,
1568         const struct pipe_draw_info *info)
1569 {
1570         struct panfrost_context *ctx = pan_context(pipe);
1571
1572         /* First of all, check the scissor to see if anything is drawn at all.
1573          * If it's not, we drop the draw (mostly a conformance issue;
1574          * well-behaved apps shouldn't hit this) */
1575
1576         if (panfrost_scissor_culls_everything(ctx))
1577                 return;
1578
1579         ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
1580         ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
1581
1582         int mode = info->mode;
1583
1584         /* Fallback unsupported restart index */
1585         unsigned primitive_index = (1 << (info->index_size * 8)) - 1;
1586
1587         if (info->primitive_restart && info->index_size
1588             && info->restart_index != primitive_index) {
1589                 util_draw_vbo_without_prim_restart(pipe, info);
1590                 return;
1591         }
1592
1593         /* Fallback for unsupported modes */
1594
1595         assert(ctx->rasterizer != NULL);
1596
1597         if (!(ctx->draw_modes & (1 << mode))) {
1598                 if (mode == PIPE_PRIM_QUADS && info->count == 4 && !ctx->rasterizer->base.flatshade) {
1599                         mode = PIPE_PRIM_TRIANGLE_FAN;
1600                 } else {
1601                         if (info->count < 4) {
1602                                 /* Degenerate case? */
1603                                 return;
1604                         }
1605
1606                         util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base);
1607                         util_primconvert_draw_vbo(ctx->primconvert, info);
1608                         return;
1609                 }
1610         }
1611
1612         /* Now that we have a guaranteed terminating path, find the job.
1613          * Assignment commented out to prevent unused warning */
1614
1615         /* struct panfrost_batch *batch = */ panfrost_get_batch_for_fbo(ctx);
1616
1617         ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode = g2m_draw_mode(mode);
1618
1619         /* Take into account a negative bias */
1620         ctx->vertex_count = info->count + abs(info->index_bias);
1621         ctx->instance_count = info->instance_count;
1622         ctx->active_prim = info->mode;
1623
1624         /* For non-indexed draws, they're the same */
1625         unsigned vertex_count = ctx->vertex_count;
1626
1627         unsigned draw_flags = 0;
1628
1629         /* The draw flags interpret how primitive size is interpreted */
1630
1631         if (panfrost_writes_point_size(ctx))
1632                 draw_flags |= MALI_DRAW_VARYING_SIZE;
1633
1634         if (info->primitive_restart)
1635                 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
1636
1637         /* For higher amounts of vertices (greater than what fits in a 16-bit
1638          * short), the other value is needed, otherwise there will be bizarre
1639          * rendering artefacts. It's not clear what these values mean yet. This
1640          * change is also needed for instancing and sometimes points (perhaps
1641          * related to dynamically setting gl_PointSize) */
1642
1643         bool is_points = mode == PIPE_PRIM_POINTS;
1644         bool many_verts = ctx->vertex_count > 0xFFFF;
1645         bool instanced = ctx->instance_count > 1;
1646
1647         draw_flags |= (is_points || many_verts || instanced) ? 0x3000 : 0x18000;
1648
1649         /* This doesn't make much sense */
1650         if (mode == PIPE_PRIM_LINE_STRIP) {
1651                 draw_flags |= 0x800;
1652         }
1653
1654         panfrost_statistics_record(ctx, info);
1655
1656         if (info->index_size) {
1657                 /* Calculate the min/max index used so we can figure out how
1658                  * many times to invoke the vertex shader */
1659
1660                 /* Fetch / calculate index bounds */
1661                 unsigned min_index = 0, max_index = 0;
1662
1663                 if (info->max_index == ~0u) {
1664                         u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index);
1665                 } else {
1666                         min_index = info->min_index;
1667                         max_index = info->max_index;
1668                 }
1669
1670                 /* Use the corresponding values */
1671                 vertex_count = max_index - min_index + 1;
1672                 ctx->payloads[PIPE_SHADER_VERTEX].offset_start = min_index + info->index_bias;
1673                 ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = min_index + info->index_bias;
1674
1675                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
1676                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
1677
1678                 //assert(!info->restart_index); /* TODO: Research */
1679
1680                 draw_flags |= panfrost_translate_index_size(info->index_size);
1681                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
1682         } else {
1683                 /* Index count == vertex count, if no indexing is applied, as
1684                  * if it is internally indexed in the expected order */
1685
1686                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = 0;
1687                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
1688
1689                 /* Reverse index state */
1690                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (u64) NULL;
1691         }
1692
1693         /* Dispatch "compute jobs" for the vertex/tiler pair as (1,
1694          * vertex_count, 1) */
1695
1696         panfrost_pack_work_groups_fused(
1697                 &ctx->payloads[PIPE_SHADER_VERTEX].prefix,
1698                 &ctx->payloads[PIPE_SHADER_FRAGMENT].prefix,
1699                 1, vertex_count, info->instance_count,
1700                 1, 1, 1);
1701
1702         ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.unknown_draw = draw_flags;
1703
1704         /* Encode the padded vertex count */
1705
1706         if (info->instance_count > 1) {
1707                 /* Triangles have non-even vertex counts so they change how
1708                  * padding works internally */
1709
1710                 bool is_triangle =
1711                         mode == PIPE_PRIM_TRIANGLES ||
1712                         mode == PIPE_PRIM_TRIANGLE_STRIP ||
1713                         mode == PIPE_PRIM_TRIANGLE_FAN;
1714
1715                 struct pan_shift_odd so =
1716                         panfrost_padded_vertex_count(vertex_count, !is_triangle);
1717
1718                 ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = so.shift;
1719                 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = so.shift;
1720
1721                 ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = so.odd;
1722                 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = so.odd;
1723
1724                 ctx->padded_count = pan_expand_shift_odd(so);
1725         } else {
1726                 ctx->padded_count = ctx->vertex_count;
1727
1728                 /* Reset instancing state */
1729                 ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = 0;
1730                 ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = 0;
1731                 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = 0;
1732                 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = 0;
1733         }
1734
1735         /* Fire off the draw itself */
1736         panfrost_queue_draw(ctx);
1737
1738         /* Increment transform feedback offsets */
1739
1740         for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
1741                 unsigned output_count = u_stream_outputs_for_vertices(
1742                                 ctx->active_prim, ctx->vertex_count);
1743
1744                 ctx->streamout.offsets[i] += output_count;
1745         }
1746 }
1747
1748 /* CSO state */
1749
1750 static void
1751 panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
1752 {
1753         free(hwcso);
1754 }
1755
1756 static void *
1757 panfrost_create_rasterizer_state(
1758         struct pipe_context *pctx,
1759         const struct pipe_rasterizer_state *cso)
1760 {
1761         struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
1762
1763         so->base = *cso;
1764
1765         /* Bitmask, unknown meaning of the start value. 0x105 on 32-bit T6XX */
1766         so->tiler_gl_enables = 0x7;
1767
1768         if (cso->front_ccw)
1769                 so->tiler_gl_enables |= MALI_FRONT_CCW_TOP;
1770
1771         if (cso->cull_face & PIPE_FACE_FRONT)
1772                 so->tiler_gl_enables |= MALI_CULL_FACE_FRONT;
1773
1774         if (cso->cull_face & PIPE_FACE_BACK)
1775                 so->tiler_gl_enables |= MALI_CULL_FACE_BACK;
1776
1777         return so;
1778 }
1779
1780 static void
1781 panfrost_bind_rasterizer_state(
1782         struct pipe_context *pctx,
1783         void *hwcso)
1784 {
1785         struct panfrost_context *ctx = pan_context(pctx);
1786
1787         /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */
1788         if (!hwcso)
1789                 return;
1790
1791         ctx->rasterizer = hwcso;
1792         ctx->dirty |= PAN_DIRTY_RASTERIZER;
1793
1794         ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units;
1795         ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
1796
1797         /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
1798         assert(ctx->rasterizer->base.offset_clamp == 0.0);
1799
1800         /* XXX: Which bit is which? Does this maybe allow offseting not-tri? */
1801
1802         SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_A, ctx->rasterizer->base.offset_tri);
1803         SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_DEPTH_RANGE_B, ctx->rasterizer->base.offset_tri);
1804
1805         /* Point sprites are emulated */
1806
1807         struct panfrost_shader_state *variant =
1808                         ctx->shader[PIPE_SHADER_FRAGMENT] ? &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant] : NULL;
1809
1810         if (ctx->rasterizer->base.sprite_coord_enable || (variant && variant->point_sprite_mask))
1811                 ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
1812 }
1813
1814 static void *
1815 panfrost_create_vertex_elements_state(
1816         struct pipe_context *pctx,
1817         unsigned num_elements,
1818         const struct pipe_vertex_element *elements)
1819 {
1820         struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
1821
1822         so->num_elements = num_elements;
1823         memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
1824
1825         for (int i = 0; i < num_elements; ++i) {
1826                 so->hw[i].index = i;
1827
1828                 enum pipe_format fmt = elements[i].src_format;
1829                 const struct util_format_description *desc = util_format_description(fmt);
1830                 so->hw[i].unknown1 = 0x2;
1831                 so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
1832
1833                 so->hw[i].format = panfrost_find_format(desc);
1834
1835                 /* The field itself should probably be shifted over */
1836                 so->hw[i].src_offset = elements[i].src_offset;
1837         }
1838
1839         return so;
1840 }
1841
1842 static void
1843 panfrost_bind_vertex_elements_state(
1844         struct pipe_context *pctx,
1845         void *hwcso)
1846 {
1847         struct panfrost_context *ctx = pan_context(pctx);
1848
1849         ctx->vertex = hwcso;
1850         ctx->dirty |= PAN_DIRTY_VERTEX;
1851 }
1852
1853 static void *
1854 panfrost_create_shader_state(
1855         struct pipe_context *pctx,
1856         const struct pipe_shader_state *cso)
1857 {
1858         struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
1859         so->base = *cso;
1860
1861         /* Token deep copy to prevent memory corruption */
1862
1863         if (cso->type == PIPE_SHADER_IR_TGSI)
1864                 so->base.tokens = tgsi_dup_tokens(so->base.tokens);
1865
1866         return so;
1867 }
1868
1869 static void
1870 panfrost_delete_shader_state(
1871         struct pipe_context *pctx,
1872         void *so)
1873 {
1874         struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
1875
1876         if (cso->base.type == PIPE_SHADER_IR_TGSI) {
1877                 DBG("Deleting TGSI shader leaks duplicated tokens\n");
1878         }
1879
1880         for (unsigned i = 0; i < cso->variant_count; ++i) {
1881                 struct panfrost_shader_state *shader_state = &cso->variants[i];
1882                 panfrost_bo_unreference(pctx->screen, shader_state->bo);
1883                 shader_state->bo = NULL;
1884         }
1885
1886         free(so);
1887 }
1888
1889 static void *
1890 panfrost_create_sampler_state(
1891         struct pipe_context *pctx,
1892         const struct pipe_sampler_state *cso)
1893 {
1894         struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
1895         so->base = *cso;
1896
1897         /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */
1898
1899         bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
1900         bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
1901         bool mip_linear  = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
1902
1903         unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
1904         unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
1905         unsigned mip_filter = mip_linear  ?
1906                 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
1907         unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
1908
1909         struct mali_sampler_descriptor sampler_descriptor = {
1910                 .filter_mode = min_filter | mag_filter | mip_filter | normalized,
1911                 .wrap_s = translate_tex_wrap(cso->wrap_s),
1912                 .wrap_t = translate_tex_wrap(cso->wrap_t),
1913                 .wrap_r = translate_tex_wrap(cso->wrap_r),
1914                 .compare_func = panfrost_translate_alt_compare_func(cso->compare_func),
1915                 .border_color = {
1916                         cso->border_color.f[0],
1917                         cso->border_color.f[1],
1918                         cso->border_color.f[2],
1919                         cso->border_color.f[3]
1920                 },
1921                 .min_lod = FIXED_16(cso->min_lod),
1922                 .max_lod = FIXED_16(cso->max_lod),
1923                 .seamless_cube_map = cso->seamless_cube_map,
1924         };
1925
1926         /* If necessary, we disable mipmapping in the sampler descriptor by
1927          * clamping the LOD as tight as possible (from 0 to epsilon,
1928          * essentially -- remember these are fixed point numbers, so
1929          * epsilon=1/256) */
1930
1931         if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
1932                 sampler_descriptor.max_lod = sampler_descriptor.min_lod;
1933
1934         /* Enforce that there is something in the middle by adding epsilon*/
1935
1936         if (sampler_descriptor.min_lod == sampler_descriptor.max_lod)
1937                 sampler_descriptor.max_lod++;
1938
1939         /* Sanity check */
1940         assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod);
1941
1942         so->hw = sampler_descriptor;
1943
1944         return so;
1945 }
1946
1947 static void
1948 panfrost_bind_sampler_states(
1949         struct pipe_context *pctx,
1950         enum pipe_shader_type shader,
1951         unsigned start_slot, unsigned num_sampler,
1952         void **sampler)
1953 {
1954         assert(start_slot == 0);
1955
1956         struct panfrost_context *ctx = pan_context(pctx);
1957
1958         /* XXX: Should upload, not just copy? */
1959         ctx->sampler_count[shader] = num_sampler;
1960         memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
1961
1962         ctx->dirty |= PAN_DIRTY_SAMPLERS;
1963 }
1964
1965 static bool
1966 panfrost_variant_matches(
1967         struct panfrost_context *ctx,
1968         struct panfrost_shader_state *variant,
1969         enum pipe_shader_type type)
1970 {
1971         struct pipe_rasterizer_state *rasterizer = &ctx->rasterizer->base;
1972         struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha;
1973
1974         bool is_fragment = (type == PIPE_SHADER_FRAGMENT);
1975
1976         if (is_fragment && (alpha->enabled || variant->alpha_state.enabled)) {
1977                 /* Make sure enable state is at least the same */
1978                 if (alpha->enabled != variant->alpha_state.enabled) {
1979                         return false;
1980                 }
1981
1982                 /* Check that the contents of the test are the same */
1983                 bool same_func = alpha->func == variant->alpha_state.func;
1984                 bool same_ref = alpha->ref_value == variant->alpha_state.ref_value;
1985
1986                 if (!(same_func && same_ref)) {
1987                         return false;
1988                 }
1989         }
1990
1991         if (is_fragment && rasterizer && (rasterizer->sprite_coord_enable |
1992                                           variant->point_sprite_mask)) {
1993                 /* Ensure the same varyings are turned to point sprites */
1994                 if (rasterizer->sprite_coord_enable != variant->point_sprite_mask)
1995                         return false;
1996
1997                 /* Ensure the orientation is correct */
1998                 bool upper_left =
1999                         rasterizer->sprite_coord_mode ==
2000                         PIPE_SPRITE_COORD_UPPER_LEFT;
2001
2002                 if (variant->point_sprite_upper_left != upper_left)
2003                         return false;
2004         }
2005
2006         /* Otherwise, we're good to go */
2007         return true;
2008 }
2009
2010 /**
2011  * Fix an uncompiled shader's stream output info, and produce a bitmask
2012  * of which VARYING_SLOT_* are captured for stream output.
2013  *
2014  * Core Gallium stores output->register_index as a "slot" number, where
2015  * slots are assigned consecutively to all outputs in info->outputs_written.
2016  * This naive packing of outputs doesn't work for us - we too have slots,
2017  * but the layout is defined by the VUE map, which we won't have until we
2018  * compile a specific shader variant.  So, we remap these and simply store
2019  * VARYING_SLOT_* in our copy's output->register_index fields.
2020  *
2021  * We then produce a bitmask of outputs which are used for SO.
2022  *
2023  * Implementation from iris.
2024  */
2025
2026 static uint64_t
2027 update_so_info(struct pipe_stream_output_info *so_info,
2028                uint64_t outputs_written)
2029 {
2030         uint64_t so_outputs = 0;
2031         uint8_t reverse_map[64] = {};
2032         unsigned slot = 0;
2033
2034         while (outputs_written)
2035                 reverse_map[slot++] = u_bit_scan64(&outputs_written);
2036
2037         for (unsigned i = 0; i < so_info->num_outputs; i++) {
2038                 struct pipe_stream_output *output = &so_info->output[i];
2039
2040                 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
2041                 output->register_index = reverse_map[output->register_index];
2042
2043                 so_outputs |= 1ull << output->register_index;
2044         }
2045
2046         return so_outputs;
2047 }
2048
2049 static void
2050 panfrost_bind_shader_state(
2051         struct pipe_context *pctx,
2052         void *hwcso,
2053         enum pipe_shader_type type)
2054 {
2055         struct panfrost_context *ctx = pan_context(pctx);
2056
2057         ctx->shader[type] = hwcso;
2058
2059         if (type == PIPE_SHADER_FRAGMENT)
2060                 ctx->dirty |= PAN_DIRTY_FS;
2061         else
2062                 ctx->dirty |= PAN_DIRTY_VS;
2063
2064         if (!hwcso) return;
2065
2066         /* Match the appropriate variant */
2067
2068         signed variant = -1;
2069         struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso;
2070
2071         for (unsigned i = 0; i < variants->variant_count; ++i) {
2072                 if (panfrost_variant_matches(ctx, &variants->variants[i], type)) {
2073                         variant = i;
2074                         break;
2075                 }
2076         }
2077
2078         if (variant == -1) {
2079                 /* No variant matched, so create a new one */
2080                 variant = variants->variant_count++;
2081                 assert(variants->variant_count < MAX_SHADER_VARIANTS);
2082
2083                 struct panfrost_shader_state *v =
2084                                 &variants->variants[variant];
2085
2086                 if (type == PIPE_SHADER_FRAGMENT) {
2087                         v->alpha_state = ctx->depth_stencil->alpha;
2088
2089                         if (ctx->rasterizer) {
2090                                 v->point_sprite_mask = ctx->rasterizer->base.sprite_coord_enable;
2091                                 v->point_sprite_upper_left =
2092                                         ctx->rasterizer->base.sprite_coord_mode ==
2093                                         PIPE_SPRITE_COORD_UPPER_LEFT;
2094                         }
2095                 }
2096
2097                 variants->variants[variant].tripipe = calloc(1, sizeof(struct mali_shader_meta));
2098
2099         }
2100
2101         /* Select this variant */
2102         variants->active_variant = variant;
2103
2104         struct panfrost_shader_state *shader_state = &variants->variants[variant];
2105         assert(panfrost_variant_matches(ctx, shader_state, type));
2106
2107         /* We finally have a variant, so compile it */
2108
2109         if (!shader_state->compiled) {
2110                 uint64_t outputs_written = 0;
2111
2112                 panfrost_shader_compile(ctx, shader_state->tripipe,
2113                               variants->base.type,
2114                               variants->base.type == PIPE_SHADER_IR_NIR ?
2115                                       variants->base.ir.nir :
2116                                       variants->base.tokens,
2117                                         tgsi_processor_to_shader_stage(type), shader_state,
2118                                         &outputs_written);
2119
2120                 shader_state->compiled = true;
2121
2122                 /* Fixup the stream out information, since what Gallium returns
2123                  * normally is mildly insane */
2124
2125                 shader_state->stream_output = variants->base.stream_output;
2126                 shader_state->so_mask =
2127                         update_so_info(&shader_state->stream_output, outputs_written);
2128         }
2129 }
2130
2131 static void
2132 panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
2133 {
2134         panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
2135 }
2136
2137 static void
2138 panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
2139 {
2140         panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
2141 }
2142
2143 static void
2144 panfrost_set_vertex_buffers(
2145         struct pipe_context *pctx,
2146         unsigned start_slot,
2147         unsigned num_buffers,
2148         const struct pipe_vertex_buffer *buffers)
2149 {
2150         struct panfrost_context *ctx = pan_context(pctx);
2151
2152         util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
2153 }
2154
2155 static void
2156 panfrost_set_constant_buffer(
2157         struct pipe_context *pctx,
2158         enum pipe_shader_type shader, uint index,
2159         const struct pipe_constant_buffer *buf)
2160 {
2161         struct panfrost_context *ctx = pan_context(pctx);
2162         struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
2163
2164         util_copy_constant_buffer(&pbuf->cb[index], buf);
2165
2166         unsigned mask = (1 << index);
2167
2168         if (unlikely(!buf)) {
2169                 pbuf->enabled_mask &= ~mask;
2170                 pbuf->dirty_mask &= ~mask;
2171                 return;
2172         }
2173
2174         pbuf->enabled_mask |= mask;
2175         pbuf->dirty_mask |= mask;
2176 }
2177
2178 static void
2179 panfrost_set_stencil_ref(
2180         struct pipe_context *pctx,
2181         const struct pipe_stencil_ref *ref)
2182 {
2183         struct panfrost_context *ctx = pan_context(pctx);
2184         ctx->stencil_ref = *ref;
2185
2186         /* Shader core dirty */
2187         ctx->dirty |= PAN_DIRTY_FS;
2188 }
2189
2190 static enum mali_texture_type
2191 panfrost_translate_texture_type(enum pipe_texture_target t) {
2192         switch (t)
2193         {
2194         case PIPE_BUFFER:
2195         case PIPE_TEXTURE_1D:
2196         case PIPE_TEXTURE_1D_ARRAY:
2197                 return MALI_TEX_1D;
2198
2199         case PIPE_TEXTURE_2D:
2200         case PIPE_TEXTURE_2D_ARRAY:
2201         case PIPE_TEXTURE_RECT:
2202                 return MALI_TEX_2D;
2203
2204         case PIPE_TEXTURE_3D:
2205                 return MALI_TEX_3D;
2206
2207         case PIPE_TEXTURE_CUBE:
2208         case PIPE_TEXTURE_CUBE_ARRAY:
2209                 return MALI_TEX_CUBE;
2210
2211         default:
2212                 unreachable("Unknown target");
2213         }
2214 }
2215
2216 static struct pipe_sampler_view *
2217 panfrost_create_sampler_view(
2218         struct pipe_context *pctx,
2219         struct pipe_resource *texture,
2220         const struct pipe_sampler_view *template)
2221 {
2222         struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
2223         int bytes_per_pixel = util_format_get_blocksize(texture->format);
2224
2225         pipe_reference(NULL, &texture->reference);
2226
2227         struct panfrost_resource *prsrc = (struct panfrost_resource *) texture;
2228         assert(prsrc->bo);
2229
2230         so->base = *template;
2231         so->base.texture = texture;
2232         so->base.reference.count = 1;
2233         so->base.context = pctx;
2234
2235         /* sampler_views correspond to texture descriptors, minus the texture
2236          * (data) itself. So, we serialise the descriptor here and cache it for
2237          * later. */
2238
2239         const struct util_format_description *desc = util_format_description(prsrc->base.format);
2240
2241         unsigned char user_swizzle[4] = {
2242                 template->swizzle_r,
2243                 template->swizzle_g,
2244                 template->swizzle_b,
2245                 template->swizzle_a
2246         };
2247
2248         enum mali_format format = panfrost_find_format(desc);
2249
2250         /* Check if we need to set a custom stride by computing the "expected"
2251          * stride and comparing it to what the BO actually wants. Only applies
2252          * to linear textures, since tiled/compressed textures have strict
2253          * alignment requirements for their strides as it is */
2254
2255         unsigned first_level = template->u.tex.first_level;
2256         unsigned last_level = template->u.tex.last_level;
2257
2258         if (prsrc->layout == PAN_LINEAR) {
2259                 for (unsigned l = first_level; l <= last_level; ++l) {
2260                         unsigned actual_stride = prsrc->slices[l].stride;
2261                         unsigned width = u_minify(texture->width0, l);
2262                         unsigned comp_stride = width * bytes_per_pixel;
2263
2264                         if (comp_stride != actual_stride) {
2265                                 so->manual_stride = true;
2266                                 break;
2267                         }
2268                 }
2269         }
2270
2271         /* In the hardware, array_size refers specifically to array textures,
2272          * whereas in Gallium, it also covers cubemaps */
2273
2274         unsigned array_size = texture->array_size;
2275
2276         if (template->target == PIPE_TEXTURE_CUBE) {
2277                 /* TODO: Cubemap arrays */
2278                 assert(array_size == 6);
2279                 array_size /= 6;
2280         }
2281
2282         struct mali_texture_descriptor texture_descriptor = {
2283                 .width = MALI_POSITIVE(u_minify(texture->width0, first_level)),
2284                 .height = MALI_POSITIVE(u_minify(texture->height0, first_level)),
2285                 .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)),
2286                 .array_size = MALI_POSITIVE(array_size),
2287
2288                 .format = {
2289                         .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
2290                         .format = format,
2291                         .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB,
2292                         .type = panfrost_translate_texture_type(template->target),
2293                         .unknown2 = 0x1,
2294                 },
2295
2296                 .swizzle = panfrost_translate_swizzle_4(user_swizzle)
2297         };
2298
2299         texture_descriptor.levels = last_level - first_level;
2300
2301         so->hw = texture_descriptor;
2302
2303         return (struct pipe_sampler_view *) so;
2304 }
2305
2306 static void
2307 panfrost_set_sampler_views(
2308         struct pipe_context *pctx,
2309         enum pipe_shader_type shader,
2310         unsigned start_slot, unsigned num_views,
2311         struct pipe_sampler_view **views)
2312 {
2313         struct panfrost_context *ctx = pan_context(pctx);
2314
2315         assert(start_slot == 0);
2316
2317         unsigned new_nr = 0;
2318         for (unsigned i = 0; i < num_views; ++i) {
2319                 if (views[i])
2320                         new_nr = i + 1;
2321         }
2322
2323         ctx->sampler_view_count[shader] = new_nr;
2324         memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *));
2325
2326         ctx->dirty |= PAN_DIRTY_TEXTURES;
2327 }
2328
2329 static void
2330 panfrost_sampler_view_destroy(
2331         struct pipe_context *pctx,
2332         struct pipe_sampler_view *view)
2333 {
2334         pipe_resource_reference(&view->texture, NULL);
2335         ralloc_free(view);
2336 }
2337
2338 static void
2339 panfrost_set_shader_buffers(
2340         struct pipe_context *pctx,
2341         enum pipe_shader_type shader,
2342         unsigned start, unsigned count,
2343         const struct pipe_shader_buffer *buffers,
2344         unsigned writable_bitmask)
2345 {
2346         struct panfrost_context *ctx = pan_context(pctx);
2347
2348         util_set_shader_buffers_mask(ctx->ssbo[shader], &ctx->ssbo_mask[shader],
2349                         buffers, start, count);
2350 }
2351
2352 /* Hints that a framebuffer should use AFBC where possible */
2353
2354 static void
2355 panfrost_hint_afbc(
2356                 struct panfrost_screen *screen,
2357                 const struct pipe_framebuffer_state *fb)
2358 {
2359         /* AFBC implemenation incomplete; hide it */
2360         if (!(pan_debug & PAN_DBG_AFBC)) return;
2361
2362         /* Hint AFBC to the resources bound to each color buffer */
2363
2364         for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
2365                 struct pipe_surface *surf = fb->cbufs[i];
2366                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
2367                 panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1);
2368         }
2369
2370         /* Also hint it to the depth buffer */
2371
2372         if (fb->zsbuf) {
2373                 struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture);
2374                 panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1);
2375         }
2376 }
2377
2378 static void
2379 panfrost_set_framebuffer_state(struct pipe_context *pctx,
2380                                const struct pipe_framebuffer_state *fb)
2381 {
2382         struct panfrost_context *ctx = pan_context(pctx);
2383
2384         /* Flush when switching framebuffers, but not if the framebuffer
2385          * state is being restored by u_blitter
2386          */
2387
2388         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
2389         bool is_scanout = panfrost_batch_is_scanout(batch);
2390         bool has_draws = batch->last_job.gpu;
2391
2392         /* Bail out early when the current and new states are the same. */
2393         if (util_framebuffer_state_equal(&ctx->pipe_framebuffer, fb))
2394                 return;
2395
2396         /* The wallpaper logic sets a new FB state before doing the blit and
2397          * restore the old one when it's done. Those FB states are reported to
2398          * be different because the surface they are pointing to are different,
2399          * but those surfaces actually point to the same cbufs/zbufs. In that
2400          * case we definitely don't want new FB descs to be emitted/attached
2401          * since the job is expected to be flushed just after the blit is done,
2402          * so let's just copy the new state and return here.
2403          */
2404         if (ctx->wallpaper_batch) {
2405                 util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
2406                 return;
2407         }
2408
2409         if (!is_scanout || has_draws)
2410                 panfrost_flush(pctx, NULL, PIPE_FLUSH_END_OF_FRAME);
2411         else
2412                 assert(!ctx->payloads[PIPE_SHADER_VERTEX].postfix.framebuffer &&
2413                        !ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.framebuffer);
2414
2415         /* Invalidate the FBO job cache since we've just been assigned a new
2416          * FB state.
2417          */
2418         ctx->batch = NULL;
2419
2420         util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
2421
2422         /* Given that we're rendering, we'd love to have compression */
2423         struct panfrost_screen *screen = pan_screen(ctx->base.screen);
2424
2425         panfrost_hint_afbc(screen, &ctx->pipe_framebuffer);
2426         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
2427                 ctx->payloads[i].postfix.framebuffer = 0;
2428 }
2429
2430 static void *
2431 panfrost_create_depth_stencil_state(struct pipe_context *pipe,
2432                                     const struct pipe_depth_stencil_alpha_state *depth_stencil)
2433 {
2434         return mem_dup(depth_stencil, sizeof(*depth_stencil));
2435 }
2436
2437 static void
2438 panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
2439                                   void *cso)
2440 {
2441         struct panfrost_context *ctx = pan_context(pipe);
2442         struct pipe_depth_stencil_alpha_state *depth_stencil = cso;
2443         ctx->depth_stencil = depth_stencil;
2444
2445         if (!depth_stencil)
2446                 return;
2447
2448         /* Alpha does not exist in the hardware (it's not in ES3), so it's
2449          * emulated in the fragment shader */
2450
2451         if (depth_stencil->alpha.enabled) {
2452                 /* We need to trigger a new shader (maybe) */
2453                 ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
2454         }
2455
2456         /* Stencil state */
2457         SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled);
2458
2459         panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
2460         ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
2461
2462         /* If back-stencil is not enabled, use the front values */
2463         bool back_enab = ctx->depth_stencil->stencil[1].enabled;
2464         unsigned back_index = back_enab ? 1 : 0;
2465
2466         panfrost_make_stencil_state(&depth_stencil->stencil[back_index], &ctx->fragment_shader_core.stencil_back);
2467         ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[back_index].writemask;
2468
2469         /* Depth state (TODO: Refactor) */
2470         SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled);
2471
2472         int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
2473
2474         ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
2475         ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
2476
2477         /* Bounds test not implemented */
2478         assert(!depth_stencil->depth.bounds_test);
2479
2480         ctx->dirty |= PAN_DIRTY_FS;
2481 }
2482
2483 static void
2484 panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
2485 {
2486         free( depth );
2487 }
2488
2489 static void
2490 panfrost_set_sample_mask(struct pipe_context *pipe,
2491                          unsigned sample_mask)
2492 {
2493 }
2494
2495 static void
2496 panfrost_set_clip_state(struct pipe_context *pipe,
2497                         const struct pipe_clip_state *clip)
2498 {
2499         //struct panfrost_context *panfrost = pan_context(pipe);
2500 }
2501
2502 static void
2503 panfrost_set_viewport_states(struct pipe_context *pipe,
2504                              unsigned start_slot,
2505                              unsigned num_viewports,
2506                              const struct pipe_viewport_state *viewports)
2507 {
2508         struct panfrost_context *ctx = pan_context(pipe);
2509
2510         assert(start_slot == 0);
2511         assert(num_viewports == 1);
2512
2513         ctx->pipe_viewport = *viewports;
2514 }
2515
2516 static void
2517 panfrost_set_scissor_states(struct pipe_context *pipe,
2518                             unsigned start_slot,
2519                             unsigned num_scissors,
2520                             const struct pipe_scissor_state *scissors)
2521 {
2522         struct panfrost_context *ctx = pan_context(pipe);
2523
2524         assert(start_slot == 0);
2525         assert(num_scissors == 1);
2526
2527         ctx->scissor = *scissors;
2528 }
2529
2530 static void
2531 panfrost_set_polygon_stipple(struct pipe_context *pipe,
2532                              const struct pipe_poly_stipple *stipple)
2533 {
2534         //struct panfrost_context *panfrost = pan_context(pipe);
2535 }
2536
2537 static void
2538 panfrost_set_active_query_state(struct pipe_context *pipe,
2539                                 bool enable)
2540 {
2541         struct panfrost_context *ctx = pan_context(pipe);
2542         ctx->active_queries = enable;
2543 }
2544
2545 static void
2546 panfrost_destroy(struct pipe_context *pipe)
2547 {
2548         struct panfrost_context *panfrost = pan_context(pipe);
2549         struct panfrost_screen *screen = pan_screen(pipe->screen);
2550
2551         if (panfrost->blitter)
2552                 util_blitter_destroy(panfrost->blitter);
2553
2554         if (panfrost->blitter_wallpaper)
2555                 util_blitter_destroy(panfrost->blitter_wallpaper);
2556
2557         panfrost_drm_release_bo(screen, panfrost->scratchpad, false);
2558         panfrost_drm_release_bo(screen, panfrost->tiler_heap, false);
2559         panfrost_drm_release_bo(screen, panfrost->tiler_dummy, false);
2560
2561         ralloc_free(pipe);
2562 }
2563
2564 static struct pipe_query *
2565 panfrost_create_query(struct pipe_context *pipe,
2566                       unsigned type,
2567                       unsigned index)
2568 {
2569         struct panfrost_query *q = rzalloc(pipe, struct panfrost_query);
2570
2571         q->type = type;
2572         q->index = index;
2573
2574         return (struct pipe_query *) q;
2575 }
2576
2577 static void
2578 panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
2579 {
2580         ralloc_free(q);
2581 }
2582
2583 static bool
2584 panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q)
2585 {
2586         struct panfrost_context *ctx = pan_context(pipe);
2587         struct panfrost_query *query = (struct panfrost_query *) q;
2588         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
2589
2590         switch (query->type) {
2591         case PIPE_QUERY_OCCLUSION_COUNTER:
2592         case PIPE_QUERY_OCCLUSION_PREDICATE:
2593         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
2594                 /* Allocate a word for the query results to be stored */
2595                 query->transfer = panfrost_allocate_transient(batch, sizeof(unsigned));
2596                 ctx->occlusion_query = query;
2597                 break;
2598
2599         /* Geometry statistics are computed in the driver. XXX: geom/tess
2600          * shaders.. */
2601
2602         case PIPE_QUERY_PRIMITIVES_GENERATED:
2603                 query->start = ctx->prims_generated;
2604                 break;
2605         case PIPE_QUERY_PRIMITIVES_EMITTED:
2606                 query->start = ctx->tf_prims_generated;
2607                 break;
2608
2609         default:
2610                 fprintf(stderr, "Skipping query %u\n", query->type);
2611                 break;
2612         }
2613
2614         return true;
2615 }
2616
2617 static bool
2618 panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
2619 {
2620         struct panfrost_context *ctx = pan_context(pipe);
2621         struct panfrost_query *query = (struct panfrost_query *) q;
2622
2623         switch (query->type) {
2624         case PIPE_QUERY_OCCLUSION_COUNTER:
2625         case PIPE_QUERY_OCCLUSION_PREDICATE:
2626         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
2627                 ctx->occlusion_query = NULL;
2628                 break;
2629         case PIPE_QUERY_PRIMITIVES_GENERATED:
2630                 query->end = ctx->prims_generated;
2631                 break;
2632         case PIPE_QUERY_PRIMITIVES_EMITTED:
2633                 query->end = ctx->tf_prims_generated;
2634                 break;
2635         }
2636
2637         return true;
2638 }
2639
2640 static bool
2641 panfrost_get_query_result(struct pipe_context *pipe,
2642                           struct pipe_query *q,
2643                           bool wait,
2644                           union pipe_query_result *vresult)
2645 {
2646         struct panfrost_query *query = (struct panfrost_query *) q;
2647
2648
2649         switch (query->type) {
2650         case PIPE_QUERY_OCCLUSION_COUNTER:
2651         case PIPE_QUERY_OCCLUSION_PREDICATE:
2652         case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
2653                 /* Flush first */
2654                 panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
2655
2656                 /* Read back the query results */
2657                 unsigned *result = (unsigned *) query->transfer.cpu;
2658                 unsigned passed = *result;
2659
2660                 if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
2661                         vresult->u64 = passed;
2662                 } else {
2663                         vresult->b = !!passed;
2664                 }
2665
2666                 break;
2667
2668         case PIPE_QUERY_PRIMITIVES_GENERATED:
2669         case PIPE_QUERY_PRIMITIVES_EMITTED:
2670                 panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
2671                 vresult->u64 = query->end - query->start;
2672                 break;
2673
2674         default:
2675                 DBG("Skipped query get %u\n", query->type);
2676                 break;
2677         }
2678
2679         return true;
2680 }
2681
2682 static struct pipe_stream_output_target *
2683 panfrost_create_stream_output_target(struct pipe_context *pctx,
2684                                      struct pipe_resource *prsc,
2685                                      unsigned buffer_offset,
2686                                      unsigned buffer_size)
2687 {
2688         struct pipe_stream_output_target *target;
2689
2690         target = rzalloc(pctx, struct pipe_stream_output_target);
2691
2692         if (!target)
2693                 return NULL;
2694
2695         pipe_reference_init(&target->reference, 1);
2696         pipe_resource_reference(&target->buffer, prsc);
2697
2698         target->context = pctx;
2699         target->buffer_offset = buffer_offset;
2700         target->buffer_size = buffer_size;
2701
2702         return target;
2703 }
2704
2705 static void
2706 panfrost_stream_output_target_destroy(struct pipe_context *pctx,
2707                                       struct pipe_stream_output_target *target)
2708 {
2709         pipe_resource_reference(&target->buffer, NULL);
2710         ralloc_free(target);
2711 }
2712
2713 static void
2714 panfrost_set_stream_output_targets(struct pipe_context *pctx,
2715                                    unsigned num_targets,
2716                                    struct pipe_stream_output_target **targets,
2717                                    const unsigned *offsets)
2718 {
2719         struct panfrost_context *ctx = pan_context(pctx);
2720         struct panfrost_streamout *so = &ctx->streamout;
2721
2722         assert(num_targets <= ARRAY_SIZE(so->targets));
2723
2724         for (unsigned i = 0; i < num_targets; i++) {
2725                 if (offsets[i] != -1)
2726                         so->offsets[i] = offsets[i];
2727
2728                 pipe_so_target_reference(&so->targets[i], targets[i]);
2729         }
2730
2731         for (unsigned i = 0; i < so->num_targets; i++)
2732                 pipe_so_target_reference(&so->targets[i], NULL);
2733
2734         so->num_targets = num_targets;
2735 }
2736
2737 static void
2738 panfrost_setup_hardware(struct panfrost_context *ctx)
2739 {
2740         struct pipe_context *gallium = (struct pipe_context *) ctx;
2741         struct panfrost_screen *screen = pan_screen(gallium->screen);
2742
2743         ctx->scratchpad = panfrost_drm_create_bo(screen, 64 * 4 * 4096, 0);
2744         ctx->tiler_heap = panfrost_drm_create_bo(screen, 4096 * 4096,
2745                                                  PAN_ALLOCATE_INVISIBLE |
2746                                                  PAN_ALLOCATE_GROWABLE);
2747         ctx->tiler_dummy = panfrost_drm_create_bo(screen, 4096,
2748                                                   PAN_ALLOCATE_INVISIBLE);
2749         assert(ctx->scratchpad && ctx->tiler_heap && ctx->tiler_dummy);
2750 }
2751
2752 /* New context creation, which also does hardware initialisation since I don't
2753  * know the better way to structure this :smirk: */
2754
2755 struct pipe_context *
2756 panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
2757 {
2758         struct panfrost_context *ctx = rzalloc(screen, struct panfrost_context);
2759         struct panfrost_screen *pscreen = pan_screen(screen);
2760         memset(ctx, 0, sizeof(*ctx));
2761         struct pipe_context *gallium = (struct pipe_context *) ctx;
2762
2763         ctx->is_t6xx = pscreen->gpu_id < 0x0700; /* Literally, "earlier than T700" */
2764
2765         gallium->screen = screen;
2766
2767         gallium->destroy = panfrost_destroy;
2768
2769         gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
2770
2771         gallium->flush = panfrost_flush;
2772         gallium->clear = panfrost_clear;
2773         gallium->draw_vbo = panfrost_draw_vbo;
2774
2775         gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
2776         gallium->set_constant_buffer = panfrost_set_constant_buffer;
2777         gallium->set_shader_buffers = panfrost_set_shader_buffers;
2778
2779         gallium->set_stencil_ref = panfrost_set_stencil_ref;
2780
2781         gallium->create_sampler_view = panfrost_create_sampler_view;
2782         gallium->set_sampler_views = panfrost_set_sampler_views;
2783         gallium->sampler_view_destroy = panfrost_sampler_view_destroy;
2784
2785         gallium->create_rasterizer_state = panfrost_create_rasterizer_state;
2786         gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
2787         gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
2788
2789         gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state;
2790         gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
2791         gallium->delete_vertex_elements_state = panfrost_generic_cso_delete;
2792
2793         gallium->create_fs_state = panfrost_create_shader_state;
2794         gallium->delete_fs_state = panfrost_delete_shader_state;
2795         gallium->bind_fs_state = panfrost_bind_fs_state;
2796
2797         gallium->create_vs_state = panfrost_create_shader_state;
2798         gallium->delete_vs_state = panfrost_delete_shader_state;
2799         gallium->bind_vs_state = panfrost_bind_vs_state;
2800
2801         gallium->create_sampler_state = panfrost_create_sampler_state;
2802         gallium->delete_sampler_state = panfrost_generic_cso_delete;
2803         gallium->bind_sampler_states = panfrost_bind_sampler_states;
2804
2805         gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
2806         gallium->bind_depth_stencil_alpha_state   = panfrost_bind_depth_stencil_state;
2807         gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state;
2808
2809         gallium->set_sample_mask = panfrost_set_sample_mask;
2810
2811         gallium->set_clip_state = panfrost_set_clip_state;
2812         gallium->set_viewport_states = panfrost_set_viewport_states;
2813         gallium->set_scissor_states = panfrost_set_scissor_states;
2814         gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
2815         gallium->set_active_query_state = panfrost_set_active_query_state;
2816
2817         gallium->create_query = panfrost_create_query;
2818         gallium->destroy_query = panfrost_destroy_query;
2819         gallium->begin_query = panfrost_begin_query;
2820         gallium->end_query = panfrost_end_query;
2821         gallium->get_query_result = panfrost_get_query_result;
2822
2823         gallium->create_stream_output_target = panfrost_create_stream_output_target;
2824         gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy;
2825         gallium->set_stream_output_targets = panfrost_set_stream_output_targets;
2826
2827         panfrost_resource_context_init(gallium);
2828         panfrost_blend_context_init(gallium);
2829         panfrost_compute_context_init(gallium);
2830
2831         panfrost_drm_init_context(ctx);
2832
2833         panfrost_setup_hardware(ctx);
2834
2835         /* XXX: leaks */
2836         gallium->stream_uploader = u_upload_create_default(gallium);
2837         gallium->const_uploader = gallium->stream_uploader;
2838         assert(gallium->stream_uploader);
2839
2840         /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */
2841         ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1;
2842
2843         ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes);
2844
2845         ctx->blitter = util_blitter_create(gallium);
2846         ctx->blitter_wallpaper = util_blitter_create(gallium);
2847
2848         assert(ctx->blitter);
2849         assert(ctx->blitter_wallpaper);
2850
2851         ctx->last_fragment_flushed = true;
2852         ctx->last_batch = NULL;
2853
2854         /* Prepare for render! */
2855
2856         panfrost_batch_init(ctx);
2857         panfrost_emit_vertex_payload(ctx);
2858         panfrost_emit_tiler_payload(ctx);
2859         panfrost_invalidate_frame(ctx);
2860         panfrost_default_shader_backend(ctx);
2861
2862         return gallium;
2863 }