src/gallium/drivers/panfrost/pan_context.c

   1 /*
   2  * © Copyright 2018 Alyssa Rosenzweig
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  */
  24
  25 #include <sys/poll.h>
  26 #include <errno.h>
  27
  28 #include "pan_context.h"
  29 #include "pan_swizzle.h"
  30 #include "pan_format.h"
  31
  32 #include "util/macros.h"
  33 #include "util/u_format.h"
  34 #include "util/u_inlines.h"
  35 #include "util/u_upload_mgr.h"
  36 #include "util/u_memory.h"
  37 #include "util/half_float.h"
  38 #include "indices/u_primconvert.h"
  39 #include "tgsi/tgsi_parse.h"
  40
  41 #include "pan_screen.h"
  42 #include "pan_blending.h"
  43 #include "pan_blend_shaders.h"
  44 #include "pan_wallpaper.h"
  45
  46 #ifdef DUMP_PERFORMANCE_COUNTERS
  47 static int performance_counter_number = 0;
  48 #endif
  49
  50 /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */
  51 //#define DRY_RUN
  52
  53 #define SET_BIT(lval, bit, cond) \
  54         if (cond) \
  55                 lval |= (bit); \
  56         else \
  57                 lval &= ~(bit);
  58
  59 /* TODO: Sample size, etc */
  60
  61 /* True for t6XX, false for t8xx. TODO: Run-time settable for automatic
  62  * hardware configuration. */
  63
  64 static bool is_t6xx = false;
  65
  66 /* If set, we'll require the use of single render-target framebuffer
  67  * descriptors (SFBD), for older hardware -- specifically, <T760 hardware, If
  68  * false, we'll use the MFBD no matter what. New hardware -does- retain support
  69  * for SFBD, and in theory we could flip between them on a per-RT basis, but
  70  * there's no real advantage to doing so */
  71
  72 static bool require_sfbd = false;
  73
  74 static void
  75 panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled)
  76 {
  77         SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled);
  78         SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled);
  79
  80         if (require_sfbd) {
  81                 SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled);
  82         } else {
  83                 SET_BIT(ctx->fragment_rts[0].format, MALI_MFBD_FORMAT_MSAA, enabled);
  84
  85                 SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled);
  86
  87                 /* XXX */
  88                 ctx->fragment_mfbd.rt_count_2 = enabled ? 4 : 1;
  89         }
  90 }
  91
  92 /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically
  93  * indepdent between color buffers and depth/stencil). To enable, we allocate
  94  * the AFBC metadata buffer and mark that it is enabled. We do -not- actually
  95  * edit the fragment job here. This routine should be called ONCE per
  96  * AFBC-compressed buffer, rather than on every frame. */
  97
  98 static void
  99 panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds)
 100 {
 101         if (require_sfbd) {
 102                 printf("AFBC not supported yet on SFBD\n");
 103                 assert(0);
 104         }
 105
 106         struct pipe_context *gallium = (struct pipe_context *) ctx;
 107         struct panfrost_screen *screen = pan_screen(gallium->screen);
 108        /* AFBC metadata is 16 bytes per tile */
 109         int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
 110         int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
 111         int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
 112         int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */
 113
 114         stride *= 2;  /* TODO: Should this be carried over? */
 115         int main_size = stride * rsrc->base.height0;
 116         rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16;
 117
 118         /* Allocate the AFBC slab itself, large enough to hold the above */
 119         screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab,
 120                                (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096,
 121                                true, 0, 0, 0);
 122
 123         rsrc->bo->has_afbc = true;
 124
 125         /* Compressed textured reads use a tagged pointer to the metadata */
 126
 127         rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1);
 128         rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu;
 129 }
 130
 131 static void
 132 panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc)
 133 {
 134         struct pipe_context *gallium = (struct pipe_context *) ctx;
 135         struct panfrost_screen *screen = pan_screen(gallium->screen);
 136         int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
 137         int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT;
 138
 139         /* 8 byte checksum per tile */
 140         rsrc->bo->checksum_stride = tile_w * 8;
 141         int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096);
 142         screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0);
 143
 144         rsrc->bo->has_checksum = true;
 145 }
 146
 147 /* ..by contrast, this routine runs for every FRAGMENT job, but does no
 148  * allocation. AFBC is enabled on a per-surface basis */
 149
 150 static void
 151 panfrost_set_fragment_afbc(struct panfrost_context *ctx)
 152 {
 153         for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) {
 154                 struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[cb]->texture;
 155
 156                 /* Non-AFBC is the default */
 157                 if (!rsrc->bo->has_afbc)
 158                         continue;
 159
 160                 if (require_sfbd) {
 161                         fprintf(stderr, "Color AFBC not supported on SFBD\n");
 162                         assert(0);
 163                 }
 164
 165                 /* Enable AFBC for the render target */
 166                 ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu;
 167                 ctx->fragment_rts[0].afbc.stride = 0;
 168                 ctx->fragment_rts[0].afbc.unk = 0x30009;
 169
 170                 ctx->fragment_rts[0].format |= MALI_MFBD_FORMAT_AFBC;
 171
 172                 /* Point rendering to our special framebuffer */
 173                 ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
 174
 175                 /* WAT? Stride is diff from the scanout case */
 176                 ctx->fragment_rts[0].framebuffer_stride = ctx->pipe_framebuffer.width * 2 * 4;
 177         }
 178
 179         /* Enable depth/stencil AFBC for the framebuffer (not the render target) */
 180         if (ctx->pipe_framebuffer.zsbuf) {
 181                 struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture;
 182
 183                 if (rsrc->bo->has_afbc) {
 184                         if (require_sfbd) {
 185                                 fprintf(stderr, "Depth AFBC not supported on SFBD\n");
 186                                 assert(0);
 187                         }
 188
 189                         ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA;
 190
 191                         ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu;
 192                         ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0;
 193
 194                         ctx->fragment_extra.ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
 195
 196                         ctx->fragment_extra.ds_afbc.zero1 = 0x10009;
 197                         ctx->fragment_extra.ds_afbc.padding = 0x1000;
 198
 199                         ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */
 200
 201                         ctx->fragment_mfbd.unk3 |= 0x400;
 202                 }
 203         }
 204
 205         /* For the special case of a depth-only FBO, we need to attach a dummy render target */
 206
 207         if (ctx->pipe_framebuffer.nr_cbufs == 0) {
 208                 if (require_sfbd) {
 209                         fprintf(stderr, "Depth-only FBO not supported on SFBD\n");
 210                         assert(0);
 211                 }
 212
 213                 ctx->fragment_rts[0].format = 0x80008000;
 214                 ctx->fragment_rts[0].framebuffer = 0;
 215                 ctx->fragment_rts[0].framebuffer_stride = 0;
 216         }
 217 }
 218
 219 /* Framebuffer descriptor */
 220
 221 static void
 222 panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h)
 223 {
 224         fb->width = MALI_POSITIVE(w);
 225         fb->height = MALI_POSITIVE(h);
 226
 227         /* No idea why this is needed, but it's how resolution_check is
 228          * calculated.  It's not clear to us yet why the hardware wants this.
 229          * The formula itself was discovered mostly by manual bruteforce and
 230          * aggressive algebraic simplification. */
 231
 232         fb->resolution_check = ((w + h) / 3) << 4;
 233 }
 234
 235 static struct mali_single_framebuffer
 236 panfrost_emit_sfbd(struct panfrost_context *ctx)
 237 {
 238         struct mali_single_framebuffer framebuffer = {
 239                 .unknown2 = 0x1f,
 240                 .format = 0x30000000,
 241                 .clear_flags = 0x1000,
 242                 .unknown_address_0 = ctx->scratchpad.gpu,
 243                 .unknown_address_1 = ctx->misc_0.gpu,
 244                 .unknown_address_2 = ctx->misc_0.gpu + 40960,
 245                 .tiler_flags = 0xf0,
 246                 .tiler_heap_free = ctx->tiler_heap.gpu,
 247                 .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
 248         };
 249
 250         panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
 251
 252         return framebuffer;
 253 }
 254
 255 static struct bifrost_framebuffer
 256 panfrost_emit_mfbd(struct panfrost_context *ctx)
 257 {
 258         struct bifrost_framebuffer framebuffer = {
 259                 /* It is not yet clear what tiler_meta means or how it's
 260                  * calculated, but we can tell the lower 32-bits are a
 261                  * (monotonically increasing?) function of tile count and
 262                  * geometry complexity; I suspect it defines a memory size of
 263                  * some kind? for the tiler. It's really unclear at the
 264                  * moment... but to add to the confusion, the hardware is happy
 265                  * enough to accept a zero in this field, so we don't even have
 266                  * to worry about it right now.
 267                  *
 268                  * The byte (just after the 32-bit mark) is much more
 269                  * interesting. The higher nibble I've only ever seen as 0xF,
 270                  * but the lower one I've seen as 0x0 or 0xF, and it's not
 271                  * obvious what the difference is. But what -is- obvious is
 272                  * that when the lower nibble is zero, performance is severely
 273                  * degraded compared to when the lower nibble is set.
 274                  * Evidently, that nibble enables some sort of fast path,
 275                  * perhaps relating to caching or tile flush? Regardless, at
 276                  * this point there's no clear reason not to set it, aside from
 277                  * substantially increased memory requirements (of the misc_0
 278                  * buffer) */
 279
 280                 .tiler_meta = ((uint64_t) 0xff << 32) | 0x0,
 281
 282                 .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
 283                 .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
 284                 .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
 285                 .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
 286
 287                 .unk1 = 0x1080,
 288
 289                 /* TODO: MRT */
 290                 .rt_count_1 = MALI_POSITIVE(1),
 291                 .rt_count_2 = 4,
 292
 293                 .unknown2 = 0x1f,
 294
 295                 /* Corresponds to unknown_address_X of SFBD */
 296                 .scratchpad = ctx->scratchpad.gpu,
 297                 .tiler_scratch_start  = ctx->misc_0.gpu,
 298
 299                 /* The constant added here is, like the lower word of
 300                  * tiler_meta, (loosely) another product of framebuffer size
 301                  * and geometry complexity. It must be sufficiently large for
 302                  * the tiler_meta fast path to work; if it's too small, there
 303                  * will be DATA_INVALID_FAULTs. Conversely, it must be less
 304                  * than the total size of misc_0, or else there's no room. It's
 305                  * possible this constant configures a partition between two
 306                  * parts of misc_0? We haven't investigated the functionality,
 307                  * as these buffers are internally used by the hardware
 308                  * (presumably by the tiler) but not seemingly touched by the driver
 309                  */
 310
 311                 .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
 312
 313                 .tiler_heap_start = ctx->tiler_heap.gpu,
 314                 .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
 315         };
 316
 317         return framebuffer;
 318 }
 319
 320 /* Are we currently rendering to the screen (rather than an FBO)? */
 321
 322 static bool
 323 panfrost_is_scanout(struct panfrost_context *ctx)
 324 {
 325         /* If there is no color buffer, it's an FBO */
 326         if (!ctx->pipe_framebuffer.nr_cbufs)
 327                 return false;
 328
 329         /* If we're too early that no framebuffer was sent, it's scanout */
 330         if (!ctx->pipe_framebuffer.cbufs[0])
 331                 return true;
 332
 333         return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET ||
 334                ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT ||
 335                ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED;
 336 }
 337
 338 /* The above function is for generalised fbd emission, used in both fragment as
 339  * well as vertex/tiler payloads. This payload is specific to fragment
 340  * payloads. */
 341
 342 static void
 343 panfrost_new_frag_framebuffer(struct panfrost_context *ctx)
 344 {
 345         mali_ptr framebuffer;
 346         int stride;
 347
 348         if (ctx->pipe_framebuffer.nr_cbufs > 0) {
 349                 framebuffer = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture)->bo->gpu[0];
 350                 stride = util_format_get_stride(ctx->pipe_framebuffer.cbufs[0]->format, ctx->pipe_framebuffer.width);
 351         } else {
 352                 /* Depth-only framebuffer -> dummy RT */
 353                 framebuffer = 0;
 354                 stride = 0;
 355         }
 356
 357         /* The default is upside down from OpenGL's perspective. */
 358         if (panfrost_is_scanout(ctx)) {
 359                 framebuffer += stride * (ctx->pipe_framebuffer.height - 1);
 360                 stride = -stride;
 361         }
 362
 363         if (require_sfbd) {
 364                 struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx);
 365
 366                 fb.framebuffer = framebuffer;
 367                 fb.stride = stride;
 368
 369                 fb.format = 0xb84e0281; /* RGB32, no MSAA */
 370                 memcpy(&ctx->fragment_sfbd, &fb, sizeof(fb));
 371         } else {
 372                 struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx);
 373
 374                 /* XXX: MRT case */
 375                 fb.rt_count_2 = 1;
 376                 fb.unk3 = 0x100;
 377
 378                 struct bifrost_render_target rt = {
 379                         .unk1 = 0x4000000,
 380                         .format = 0x860a8899, /* RGBA32, no MSAA */
 381                         .framebuffer = framebuffer,
 382                         .framebuffer_stride = (stride / 16) & 0xfffffff,
 383                 };
 384
 385                 memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt));
 386
 387                 memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra));
 388                 memcpy(&ctx->fragment_mfbd, &fb, sizeof(fb));
 389         }
 390 }
 391
 392 /* Maps float 0.0-1.0 to int 0x00-0xFF */
 393 static uint8_t
 394 normalised_float_to_u8(float f)
 395 {
 396         return (uint8_t) (int) (f * 255.0f);
 397 }
 398
 399 static void
 400 panfrost_clear_sfbd(struct panfrost_context *ctx,
 401                 bool clear_color,
 402                 bool clear_depth,
 403                 bool clear_stencil,
 404                 uint32_t packed_color,
 405                 double depth, unsigned stencil
 406                 )
 407 {
 408         struct mali_single_framebuffer *sfbd = &ctx->fragment_sfbd;
 409
 410         if (clear_color) {
 411                 sfbd->clear_color_1 = packed_color;
 412                 sfbd->clear_color_2 = packed_color;
 413                 sfbd->clear_color_3 = packed_color;
 414                 sfbd->clear_color_4 = packed_color;
 415         }
 416
 417         if (clear_depth) {
 418                 sfbd->clear_depth_1 = depth;
 419                 sfbd->clear_depth_2 = depth;
 420                 sfbd->clear_depth_3 = depth;
 421                 sfbd->clear_depth_4 = depth;
 422         }
 423
 424         if (clear_stencil) {
 425                 sfbd->clear_stencil = stencil;
 426         }
 427
 428         /* Setup buffers */
 429
 430         if (clear_depth) {
 431                 sfbd->depth_buffer = ctx->depth_stencil_buffer.gpu;
 432                 sfbd->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE;
 433         }
 434
 435         if (clear_stencil) {
 436                 sfbd->stencil_buffer = ctx->depth_stencil_buffer.gpu;
 437                 sfbd->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE;
 438         }
 439
 440         /* Set flags based on what has been cleared, for the SFBD case */
 441         /* XXX: What do these flags mean? */
 442         int clear_flags = 0x101100;
 443
 444         if (clear_color && clear_depth && clear_stencil) {
 445                 /* On a tiler like this, it's fastest to clear all three buffers at once */
 446
 447                 clear_flags |= MALI_CLEAR_FAST;
 448         } else {
 449                 clear_flags |= MALI_CLEAR_SLOW;
 450
 451                 if (clear_stencil)
 452                         clear_flags |= MALI_CLEAR_SLOW_STENCIL;
 453         }
 454
 455         sfbd->clear_flags = clear_flags;
 456 }
 457
 458 static void
 459 panfrost_clear_mfbd(struct panfrost_context *ctx,
 460                 bool clear_color,
 461                 bool clear_depth,
 462                 bool clear_stencil,
 463                 uint32_t packed_color,
 464                 double depth, unsigned stencil
 465                 )
 466 {
 467         struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0];
 468         struct bifrost_framebuffer *buffer_ds = &ctx->fragment_mfbd;
 469
 470         if (clear_color) {
 471                 buffer_color->clear_color_1 = packed_color;
 472                 buffer_color->clear_color_2 = packed_color;
 473                 buffer_color->clear_color_3 = packed_color;
 474                 buffer_color->clear_color_4 = packed_color;
 475         }
 476
 477         if (clear_depth) {
 478                 buffer_ds->clear_depth = depth;
 479         }
 480
 481         if (clear_stencil) {
 482                 buffer_ds->clear_stencil = stencil;
 483         }
 484
 485         if (clear_depth || clear_stencil) {
 486                 /* Setup combined 24/8 depth/stencil */
 487                 ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA;
 488                 //ctx->fragment_extra.unk = /*0x405*/0x404;
 489                 ctx->fragment_extra.unk = 0x405;
 490                 ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu;
 491                 ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4;
 492         }
 493 }
 494
 495 static void
 496 panfrost_clear(
 497         struct pipe_context *pipe,
 498         unsigned buffers,
 499         const union pipe_color_union *color,
 500         double depth, unsigned stencil)
 501 {
 502         struct panfrost_context *ctx = pan_context(pipe);
 503
 504         if (!color) {
 505                 printf("Warning: clear color null?\n");
 506                 return;
 507         }
 508
 509         /* Save settings for FBO switch */
 510         ctx->last_clear.buffers = buffers;
 511         ctx->last_clear.color = color;
 512         ctx->last_clear.depth = depth;
 513         ctx->last_clear.depth = depth;
 514
 515         bool clear_color = buffers & PIPE_CLEAR_COLOR;
 516         bool clear_depth = buffers & PIPE_CLEAR_DEPTH;
 517         bool clear_stencil = buffers & PIPE_CLEAR_STENCIL;
 518
 519         /* Remember that we've done something */
 520         ctx->frame_cleared = true;
 521
 522         /* Alpha clear only meaningful without alpha channel */
 523         bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format);
 524         float clear_alpha = has_alpha ? color->f[3] : 1.0f;
 525
 526         uint32_t packed_color =
 527                 (normalised_float_to_u8(clear_alpha) << 24) |
 528                 (normalised_float_to_u8(color->f[2]) << 16) |
 529                 (normalised_float_to_u8(color->f[1]) <<  8) |
 530                 (normalised_float_to_u8(color->f[0]) <<  0);
 531
 532         if (require_sfbd) {
 533                 panfrost_clear_sfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil);
 534         } else {
 535                 panfrost_clear_mfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil);
 536         }
 537 }
 538
 539 static mali_ptr
 540 panfrost_attach_vt_mfbd(struct panfrost_context *ctx)
 541 {
 542         /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */
 543         struct bifrost_render_target rts_list[] = {
 544                 {
 545                         .chunknown = {
 546                                 .unk = 0x30005,
 547                         },
 548                         .framebuffer = ctx->misc_0.gpu,
 549                         .zero2 = 0x3,
 550                 },
 551         };
 552
 553         /* Allocate memory for the three components */
 554         int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list);
 555         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
 556
 557         /* Opaque 1024-block */
 558         rts_list[0].chunknown.pointer = transfer.gpu;
 559
 560         memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd));
 561         memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list));
 562
 563         return (transfer.gpu + 1024) | MALI_MFBD;
 564 }
 565
 566 static mali_ptr
 567 panfrost_attach_vt_sfbd(struct panfrost_context *ctx)
 568 {
 569         return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD;
 570 }
 571
 572 static void
 573 panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
 574 {
 575         mali_ptr framebuffer = require_sfbd ?
 576                 panfrost_attach_vt_sfbd(ctx) :
 577                 panfrost_attach_vt_mfbd(ctx);
 578
 579         ctx->payload_vertex.postfix.framebuffer = framebuffer;
 580         ctx->payload_tiler.postfix.framebuffer = framebuffer;
 581 }
 582
 583 static void
 584 panfrost_viewport(struct panfrost_context *ctx,
 585                   float depth_clip_near,
 586                   float depth_clip_far,
 587                   int viewport_x0, int viewport_y0,
 588                   int viewport_x1, int viewport_y1)
 589 {
 590         /* Clip bounds are encoded as floats. The viewport itself is encoded as
 591          * (somewhat) asymmetric ints. */
 592
 593         struct mali_viewport ret = {
 594                 /* By default, do no viewport clipping, i.e. clip to (-inf,
 595                  * inf) in each direction. Clipping to the viewport in theory
 596                  * should work, but in practice causes issues when we're not
 597                  * explicitly trying to scissor */
 598
 599                 .clip_minx = -inff,
 600                 .clip_miny = -inff,
 601                 .clip_maxx = inff,
 602                 .clip_maxy = inff,
 603
 604                 /* We always perform depth clipping (TODO: Can this be disabled?) */
 605
 606                 .clip_minz = depth_clip_near,
 607                 .clip_maxz = depth_clip_far,
 608
 609                 .viewport0 = { viewport_x0, viewport_y0 },
 610                 .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) },
 611         };
 612
 613         memcpy(ctx->viewport, &ret, sizeof(ret));
 614 }
 615
 616 /* Reset per-frame context, called on context initialisation as well as after
 617  * flushing a frame */
 618
 619 static void
 620 panfrost_invalidate_frame(struct panfrost_context *ctx)
 621 {
 622         unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset;
 623         printf("Uploaded transient %d bytes\n", transient_count);
 624
 625         /* Rotate cmdstream */
 626         if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0])))
 627                 ctx->cmdstream_i = 0;
 628
 629         if (require_sfbd)
 630                 ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
 631         else
 632                 ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
 633
 634         panfrost_new_frag_framebuffer(ctx);
 635
 636         /* Reset varyings allocated */
 637         ctx->varying_height = 0;
 638
 639         /* The transient cmdstream is dirty every frame; the only bits worth preserving
 640          * (textures, shaders, etc) are in other buffers anyways */
 641
 642         ctx->transient_pools[ctx->cmdstream_i].entry_index = 0;
 643         ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0;
 644
 645         /* Regenerate payloads */
 646         panfrost_attach_vt_framebuffer(ctx);
 647
 648         if (ctx->rasterizer)
 649                 ctx->dirty |= PAN_DIRTY_RASTERIZER;
 650
 651         /* XXX */
 652         ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
 653 }
 654
 655 /* In practice, every field of these payloads should be configurable
 656  * arbitrarily, which means these functions are basically catch-all's for
 657  * as-of-yet unwavering unknowns */
 658
 659 static void
 660 panfrost_emit_vertex_payload(struct panfrost_context *ctx)
 661 {
 662         struct midgard_payload_vertex_tiler payload = {
 663                 .prefix = {
 664                         .workgroups_z_shift = 32,
 665                         .workgroups_x_shift_2 = 0x2,
 666                         .workgroups_x_shift_3 = 0x5,
 667                 },
 668                 .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2),
 669         };
 670
 671         memcpy(&ctx->payload_vertex, &payload, sizeof(payload));
 672 }
 673
 674 static void
 675 panfrost_emit_tiler_payload(struct panfrost_context *ctx)
 676 {
 677         struct midgard_payload_vertex_tiler payload = {
 678                 .prefix = {
 679                         .workgroups_z_shift = 32,
 680                         .workgroups_x_shift_2 = 0x2,
 681                         .workgroups_x_shift_3 = 0x6,
 682
 683                         .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */
 684                 },
 685         };
 686
 687         /* Reserve the viewport */
 688         struct panfrost_transfer t = panfrost_allocate_chunk(ctx, sizeof(struct mali_viewport), HEAP_DESCRIPTOR);
 689         ctx->viewport = (struct mali_viewport *) t.cpu;
 690         payload.postfix.viewport = t.gpu;
 691
 692         memcpy(&ctx->payload_tiler, &payload, sizeof(payload));
 693 }
 694
 695 static unsigned
 696 translate_tex_wrap(enum pipe_tex_wrap w)
 697 {
 698         switch (w) {
 699         case PIPE_TEX_WRAP_REPEAT:
 700                 return MALI_WRAP_REPEAT;
 701
 702         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
 703                 return MALI_WRAP_CLAMP_TO_EDGE;
 704
 705         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
 706                 return MALI_WRAP_CLAMP_TO_BORDER;
 707
 708         case PIPE_TEX_WRAP_MIRROR_REPEAT:
 709                 return MALI_WRAP_MIRRORED_REPEAT;
 710
 711         default:
 712                 assert(0);
 713                 return 0;
 714         }
 715 }
 716
 717 static unsigned
 718 translate_tex_filter(enum pipe_tex_filter f)
 719 {
 720         switch (f) {
 721         case PIPE_TEX_FILTER_NEAREST:
 722                 return MALI_NEAREST;
 723
 724         case PIPE_TEX_FILTER_LINEAR:
 725                 return MALI_LINEAR;
 726
 727         default:
 728                 assert(0);
 729                 return 0;
 730         }
 731 }
 732
 733 static unsigned
 734 translate_mip_filter(enum pipe_tex_mipfilter f)
 735 {
 736         return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0;
 737 }
 738
 739 static unsigned
 740 panfrost_translate_compare_func(enum pipe_compare_func in)
 741 {
 742         switch (in) {
 743         case PIPE_FUNC_NEVER:
 744                 return MALI_FUNC_NEVER;
 745
 746         case PIPE_FUNC_LESS:
 747                 return MALI_FUNC_LESS;
 748
 749         case PIPE_FUNC_EQUAL:
 750                 return MALI_FUNC_EQUAL;
 751
 752         case PIPE_FUNC_LEQUAL:
 753                 return MALI_FUNC_LEQUAL;
 754
 755         case PIPE_FUNC_GREATER:
 756                 return MALI_FUNC_GREATER;
 757
 758         case PIPE_FUNC_NOTEQUAL:
 759                 return MALI_FUNC_NOTEQUAL;
 760
 761         case PIPE_FUNC_GEQUAL:
 762                 return MALI_FUNC_GEQUAL;
 763
 764         case PIPE_FUNC_ALWAYS:
 765                 return MALI_FUNC_ALWAYS;
 766         }
 767
 768         assert (0);
 769         return 0; /* Unreachable */
 770 }
 771
 772 static unsigned
 773 panfrost_translate_alt_compare_func(enum pipe_compare_func in)
 774 {
 775         switch (in) {
 776         case PIPE_FUNC_NEVER:
 777                 return MALI_ALT_FUNC_NEVER;
 778
 779         case PIPE_FUNC_LESS:
 780                 return MALI_ALT_FUNC_LESS;
 781
 782         case PIPE_FUNC_EQUAL:
 783                 return MALI_ALT_FUNC_EQUAL;
 784
 785         case PIPE_FUNC_LEQUAL:
 786                 return MALI_ALT_FUNC_LEQUAL;
 787
 788         case PIPE_FUNC_GREATER:
 789                 return MALI_ALT_FUNC_GREATER;
 790
 791         case PIPE_FUNC_NOTEQUAL:
 792                 return MALI_ALT_FUNC_NOTEQUAL;
 793
 794         case PIPE_FUNC_GEQUAL:
 795                 return MALI_ALT_FUNC_GEQUAL;
 796
 797         case PIPE_FUNC_ALWAYS:
 798                 return MALI_ALT_FUNC_ALWAYS;
 799         }
 800
 801         assert (0);
 802         return 0; /* Unreachable */
 803 }
 804
 805 static unsigned
 806 panfrost_translate_stencil_op(enum pipe_stencil_op in)
 807 {
 808         switch (in) {
 809         case PIPE_STENCIL_OP_KEEP:
 810                 return MALI_STENCIL_KEEP;
 811
 812         case PIPE_STENCIL_OP_ZERO:
 813                 return MALI_STENCIL_ZERO;
 814
 815         case PIPE_STENCIL_OP_REPLACE:
 816                 return MALI_STENCIL_REPLACE;
 817
 818         case PIPE_STENCIL_OP_INCR:
 819                 return MALI_STENCIL_INCR;
 820
 821         case PIPE_STENCIL_OP_DECR:
 822                 return MALI_STENCIL_DECR;
 823
 824         case PIPE_STENCIL_OP_INCR_WRAP:
 825                 return MALI_STENCIL_INCR_WRAP;
 826
 827         case PIPE_STENCIL_OP_DECR_WRAP:
 828                 return MALI_STENCIL_DECR_WRAP;
 829
 830         case PIPE_STENCIL_OP_INVERT:
 831                 return MALI_STENCIL_INVERT;
 832         }
 833
 834         assert (0);
 835         return 0; /* Unreachable */
 836 }
 837
 838 static void
 839 panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out)
 840 {
 841         out->ref = 0; /* Gallium gets it from elsewhere */
 842
 843         out->mask = in->valuemask;
 844         out->func = panfrost_translate_compare_func(in->func);
 845         out->sfail = panfrost_translate_stencil_op(in->fail_op);
 846         out->dpfail = panfrost_translate_stencil_op(in->zfail_op);
 847         out->dppass = panfrost_translate_stencil_op(in->zpass_op);
 848 }
 849
 850 static void
 851 panfrost_default_shader_backend(struct panfrost_context *ctx)
 852 {
 853         struct mali_shader_meta shader = {
 854                 .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000),
 855
 856                 .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010,
 857                 .unknown2_4 = MALI_NO_MSAA | 0x4e0,
 858         };
 859
 860         if (is_t6xx) {
 861                 shader.unknown2_4 |= 0x10;
 862         }
 863
 864         struct pipe_stencil_state default_stencil = {
 865                 .enabled = 0,
 866                 .func = PIPE_FUNC_ALWAYS,
 867                 .fail_op = MALI_STENCIL_KEEP,
 868                 .zfail_op = MALI_STENCIL_KEEP,
 869                 .zpass_op = MALI_STENCIL_KEEP,
 870                 .writemask = 0xFF,
 871                 .valuemask = 0xFF
 872         };
 873
 874         panfrost_make_stencil_state(&default_stencil, &shader.stencil_front);
 875         shader.stencil_mask_front = default_stencil.writemask;
 876
 877         panfrost_make_stencil_state(&default_stencil, &shader.stencil_back);
 878         shader.stencil_mask_back = default_stencil.writemask;
 879
 880         if (default_stencil.enabled)
 881                 shader.unknown2_4 |= MALI_STENCIL_TEST;
 882
 883         memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
 884 }
 885
 886 /* Generates a vertex/tiler job. This is, in some sense, the heart of the
 887  * graphics command stream. It should be called once per draw, accordding to
 888  * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
 889  * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for
 890  * vertex jobs. */
 891
 892 struct panfrost_transfer
 893 panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler)
 894 {
 895         /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */
 896         int draw_job_index = 1 + (2 * ctx->draw_count);
 897
 898         struct mali_job_descriptor_header job = {
 899                 .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
 900                 .job_index = draw_job_index + (is_tiler ? 1 : 0),
 901 #ifdef __LP64__
 902                 .job_descriptor_size = 1,
 903 #endif
 904         };
 905
 906         /* Only non-elided tiler jobs have dependencies which are known at this point */
 907
 908         if (is_tiler && !is_elided_tiler) {
 909                 /* Tiler jobs depend on vertex jobs */
 910
 911                 job.job_dependency_index_1 = draw_job_index;
 912
 913                 /* Tiler jobs also depend on the previous tiler job */
 914
 915                 if (ctx->draw_count)
 916                         job.job_dependency_index_2 = draw_job_index - 1;
 917         }
 918
 919         struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex;
 920
 921         /* There's some padding hacks on 32-bit */
 922
 923 #ifdef __LP64__
 924         int offset = 0;
 925 #else
 926         int offset = 4;
 927 #endif
 928         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload));
 929         memcpy(transfer.cpu, &job, sizeof(job));
 930         memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload));
 931         return transfer;
 932 }
 933
 934 /* Generates a set value job. It's unclear what exactly this does, why it's
 935  * necessary, and when to call it. */
 936
 937 static void
 938 panfrost_set_value_job(struct panfrost_context *ctx)
 939 {
 940         struct mali_job_descriptor_header job = {
 941                 .job_type = JOB_TYPE_SET_VALUE,
 942                 .job_descriptor_size = 1,
 943                 .job_index = 1 + (2 * ctx->draw_count),
 944         };
 945
 946         struct mali_payload_set_value payload = {
 947                 .out = ctx->misc_0.gpu,
 948                 .unknown = 0x3,
 949         };
 950
 951         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload));
 952         memcpy(transfer.cpu, &job, sizeof(job));
 953         memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload));
 954
 955         ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu;
 956         ctx->set_value_job = transfer.gpu;
 957 }
 958
 959 /* Generate a fragment job. This should be called once per frame. (According to
 960  * presentations, this is supposed to correspond to eglSwapBuffers) */
 961
 962 mali_ptr
 963 panfrost_fragment_job(struct panfrost_context *ctx)
 964 {
 965         /* Update fragment FBD */
 966         panfrost_set_fragment_afbc(ctx);
 967
 968         if (ctx->pipe_framebuffer.nr_cbufs == 1) {
 969                 struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;
 970
 971                 if (rsrc->bo->has_checksum) {
 972                         if (require_sfbd) {
 973                                 fprintf(stderr, "Checksumming not supported on SFBD\n");
 974                                 assert(0);
 975                         }
 976
 977                         int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0);
 978
 979                         ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA;
 980                         ctx->fragment_extra.unk |= 0x420;
 981                         ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride;
 982                         ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0;
 983                 }
 984         }
 985
 986         /* The frame is complete and therefore the framebuffer descriptor is
 987          * ready for linkage and upload */
 988
 989         size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1);
 990         struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz);
 991         off_t offset = 0;
 992
 993         if (require_sfbd) {
 994                 /* Upload just the SFBD all at once */
 995                 memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd));
 996                 offset += sizeof(ctx->fragment_sfbd);
 997         } else {
 998                 /* Upload the MFBD header */
 999                 memcpy(fbd_t.cpu, &ctx->fragment_mfbd, sizeof(ctx->fragment_mfbd));
1000                 offset += sizeof(ctx->fragment_mfbd);
1001
1002                 /* Upload extra framebuffer info if necessary */
1003                 if (ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) {
1004                         memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra));
1005                         offset += sizeof(struct bifrost_fb_extra);
1006                 }
1007
1008                 /* Upload (single) render target */
1009                 memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1);
1010         }
1011
1012         /* Generate the fragment (frame) job */
1013
1014         struct mali_job_descriptor_header header = {
1015                 .job_type = JOB_TYPE_FRAGMENT,
1016                 .job_index = 1,
1017 #ifdef __LP64__
1018                 .job_descriptor_size = 1
1019 #endif
1020         };
1021
1022         struct mali_payload_fragment payload = {
1023                 .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0),
1024                 .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height),
1025                 .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD),
1026         };
1027
1028         if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) {
1029                 /* Signal that there is an extra portion of the framebuffer
1030                  * descriptor */
1031
1032                 payload.framebuffer |= 2;
1033         }
1034
1035         /* Normally, there should be no padding. However, fragment jobs are
1036          * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes
1037          * of zero padding in between. */
1038
1039         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(header) + sizeof(payload));
1040         memcpy(transfer.cpu, &header, sizeof(header));
1041         memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload));
1042         return transfer.gpu;
1043 }
1044
1045 /* Emits attributes and varying descriptors, which should be called every draw,
1046  * excepting some obscure circumstances */
1047
1048 static void
1049 panfrost_emit_vertex_data(struct panfrost_context *ctx)
1050 {
1051         /* TODO: Only update the dirtied buffers */
1052         union mali_attr attrs[PIPE_MAX_ATTRIBS];
1053         union mali_attr varyings[PIPE_MAX_ATTRIBS];
1054
1055         unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count);
1056
1057         for (int i = 0; i < ctx->vertex_buffer_count; ++i) {
1058                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
1059                 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
1060
1061                 /* Let's figure out the layout of the attributes in memory so
1062                  * we can be smart about size computation. The idea is to
1063                  * figure out the maximum src_offset, which tells us the latest
1064                  * spot a vertex could start. Meanwhile, we figure out the size
1065                  * of the attribute memory (assuming interleaved
1066                  * representation) and tack on the max src_offset for a
1067                  * reasonably good upper bound on the size.
1068                  *
1069                  * Proving correctness is left as an exercise to the reader.
1070                  */
1071
1072                 unsigned max_src_offset = 0;
1073
1074                 for (unsigned j = 0; j < ctx->vertex->num_elements; ++j) {
1075                         if (ctx->vertex->pipe[j].vertex_buffer_index != i) continue;
1076                         max_src_offset = MAX2(max_src_offset, ctx->vertex->pipe[j].src_offset);
1077                 }
1078
1079                 /* Offset vertex count by draw_start to make sure we upload enough */
1080                 attrs[i].stride = buf->stride;
1081                 attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + invocation_count) + max_src_offset;
1082
1083                 /* Vertex elements are -already- GPU-visible, at
1084                  * rsrc->gpu. However, attribute buffers must be 64 aligned. If
1085                  * it is not, for now we have to duplicate the buffer. */
1086
1087                 mali_ptr effective_address = (rsrc->bo->gpu[0] + buf->buffer_offset);
1088
1089                 if (effective_address & 0x3F) {
1090                         attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu[0] + buf->buffer_offset, attrs[i].size) | 1;
1091                 } else {
1092                         attrs[i].elements = effective_address | 1;
1093                 }
1094         }
1095
1096         struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings;
1097
1098         for (int i = 0; i < vars->varying_buffer_count; ++i) {
1099                 mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height;
1100
1101                 varyings[i].elements = varying_address | 1;
1102                 varyings[i].stride = vars->varyings_stride[i];
1103                 varyings[i].size = vars->varyings_stride[i] * invocation_count;
1104
1105                 /* If this varying has to be linked somewhere, do it now. See
1106                  * pan_assemble.c for the indices. TODO: Use a more generic
1107                  * linking interface */
1108
1109                 if (i == 1) {
1110                         /* gl_Position */
1111                         ctx->payload_tiler.postfix.position_varying = varying_address;
1112                 } else if (i == 2) {
1113                         /* gl_PointSize */
1114                         ctx->payload_tiler.primitive_size.pointer = varying_address;
1115                 }
1116
1117                 /* Varyings appear to need 64-byte alignment */
1118                 ctx->varying_height += ALIGN(varyings[i].size, 64);
1119
1120                 /* Ensure that we fit */
1121                 assert(ctx->varying_height < ctx->varying_mem.size);
1122         }
1123
1124         ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr));
1125
1126         mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr));
1127         ctx->payload_vertex.postfix.varyings = varyings_p;
1128         ctx->payload_tiler.postfix.varyings = varyings_p;
1129 }
1130
1131 /* Go through dirty flags and actualise them in the cmdstream. */
1132
1133 void
1134 panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
1135 {
1136         if (with_vertex_data) {
1137                 panfrost_emit_vertex_data(ctx);
1138         }
1139
1140         if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
1141                 ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables;
1142                 panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample);
1143         }
1144
1145         if (ctx->occlusion_query) {
1146                 ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE;
1147                 ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu;
1148         }
1149
1150         if (ctx->dirty & PAN_DIRTY_VS) {
1151                 assert(ctx->vs);
1152
1153                 struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
1154
1155                 /* Late shader descriptor assignments */
1156                 vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX];
1157                 vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX];
1158
1159                 /* Who knows */
1160                 vs->tripipe->midgard1.unknown1 = 0x2201;
1161
1162                 ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4;
1163
1164                 /* Varying descriptor is tied to the vertex shader. Also the
1165                  * fragment shader, I suppose, but it's generated with the
1166                  * vertex shader so */
1167
1168                 struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings;
1169
1170                 ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor;
1171                 ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment;
1172         }
1173
1174         if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
1175                 /* Check if we need to link the gl_PointSize varying */
1176                 assert(ctx->vs);
1177                 struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant];
1178
1179                 bool needs_gl_point_size = vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS;
1180
1181                 if (!needs_gl_point_size) {
1182                         /* If the size is constant, write it out. Otherwise,
1183                          * don't touch primitive_size (since we would clobber
1184                          * the pointer there) */
1185
1186                         ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width;
1187                 }
1188
1189                 /* Set the flag for varying (pointer) point size if the shader needs that */
1190                 SET_BIT(ctx->payload_tiler.prefix.unknown_draw, MALI_DRAW_VARYING_SIZE, needs_gl_point_size);
1191         }
1192
1193         /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */
1194         if (ctx->fs)
1195                 ctx->dirty |= PAN_DIRTY_FS;
1196
1197         if (ctx->dirty & PAN_DIRTY_FS) {
1198                 assert(ctx->fs);
1199                 struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant];
1200
1201 #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
1202
1203                 COPY(shader);
1204                 COPY(attribute_count);
1205                 COPY(varying_count);
1206                 COPY(midgard1.uniform_count);
1207                 COPY(midgard1.work_count);
1208                 COPY(midgard1.unknown2);
1209
1210 #undef COPY
1211                 /* If there is a blend shader, work registers are shared */
1212
1213                 if (ctx->blend->has_blend_shader)
1214                         ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16;
1215
1216                 /* Set late due to depending on render state */
1217                 /* The one at the end seems to mean "1 UBO" */
1218                 ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201;
1219
1220                 /* Assign texture/sample count right before upload */
1221                 ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT];
1222                 ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT];
1223
1224                 /* Assign the stencil refs late */
1225                 ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0];
1226                 ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1];
1227
1228                 /* CAN_DISCARD should be set if the fragment shader possibly
1229                  * contains a 'discard' instruction. It is likely this is
1230                  * related to optimizations related to forward-pixel kill, as
1231                  * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good
1232                  * thing?" by Peter Harris
1233                  */
1234
1235                 if (variant->can_discard) {
1236                         ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
1237                         ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE;
1238                         ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000;
1239                         ctx->fragment_shader_core.midgard1.unknown1 = 0x4200;
1240                 }
1241
1242                 /* Check if we're using the default blend descriptor (fast path) */
1243
1244                 bool no_blending =
1245                         !ctx->blend->has_blend_shader &&
1246                         (ctx->blend->equation.rgb_mode == 0x122) &&
1247                         (ctx->blend->equation.alpha_mode == 0x122) &&
1248                         (ctx->blend->equation.color_mask == 0xf);
1249
1250                 if (require_sfbd) {
1251                         /* When only a single render target platform is used, the blend
1252                          * information is inside the shader meta itself. We
1253                          * additionally need to signal CAN_DISCARD for nontrivial blend
1254                          * modes (so we're able to read back the destination buffer) */
1255
1256                         if (ctx->blend->has_blend_shader) {
1257                                 ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader;
1258                         } else {
1259                                 memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation));
1260                         }
1261
1262                         if (!no_blending) {
1263                                 ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD;
1264                         }
1265                 }
1266
1267                 size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta);
1268                 struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
1269                 memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta));
1270
1271                 ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4;
1272
1273                 if (!require_sfbd) {
1274                         /* Additional blend descriptor tacked on for jobs using MFBD */
1275
1276                         unsigned blend_count = 0;
1277
1278                         if (ctx->blend->has_blend_shader) {
1279                                 /* For a blend shader, the bottom nibble corresponds to
1280                                  * the number of work registers used, which signals the
1281                                  * -existence- of a blend shader */
1282
1283                                 assert(ctx->blend->blend_work_count >= 2);
1284                                 blend_count |= MIN2(ctx->blend->blend_work_count, 3);
1285                         } else {
1286                                 /* Otherwise, the bottom bit simply specifies if
1287                                  * blending (anything other than REPLACE) is enabled */
1288
1289
1290                                 if (!no_blending)
1291                                         blend_count |= 0x1;
1292                         }
1293
1294                         /* Second blend equation is always a simple replace */
1295
1296                         uint64_t replace_magic = 0xf0122122;
1297                         struct mali_blend_equation replace_mode;
1298                         memcpy(&replace_mode, &replace_magic, sizeof(replace_mode));
1299
1300                         struct mali_blend_meta blend_meta[] = {
1301                                 {
1302                                         .unk1 = 0x200 | blend_count,
1303                                         .blend_equation_1 = ctx->blend->equation,
1304                                         .blend_equation_2 = replace_mode
1305                                 },
1306                         };
1307
1308                         if (ctx->blend->has_blend_shader)
1309                                 memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader));
1310
1311                         memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta));
1312                 }
1313         }
1314
1315         if (ctx->dirty & PAN_DIRTY_VERTEX) {
1316                 ctx->payload_vertex.postfix.attribute_meta = ctx->vertex->descriptor_ptr;
1317         }
1318
1319         if (ctx->dirty & PAN_DIRTY_SAMPLERS) {
1320                 /* Upload samplers back to back, no padding */
1321
1322                 for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
1323                         if (!ctx->sampler_count[t]) continue;
1324
1325                         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]);
1326                         struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu;
1327
1328                         for (int i = 0; i < ctx->sampler_count[t]; ++i) {
1329                                 desc[i] = ctx->samplers[t][i]->hw;
1330                         }
1331
1332                         if (t == PIPE_SHADER_FRAGMENT)
1333                                 ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu;
1334                         else if (t == PIPE_SHADER_VERTEX)
1335                                 ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu;
1336                         else
1337                                 assert(0);
1338                 }
1339         }
1340
1341         if (ctx->dirty & PAN_DIRTY_TEXTURES) {
1342                 for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
1343                         /* Shortcircuit */
1344                         if (!ctx->sampler_view_count[t]) continue;
1345
1346                         uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1347
1348                         for (int i = 0; i < ctx->sampler_view_count[t]; ++i) {
1349                                 if (!ctx->sampler_views[t][i])
1350                                         continue;
1351
1352                                 struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture;
1353                                 struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc;
1354
1355                                 /* Inject the address in. */
1356                                 for (int l = 0; l < (tex_rsrc->last_level + 1); ++l)
1357                                         ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] = rsrc->bo->gpu[l];
1358
1359                                 /* Workaround maybe-errata (?) with non-mipmaps */
1360                                 int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels;
1361
1362                                 if (!rsrc->bo->is_mipmap) {
1363                                         if (is_t6xx) {
1364                                                 /* HW ERRATA, not needed after t6XX */
1365                                                 ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0];
1366
1367                                                 ctx->sampler_views[t][i]->hw.unknown3A = 1;
1368                                         }
1369
1370                                         ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0;
1371                                 }
1372
1373                                 trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor));
1374
1375                                 /* Restore */
1376                                 ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s;
1377
1378                                 if (is_t6xx) {
1379                                         ctx->sampler_views[t][i]->hw.unknown3A = 0;
1380                                 }
1381                         }
1382
1383                         mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
1384
1385                         if (t == PIPE_SHADER_FRAGMENT)
1386                                 ctx->payload_tiler.postfix.texture_trampoline = trampoline;
1387                         else if (t == PIPE_SHADER_VERTEX)
1388                                 ctx->payload_vertex.postfix.texture_trampoline = trampoline;
1389                         else
1390                                 assert(0);
1391                 }
1392         }
1393
1394         /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */
1395         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
1396
1397         float viewport_vec4[] = {
1398                 vp->scale[0],
1399                 fabsf(vp->scale[1]),
1400
1401                 vp->translate[0],
1402                 /* -1.0 * vp->translate[1] */ fabs(1.0 * vp->scale[1]) /* XXX */
1403         };
1404
1405         for (int i = 0; i < PIPE_SHADER_TYPES; ++i) {
1406                 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i];
1407
1408                 if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) {
1409                         /* It doesn't matter if we don't use all the memory;
1410                          * we'd need a dummy UBO anyway. Compute the max */
1411
1412                         size_t size = sizeof(viewport_vec4) + buf->size;
1413                         struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size);
1414
1415                         /* Keep track how much we've uploaded */
1416                         off_t offset = 0;
1417
1418                         if (i == PIPE_SHADER_VERTEX) {
1419                                 /* Upload viewport */
1420                                 memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4));
1421                                 offset += sizeof(viewport_vec4);
1422                         }
1423
1424                         /* Upload uniforms */
1425                         memcpy(transfer.cpu + offset, buf->buffer, buf->size);
1426
1427                         int uniform_count = 0;
1428
1429                         struct mali_vertex_tiler_postfix *postfix;
1430
1431                         switch (i) {
1432                         case PIPE_SHADER_VERTEX:
1433                                 uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count;
1434                                 postfix = &ctx->payload_vertex.postfix;
1435                                 break;
1436
1437                         case PIPE_SHADER_FRAGMENT:
1438                                 uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count;
1439                                 postfix = &ctx->payload_tiler.postfix;
1440                                 break;
1441
1442                         default:
1443                                 printf("Unknown shader stage %d in uniform upload\n", i);
1444                                 assert(0);
1445                         }
1446
1447                         /* Also attach the same buffer as a UBO for extended access */
1448
1449                         struct mali_uniform_buffer_meta uniform_buffers[] = {
1450                                 {
1451                                         .size = MALI_POSITIVE((2 + uniform_count)),
1452                                         .ptr = transfer.gpu >> 2,
1453                                 },
1454                         };
1455
1456                         mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers));
1457                         postfix->uniforms = transfer.gpu;
1458                         postfix->uniform_buffers = ubufs;
1459
1460                         buf->dirty = 0;
1461                 }
1462         }
1463
1464         ctx->dirty = 0;
1465 }
1466
1467 /* Corresponds to exactly one draw, but does not submit anything */
1468
1469 static void
1470 panfrost_queue_draw(struct panfrost_context *ctx)
1471 {
1472         /* TODO: Expand the array? */
1473         if (ctx->draw_count >= MAX_DRAW_CALLS) {
1474                 printf("Job buffer overflow, ignoring draw\n");
1475                 assert(0);
1476         }
1477
1478         /* Handle dirty flags now */
1479         panfrost_emit_for_draw(ctx, true);
1480
1481         struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false);
1482         struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false);
1483
1484         ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu;
1485         ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu;
1486
1487         ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu;
1488         ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu;
1489
1490         ctx->draw_count++;
1491 }
1492
1493 /* At the end of the frame, the vertex and tiler jobs are linked together and
1494  * then the fragment job is plonked at the end. Set value job is first for
1495  * unknown reasons. */
1496
1497 static void
1498 panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next)
1499 {
1500         if (first->job_descriptor_size)
1501                 first->next_job_64 = (u64) (uintptr_t) next;
1502         else
1503                 first->next_job_32 = (u32) (uintptr_t) next;
1504 }
1505
1506 static void
1507 panfrost_link_jobs(struct panfrost_context *ctx)
1508 {
1509         if (ctx->draw_count) {
1510                 /* Generate the set_value_job */
1511                 panfrost_set_value_job(ctx);
1512
1513                 /* Have the first vertex job depend on the set value job */
1514                 ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index;
1515
1516                 /* SV -> V */
1517                 panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]);
1518         }
1519
1520         /* V -> V/T ; T -> T/null */
1521         for (int i = 0; i < ctx->vertex_job_count; ++i) {
1522                 bool isLast = (i + 1) == ctx->vertex_job_count;
1523
1524                 panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]);
1525         }
1526
1527         /* T -> T/null */
1528         for (int i = 0; i < ctx->tiler_job_count; ++i) {
1529                 bool isLast = (i + 1) == ctx->tiler_job_count;
1530                 panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]);
1531         }
1532 }
1533
1534 /* The entire frame is in memory -- send it off to the kernel! */
1535
1536 static void
1537 panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate)
1538 {
1539         struct pipe_context *gallium = (struct pipe_context *) ctx;
1540         struct panfrost_screen *screen = pan_screen(gallium->screen);
1541
1542         /* Edge case if screen is cleared and nothing else */
1543         bool has_draws = ctx->draw_count > 0;
1544
1545         /* Workaround a bizarre lockup (a hardware errata?) */
1546         if (!has_draws)
1547                 flush_immediate = true;
1548
1549         /* A number of jobs are batched -- this must be linked and cleared */
1550         panfrost_link_jobs(ctx);
1551
1552         ctx->draw_count = 0;
1553         ctx->vertex_job_count = 0;
1554         ctx->tiler_job_count = 0;
1555
1556 #ifndef DRY_RUN
1557
1558         bool is_scanout = panfrost_is_scanout(ctx);
1559         int fragment_id = screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout);
1560
1561         /* If visual, we can stall a frame */
1562
1563         if (panfrost_is_scanout(ctx) && !flush_immediate)
1564                 screen->driver->force_flush_fragment(ctx);
1565
1566         screen->last_fragment_id = fragment_id;
1567         screen->last_fragment_flushed = false;
1568
1569         /* If readback, flush now (hurts the pipelined performance) */
1570         if (panfrost_is_scanout(ctx) && flush_immediate)
1571                 screen->driver->force_flush_fragment(ctx);
1572
1573 #ifdef DUMP_PERFORMANCE_COUNTERS
1574         char filename[128];
1575         snprintf(filename, sizeof(filename), "/dev/shm/frame%d.mdgprf", ++performance_counter_number);
1576         FILE *fp = fopen(filename, "wb");
1577         fwrite(screen->perf_counters.cpu,  4096, sizeof(uint32_t), fp);
1578         fclose(fp);
1579 #endif
1580
1581 #endif
1582 }
1583
1584 bool dont_scanout = false;
1585
1586 void
1587 panfrost_flush(
1588         struct pipe_context *pipe,
1589         struct pipe_fence_handle **fence,
1590         unsigned flags)
1591 {
1592         struct panfrost_context *ctx = pan_context(pipe);
1593
1594         /* If there is nothing drawn, skip the frame */
1595         if (!ctx->draw_count && !ctx->frame_cleared) return;
1596
1597         if (!ctx->frame_cleared) {
1598                 /* While there are draws, there was no clear. This is a partial
1599                  * update, which needs to be handled via the "wallpaper"
1600                  * method. We also need to fake a clear, just to get the
1601                  * FRAGMENT job correct. */
1602
1603                 panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil);
1604
1605                 panfrost_draw_wallpaper(pipe);
1606         }
1607
1608         /* Frame clear handled, reset */
1609         ctx->frame_cleared = false;
1610
1611         /* Whether to stall the pipeline for immediately correct results */
1612         bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME;
1613
1614         /* Submit the frame itself */
1615         panfrost_submit_frame(ctx, flush_immediate);
1616
1617         /* Prepare for the next frame */
1618         panfrost_invalidate_frame(ctx);
1619 }
1620
1621 #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c;
1622
1623 static int
1624 g2m_draw_mode(enum pipe_prim_type mode)
1625 {
1626         switch (mode) {
1627                 DEFINE_CASE(POINTS);
1628                 DEFINE_CASE(LINES);
1629                 DEFINE_CASE(LINE_LOOP);
1630                 DEFINE_CASE(LINE_STRIP);
1631                 DEFINE_CASE(TRIANGLES);
1632                 DEFINE_CASE(TRIANGLE_STRIP);
1633                 DEFINE_CASE(TRIANGLE_FAN);
1634                 DEFINE_CASE(QUADS);
1635                 DEFINE_CASE(QUAD_STRIP);
1636                 DEFINE_CASE(POLYGON);
1637
1638         default:
1639                 printf("Illegal draw mode %d\n", mode);
1640                 assert(0);
1641                 return MALI_LINE_LOOP;
1642         }
1643 }
1644
1645 #undef DEFINE_CASE
1646
1647 static unsigned
1648 panfrost_translate_index_size(unsigned size)
1649 {
1650         switch (size) {
1651         case 1:
1652                 return MALI_DRAW_INDEXED_UINT8;
1653
1654         case 2:
1655                 return MALI_DRAW_INDEXED_UINT16;
1656
1657         case 4:
1658                 return MALI_DRAW_INDEXED_UINT32;
1659
1660         default:
1661                 printf("Unknown index size %d\n", size);
1662                 assert(0);
1663                 return 0;
1664         }
1665 }
1666
1667 static const uint8_t *
1668 panfrost_get_index_buffer_raw(const struct pipe_draw_info *info)
1669 {
1670         if (info->has_user_indices) {
1671                 return (const uint8_t *) info->index.user;
1672         } else {
1673                 struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
1674                 return (const uint8_t *) rsrc->bo->cpu[0];
1675         }
1676 }
1677
1678 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
1679  * good for the duration of the draw (transient), could last longer */
1680
1681 static mali_ptr
1682 panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
1683 {
1684         struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
1685
1686         off_t offset = info->start * info->index_size;
1687
1688         if (!info->has_user_indices) {
1689                 /* Only resources can be directly mapped */
1690                 return rsrc->bo->gpu[0] + offset;
1691         } else {
1692                 /* Otherwise, we need to upload to transient memory */
1693                 const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info);
1694                 return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size);
1695         }
1696 }
1697
1698 static void
1699 panfrost_draw_vbo(
1700         struct pipe_context *pipe,
1701         const struct pipe_draw_info *info);
1702
1703 #define CALCULATE_MIN_MAX_INDEX(T, buffer, start, count) \
1704         for (unsigned _idx = (start); _idx < (start + count); ++_idx) { \
1705                 T idx = buffer[_idx]; \
1706                 if (idx > max_index) max_index = idx; \
1707                 if (idx < min_index) min_index = idx; \
1708         }
1709
1710 static void
1711 panfrost_draw_vbo(
1712         struct pipe_context *pipe,
1713         const struct pipe_draw_info *info)
1714 {
1715         struct panfrost_context *ctx = pan_context(pipe);
1716
1717         ctx->payload_vertex.draw_start = info->start;
1718         ctx->payload_tiler.draw_start = info->start;
1719
1720         int mode = info->mode;
1721
1722         /* Fallback for unsupported modes */
1723
1724         if (!(ctx->draw_modes & mode)) {
1725                 if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) {
1726                         mode = PIPE_PRIM_TRIANGLE_FAN;
1727                 } else {
1728                         if (info->count < 4) {
1729                                 /* Degenerate case? */
1730                                 return;
1731                         }
1732
1733                         util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base);
1734                         util_primconvert_draw_vbo(ctx->primconvert, info);
1735                         return;
1736                 }
1737         }
1738
1739         ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode);
1740
1741         ctx->vertex_count = info->count;
1742
1743         /* For non-indexed draws, they're the same */
1744         unsigned invocation_count = ctx->vertex_count;
1745
1746         /* For higher amounts of vertices (greater than what fits in a 16-bit
1747          * short), the other value is needed, otherwise there will be bizarre
1748          * rendering artefacts. It's not clear what these values mean yet. */
1749
1750         ctx->payload_tiler.prefix.unknown_draw &= ~(0x3000 | 0x18000);
1751         ctx->payload_tiler.prefix.unknown_draw |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000;
1752
1753         if (info->index_size) {
1754                 /* Calculate the min/max index used so we can figure out how
1755                  * many times to invoke the vertex shader */
1756
1757                 const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info);
1758
1759                 int min_index = INT_MAX;
1760                 int max_index = 0;
1761
1762                 if (info->index_size == 1) {
1763                         CALCULATE_MIN_MAX_INDEX(uint8_t, ibuf8, info->start, info->count);
1764                 } else if (info->index_size == 2) {
1765                         const uint16_t *ibuf16 = (const uint16_t *) ibuf8;
1766                         CALCULATE_MIN_MAX_INDEX(uint16_t, ibuf16, info->start, info->count);
1767                 } else if (info->index_size == 4) {
1768                         const uint32_t *ibuf32 = (const uint32_t *) ibuf8;
1769                         CALCULATE_MIN_MAX_INDEX(uint32_t, ibuf32, info->start, info->count);
1770                 } else {
1771                         assert(0);
1772                 }
1773
1774                 /* Make sure we didn't go crazy */
1775                 assert(min_index < INT_MAX);
1776                 assert(max_index > 0);
1777                 assert(max_index > min_index);
1778
1779                 /* Use the corresponding values */
1780                 invocation_count = max_index - min_index + 1;
1781                 ctx->payload_vertex.draw_start = min_index;
1782                 ctx->payload_tiler.draw_start = min_index;
1783
1784                 ctx->payload_tiler.prefix.negative_start = -min_index;
1785                 ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count);
1786
1787                 //assert(!info->restart_index); /* TODO: Research */
1788                 assert(!info->index_bias);
1789                 //assert(!info->min_index); /* TODO: Use value */
1790
1791                 ctx->payload_tiler.prefix.unknown_draw |= panfrost_translate_index_size(info->index_size);
1792                 ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
1793         } else {
1794                 /* Index count == vertex count, if no indexing is applied, as
1795                  * if it is internally indexed in the expected order */
1796
1797                 ctx->payload_tiler.prefix.negative_start = 0;
1798                 ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
1799
1800                 /* Reverse index state */
1801                 ctx->payload_tiler.prefix.unknown_draw &= ~MALI_DRAW_INDEXED_UINT32;
1802                 ctx->payload_tiler.prefix.indices = (uintptr_t) NULL;
1803         }
1804
1805         ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count);
1806         ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count);
1807
1808         /* Fire off the draw itself */
1809         panfrost_queue_draw(ctx);
1810 }
1811
1812 /* CSO state */
1813
1814 static void
1815 panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
1816 {
1817         free(hwcso);
1818 }
1819
1820 static void
1821 panfrost_set_scissor(struct panfrost_context *ctx)
1822 {
1823         const struct pipe_scissor_state *ss = &ctx->scissor;
1824
1825         if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor && 0) {
1826                 ctx->viewport->viewport0[0] = ss->minx;
1827                 ctx->viewport->viewport0[1] = ss->miny;
1828                 ctx->viewport->viewport1[0] = MALI_POSITIVE(ss->maxx);
1829                 ctx->viewport->viewport1[1] = MALI_POSITIVE(ss->maxy);
1830         } else {
1831                 ctx->viewport->viewport0[0] = 0;
1832                 ctx->viewport->viewport0[1] = 0;
1833                 ctx->viewport->viewport1[0] = MALI_POSITIVE(ctx->pipe_framebuffer.width);
1834                 ctx->viewport->viewport1[1] = MALI_POSITIVE(ctx->pipe_framebuffer.height);
1835         }
1836 }
1837
1838 static void *
1839 panfrost_create_rasterizer_state(
1840         struct pipe_context *pctx,
1841         const struct pipe_rasterizer_state *cso)
1842 {
1843         struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
1844
1845         so->base = *cso;
1846
1847         /* Bitmask, unknown meaning of the start value */
1848         so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7;
1849
1850         so->tiler_gl_enables |= MALI_FRONT_FACE(
1851                                         cso->front_ccw ? MALI_CCW : MALI_CW);
1852
1853         if (cso->cull_face & PIPE_FACE_FRONT)
1854                 so->tiler_gl_enables |= MALI_CULL_FACE_FRONT;
1855
1856         if (cso->cull_face & PIPE_FACE_BACK)
1857                 so->tiler_gl_enables |= MALI_CULL_FACE_BACK;
1858
1859         return so;
1860 }
1861
1862 static void
1863 panfrost_bind_rasterizer_state(
1864         struct pipe_context *pctx,
1865         void *hwcso)
1866 {
1867         struct panfrost_context *ctx = pan_context(pctx);
1868         struct pipe_rasterizer_state *cso = hwcso;
1869
1870         /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */
1871         if (!hwcso)
1872                 return;
1873
1874         /* If scissor test has changed, we'll need to update that now */
1875         bool update_scissor = !ctx->rasterizer || ctx->rasterizer->base.scissor != cso->scissor;
1876
1877         ctx->rasterizer = hwcso;
1878
1879         /* Actualise late changes */
1880         if (update_scissor)
1881                 panfrost_set_scissor(ctx);
1882
1883         ctx->dirty |= PAN_DIRTY_RASTERIZER;
1884 }
1885
1886 static void *
1887 panfrost_create_vertex_elements_state(
1888         struct pipe_context *pctx,
1889         unsigned num_elements,
1890         const struct pipe_vertex_element *elements)
1891 {
1892         struct panfrost_context *ctx = pan_context(pctx);
1893         struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
1894
1895         so->num_elements = num_elements;
1896         memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
1897
1898         struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_attr_meta) * num_elements, HEAP_DESCRIPTOR);
1899         so->hw = (struct mali_attr_meta *) transfer.cpu;
1900         so->descriptor_ptr = transfer.gpu;
1901
1902         /* Allocate memory for the descriptor state */
1903
1904         for (int i = 0; i < num_elements; ++i) {
1905                 so->hw[i].index = elements[i].vertex_buffer_index;
1906
1907                 enum pipe_format fmt = elements[i].src_format;
1908                 const struct util_format_description *desc = util_format_description(fmt);
1909                 so->hw[i].unknown1 = 0x2;
1910                 so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
1911
1912                 so->hw[i].format = panfrost_find_format(desc);
1913
1914                 /* The field itself should probably be shifted over */
1915                 so->hw[i].src_offset = elements[i].src_offset;
1916         }
1917
1918         return so;
1919 }
1920
1921 static void
1922 panfrost_bind_vertex_elements_state(
1923         struct pipe_context *pctx,
1924         void *hwcso)
1925 {
1926         struct panfrost_context *ctx = pan_context(pctx);
1927
1928         ctx->vertex = hwcso;
1929         ctx->dirty |= PAN_DIRTY_VERTEX;
1930 }
1931
1932 static void
1933 panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
1934 {
1935         struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso;
1936         unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements;
1937         printf("Vertex elements delete leaks descriptor (%d bytes)\n", bytes);
1938         free(hwcso);
1939 }
1940
1941 static void *
1942 panfrost_create_shader_state(
1943         struct pipe_context *pctx,
1944         const struct pipe_shader_state *cso)
1945 {
1946         struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
1947         so->base = *cso;
1948
1949         /* Token deep copy to prevent memory corruption */
1950
1951         if (cso->type == PIPE_SHADER_IR_TGSI)
1952                 so->base.tokens = tgsi_dup_tokens(so->base.tokens);
1953
1954         return so;
1955 }
1956
1957 static void
1958 panfrost_delete_shader_state(
1959         struct pipe_context *pctx,
1960         void *so)
1961 {
1962         struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
1963
1964         if (cso->base.type == PIPE_SHADER_IR_TGSI) {
1965                 printf("Deleting TGSI shader leaks duplicated tokens\n");
1966         }
1967
1968         unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta);
1969         printf("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak);
1970
1971         free(so);
1972 }
1973
1974 static void *
1975 panfrost_create_sampler_state(
1976         struct pipe_context *pctx,
1977         const struct pipe_sampler_state *cso)
1978 {
1979         struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
1980         so->base = *cso;
1981
1982         /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */
1983
1984         struct mali_sampler_descriptor sampler_descriptor = {
1985                 .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter))
1986                 | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter))
1987                 | translate_mip_filter(cso->min_mip_filter)
1988                 | 0x20,
1989
1990                 .wrap_s = translate_tex_wrap(cso->wrap_s),
1991                 .wrap_t = translate_tex_wrap(cso->wrap_t),
1992                 .wrap_r = translate_tex_wrap(cso->wrap_r),
1993                 .compare_func = panfrost_translate_alt_compare_func(cso->compare_func),
1994                 .border_color = {
1995                         cso->border_color.f[0],
1996                         cso->border_color.f[1],
1997                         cso->border_color.f[2],
1998                         cso->border_color.f[3]
1999                 },
2000                 .min_lod = FIXED_16(0.0),
2001                 .max_lod = FIXED_16(31.0),
2002                 .unknown2 = 1,
2003         };
2004
2005         so->hw = sampler_descriptor;
2006
2007         return so;
2008 }
2009
2010 static void
2011 panfrost_bind_sampler_states(
2012         struct pipe_context *pctx,
2013         enum pipe_shader_type shader,
2014         unsigned start_slot, unsigned num_sampler,
2015         void **sampler)
2016 {
2017         assert(start_slot == 0);
2018
2019         struct panfrost_context *ctx = pan_context(pctx);
2020
2021         /* XXX: Should upload, not just copy? */
2022         ctx->sampler_count[shader] = num_sampler;
2023         memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
2024
2025         ctx->dirty |= PAN_DIRTY_SAMPLERS;
2026 }
2027
2028 static bool
2029 panfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant)
2030 {
2031         struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha;
2032
2033         if (alpha->enabled || variant->alpha_state.enabled) {
2034                 /* Make sure enable state is at least the same */
2035                 if (alpha->enabled != variant->alpha_state.enabled) {
2036                         return false;
2037                 }
2038
2039                 /* Check that the contents of the test are the same */
2040                 bool same_func = alpha->func == variant->alpha_state.func;
2041                 bool same_ref = alpha->ref_value == variant->alpha_state.ref_value;
2042
2043                 if (!(same_func && same_ref)) {
2044                         return false;
2045                 }
2046         }
2047         /* Otherwise, we're good to go */
2048         return true;
2049 }
2050
2051 static void
2052 panfrost_bind_fs_state(
2053         struct pipe_context *pctx,
2054         void *hwcso)
2055 {
2056         struct panfrost_context *ctx = pan_context(pctx);
2057
2058         ctx->fs = hwcso;
2059
2060         if (hwcso) {
2061                 /* Match the appropriate variant */
2062
2063                 signed variant = -1;
2064
2065                 struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso;
2066
2067                 for (unsigned i = 0; i < variants->variant_count; ++i) {
2068                         if (panfrost_variant_matches(ctx, &variants->variants[i])) {
2069                                 variant = i;
2070                                 break;
2071                         }
2072                 }
2073
2074                 if (variant == -1) {
2075                         /* No variant matched, so create a new one */
2076                         variant = variants->variant_count++;
2077                         assert(variants->variant_count < MAX_SHADER_VARIANTS);
2078
2079                         variants->variants[variant].base = hwcso;
2080                         variants->variants[variant].alpha_state = ctx->depth_stencil->alpha;
2081
2082                         /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */
2083                         struct panfrost_context *ctx = pan_context(pctx);
2084                         struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR);
2085
2086                         variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu;
2087                         variants->variants[variant].tripipe_gpu = transfer.gpu;
2088
2089                 }
2090
2091                 /* Select this variant */
2092                 variants->active_variant = variant;
2093
2094                 struct panfrost_shader_state *shader_state = &variants->variants[variant];
2095                 assert(panfrost_variant_matches(ctx, shader_state));
2096
2097                 /* Now we have a variant selected, so compile and go */
2098
2099                 if (!shader_state->compiled) {
2100                         panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state);
2101                         shader_state->compiled = true;
2102                 }
2103         }
2104
2105         ctx->dirty |= PAN_DIRTY_FS;
2106 }
2107
2108 static void
2109 panfrost_bind_vs_state(
2110         struct pipe_context *pctx,
2111         void *hwcso)
2112 {
2113         struct panfrost_context *ctx = pan_context(pctx);
2114
2115         ctx->vs = hwcso;
2116
2117         if (hwcso) {
2118                 if (!ctx->vs->variants[0].compiled) {
2119                         ctx->vs->variants[0].base = hwcso;
2120
2121                         /* TODO DRY from above */
2122                         struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR);
2123                         ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu;
2124                         ctx->vs->variants[0].tripipe_gpu = transfer.gpu;
2125
2126                         panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]);
2127                         ctx->vs->variants[0].compiled = true;
2128                 }
2129         }
2130
2131         ctx->dirty |= PAN_DIRTY_VS;
2132 }
2133
2134 static void
2135 panfrost_set_vertex_buffers(
2136         struct pipe_context *pctx,
2137         unsigned start_slot,
2138         unsigned num_buffers,
2139         const struct pipe_vertex_buffer *buffers)
2140 {
2141         struct panfrost_context *ctx = pan_context(pctx);
2142         assert(num_buffers <= PIPE_MAX_ATTRIBS);
2143
2144         /* XXX: Dirty tracking? etc */
2145         if (buffers) {
2146                 size_t sz = sizeof(buffers[0]) * num_buffers;
2147                 ctx->vertex_buffers = malloc(sz);
2148                 ctx->vertex_buffer_count = num_buffers;
2149                 memcpy(ctx->vertex_buffers, buffers, sz);
2150         } else {
2151                 if (ctx->vertex_buffers) {
2152                         free(ctx->vertex_buffers);
2153                         ctx->vertex_buffers = NULL;
2154                 }
2155
2156                 ctx->vertex_buffer_count = 0;
2157         }
2158 }
2159
2160 static void
2161 panfrost_set_constant_buffer(
2162         struct pipe_context *pctx,
2163         enum pipe_shader_type shader, uint index,
2164         const struct pipe_constant_buffer *buf)
2165 {
2166         struct panfrost_context *ctx = pan_context(pctx);
2167         struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
2168
2169         size_t sz = buf ? buf->buffer_size : 0;
2170
2171         /* Free previous buffer */
2172
2173         pbuf->dirty = true;
2174         pbuf->size = sz;
2175
2176         if (pbuf->buffer) {
2177                 free(pbuf->buffer);
2178                 pbuf->buffer = NULL;
2179         }
2180
2181         /* If unbinding, we're done */
2182
2183         if (!buf)
2184                 return;
2185
2186         /* Multiple constant buffers not yet supported */
2187         assert(index == 0);
2188
2189         const uint8_t *cpu;
2190
2191         struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer);
2192
2193         if (rsrc) {
2194                 cpu = rsrc->bo->cpu[0];
2195         } else if (buf->user_buffer) {
2196                 cpu = buf->user_buffer;
2197         } else {
2198                 printf("No constant buffer?\n");
2199                 return;
2200         }
2201
2202         /* Copy the constant buffer into the driver context for later upload */
2203
2204         pbuf->buffer = malloc(sz);
2205         memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz);
2206 }
2207
2208 static void
2209 panfrost_set_stencil_ref(
2210         struct pipe_context *pctx,
2211         const struct pipe_stencil_ref *ref)
2212 {
2213         struct panfrost_context *ctx = pan_context(pctx);
2214         ctx->stencil_ref = *ref;
2215
2216         /* Shader core dirty */
2217         ctx->dirty |= PAN_DIRTY_FS;
2218 }
2219
2220 static struct pipe_sampler_view *
2221 panfrost_create_sampler_view(
2222         struct pipe_context *pctx,
2223         struct pipe_resource *texture,
2224         const struct pipe_sampler_view *template)
2225 {
2226         struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view);
2227         int bytes_per_pixel = util_format_get_blocksize(texture->format);
2228
2229         pipe_reference(NULL, &texture->reference);
2230
2231         struct panfrost_resource *prsrc = (struct panfrost_resource *) texture;
2232
2233         so->base = *template;
2234         so->base.texture = texture;
2235         so->base.reference.count = 1;
2236         so->base.context = pctx;
2237
2238         /* sampler_views correspond to texture descriptors, minus the texture
2239          * (data) itself. So, we serialise the descriptor here and cache it for
2240          * later. */
2241
2242         /* TODO: Other types of textures */
2243         assert(template->target == PIPE_TEXTURE_2D);
2244
2245         /* Make sure it's something with which we're familiar */
2246         assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4);
2247
2248         /* TODO: Detect from format better */
2249         const struct util_format_description *desc = util_format_description(prsrc->base.format);
2250
2251         unsigned char user_swizzle[4] = {
2252                 template->swizzle_r,
2253                 template->swizzle_g,
2254                 template->swizzle_b,
2255                 template->swizzle_a
2256         };
2257
2258         enum mali_format format = panfrost_find_format(desc);
2259
2260         struct mali_texture_descriptor texture_descriptor = {
2261                 .width = MALI_POSITIVE(texture->width0),
2262                 .height = MALI_POSITIVE(texture->height0),
2263                 .depth = MALI_POSITIVE(texture->depth0),
2264
2265                 /* TODO: Decode */
2266                 .format = {
2267                         .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
2268                         .format = format,
2269
2270                         .usage1 = 0x0,
2271                         .is_not_cubemap = 1,
2272
2273                         /* 0x11 - regular texture 2d, uncompressed tiled */
2274                         /* 0x12 - regular texture 2d, uncompressed linear */
2275                         /* 0x1c - AFBC compressed (internally tiled, probably) texture 2D */
2276
2277                         .usage2 = prsrc->bo->has_afbc ? 0x1c : (prsrc->bo->tiled ? 0x11 : 0x12),
2278                 },
2279
2280                 .swizzle = panfrost_translate_swizzle_4(user_swizzle)
2281         };
2282
2283         /* TODO: Other base levels require adjusting dimensions / level numbers / etc */
2284         assert (template->u.tex.first_level == 0);
2285
2286         texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level;
2287
2288         so->hw = texture_descriptor;
2289
2290         return (struct pipe_sampler_view *) so;
2291 }
2292
2293 static void
2294 panfrost_set_sampler_views(
2295         struct pipe_context *pctx,
2296         enum pipe_shader_type shader,
2297         unsigned start_slot, unsigned num_views,
2298         struct pipe_sampler_view **views)
2299 {
2300         struct panfrost_context *ctx = pan_context(pctx);
2301
2302         assert(start_slot == 0);
2303
2304         ctx->sampler_view_count[shader] = num_views;
2305         memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *));
2306
2307         ctx->dirty |= PAN_DIRTY_TEXTURES;
2308 }
2309
2310 static void
2311 panfrost_sampler_view_destroy(
2312         struct pipe_context *pctx,
2313         struct pipe_sampler_view *views)
2314 {
2315         //struct panfrost_context *ctx = pan_context(pctx);
2316
2317         /* TODO */
2318
2319         free(views);
2320 }
2321
2322 static void
2323 panfrost_set_framebuffer_state(struct pipe_context *pctx,
2324                                const struct pipe_framebuffer_state *fb)
2325 {
2326         struct panfrost_context *ctx = pan_context(pctx);
2327
2328         /* Flush when switching away from an FBO */
2329
2330         if (!panfrost_is_scanout(ctx)) {
2331                 panfrost_flush(pctx, NULL, 0);
2332         }
2333
2334         ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs;
2335         ctx->pipe_framebuffer.samples = fb->samples;
2336         ctx->pipe_framebuffer.layers = fb->layers;
2337         ctx->pipe_framebuffer.width = fb->width;
2338         ctx->pipe_framebuffer.height = fb->height;
2339
2340         for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2341                 struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
2342
2343                 /* check if changing cbuf */
2344                 if (ctx->pipe_framebuffer.cbufs[i] == cb) continue;
2345
2346                 if (cb && (i != 0)) {
2347                         printf("XXX: Multiple render targets not supported before t7xx!\n");
2348                         assert(0);
2349                 }
2350
2351                 /* assign new */
2352                 pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb);
2353
2354                 if (!cb)
2355                         continue;
2356
2357                 if (require_sfbd)
2358                         ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
2359                 else
2360                         ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
2361
2362                 panfrost_attach_vt_framebuffer(ctx);
2363                 panfrost_new_frag_framebuffer(ctx);
2364                 panfrost_set_scissor(ctx);
2365
2366                 struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture);
2367                 bool is_scanout = panfrost_is_scanout(ctx);
2368
2369                 if (!is_scanout && !tex->bo->has_afbc) {
2370                         /* The blob is aggressive about enabling AFBC. As such,
2371                          * it's pretty much necessary to use it here, since we
2372                          * have no traces of non-compressed FBO. */
2373
2374                         panfrost_enable_afbc(ctx, tex, false);
2375                 }
2376
2377                 if (!is_scanout && !tex->bo->has_checksum) {
2378                         /* Enable transaction elimination if we can */
2379                         panfrost_enable_checksum(ctx, tex);
2380                 }
2381         }
2382
2383         {
2384                 struct pipe_surface *zb = fb->zsbuf;
2385
2386                 if (ctx->pipe_framebuffer.zsbuf != zb) {
2387                         pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb);
2388
2389                         if (zb) {
2390                                 /* FBO has depth */
2391
2392                                 if (require_sfbd)
2393                                         ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx);
2394                                 else
2395                                         ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx);
2396
2397                                 panfrost_attach_vt_framebuffer(ctx);
2398                                 panfrost_new_frag_framebuffer(ctx);
2399                                 panfrost_set_scissor(ctx);
2400
2401                                 struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture);
2402
2403                                 if (!tex->bo->has_afbc && !panfrost_is_scanout(ctx))
2404                                         panfrost_enable_afbc(ctx, tex, true);
2405                         }
2406                 }
2407         }
2408
2409         /* Force a clear XXX wrong? */
2410         if (ctx->last_clear.color)
2411                 panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil);
2412 }
2413
2414 static void *
2415 panfrost_create_blend_state(struct pipe_context *pipe,
2416                             const struct pipe_blend_state *blend)
2417 {
2418         struct panfrost_context *ctx = pan_context(pipe);
2419         struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state);
2420         so->base = *blend;
2421
2422         /* TODO: The following features are not yet implemented */
2423         assert(!blend->logicop_enable);
2424         assert(!blend->alpha_to_coverage);
2425         assert(!blend->alpha_to_one);
2426
2427         /* Compile the blend state, first as fixed-function if we can */
2428
2429         if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color))
2430                 return so;
2431
2432         /* If we can't, compile a blend shader instead */
2433
2434         panfrost_make_blend_shader(ctx, so, &ctx->blend_color);
2435
2436         return so;
2437 }
2438
2439 static void
2440 panfrost_bind_blend_state(struct pipe_context *pipe,
2441                           void *cso)
2442 {
2443         struct panfrost_context *ctx = pan_context(pipe);
2444         struct pipe_blend_state *blend = (struct pipe_blend_state *) cso;
2445         struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso;
2446         ctx->blend = pblend;
2447
2448         if (!blend)
2449                 return;
2450
2451         SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither);
2452
2453         /* TODO: Attach color */
2454
2455         /* Shader itself is not dirty, but the shader core is */
2456         ctx->dirty |= PAN_DIRTY_FS;
2457 }
2458
2459 static void
2460 panfrost_delete_blend_state(struct pipe_context *pipe,
2461                             void *blend)
2462 {
2463         struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend;
2464
2465         if (so->has_blend_shader) {
2466                 printf("Deleting blend state leak blend shaders bytecode\n");
2467         }
2468
2469         free(blend);
2470 }
2471
2472 static void
2473 panfrost_set_blend_color(struct pipe_context *pipe,
2474                          const struct pipe_blend_color *blend_color)
2475 {
2476         struct panfrost_context *ctx = pan_context(pipe);
2477
2478         /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */
2479
2480         if (blend_color) {
2481                 ctx->blend_color = *blend_color;
2482
2483                 /* The blend mode depends on the blend constant color, due to the
2484                  * fixed/programmable split. So, we're forced to regenerate the blend
2485                  * equation */
2486
2487                 /* TODO: Attach color */
2488         }
2489 }
2490
2491 static void *
2492 panfrost_create_depth_stencil_state(struct pipe_context *pipe,
2493                                     const struct pipe_depth_stencil_alpha_state *depth_stencil)
2494 {
2495         return mem_dup(depth_stencil, sizeof(*depth_stencil));
2496 }
2497
2498 static void
2499 panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
2500                                   void *cso)
2501 {
2502         struct panfrost_context *ctx = pan_context(pipe);
2503         struct pipe_depth_stencil_alpha_state *depth_stencil = cso;
2504         ctx->depth_stencil = depth_stencil;
2505
2506         if (!depth_stencil)
2507                 return;
2508
2509         /* Alpha does not exist in the hardware (it's not in ES3), so it's
2510          * emulated in the fragment shader */
2511
2512         if (depth_stencil->alpha.enabled) {
2513                 /* We need to trigger a new shader (maybe) */
2514                 ctx->base.bind_fs_state(&ctx->base, ctx->fs);
2515         }
2516
2517         /* Stencil state */
2518         SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */
2519
2520         panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front);
2521         ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask;
2522
2523         panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back);
2524         ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask;
2525
2526         /* Depth state (TODO: Refactor) */
2527         SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled);
2528
2529         int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS;
2530
2531         ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
2532         ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func));
2533
2534         /* Bounds test not implemented */
2535         assert(!depth_stencil->depth.bounds_test);
2536
2537         ctx->dirty |= PAN_DIRTY_FS;
2538 }
2539
2540 static void
2541 panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
2542 {
2543         free( depth );
2544 }
2545
2546 static void
2547 panfrost_set_sample_mask(struct pipe_context *pipe,
2548                          unsigned sample_mask)
2549 {
2550 }
2551
2552 static void
2553 panfrost_set_clip_state(struct pipe_context *pipe,
2554                         const struct pipe_clip_state *clip)
2555 {
2556         //struct panfrost_context *panfrost = pan_context(pipe);
2557 }
2558
2559 static void
2560 panfrost_set_viewport_states(struct pipe_context *pipe,
2561                              unsigned start_slot,
2562                              unsigned num_viewports,
2563                              const struct pipe_viewport_state *viewports)
2564 {
2565         struct panfrost_context *ctx = pan_context(pipe);
2566
2567         assert(start_slot == 0);
2568         assert(num_viewports == 1);
2569
2570         ctx->pipe_viewport = *viewports;
2571
2572 #if 0
2573         /* TODO: What if not centered? */
2574         float w = abs(viewports->scale[0]) * 2.0;
2575         float h = abs(viewports->scale[1]) * 2.0;
2576
2577         ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w);
2578         ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h);
2579 #endif
2580 }
2581
2582 static void
2583 panfrost_set_scissor_states(struct pipe_context *pipe,
2584                             unsigned start_slot,
2585                             unsigned num_scissors,
2586                             const struct pipe_scissor_state *scissors)
2587 {
2588         struct panfrost_context *ctx = pan_context(pipe);
2589
2590         assert(start_slot == 0);
2591         assert(num_scissors == 1);
2592
2593         ctx->scissor = *scissors;
2594
2595         panfrost_set_scissor(ctx);
2596 }
2597
2598 static void
2599 panfrost_set_polygon_stipple(struct pipe_context *pipe,
2600                              const struct pipe_poly_stipple *stipple)
2601 {
2602         //struct panfrost_context *panfrost = pan_context(pipe);
2603 }
2604
2605 static void
2606 panfrost_set_active_query_state(struct pipe_context *pipe,
2607                                 boolean enable)
2608 {
2609         //struct panfrost_context *panfrost = pan_context(pipe);
2610 }
2611
2612 static void
2613 panfrost_destroy(struct pipe_context *pipe)
2614 {
2615         struct panfrost_context *panfrost = pan_context(pipe);
2616
2617         if (panfrost->blitter)
2618                 util_blitter_destroy(panfrost->blitter);
2619 }
2620
2621 static struct pipe_query *
2622 panfrost_create_query(struct pipe_context *pipe,
2623                       unsigned type,
2624                       unsigned index)
2625 {
2626         struct panfrost_query *q = CALLOC_STRUCT(panfrost_query);
2627
2628         q->type = type;
2629         q->index = index;
2630
2631         return (struct pipe_query *) q;
2632 }
2633
2634 static void
2635 panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
2636 {
2637         FREE(q);
2638 }
2639
2640 static boolean
2641 panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q)
2642 {
2643         struct panfrost_context *ctx = pan_context(pipe);
2644         struct panfrost_query *query = (struct panfrost_query *) q;
2645
2646         switch (query->type) {
2647                 case PIPE_QUERY_OCCLUSION_COUNTER:
2648                 case PIPE_QUERY_OCCLUSION_PREDICATE:
2649                 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
2650                 {
2651                         /* Allocate a word for the query results to be stored */
2652                         query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR);
2653
2654                         ctx->occlusion_query = query;
2655
2656                         break;
2657                 }
2658
2659                 default:
2660                         fprintf(stderr, "Skipping query %d\n", query->type);
2661                         break;
2662         }
2663
2664         return true;
2665 }
2666
2667 static bool
2668 panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
2669 {
2670         struct panfrost_context *ctx = pan_context(pipe);
2671         ctx->occlusion_query = NULL;
2672         return true;
2673 }
2674
2675 static boolean
2676 panfrost_get_query_result(struct pipe_context *pipe,
2677                           struct pipe_query *q,
2678                           boolean wait,
2679                           union pipe_query_result *vresult)
2680 {
2681         /* STUB */
2682         struct panfrost_query *query = (struct panfrost_query *) q;
2683
2684         /* We need to flush out the jobs to actually run the counter, TODO
2685          * check wait, TODO wallpaper after if needed */
2686
2687         panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME);
2688
2689         switch (query->type) {
2690                 case PIPE_QUERY_OCCLUSION_COUNTER:
2691                 case PIPE_QUERY_OCCLUSION_PREDICATE:
2692                 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
2693                         /* Read back the query results */
2694                         unsigned *result = (unsigned *) query->transfer.cpu;
2695                         unsigned passed = *result;
2696
2697                         if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
2698                                 vresult->u64 = passed;
2699                         } else {
2700                                 vresult->b = !!passed;
2701                         }
2702
2703                         break;
2704                 }
2705                 default:
2706                         fprintf(stderr, "Skipped query get %d\n", query->type);
2707                         break;
2708         }
2709
2710         return true;
2711 }
2712
2713 static void
2714 panfrost_setup_hardware(struct panfrost_context *ctx)
2715 {
2716         struct pipe_context *gallium = (struct pipe_context *) ctx;
2717         struct panfrost_screen *screen = pan_screen(gallium->screen);
2718
2719         for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) {
2720                 /* Allocate the beginning of the transient pool */
2721                 int entry_size = (1 << 22); /* 4MB */
2722
2723                 ctx->transient_pools[i].entry_size = entry_size;
2724                 ctx->transient_pools[i].entry_count = 1;
2725
2726                 ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT);
2727         }
2728
2729         screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0);
2730         screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, 0, 0, 0);
2731         screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
2732         screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_GROWABLE, 1, 128);
2733         screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_GROWABLE, 1, 128);
2734
2735 }
2736
2737 /* New context creation, which also does hardware initialisation since I don't
2738  * know the better way to structure this :smirk: */
2739
2740 struct pipe_context *
2741 panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
2742 {
2743         struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context);
2744         memset(ctx, 0, sizeof(*ctx));
2745         struct pipe_context *gallium = (struct pipe_context *) ctx;
2746
2747         gallium->screen = screen;
2748
2749         gallium->destroy = panfrost_destroy;
2750
2751         gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
2752
2753         gallium->flush = panfrost_flush;
2754         gallium->clear = panfrost_clear;
2755         gallium->draw_vbo = panfrost_draw_vbo;
2756
2757         gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
2758         gallium->set_constant_buffer = panfrost_set_constant_buffer;
2759
2760         gallium->set_stencil_ref = panfrost_set_stencil_ref;
2761
2762         gallium->create_sampler_view = panfrost_create_sampler_view;
2763         gallium->set_sampler_views = panfrost_set_sampler_views;
2764         gallium->sampler_view_destroy = panfrost_sampler_view_destroy;
2765
2766         gallium->create_rasterizer_state = panfrost_create_rasterizer_state;
2767         gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
2768         gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
2769
2770         gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state;
2771         gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
2772         gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state;
2773
2774         gallium->create_fs_state = panfrost_create_shader_state;
2775         gallium->delete_fs_state = panfrost_delete_shader_state;
2776         gallium->bind_fs_state = panfrost_bind_fs_state;
2777
2778         gallium->create_vs_state = panfrost_create_shader_state;
2779         gallium->delete_vs_state = panfrost_delete_shader_state;
2780         gallium->bind_vs_state = panfrost_bind_vs_state;
2781
2782         gallium->create_sampler_state = panfrost_create_sampler_state;
2783         gallium->delete_sampler_state = panfrost_generic_cso_delete;
2784         gallium->bind_sampler_states = panfrost_bind_sampler_states;
2785
2786         gallium->create_blend_state = panfrost_create_blend_state;
2787         gallium->bind_blend_state   = panfrost_bind_blend_state;
2788         gallium->delete_blend_state = panfrost_delete_blend_state;
2789
2790         gallium->set_blend_color = panfrost_set_blend_color;
2791
2792         gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
2793         gallium->bind_depth_stencil_alpha_state   = panfrost_bind_depth_stencil_state;
2794         gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state;
2795
2796         gallium->set_sample_mask = panfrost_set_sample_mask;
2797
2798         gallium->set_clip_state = panfrost_set_clip_state;
2799         gallium->set_viewport_states = panfrost_set_viewport_states;
2800         gallium->set_scissor_states = panfrost_set_scissor_states;
2801         gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
2802         gallium->set_active_query_state = panfrost_set_active_query_state;
2803
2804         gallium->create_query = panfrost_create_query;
2805         gallium->destroy_query = panfrost_destroy_query;
2806         gallium->begin_query = panfrost_begin_query;
2807         gallium->end_query = panfrost_end_query;
2808         gallium->get_query_result = panfrost_get_query_result;
2809
2810         panfrost_resource_context_init(gallium);
2811
2812         panfrost_setup_hardware(ctx);
2813
2814         /* XXX: leaks */
2815         gallium->stream_uploader = u_upload_create_default(gallium);
2816         gallium->const_uploader = gallium->stream_uploader;
2817         assert(gallium->stream_uploader);
2818
2819         /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */
2820         ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1;
2821
2822         ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes);
2823
2824         ctx->blitter = util_blitter_create(gallium);
2825         assert(ctx->blitter);
2826
2827         /* Prepare for render! */
2828
2829         panfrost_emit_vertex_payload(ctx);
2830         panfrost_emit_tiler_payload(ctx);
2831         panfrost_invalidate_frame(ctx);
2832         panfrost_viewport(ctx, 0.0, 1.0, 0, 0, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height);
2833         panfrost_default_shader_backend(ctx);
2834         panfrost_generate_space_filler_indices();
2835
2836         return gallium;
2837 }