src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 uint32_t tex_mocs[] = {
  59    [7] = GEN7_MOCS_L3,
  60    [8] = BDW_MOCS_WB,
  61    [9] = SKL_MOCS_WB,
  62    [10] = CNL_MOCS_WB,
  63 };
  64
  65 uint32_t rb_mocs[] = {
  66    [7] = GEN7_MOCS_L3,
  67    [8] = BDW_MOCS_PTE,
  68    [9] = SKL_MOCS_PTE,
  69    [10] = CNL_MOCS_PTE,
  70 };
  71
  72 static void
  73 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  74              GLenum target, struct isl_view *view,
  75              uint32_t *tile_x, uint32_t *tile_y,
  76              uint32_t *offset, struct isl_surf *surf)
  77 {
  78    *surf = mt->surf;
  79
  80    const enum isl_dim_layout dim_layout =
  81       get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target);
  82
  83    if (surf->dim_layout == dim_layout)
  84       return;
  85
  86    /* The layout of the specified texture target is not compatible with the
  87     * actual layout of the miptree structure in memory -- You're entering
  88     * dangerous territory, this can only possibly work if you only intended
  89     * to access a single level and slice of the texture, and the hardware
  90     * supports the tile offset feature in order to allow non-tile-aligned
  91     * base offsets, since we'll have to point the hardware to the first
  92     * texel of the level instead of relying on the usual base level/layer
  93     * controls.
  94     */
  95    assert(brw->has_surface_tile_offset);
  96    assert(view->levels == 1 && view->array_len == 1);
  97    assert(*tile_x == 0 && *tile_y == 0);
  98
  99    *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 100                                              view->base_array_layer,
 101                                              tile_x, tile_y);
 102
 103    /* Minify the logical dimensions of the texture. */
 104    const unsigned l = view->base_level - mt->first_level;
 105    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 106    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 107       minify(surf->logical_level0_px.height, l);
 108    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 109       minify(surf->logical_level0_px.depth, l);
 110
 111    /* Only the base level and layer can be addressed with the overridden
 112     * layout.
 113     */
 114    surf->logical_level0_px.array_len = 1;
 115    surf->levels = 1;
 116    surf->dim_layout = dim_layout;
 117
 118    /* The requested slice of the texture is now at the base level and
 119     * layer.
 120     */
 121    view->base_level = 0;
 122    view->base_array_layer = 0;
 123 }
 124
 125 static void
 126 brw_emit_surface_state(struct brw_context *brw,
 127                        struct intel_mipmap_tree *mt,
 128                        GLenum target, struct isl_view view,
 129                        enum isl_aux_usage aux_usage,
 130                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
 131                        unsigned reloc_flags)
 132 {
 133    uint32_t tile_x = mt->level[0].level_x;
 134    uint32_t tile_y = mt->level[0].level_y;
 135    uint32_t offset = mt->offset;
 136
 137    struct isl_surf surf;
 138
 139    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 140
 141    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 142
 143    struct brw_bo *aux_bo;
 144    struct isl_surf *aux_surf = NULL;
 145    uint64_t aux_offset = 0;
 146    switch (aux_usage) {
 147    case ISL_AUX_USAGE_MCS:
 148    case ISL_AUX_USAGE_CCS_D:
 149    case ISL_AUX_USAGE_CCS_E:
 150       aux_surf = &mt->mcs_buf->surf;
 151       aux_bo = mt->mcs_buf->bo;
 152       aux_offset = mt->mcs_buf->offset;
 153       break;
 154
 155    case ISL_AUX_USAGE_HIZ:
 156       aux_surf = &mt->hiz_buf->surf;
 157       aux_bo = mt->hiz_buf->bo;
 158       aux_offset = 0;
 159       break;
 160
 161    case ISL_AUX_USAGE_NONE:
 162       break;
 163    }
 164
 165    if (aux_usage != ISL_AUX_USAGE_NONE) {
 166       /* We only really need a clear color if we also have an auxiliary
 167        * surface.  Without one, it does nothing.
 168        */
 169       clear_color = mt->fast_clear_color;
 170    }
 171
 172    void *state = brw_state_batch(brw,
 173                                  brw->isl_dev.ss.size,
 174                                  brw->isl_dev.ss.align,
 175                                  surf_offset);
 176
 177    isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
 178                        .address = brw_emit_reloc(&brw->batch,
 179                                                  *surf_offset + brw->isl_dev.ss.addr_offset,
 180                                                  mt->bo, offset, reloc_flags),
 181                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 182                        .aux_address = aux_offset,
 183                        .mocs = mocs, .clear_color = clear_color,
 184                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 185    if (aux_surf) {
 186       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 187        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 188        * contain other control information.  Since buffer addresses are always
 189        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 190        * an ordinary reloc to do the necessary address translation.
 191        *
 192        * FIXME: move to the point of assignment.
 193        */
 194       assert((aux_offset & 0xfff) == 0);
 195       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 196       *aux_addr = brw_emit_reloc(&brw->batch,
 197                                  *surf_offset +
 198                                  brw->isl_dev.ss.aux_addr_offset,
 199                                  aux_bo, *aux_addr,
 200                                  reloc_flags);
 201    }
 202 }
 203
 204 static uint32_t
 205 gen6_update_renderbuffer_surface(struct brw_context *brw,
 206                                  struct gl_renderbuffer *rb,
 207                                  unsigned unit,
 208                                  uint32_t surf_index)
 209 {
 210    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 211    struct gl_context *ctx = &brw->ctx;
 212    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 213    struct intel_mipmap_tree *mt = irb->mt;
 214
 215    enum isl_aux_usage aux_usage =
 216       brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
 217       intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
 218                                      ctx->Color.BlendEnabled & (1 << unit));
 219
 220    assert(brw_render_target_supported(brw, rb));
 221
 222    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 223    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 224       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 225                     __func__, _mesa_get_format_name(rb_format));
 226    }
 227
 228    struct isl_view view = {
 229       .format = brw->mesa_to_isl_render_format[rb_format],
 230       .base_level = irb->mt_level - irb->mt->first_level,
 231       .levels = 1,
 232       .base_array_layer = irb->mt_layer,
 233       .array_len = MAX2(irb->layer_count, 1),
 234       .swizzle = ISL_SWIZZLE_IDENTITY,
 235       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 236    };
 237
 238    uint32_t offset;
 239    brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 240                           rb_mocs[devinfo->gen],
 241                           &offset, surf_index,
 242                           RELOC_WRITE);
 243    return offset;
 244 }
 245
 246 GLuint
 247 translate_tex_target(GLenum target)
 248 {
 249    switch (target) {
 250    case GL_TEXTURE_1D:
 251    case GL_TEXTURE_1D_ARRAY_EXT:
 252       return BRW_SURFACE_1D;
 253
 254    case GL_TEXTURE_RECTANGLE_NV:
 255       return BRW_SURFACE_2D;
 256
 257    case GL_TEXTURE_2D:
 258    case GL_TEXTURE_2D_ARRAY_EXT:
 259    case GL_TEXTURE_EXTERNAL_OES:
 260    case GL_TEXTURE_2D_MULTISAMPLE:
 261    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 262       return BRW_SURFACE_2D;
 263
 264    case GL_TEXTURE_3D:
 265       return BRW_SURFACE_3D;
 266
 267    case GL_TEXTURE_CUBE_MAP:
 268    case GL_TEXTURE_CUBE_MAP_ARRAY:
 269       return BRW_SURFACE_CUBE;
 270
 271    default:
 272       unreachable("not reached");
 273    }
 274 }
 275
 276 uint32_t
 277 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 278 {
 279    switch (tiling) {
 280    case ISL_TILING_X:
 281       return BRW_SURFACE_TILED;
 282    case ISL_TILING_Y0:
 283       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 284    default:
 285       return 0;
 286    }
 287 }
 288
 289
 290 uint32_t
 291 brw_get_surface_num_multisamples(unsigned num_samples)
 292 {
 293    if (num_samples > 1)
 294       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 295    else
 296       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 297 }
 298
 299 /**
 300  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 301  * swizzling.
 302  */
 303 int
 304 brw_get_texture_swizzle(const struct gl_context *ctx,
 305                         const struct gl_texture_object *t)
 306 {
 307    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 308
 309    int swizzles[SWIZZLE_NIL + 1] = {
 310       SWIZZLE_X,
 311       SWIZZLE_Y,
 312       SWIZZLE_Z,
 313       SWIZZLE_W,
 314       SWIZZLE_ZERO,
 315       SWIZZLE_ONE,
 316       SWIZZLE_NIL
 317    };
 318
 319    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 320        img->_BaseFormat == GL_DEPTH_STENCIL) {
 321       GLenum depth_mode = t->DepthMode;
 322
 323       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 324        * with depth component data specified with a sized internal format.
 325        * Otherwise, it's left at the old default, GL_LUMINANCE.
 326        */
 327       if (_mesa_is_gles3(ctx) &&
 328           img->InternalFormat != GL_DEPTH_COMPONENT &&
 329           img->InternalFormat != GL_DEPTH_STENCIL) {
 330          depth_mode = GL_RED;
 331       }
 332
 333       switch (depth_mode) {
 334       case GL_ALPHA:
 335          swizzles[0] = SWIZZLE_ZERO;
 336          swizzles[1] = SWIZZLE_ZERO;
 337          swizzles[2] = SWIZZLE_ZERO;
 338          swizzles[3] = SWIZZLE_X;
 339          break;
 340       case GL_LUMINANCE:
 341          swizzles[0] = SWIZZLE_X;
 342          swizzles[1] = SWIZZLE_X;
 343          swizzles[2] = SWIZZLE_X;
 344          swizzles[3] = SWIZZLE_ONE;
 345          break;
 346       case GL_INTENSITY:
 347          swizzles[0] = SWIZZLE_X;
 348          swizzles[1] = SWIZZLE_X;
 349          swizzles[2] = SWIZZLE_X;
 350          swizzles[3] = SWIZZLE_X;
 351          break;
 352       case GL_RED:
 353          swizzles[0] = SWIZZLE_X;
 354          swizzles[1] = SWIZZLE_ZERO;
 355          swizzles[2] = SWIZZLE_ZERO;
 356          swizzles[3] = SWIZZLE_ONE;
 357          break;
 358       }
 359    }
 360
 361    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 362
 363    /* If the texture's format is alpha-only, force R, G, and B to
 364     * 0.0. Similarly, if the texture's format has no alpha channel,
 365     * force the alpha value read to 1.0. This allows for the
 366     * implementation to use an RGBA texture for any of these formats
 367     * without leaking any unexpected values.
 368     */
 369    switch (img->_BaseFormat) {
 370    case GL_ALPHA:
 371       swizzles[0] = SWIZZLE_ZERO;
 372       swizzles[1] = SWIZZLE_ZERO;
 373       swizzles[2] = SWIZZLE_ZERO;
 374       break;
 375    case GL_LUMINANCE:
 376       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 377          swizzles[0] = SWIZZLE_X;
 378          swizzles[1] = SWIZZLE_X;
 379          swizzles[2] = SWIZZLE_X;
 380          swizzles[3] = SWIZZLE_ONE;
 381       }
 382       break;
 383    case GL_LUMINANCE_ALPHA:
 384       if (datatype == GL_SIGNED_NORMALIZED) {
 385          swizzles[0] = SWIZZLE_X;
 386          swizzles[1] = SWIZZLE_X;
 387          swizzles[2] = SWIZZLE_X;
 388          swizzles[3] = SWIZZLE_W;
 389       }
 390       break;
 391    case GL_INTENSITY:
 392       if (datatype == GL_SIGNED_NORMALIZED) {
 393          swizzles[0] = SWIZZLE_X;
 394          swizzles[1] = SWIZZLE_X;
 395          swizzles[2] = SWIZZLE_X;
 396          swizzles[3] = SWIZZLE_X;
 397       }
 398       break;
 399    case GL_RED:
 400    case GL_RG:
 401    case GL_RGB:
 402       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 403           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 404           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 405          swizzles[3] = SWIZZLE_ONE;
 406       break;
 407    }
 408
 409    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 410                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 411                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 412                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 413 }
 414
 415 /**
 416  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 417  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 418  *
 419  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 420  *         0          1          2          3             4            5
 421  *         4          5          6          7             0            1
 422  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 423  *
 424  * which is simply adding 4 then modding by 8 (or anding with 7).
 425  *
 426  * We then may need to apply workarounds for textureGather hardware bugs.
 427  */
 428 static unsigned
 429 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 430 {
 431    unsigned scs = (swizzle + 4) & 7;
 432
 433    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 434 }
 435
 436 static bool
 437 brw_aux_surface_disabled(const struct brw_context *brw,
 438                          const struct intel_mipmap_tree *mt)
 439 {
 440    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 441
 442    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 443       const struct intel_renderbuffer *irb =
 444          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 445
 446       if (irb && irb->mt == mt)
 447          return brw->draw_aux_buffer_disabled[i];
 448    }
 449
 450    return false;
 451 }
 452
 453 void
 454 brw_update_texture_surface(struct gl_context *ctx,
 455                            unsigned unit,
 456                            uint32_t *surf_offset,
 457                            bool for_gather,
 458                            uint32_t plane)
 459 {
 460    struct brw_context *brw = brw_context(ctx);
 461    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 462    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 463
 464    if (obj->Target == GL_TEXTURE_BUFFER) {
 465       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 466
 467    } else {
 468       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 469       struct intel_mipmap_tree *mt = intel_obj->mt;
 470
 471       if (plane > 0) {
 472          if (mt->plane[plane - 1] == NULL)
 473             return;
 474          mt = mt->plane[plane - 1];
 475       }
 476
 477       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 478       /* If this is a view with restricted NumLayers, then our effective depth
 479        * is not just the miptree depth.
 480        */
 481       unsigned view_num_layers;
 482       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 483          view_num_layers = obj->NumLayers;
 484       } else {
 485          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 486                               mt->surf.logical_level0_px.depth :
 487                               mt->surf.logical_level0_px.array_len;
 488       }
 489
 490       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 491        * texturing functions that return a float, as our code generation always
 492        * selects the .x channel (which would always be 0).
 493        */
 494       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 495       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 496          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 497           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 498       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 499                                 brw_get_texture_swizzle(&brw->ctx, obj));
 500
 501       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 502       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 503                                                     sampler->sRGBDecode);
 504
 505       /* Implement gen6 and gen7 gather work-around */
 506       bool need_green_to_blue = false;
 507       if (for_gather) {
 508          if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 509                                    format == ISL_FORMAT_R32G32_SINT ||
 510                                    format == ISL_FORMAT_R32G32_UINT)) {
 511             format = ISL_FORMAT_R32G32_FLOAT_LD;
 512             need_green_to_blue = devinfo->is_haswell;
 513          } else if (devinfo->gen == 6) {
 514             /* Sandybridge's gather4 message is broken for integer formats.
 515              * To work around this, we pretend the surface is UNORM for
 516              * 8 or 16-bit formats, and emit shader instructions to recover
 517              * the real INT/UINT value.  For 32-bit formats, we pretend
 518              * the surface is FLOAT, and simply reinterpret the resulting
 519              * bits.
 520              */
 521             switch (format) {
 522             case ISL_FORMAT_R8_SINT:
 523             case ISL_FORMAT_R8_UINT:
 524                format = ISL_FORMAT_R8_UNORM;
 525                break;
 526
 527             case ISL_FORMAT_R16_SINT:
 528             case ISL_FORMAT_R16_UINT:
 529                format = ISL_FORMAT_R16_UNORM;
 530                break;
 531
 532             case ISL_FORMAT_R32_SINT:
 533             case ISL_FORMAT_R32_UINT:
 534                format = ISL_FORMAT_R32_FLOAT;
 535                break;
 536
 537             default:
 538                break;
 539             }
 540          }
 541       }
 542
 543       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 544          if (devinfo->gen <= 7) {
 545             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 546             mt = mt->r8stencil_mt;
 547          } else {
 548             mt = mt->stencil_mt;
 549          }
 550          format = ISL_FORMAT_R8_UINT;
 551       } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 552          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 553          mt = mt->r8stencil_mt;
 554          format = ISL_FORMAT_R8_UINT;
 555       }
 556
 557       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 558
 559       struct isl_view view = {
 560          .format = format,
 561          .base_level = obj->MinLevel + obj->BaseLevel,
 562          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 563          .base_array_layer = obj->MinLayer,
 564          .array_len = view_num_layers,
 565          .swizzle = {
 566             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 567             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 568             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 569             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 570          },
 571          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 572       };
 573
 574       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 575           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 576          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 577
 578       enum isl_aux_usage aux_usage =
 579          intel_miptree_texture_aux_usage(brw, mt, format);
 580
 581       if (brw_aux_surface_disabled(brw, mt))
 582          aux_usage = ISL_AUX_USAGE_NONE;
 583
 584       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 585                              tex_mocs[devinfo->gen],
 586                              surf_offset, surf_index,
 587                              0);
 588    }
 589 }
 590
 591 void
 592 brw_emit_buffer_surface_state(struct brw_context *brw,
 593                               uint32_t *out_offset,
 594                               struct brw_bo *bo,
 595                               unsigned buffer_offset,
 596                               unsigned surface_format,
 597                               unsigned buffer_size,
 598                               unsigned pitch,
 599                               unsigned reloc_flags)
 600 {
 601    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 602    uint32_t *dw = brw_state_batch(brw,
 603                                   brw->isl_dev.ss.size,
 604                                   brw->isl_dev.ss.align,
 605                                   out_offset);
 606
 607    isl_buffer_fill_state(&brw->isl_dev, dw,
 608                          .address = !bo ? buffer_offset :
 609                                     brw_emit_reloc(&brw->batch,
 610                                                    *out_offset + brw->isl_dev.ss.addr_offset,
 611                                                    bo, buffer_offset,
 612                                                    reloc_flags),
 613                          .size = buffer_size,
 614                          .format = surface_format,
 615                          .stride = pitch,
 616                          .mocs = tex_mocs[devinfo->gen]);
 617 }
 618
 619 void
 620 brw_update_buffer_texture_surface(struct gl_context *ctx,
 621                                   unsigned unit,
 622                                   uint32_t *surf_offset)
 623 {
 624    struct brw_context *brw = brw_context(ctx);
 625    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 626    struct intel_buffer_object *intel_obj =
 627       intel_buffer_object(tObj->BufferObject);
 628    uint32_t size = tObj->BufferSize;
 629    struct brw_bo *bo = NULL;
 630    mesa_format format = tObj->_BufferObjectFormat;
 631    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 632    int texel_size = _mesa_get_format_bytes(format);
 633
 634    if (intel_obj) {
 635       size = MIN2(size, intel_obj->Base.Size);
 636       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 637                                   false);
 638    }
 639
 640    /* The ARB_texture_buffer_specification says:
 641     *
 642     *    "The number of texels in the buffer texture's texel array is given by
 643     *
 644     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 645     *
 646     *     where <buffer_size> is the size of the buffer object, in basic
 647     *     machine units and <components> and <base_type> are the element count
 648     *     and base data type for elements, as specified in Table X.1.  The
 649     *     number of texels in the texel array is then clamped to the
 650     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 651     *
 652     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 653     * so that when ISL divides by stride to obtain the number of texels, that
 654     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 655     */
 656    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 657
 658    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 659       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 660                     _mesa_get_format_name(format));
 661    }
 662
 663    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 664                                  tObj->BufferOffset,
 665                                  isl_format,
 666                                  size,
 667                                  texel_size,
 668                                  0);
 669 }
 670
 671 /**
 672  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 673  * read from this buffer with Data Port Read instructions/messages.
 674  */
 675 void
 676 brw_create_constant_surface(struct brw_context *brw,
 677                             struct brw_bo *bo,
 678                             uint32_t offset,
 679                             uint32_t size,
 680                             uint32_t *out_offset)
 681 {
 682    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 683                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 684                                  size, 1, 0);
 685 }
 686
 687 /**
 688  * Create the buffer surface. Shader buffer variables will be
 689  * read from / write to this buffer with Data Port Read/Write
 690  * instructions/messages.
 691  */
 692 void
 693 brw_create_buffer_surface(struct brw_context *brw,
 694                           struct brw_bo *bo,
 695                           uint32_t offset,
 696                           uint32_t size,
 697                           uint32_t *out_offset)
 698 {
 699    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 700     * messages. We need these specifically for the fragment shader since they
 701     * include a pixel mask header that we need to ensure correct behavior
 702     * with helper invocations, which cannot write to the buffer.
 703     */
 704    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 705                                  ISL_FORMAT_RAW,
 706                                  size, 1, RELOC_WRITE);
 707 }
 708
 709 /**
 710  * Set up a binding table entry for use by stream output logic (transform
 711  * feedback).
 712  *
 713  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 714  */
 715 void
 716 brw_update_sol_surface(struct brw_context *brw,
 717                        struct gl_buffer_object *buffer_obj,
 718                        uint32_t *out_offset, unsigned num_vector_components,
 719                        unsigned stride_dwords, unsigned offset_dwords)
 720 {
 721    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 722    uint32_t offset_bytes = 4 * offset_dwords;
 723    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 724                                              offset_bytes,
 725                                              buffer_obj->Size - offset_bytes,
 726                                              true);
 727    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 728    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 729    size_t size_dwords = buffer_obj->Size / 4;
 730    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 731
 732    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 733     * too big to map using a single binding table entry?
 734     */
 735    assert((size_dwords - offset_dwords) / stride_dwords
 736           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 737
 738    if (size_dwords > offset_dwords + num_vector_components) {
 739       /* There is room for at least 1 transform feedback output in the buffer.
 740        * Compute the number of additional transform feedback outputs the
 741        * buffer has room for.
 742        */
 743       buffer_size_minus_1 =
 744          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 745    } else {
 746       /* There isn't even room for a single transform feedback output in the
 747        * buffer.  We can't configure the binding table entry to prevent output
 748        * entirely; we'll have to rely on the geometry shader to detect
 749        * overflow.  But to minimize the damage in case of a bug, set up the
 750        * binding table entry to just allow a single output.
 751        */
 752       buffer_size_minus_1 = 0;
 753    }
 754    width = buffer_size_minus_1 & 0x7f;
 755    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 756    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 757
 758    switch (num_vector_components) {
 759    case 1:
 760       surface_format = ISL_FORMAT_R32_FLOAT;
 761       break;
 762    case 2:
 763       surface_format = ISL_FORMAT_R32G32_FLOAT;
 764       break;
 765    case 3:
 766       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 767       break;
 768    case 4:
 769       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 770       break;
 771    default:
 772       unreachable("Invalid vector size for transform feedback output");
 773    }
 774
 775    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 776       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 777       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 778       BRW_SURFACE_RC_READ_WRITE;
 779    surf[1] = brw_emit_reloc(&brw->batch,
 780                             *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
 781    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 782               height << BRW_SURFACE_HEIGHT_SHIFT);
 783    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 784               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 785    surf[4] = 0;
 786    surf[5] = 0;
 787 }
 788
 789 /* Creates a new WM constant buffer reflecting the current fragment program's
 790  * constants, if needed by the fragment program.
 791  *
 792  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 793  * state atom.
 794  */
 795 static void
 796 brw_upload_wm_pull_constants(struct brw_context *brw)
 797 {
 798    struct brw_stage_state *stage_state = &brw->wm.base;
 799    /* BRW_NEW_FRAGMENT_PROGRAM */
 800    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 801    /* BRW_NEW_FS_PROG_DATA */
 802    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 803
 804    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 805    /* _NEW_PROGRAM_CONSTANTS */
 806    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 807                              stage_state, prog_data);
 808 }
 809
 810 const struct brw_tracked_state brw_wm_pull_constants = {
 811    .dirty = {
 812       .mesa = _NEW_PROGRAM_CONSTANTS,
 813       .brw = BRW_NEW_BATCH |
 814              BRW_NEW_FRAGMENT_PROGRAM |
 815              BRW_NEW_FS_PROG_DATA,
 816    },
 817    .emit = brw_upload_wm_pull_constants,
 818 };
 819
 820 /**
 821  * Creates a null renderbuffer surface.
 822  *
 823  * This is used when the shader doesn't write to any color output.  An FB
 824  * write to target 0 will still be emitted, because that's how the thread is
 825  * terminated (and computed depth is returned), so we need to have the
 826  * hardware discard the target 0 color output..
 827  */
 828 static void
 829 emit_null_surface_state(struct brw_context *brw,
 830                         const struct gl_framebuffer *fb,
 831                         uint32_t *out_offset)
 832 {
 833    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 834    uint32_t *surf = brw_state_batch(brw,
 835                                     brw->isl_dev.ss.size,
 836                                     brw->isl_dev.ss.align,
 837                                     out_offset);
 838
 839    /* Use the fb dimensions or 1x1x1 */
 840    const unsigned width   = fb ? _mesa_geometric_width(fb)   : 1;
 841    const unsigned height  = fb ? _mesa_geometric_height(fb)  : 1;
 842    const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
 843
 844    if (devinfo->gen != 6 || samples <= 1) {
 845       isl_null_fill_state(&brw->isl_dev, surf,
 846                           isl_extent3d(width, height, 1));
 847       return;
 848    }
 849
 850    /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
 851     * So work around this problem by rendering into dummy color buffer.
 852     *
 853     * To decrease the amount of memory needed by the workaround buffer, we
 854     * set its pitch to 128 bytes (the width of a Y tile).  This means that
 855     * the amount of memory needed for the workaround buffer is
 856     * (width_in_tiles + height_in_tiles - 1) tiles.
 857     *
 858     * Note that since the workaround buffer will be interpreted by the
 859     * hardware as an interleaved multisampled buffer, we need to compute
 860     * width_in_tiles and height_in_tiles by dividing the width and height
 861     * by 16 rather than the normal Y-tile size of 32.
 862     */
 863    unsigned width_in_tiles = ALIGN(width, 16) / 16;
 864    unsigned height_in_tiles = ALIGN(height, 16) / 16;
 865    unsigned pitch_minus_1 = 127;
 866    unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 867    brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 868                       size_needed);
 869
 870    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 871               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 872    surf[1] = brw_emit_reloc(&brw->batch, *out_offset + 4,
 873                             brw->wm.multisampled_null_render_target_bo,
 874                             0, RELOC_WRITE);
 875
 876    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 877               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 878
 879    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 880     * Notes):
 881     *
 882     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 883     */
 884    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 885               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 886    surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
 887    surf[5] = 0;
 888 }
 889
 890 /**
 891  * Sets up a surface state structure to point at the given region.
 892  * While it is only used for the front/back buffer currently, it should be
 893  * usable for further buffers when doing ARB_draw_buffer support.
 894  */
 895 static uint32_t
 896 gen4_update_renderbuffer_surface(struct brw_context *brw,
 897                                  struct gl_renderbuffer *rb,
 898                                  unsigned unit,
 899                                  uint32_t surf_index)
 900 {
 901    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 902    struct gl_context *ctx = &brw->ctx;
 903    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 904    struct intel_mipmap_tree *mt = irb->mt;
 905    uint32_t *surf;
 906    uint32_t tile_x, tile_y;
 907    enum isl_format format;
 908    uint32_t offset;
 909    /* _NEW_BUFFERS */
 910    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 911    /* BRW_NEW_FS_PROG_DATA */
 912
 913    if (rb->TexImage && !brw->has_surface_tile_offset) {
 914       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 915
 916       if (tile_x != 0 || tile_y != 0) {
 917          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 918           * destination in a miptree unless you actually setup your renderbuffer
 919           * as a miptree and used the fragile lod/array_index/etc. controls to
 920           * select the image.  So, instead, we just make a new single-level
 921           * miptree and render into that.
 922           */
 923          intel_renderbuffer_move_to_temp(brw, irb, false);
 924          assert(irb->align_wa_mt);
 925          mt = irb->align_wa_mt;
 926       }
 927    }
 928
 929    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
 930
 931    format = brw->mesa_to_isl_render_format[rb_format];
 932    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 933       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 934                     __func__, _mesa_get_format_name(rb_format));
 935    }
 936
 937    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 938               format << BRW_SURFACE_FORMAT_SHIFT);
 939
 940    /* reloc */
 941    assert(mt->offset % mt->cpp == 0);
 942    surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
 943                             mt->offset +
 944                             intel_renderbuffer_get_tile_offsets(irb,
 945                                                                 &tile_x,
 946                                                                 &tile_y),
 947                             RELOC_WRITE);
 948
 949    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 950               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 951
 952    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
 953               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 954
 955    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
 956
 957    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 958    /* Note that the low bits of these fields are missing, so
 959     * there's the possibility of getting in trouble.
 960     */
 961    assert(tile_x % 4 == 0);
 962    assert(tile_y % 2 == 0);
 963    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 964               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 965               (mt->surf.image_alignment_el.height == 4 ?
 966                   BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 967
 968    if (devinfo->gen < 6) {
 969       /* _NEW_COLOR */
 970       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
 971           (ctx->Color.BlendEnabled & (1 << unit)))
 972          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 973
 974       if (!ctx->Color.ColorMask[unit][0])
 975          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 976       if (!ctx->Color.ColorMask[unit][1])
 977          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 978       if (!ctx->Color.ColorMask[unit][2])
 979          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 980
 981       /* As mentioned above, disable writes to the alpha component when the
 982        * renderbuffer is XRGB.
 983        */
 984       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 985           !ctx->Color.ColorMask[unit][3]) {
 986          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 987       }
 988    }
 989
 990    return offset;
 991 }
 992
 993 static void
 994 update_renderbuffer_surfaces(struct brw_context *brw)
 995 {
 996    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 997    const struct gl_context *ctx = &brw->ctx;
 998
 999    /* _NEW_BUFFERS | _NEW_COLOR */
1000    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1001
1002    /* Render targets always start at binding table index 0. */
1003    const unsigned rt_start = 0;
1004
1005    uint32_t *surf_offsets = brw->wm.base.surf_offset;
1006
1007    /* Update surfaces for drawing buffers */
1008    if (fb->_NumColorDrawBuffers >= 1) {
1009       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1010          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1011
1012          if (intel_renderbuffer(rb)) {
1013             surf_offsets[rt_start + i] = devinfo->gen >= 6 ?
1014                gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
1015                gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
1016          } else {
1017             emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
1018          }
1019       }
1020    } else {
1021       emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
1022    }
1023
1024    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1025 }
1026
1027 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1028    .dirty = {
1029       .mesa = _NEW_BUFFERS |
1030               _NEW_COLOR,
1031       .brw = BRW_NEW_BATCH,
1032    },
1033    .emit = update_renderbuffer_surfaces,
1034 };
1035
1036 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1037    .dirty = {
1038       .mesa = _NEW_BUFFERS,
1039       .brw = BRW_NEW_BATCH |
1040              BRW_NEW_FAST_CLEAR_COLOR,
1041    },
1042    .emit = update_renderbuffer_surfaces,
1043 };
1044
1045 static void
1046 update_renderbuffer_read_surfaces(struct brw_context *brw)
1047 {
1048    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1049    const struct gl_context *ctx = &brw->ctx;
1050
1051    /* BRW_NEW_FS_PROG_DATA */
1052    const struct brw_wm_prog_data *wm_prog_data =
1053       brw_wm_prog_data(brw->wm.base.prog_data);
1054
1055    if (wm_prog_data->has_render_target_reads &&
1056        !ctx->Extensions.MESA_shader_framebuffer_fetch) {
1057       /* _NEW_BUFFERS */
1058       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1059
1060       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1061          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1062          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1063          const unsigned surf_index =
1064             wm_prog_data->binding_table.render_target_read_start + i;
1065          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1066
1067          if (irb) {
1068             const enum isl_format format = brw->mesa_to_isl_render_format[
1069                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1070             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1071                                                 format));
1072
1073             /* Override the target of the texture if the render buffer is a
1074              * single slice of a 3D texture (since the minimum array element
1075              * field of the surface state structure is ignored by the sampler
1076              * unit for 3D textures on some hardware), or if the render buffer
1077              * is a 1D array (since shaders always provide the array index
1078              * coordinate at the Z component to avoid state-dependent
1079              * recompiles when changing the texture target of the
1080              * framebuffer).
1081              */
1082             const GLenum target =
1083                (irb->mt->target == GL_TEXTURE_3D &&
1084                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1085                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1086                irb->mt->target;
1087
1088             const struct isl_view view = {
1089                .format = format,
1090                .base_level = irb->mt_level - irb->mt->first_level,
1091                .levels = 1,
1092                .base_array_layer = irb->mt_layer,
1093                .array_len = irb->layer_count,
1094                .swizzle = ISL_SWIZZLE_IDENTITY,
1095                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1096             };
1097
1098             enum isl_aux_usage aux_usage =
1099                intel_miptree_texture_aux_usage(brw, irb->mt, format);
1100             if (brw->draw_aux_buffer_disabled[i])
1101                aux_usage = ISL_AUX_USAGE_NONE;
1102
1103             brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1104                                    tex_mocs[devinfo->gen],
1105                                    surf_offset, surf_index,
1106                                    0);
1107
1108          } else {
1109             emit_null_surface_state(brw, fb, surf_offset);
1110          }
1111       }
1112
1113       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1114    }
1115 }
1116
1117 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1118    .dirty = {
1119       .mesa = _NEW_BUFFERS,
1120       .brw = BRW_NEW_BATCH |
1121              BRW_NEW_FAST_CLEAR_COLOR |
1122              BRW_NEW_FS_PROG_DATA,
1123    },
1124    .emit = update_renderbuffer_read_surfaces,
1125 };
1126
1127 static void
1128 update_stage_texture_surfaces(struct brw_context *brw,
1129                               const struct gl_program *prog,
1130                               struct brw_stage_state *stage_state,
1131                               bool for_gather, uint32_t plane)
1132 {
1133    if (!prog)
1134       return;
1135
1136    struct gl_context *ctx = &brw->ctx;
1137
1138    uint32_t *surf_offset = stage_state->surf_offset;
1139
1140    /* BRW_NEW_*_PROG_DATA */
1141    if (for_gather)
1142       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1143    else
1144       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1145
1146    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1147    for (unsigned s = 0; s < num_samplers; s++) {
1148       surf_offset[s] = 0;
1149
1150       if (prog->SamplersUsed & (1 << s)) {
1151          const unsigned unit = prog->SamplerUnits[s];
1152
1153          /* _NEW_TEXTURE */
1154          if (ctx->Texture.Unit[unit]._Current) {
1155             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1156          }
1157       }
1158    }
1159 }
1160
1161
1162 /**
1163  * Construct SURFACE_STATE objects for enabled textures.
1164  */
1165 static void
1166 brw_update_texture_surfaces(struct brw_context *brw)
1167 {
1168    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1169
1170    /* BRW_NEW_VERTEX_PROGRAM */
1171    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1172
1173    /* BRW_NEW_TESS_PROGRAMS */
1174    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1175    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1176
1177    /* BRW_NEW_GEOMETRY_PROGRAM */
1178    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1179
1180    /* BRW_NEW_FRAGMENT_PROGRAM */
1181    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1182
1183    /* _NEW_TEXTURE */
1184    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1185    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1186    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1187    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1188    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1189
1190    /* emit alternate set of surface state for gather. this
1191     * allows the surface format to be overriden for only the
1192     * gather4 messages. */
1193    if (devinfo->gen < 8) {
1194       if (vs && vs->nir->info.uses_texture_gather)
1195          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1196       if (tcs && tcs->nir->info.uses_texture_gather)
1197          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1198       if (tes && tes->nir->info.uses_texture_gather)
1199          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1200       if (gs && gs->nir->info.uses_texture_gather)
1201          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1202       if (fs && fs->nir->info.uses_texture_gather)
1203          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1204    }
1205
1206    if (fs) {
1207       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1208       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1209    }
1210
1211    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1212 }
1213
1214 const struct brw_tracked_state brw_texture_surfaces = {
1215    .dirty = {
1216       .mesa = _NEW_TEXTURE,
1217       .brw = BRW_NEW_BATCH |
1218              BRW_NEW_FAST_CLEAR_COLOR |
1219              BRW_NEW_FRAGMENT_PROGRAM |
1220              BRW_NEW_FS_PROG_DATA |
1221              BRW_NEW_GEOMETRY_PROGRAM |
1222              BRW_NEW_GS_PROG_DATA |
1223              BRW_NEW_TESS_PROGRAMS |
1224              BRW_NEW_TCS_PROG_DATA |
1225              BRW_NEW_TES_PROG_DATA |
1226              BRW_NEW_TEXTURE_BUFFER |
1227              BRW_NEW_VERTEX_PROGRAM |
1228              BRW_NEW_VS_PROG_DATA,
1229    },
1230    .emit = brw_update_texture_surfaces,
1231 };
1232
1233 static void
1234 brw_update_cs_texture_surfaces(struct brw_context *brw)
1235 {
1236    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1237
1238    /* BRW_NEW_COMPUTE_PROGRAM */
1239    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1240
1241    /* _NEW_TEXTURE */
1242    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1243
1244    /* emit alternate set of surface state for gather. this
1245     * allows the surface format to be overriden for only the
1246     * gather4 messages.
1247     */
1248    if (devinfo->gen < 8) {
1249       if (cs && cs->nir->info.uses_texture_gather)
1250          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1251    }
1252
1253    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1254 }
1255
1256 const struct brw_tracked_state brw_cs_texture_surfaces = {
1257    .dirty = {
1258       .mesa = _NEW_TEXTURE,
1259       .brw = BRW_NEW_BATCH |
1260              BRW_NEW_COMPUTE_PROGRAM |
1261              BRW_NEW_FAST_CLEAR_COLOR,
1262    },
1263    .emit = brw_update_cs_texture_surfaces,
1264 };
1265
1266
1267 void
1268 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1269                         struct brw_stage_state *stage_state,
1270                         struct brw_stage_prog_data *prog_data)
1271 {
1272    struct gl_context *ctx = &brw->ctx;
1273
1274    if (!prog)
1275       return;
1276
1277    uint32_t *ubo_surf_offsets =
1278       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1279
1280    for (int i = 0; i < prog->info.num_ubos; i++) {
1281       struct gl_uniform_buffer_binding *binding =
1282          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1283
1284       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1285          emit_null_surface_state(brw, NULL, &ubo_surf_offsets[i]);
1286       } else {
1287          struct intel_buffer_object *intel_bo =
1288             intel_buffer_object(binding->BufferObject);
1289          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1290          if (!binding->AutomaticSize)
1291             size = MIN2(size, binding->Size);
1292          struct brw_bo *bo =
1293             intel_bufferobj_buffer(brw, intel_bo,
1294                                    binding->Offset,
1295                                    size, false);
1296          brw_create_constant_surface(brw, bo, binding->Offset,
1297                                      size,
1298                                      &ubo_surf_offsets[i]);
1299       }
1300    }
1301
1302    uint32_t *ssbo_surf_offsets =
1303       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1304
1305    for (int i = 0; i < prog->info.num_ssbos; i++) {
1306       struct gl_shader_storage_buffer_binding *binding =
1307          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1308
1309       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1310          emit_null_surface_state(brw, NULL, &ssbo_surf_offsets[i]);
1311       } else {
1312          struct intel_buffer_object *intel_bo =
1313             intel_buffer_object(binding->BufferObject);
1314          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1315          if (!binding->AutomaticSize)
1316             size = MIN2(size, binding->Size);
1317          struct brw_bo *bo =
1318             intel_bufferobj_buffer(brw, intel_bo,
1319                                    binding->Offset,
1320                                    size, true);
1321          brw_create_buffer_surface(brw, bo, binding->Offset,
1322                                    size,
1323                                    &ssbo_surf_offsets[i]);
1324       }
1325    }
1326
1327    stage_state->push_constants_dirty = true;
1328
1329    if (prog->info.num_ubos || prog->info.num_ssbos)
1330       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1331 }
1332
1333 static void
1334 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1335 {
1336    struct gl_context *ctx = &brw->ctx;
1337    /* _NEW_PROGRAM */
1338    struct gl_program *prog = ctx->FragmentProgram._Current;
1339
1340    /* BRW_NEW_FS_PROG_DATA */
1341    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1342 }
1343
1344 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1345    .dirty = {
1346       .mesa = _NEW_PROGRAM,
1347       .brw = BRW_NEW_BATCH |
1348              BRW_NEW_FS_PROG_DATA |
1349              BRW_NEW_UNIFORM_BUFFER,
1350    },
1351    .emit = brw_upload_wm_ubo_surfaces,
1352 };
1353
1354 static void
1355 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1356 {
1357    struct gl_context *ctx = &brw->ctx;
1358    /* _NEW_PROGRAM */
1359    struct gl_program *prog =
1360       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1361
1362    /* BRW_NEW_CS_PROG_DATA */
1363    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1364 }
1365
1366 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1367    .dirty = {
1368       .mesa = _NEW_PROGRAM,
1369       .brw = BRW_NEW_BATCH |
1370              BRW_NEW_CS_PROG_DATA |
1371              BRW_NEW_UNIFORM_BUFFER,
1372    },
1373    .emit = brw_upload_cs_ubo_surfaces,
1374 };
1375
1376 void
1377 brw_upload_abo_surfaces(struct brw_context *brw,
1378                         const struct gl_program *prog,
1379                         struct brw_stage_state *stage_state,
1380                         struct brw_stage_prog_data *prog_data)
1381 {
1382    struct gl_context *ctx = &brw->ctx;
1383    uint32_t *surf_offsets =
1384       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1385
1386    if (prog->info.num_abos) {
1387       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1388          struct gl_atomic_buffer_binding *binding =
1389             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1390          struct intel_buffer_object *intel_bo =
1391             intel_buffer_object(binding->BufferObject);
1392          struct brw_bo *bo =
1393             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1394                                    intel_bo->Base.Size - binding->Offset,
1395                                    true);
1396
1397          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1398                                        binding->Offset, ISL_FORMAT_RAW,
1399                                        bo->size - binding->Offset, 1,
1400                                        RELOC_WRITE);
1401       }
1402
1403       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1404    }
1405 }
1406
1407 static void
1408 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1409 {
1410    /* _NEW_PROGRAM */
1411    const struct gl_program *wm = brw->fragment_program;
1412
1413    if (wm) {
1414       /* BRW_NEW_FS_PROG_DATA */
1415       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1416    }
1417 }
1418
1419 const struct brw_tracked_state brw_wm_abo_surfaces = {
1420    .dirty = {
1421       .mesa = _NEW_PROGRAM,
1422       .brw = BRW_NEW_ATOMIC_BUFFER |
1423              BRW_NEW_BATCH |
1424              BRW_NEW_FS_PROG_DATA,
1425    },
1426    .emit = brw_upload_wm_abo_surfaces,
1427 };
1428
1429 static void
1430 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1431 {
1432    /* _NEW_PROGRAM */
1433    const struct gl_program *cp = brw->compute_program;
1434
1435    if (cp) {
1436       /* BRW_NEW_CS_PROG_DATA */
1437       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1438    }
1439 }
1440
1441 const struct brw_tracked_state brw_cs_abo_surfaces = {
1442    .dirty = {
1443       .mesa = _NEW_PROGRAM,
1444       .brw = BRW_NEW_ATOMIC_BUFFER |
1445              BRW_NEW_BATCH |
1446              BRW_NEW_CS_PROG_DATA,
1447    },
1448    .emit = brw_upload_cs_abo_surfaces,
1449 };
1450
1451 static void
1452 brw_upload_cs_image_surfaces(struct brw_context *brw)
1453 {
1454    /* _NEW_PROGRAM */
1455    const struct gl_program *cp = brw->compute_program;
1456
1457    if (cp) {
1458       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1459       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1460                                 brw->cs.base.prog_data);
1461    }
1462 }
1463
1464 const struct brw_tracked_state brw_cs_image_surfaces = {
1465    .dirty = {
1466       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1467       .brw = BRW_NEW_BATCH |
1468              BRW_NEW_CS_PROG_DATA |
1469              BRW_NEW_FAST_CLEAR_COLOR |
1470              BRW_NEW_IMAGE_UNITS
1471    },
1472    .emit = brw_upload_cs_image_surfaces,
1473 };
1474
1475 static uint32_t
1476 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1477 {
1478    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1479    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1480    if (access == GL_WRITE_ONLY) {
1481       return hw_format;
1482    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1483       /* Typed surface reads support a very limited subset of the shader
1484        * image formats.  Translate it into the closest format the
1485        * hardware supports.
1486        */
1487       return isl_lower_storage_image_format(devinfo, hw_format);
1488    } else {
1489       /* The hardware doesn't actually support a typed format that we can use
1490        * so we have to fall back to untyped read/write messages.
1491        */
1492       return ISL_FORMAT_RAW;
1493    }
1494 }
1495
1496 static void
1497 update_default_image_param(struct brw_context *brw,
1498                            struct gl_image_unit *u,
1499                            unsigned surface_idx,
1500                            struct brw_image_param *param)
1501 {
1502    memset(param, 0, sizeof(*param));
1503    param->surface_idx = surface_idx;
1504    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1505     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1506     * detailed explanation of these parameters.
1507     */
1508    param->swizzling[0] = 0xff;
1509    param->swizzling[1] = 0xff;
1510 }
1511
1512 static void
1513 update_buffer_image_param(struct brw_context *brw,
1514                           struct gl_image_unit *u,
1515                           unsigned surface_idx,
1516                           struct brw_image_param *param)
1517 {
1518    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1519    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1520    update_default_image_param(brw, u, surface_idx, param);
1521
1522    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1523    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1524 }
1525
1526 static unsigned
1527 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1528                      unsigned level)
1529 {
1530    if (target == GL_TEXTURE_CUBE_MAP)
1531       return 6;
1532
1533    return target == GL_TEXTURE_3D ?
1534       minify(mt->surf.logical_level0_px.depth, level) :
1535       mt->surf.logical_level0_px.array_len;
1536 }
1537
1538 static void
1539 update_image_surface(struct brw_context *brw,
1540                      struct gl_image_unit *u,
1541                      GLenum access,
1542                      unsigned surface_idx,
1543                      uint32_t *surf_offset,
1544                      struct brw_image_param *param)
1545 {
1546    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1547
1548    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1549       struct gl_texture_object *obj = u->TexObj;
1550       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1551
1552       if (obj->Target == GL_TEXTURE_BUFFER) {
1553          struct intel_buffer_object *intel_obj =
1554             intel_buffer_object(obj->BufferObject);
1555          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1556                                       _mesa_get_format_bytes(u->_ActualFormat));
1557
1558          brw_emit_buffer_surface_state(
1559             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1560             format, intel_obj->Base.Size, texel_size,
1561             access != GL_READ_ONLY ? RELOC_WRITE : 0);
1562
1563          update_buffer_image_param(brw, u, surface_idx, param);
1564
1565       } else {
1566          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1567          struct intel_mipmap_tree *mt = intel_obj->mt;
1568          const unsigned num_layers = u->Layered ?
1569             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1570
1571          struct isl_view view = {
1572             .format = format,
1573             .base_level = obj->MinLevel + u->Level,
1574             .levels = 1,
1575             .base_array_layer = obj->MinLayer + u->_Layer,
1576             .array_len = num_layers,
1577             .swizzle = ISL_SWIZZLE_IDENTITY,
1578             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1579          };
1580
1581          if (format == ISL_FORMAT_RAW) {
1582             brw_emit_buffer_surface_state(
1583                brw, surf_offset, mt->bo, mt->offset,
1584                format, mt->bo->size - mt->offset, 1 /* pitch */,
1585                access != GL_READ_ONLY ? RELOC_WRITE : 0);
1586
1587          } else {
1588             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1589             assert(!intel_miptree_has_color_unresolved(mt,
1590                                                        view.base_level, 1,
1591                                                        view.base_array_layer,
1592                                                        view.array_len));
1593             brw_emit_surface_state(brw, mt, mt->target, view,
1594                                    ISL_AUX_USAGE_NONE, tex_mocs[devinfo->gen],
1595                                    surf_offset, surf_index,
1596                                    access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1597          }
1598
1599          isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1600          param->surface_idx = surface_idx;
1601       }
1602
1603    } else {
1604       emit_null_surface_state(brw, NULL, surf_offset);
1605       update_default_image_param(brw, u, surface_idx, param);
1606    }
1607 }
1608
1609 void
1610 brw_upload_image_surfaces(struct brw_context *brw,
1611                           const struct gl_program *prog,
1612                           struct brw_stage_state *stage_state,
1613                           struct brw_stage_prog_data *prog_data)
1614 {
1615    assert(prog);
1616    struct gl_context *ctx = &brw->ctx;
1617
1618    if (prog->info.num_images) {
1619       for (unsigned i = 0; i < prog->info.num_images; i++) {
1620          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1621          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1622
1623          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1624                               surf_idx,
1625                               &stage_state->surf_offset[surf_idx],
1626                               &prog_data->image_param[i]);
1627       }
1628
1629       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1630       /* This may have changed the image metadata dependent on the context
1631        * image unit state and passed to the program as uniforms, make sure
1632        * that push and pull constants are reuploaded.
1633        */
1634       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1635    }
1636 }
1637
1638 static void
1639 brw_upload_wm_image_surfaces(struct brw_context *brw)
1640 {
1641    /* BRW_NEW_FRAGMENT_PROGRAM */
1642    const struct gl_program *wm = brw->fragment_program;
1643
1644    if (wm) {
1645       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1646       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1647                                 brw->wm.base.prog_data);
1648    }
1649 }
1650
1651 const struct brw_tracked_state brw_wm_image_surfaces = {
1652    .dirty = {
1653       .mesa = _NEW_TEXTURE,
1654       .brw = BRW_NEW_BATCH |
1655              BRW_NEW_FAST_CLEAR_COLOR |
1656              BRW_NEW_FRAGMENT_PROGRAM |
1657              BRW_NEW_FS_PROG_DATA |
1658              BRW_NEW_IMAGE_UNITS
1659    },
1660    .emit = brw_upload_wm_image_surfaces,
1661 };
1662
1663 static void
1664 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1665 {
1666    struct gl_context *ctx = &brw->ctx;
1667    /* _NEW_PROGRAM */
1668    struct gl_program *prog =
1669       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1670    /* BRW_NEW_CS_PROG_DATA */
1671    const struct brw_cs_prog_data *cs_prog_data =
1672       brw_cs_prog_data(brw->cs.base.prog_data);
1673
1674    if (prog && cs_prog_data->uses_num_work_groups) {
1675       const unsigned surf_idx =
1676          cs_prog_data->binding_table.work_groups_start;
1677       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1678       struct brw_bo *bo;
1679       uint32_t bo_offset;
1680
1681       if (brw->compute.num_work_groups_bo == NULL) {
1682          bo = NULL;
1683          intel_upload_data(brw,
1684                            (void *)brw->compute.num_work_groups,
1685                            3 * sizeof(GLuint),
1686                            sizeof(GLuint),
1687                            &bo,
1688                            &bo_offset);
1689       } else {
1690          bo = brw->compute.num_work_groups_bo;
1691          bo_offset = brw->compute.num_work_groups_offset;
1692       }
1693
1694       brw_emit_buffer_surface_state(brw, surf_offset,
1695                                     bo, bo_offset,
1696                                     ISL_FORMAT_RAW,
1697                                     3 * sizeof(GLuint), 1,
1698                                     RELOC_WRITE);
1699       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1700    }
1701 }
1702
1703 const struct brw_tracked_state brw_cs_work_groups_surface = {
1704    .dirty = {
1705       .brw = BRW_NEW_CS_PROG_DATA |
1706              BRW_NEW_CS_WORK_GROUPS
1707    },
1708    .emit = brw_upload_cs_work_groups_surface,
1709 };