src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 uint32_t tex_mocs[] = {
  59    [7] = GEN7_MOCS_L3,
  60    [8] = BDW_MOCS_WB,
  61    [9] = SKL_MOCS_WB,
  62    [10] = CNL_MOCS_WB,
  63 };
  64
  65 uint32_t rb_mocs[] = {
  66    [7] = GEN7_MOCS_L3,
  67    [8] = BDW_MOCS_PTE,
  68    [9] = SKL_MOCS_PTE,
  69    [10] = CNL_MOCS_PTE,
  70 };
  71
  72 static void
  73 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  74              GLenum target, struct isl_view *view,
  75              uint32_t *tile_x, uint32_t *tile_y,
  76              uint32_t *offset, struct isl_surf *surf)
  77 {
  78    *surf = mt->surf;
  79
  80    const enum isl_dim_layout dim_layout =
  81       get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target);
  82
  83    if (surf->dim_layout == dim_layout)
  84       return;
  85
  86    /* The layout of the specified texture target is not compatible with the
  87     * actual layout of the miptree structure in memory -- You're entering
  88     * dangerous territory, this can only possibly work if you only intended
  89     * to access a single level and slice of the texture, and the hardware
  90     * supports the tile offset feature in order to allow non-tile-aligned
  91     * base offsets, since we'll have to point the hardware to the first
  92     * texel of the level instead of relying on the usual base level/layer
  93     * controls.
  94     */
  95    assert(brw->has_surface_tile_offset);
  96    assert(view->levels == 1 && view->array_len == 1);
  97    assert(*tile_x == 0 && *tile_y == 0);
  98
  99    *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 100                                              view->base_array_layer,
 101                                              tile_x, tile_y);
 102
 103    /* Minify the logical dimensions of the texture. */
 104    const unsigned l = view->base_level - mt->first_level;
 105    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 106    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 107       minify(surf->logical_level0_px.height, l);
 108    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 109       minify(surf->logical_level0_px.depth, l);
 110
 111    /* Only the base level and layer can be addressed with the overridden
 112     * layout.
 113     */
 114    surf->logical_level0_px.array_len = 1;
 115    surf->levels = 1;
 116    surf->dim_layout = dim_layout;
 117
 118    /* The requested slice of the texture is now at the base level and
 119     * layer.
 120     */
 121    view->base_level = 0;
 122    view->base_array_layer = 0;
 123 }
 124
 125 static void
 126 brw_emit_surface_state(struct brw_context *brw,
 127                        struct intel_mipmap_tree *mt,
 128                        GLenum target, struct isl_view view,
 129                        enum isl_aux_usage aux_usage,
 130                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
 131                        unsigned reloc_flags)
 132 {
 133    uint32_t tile_x = mt->level[0].level_x;
 134    uint32_t tile_y = mt->level[0].level_y;
 135    uint32_t offset = mt->offset;
 136
 137    struct isl_surf surf;
 138
 139    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 140
 141    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 142
 143    struct brw_bo *aux_bo;
 144    struct isl_surf *aux_surf = NULL;
 145    uint64_t aux_offset = 0;
 146    switch (aux_usage) {
 147    case ISL_AUX_USAGE_MCS:
 148    case ISL_AUX_USAGE_CCS_D:
 149    case ISL_AUX_USAGE_CCS_E:
 150       aux_surf = &mt->mcs_buf->surf;
 151       aux_bo = mt->mcs_buf->bo;
 152       aux_offset = mt->mcs_buf->offset;
 153       break;
 154
 155    case ISL_AUX_USAGE_HIZ:
 156       aux_surf = &mt->hiz_buf->surf;
 157       aux_bo = mt->hiz_buf->bo;
 158       aux_offset = 0;
 159       break;
 160
 161    case ISL_AUX_USAGE_NONE:
 162       break;
 163    }
 164
 165    if (aux_usage != ISL_AUX_USAGE_NONE) {
 166       /* We only really need a clear color if we also have an auxiliary
 167        * surface.  Without one, it does nothing.
 168        */
 169       clear_color = mt->fast_clear_color;
 170    }
 171
 172    void *state = brw_state_batch(brw,
 173                                  brw->isl_dev.ss.size,
 174                                  brw->isl_dev.ss.align,
 175                                  surf_offset);
 176
 177    isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
 178                        .address = brw_emit_reloc(&brw->batch,
 179                                                  *surf_offset + brw->isl_dev.ss.addr_offset,
 180                                                  mt->bo, offset, reloc_flags),
 181                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 182                        .aux_address = aux_offset,
 183                        .mocs = mocs, .clear_color = clear_color,
 184                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 185    if (aux_surf) {
 186       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 187        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 188        * contain other control information.  Since buffer addresses are always
 189        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 190        * an ordinary reloc to do the necessary address translation.
 191        *
 192        * FIXME: move to the point of assignment.
 193        */
 194       assert((aux_offset & 0xfff) == 0);
 195       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 196       *aux_addr = brw_emit_reloc(&brw->batch,
 197                                  *surf_offset +
 198                                  brw->isl_dev.ss.aux_addr_offset,
 199                                  aux_bo, *aux_addr,
 200                                  reloc_flags);
 201    }
 202 }
 203
 204 static uint32_t
 205 gen6_update_renderbuffer_surface(struct brw_context *brw,
 206                                  struct gl_renderbuffer *rb,
 207                                  unsigned unit,
 208                                  uint32_t surf_index)
 209 {
 210    struct gl_context *ctx = &brw->ctx;
 211    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 212    struct intel_mipmap_tree *mt = irb->mt;
 213
 214    enum isl_aux_usage aux_usage =
 215       brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
 216       intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
 217                                      ctx->Color.BlendEnabled & (1 << unit));
 218
 219    assert(brw_render_target_supported(brw, rb));
 220
 221    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 222    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 223       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 224                     __func__, _mesa_get_format_name(rb_format));
 225    }
 226
 227    struct isl_view view = {
 228       .format = brw->mesa_to_isl_render_format[rb_format],
 229       .base_level = irb->mt_level - irb->mt->first_level,
 230       .levels = 1,
 231       .base_array_layer = irb->mt_layer,
 232       .array_len = MAX2(irb->layer_count, 1),
 233       .swizzle = ISL_SWIZZLE_IDENTITY,
 234       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 235    };
 236
 237    uint32_t offset;
 238    brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 239                           rb_mocs[brw->gen],
 240                           &offset, surf_index,
 241                           RELOC_WRITE);
 242    return offset;
 243 }
 244
 245 GLuint
 246 translate_tex_target(GLenum target)
 247 {
 248    switch (target) {
 249    case GL_TEXTURE_1D:
 250    case GL_TEXTURE_1D_ARRAY_EXT:
 251       return BRW_SURFACE_1D;
 252
 253    case GL_TEXTURE_RECTANGLE_NV:
 254       return BRW_SURFACE_2D;
 255
 256    case GL_TEXTURE_2D:
 257    case GL_TEXTURE_2D_ARRAY_EXT:
 258    case GL_TEXTURE_EXTERNAL_OES:
 259    case GL_TEXTURE_2D_MULTISAMPLE:
 260    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 261       return BRW_SURFACE_2D;
 262
 263    case GL_TEXTURE_3D:
 264       return BRW_SURFACE_3D;
 265
 266    case GL_TEXTURE_CUBE_MAP:
 267    case GL_TEXTURE_CUBE_MAP_ARRAY:
 268       return BRW_SURFACE_CUBE;
 269
 270    default:
 271       unreachable("not reached");
 272    }
 273 }
 274
 275 uint32_t
 276 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 277 {
 278    switch (tiling) {
 279    case ISL_TILING_X:
 280       return BRW_SURFACE_TILED;
 281    case ISL_TILING_Y0:
 282       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 283    default:
 284       return 0;
 285    }
 286 }
 287
 288
 289 uint32_t
 290 brw_get_surface_num_multisamples(unsigned num_samples)
 291 {
 292    if (num_samples > 1)
 293       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 294    else
 295       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 296 }
 297
 298 /**
 299  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 300  * swizzling.
 301  */
 302 int
 303 brw_get_texture_swizzle(const struct gl_context *ctx,
 304                         const struct gl_texture_object *t)
 305 {
 306    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 307
 308    int swizzles[SWIZZLE_NIL + 1] = {
 309       SWIZZLE_X,
 310       SWIZZLE_Y,
 311       SWIZZLE_Z,
 312       SWIZZLE_W,
 313       SWIZZLE_ZERO,
 314       SWIZZLE_ONE,
 315       SWIZZLE_NIL
 316    };
 317
 318    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 319        img->_BaseFormat == GL_DEPTH_STENCIL) {
 320       GLenum depth_mode = t->DepthMode;
 321
 322       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 323        * with depth component data specified with a sized internal format.
 324        * Otherwise, it's left at the old default, GL_LUMINANCE.
 325        */
 326       if (_mesa_is_gles3(ctx) &&
 327           img->InternalFormat != GL_DEPTH_COMPONENT &&
 328           img->InternalFormat != GL_DEPTH_STENCIL) {
 329          depth_mode = GL_RED;
 330       }
 331
 332       switch (depth_mode) {
 333       case GL_ALPHA:
 334          swizzles[0] = SWIZZLE_ZERO;
 335          swizzles[1] = SWIZZLE_ZERO;
 336          swizzles[2] = SWIZZLE_ZERO;
 337          swizzles[3] = SWIZZLE_X;
 338          break;
 339       case GL_LUMINANCE:
 340          swizzles[0] = SWIZZLE_X;
 341          swizzles[1] = SWIZZLE_X;
 342          swizzles[2] = SWIZZLE_X;
 343          swizzles[3] = SWIZZLE_ONE;
 344          break;
 345       case GL_INTENSITY:
 346          swizzles[0] = SWIZZLE_X;
 347          swizzles[1] = SWIZZLE_X;
 348          swizzles[2] = SWIZZLE_X;
 349          swizzles[3] = SWIZZLE_X;
 350          break;
 351       case GL_RED:
 352          swizzles[0] = SWIZZLE_X;
 353          swizzles[1] = SWIZZLE_ZERO;
 354          swizzles[2] = SWIZZLE_ZERO;
 355          swizzles[3] = SWIZZLE_ONE;
 356          break;
 357       }
 358    }
 359
 360    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 361
 362    /* If the texture's format is alpha-only, force R, G, and B to
 363     * 0.0. Similarly, if the texture's format has no alpha channel,
 364     * force the alpha value read to 1.0. This allows for the
 365     * implementation to use an RGBA texture for any of these formats
 366     * without leaking any unexpected values.
 367     */
 368    switch (img->_BaseFormat) {
 369    case GL_ALPHA:
 370       swizzles[0] = SWIZZLE_ZERO;
 371       swizzles[1] = SWIZZLE_ZERO;
 372       swizzles[2] = SWIZZLE_ZERO;
 373       break;
 374    case GL_LUMINANCE:
 375       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 376          swizzles[0] = SWIZZLE_X;
 377          swizzles[1] = SWIZZLE_X;
 378          swizzles[2] = SWIZZLE_X;
 379          swizzles[3] = SWIZZLE_ONE;
 380       }
 381       break;
 382    case GL_LUMINANCE_ALPHA:
 383       if (datatype == GL_SIGNED_NORMALIZED) {
 384          swizzles[0] = SWIZZLE_X;
 385          swizzles[1] = SWIZZLE_X;
 386          swizzles[2] = SWIZZLE_X;
 387          swizzles[3] = SWIZZLE_W;
 388       }
 389       break;
 390    case GL_INTENSITY:
 391       if (datatype == GL_SIGNED_NORMALIZED) {
 392          swizzles[0] = SWIZZLE_X;
 393          swizzles[1] = SWIZZLE_X;
 394          swizzles[2] = SWIZZLE_X;
 395          swizzles[3] = SWIZZLE_X;
 396       }
 397       break;
 398    case GL_RED:
 399    case GL_RG:
 400    case GL_RGB:
 401       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 402           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 403           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 404          swizzles[3] = SWIZZLE_ONE;
 405       break;
 406    }
 407
 408    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 409                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 410                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 411                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 412 }
 413
 414 /**
 415  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 416  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 417  *
 418  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 419  *         0          1          2          3             4            5
 420  *         4          5          6          7             0            1
 421  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 422  *
 423  * which is simply adding 4 then modding by 8 (or anding with 7).
 424  *
 425  * We then may need to apply workarounds for textureGather hardware bugs.
 426  */
 427 static unsigned
 428 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 429 {
 430    unsigned scs = (swizzle + 4) & 7;
 431
 432    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 433 }
 434
 435 static bool
 436 brw_aux_surface_disabled(const struct brw_context *brw,
 437                          const struct intel_mipmap_tree *mt)
 438 {
 439    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 440
 441    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 442       const struct intel_renderbuffer *irb =
 443          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 444
 445       if (irb && irb->mt == mt)
 446          return brw->draw_aux_buffer_disabled[i];
 447    }
 448
 449    return false;
 450 }
 451
 452 void
 453 brw_update_texture_surface(struct gl_context *ctx,
 454                            unsigned unit,
 455                            uint32_t *surf_offset,
 456                            bool for_gather,
 457                            uint32_t plane)
 458 {
 459    struct brw_context *brw = brw_context(ctx);
 460    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 461
 462    if (obj->Target == GL_TEXTURE_BUFFER) {
 463       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 464
 465    } else {
 466       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 467       struct intel_mipmap_tree *mt = intel_obj->mt;
 468
 469       if (plane > 0) {
 470          if (mt->plane[plane - 1] == NULL)
 471             return;
 472          mt = mt->plane[plane - 1];
 473       }
 474
 475       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 476       /* If this is a view with restricted NumLayers, then our effective depth
 477        * is not just the miptree depth.
 478        */
 479       unsigned view_num_layers;
 480       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 481          view_num_layers = obj->NumLayers;
 482       } else {
 483          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 484                               mt->surf.logical_level0_px.depth :
 485                               mt->surf.logical_level0_px.array_len;
 486       }
 487
 488       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 489        * texturing functions that return a float, as our code generation always
 490        * selects the .x channel (which would always be 0).
 491        */
 492       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 493       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 494          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 495           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 496       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 497                                 brw_get_texture_swizzle(&brw->ctx, obj));
 498
 499       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 500       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 501                                                     sampler->sRGBDecode);
 502
 503       /* Implement gen6 and gen7 gather work-around */
 504       bool need_green_to_blue = false;
 505       if (for_gather) {
 506          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 507                                format == ISL_FORMAT_R32G32_SINT ||
 508                                format == ISL_FORMAT_R32G32_UINT)) {
 509             format = ISL_FORMAT_R32G32_FLOAT_LD;
 510             need_green_to_blue = brw->is_haswell;
 511          } else if (brw->gen == 6) {
 512             /* Sandybridge's gather4 message is broken for integer formats.
 513              * To work around this, we pretend the surface is UNORM for
 514              * 8 or 16-bit formats, and emit shader instructions to recover
 515              * the real INT/UINT value.  For 32-bit formats, we pretend
 516              * the surface is FLOAT, and simply reinterpret the resulting
 517              * bits.
 518              */
 519             switch (format) {
 520             case ISL_FORMAT_R8_SINT:
 521             case ISL_FORMAT_R8_UINT:
 522                format = ISL_FORMAT_R8_UNORM;
 523                break;
 524
 525             case ISL_FORMAT_R16_SINT:
 526             case ISL_FORMAT_R16_UINT:
 527                format = ISL_FORMAT_R16_UNORM;
 528                break;
 529
 530             case ISL_FORMAT_R32_SINT:
 531             case ISL_FORMAT_R32_UINT:
 532                format = ISL_FORMAT_R32_FLOAT;
 533                break;
 534
 535             default:
 536                break;
 537             }
 538          }
 539       }
 540
 541       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 542          if (brw->gen <= 7) {
 543             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 544             mt = mt->r8stencil_mt;
 545          } else {
 546             mt = mt->stencil_mt;
 547          }
 548          format = ISL_FORMAT_R8_UINT;
 549       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 550          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 551          mt = mt->r8stencil_mt;
 552          format = ISL_FORMAT_R8_UINT;
 553       }
 554
 555       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 556
 557       struct isl_view view = {
 558          .format = format,
 559          .base_level = obj->MinLevel + obj->BaseLevel,
 560          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 561          .base_array_layer = obj->MinLayer,
 562          .array_len = view_num_layers,
 563          .swizzle = {
 564             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 565             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 566             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 567             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 568          },
 569          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 570       };
 571
 572       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 573           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 574          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 575
 576       enum isl_aux_usage aux_usage =
 577          intel_miptree_texture_aux_usage(brw, mt, format);
 578
 579       if (brw_aux_surface_disabled(brw, mt))
 580          aux_usage = ISL_AUX_USAGE_NONE;
 581
 582       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 583                              tex_mocs[brw->gen],
 584                              surf_offset, surf_index,
 585                              0);
 586    }
 587 }
 588
 589 void
 590 brw_emit_buffer_surface_state(struct brw_context *brw,
 591                               uint32_t *out_offset,
 592                               struct brw_bo *bo,
 593                               unsigned buffer_offset,
 594                               unsigned surface_format,
 595                               unsigned buffer_size,
 596                               unsigned pitch,
 597                               unsigned reloc_flags)
 598 {
 599    uint32_t *dw = brw_state_batch(brw,
 600                                   brw->isl_dev.ss.size,
 601                                   brw->isl_dev.ss.align,
 602                                   out_offset);
 603
 604    isl_buffer_fill_state(&brw->isl_dev, dw,
 605                          .address = !bo ? buffer_offset :
 606                                     brw_emit_reloc(&brw->batch,
 607                                                    *out_offset + brw->isl_dev.ss.addr_offset,
 608                                                    bo, buffer_offset,
 609                                                    reloc_flags),
 610                          .size = buffer_size,
 611                          .format = surface_format,
 612                          .stride = pitch,
 613                          .mocs = tex_mocs[brw->gen]);
 614 }
 615
 616 void
 617 brw_update_buffer_texture_surface(struct gl_context *ctx,
 618                                   unsigned unit,
 619                                   uint32_t *surf_offset)
 620 {
 621    struct brw_context *brw = brw_context(ctx);
 622    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 623    struct intel_buffer_object *intel_obj =
 624       intel_buffer_object(tObj->BufferObject);
 625    uint32_t size = tObj->BufferSize;
 626    struct brw_bo *bo = NULL;
 627    mesa_format format = tObj->_BufferObjectFormat;
 628    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 629    int texel_size = _mesa_get_format_bytes(format);
 630
 631    if (intel_obj) {
 632       size = MIN2(size, intel_obj->Base.Size);
 633       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 634                                   false);
 635    }
 636
 637    /* The ARB_texture_buffer_specification says:
 638     *
 639     *    "The number of texels in the buffer texture's texel array is given by
 640     *
 641     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 642     *
 643     *     where <buffer_size> is the size of the buffer object, in basic
 644     *     machine units and <components> and <base_type> are the element count
 645     *     and base data type for elements, as specified in Table X.1.  The
 646     *     number of texels in the texel array is then clamped to the
 647     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 648     *
 649     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 650     * so that when ISL divides by stride to obtain the number of texels, that
 651     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 652     */
 653    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 654
 655    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 656       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 657                     _mesa_get_format_name(format));
 658    }
 659
 660    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 661                                  tObj->BufferOffset,
 662                                  isl_format,
 663                                  size,
 664                                  texel_size,
 665                                  0);
 666 }
 667
 668 /**
 669  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 670  * read from this buffer with Data Port Read instructions/messages.
 671  */
 672 void
 673 brw_create_constant_surface(struct brw_context *brw,
 674                             struct brw_bo *bo,
 675                             uint32_t offset,
 676                             uint32_t size,
 677                             uint32_t *out_offset)
 678 {
 679    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 680                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 681                                  size, 1, 0);
 682 }
 683
 684 /**
 685  * Create the buffer surface. Shader buffer variables will be
 686  * read from / write to this buffer with Data Port Read/Write
 687  * instructions/messages.
 688  */
 689 void
 690 brw_create_buffer_surface(struct brw_context *brw,
 691                           struct brw_bo *bo,
 692                           uint32_t offset,
 693                           uint32_t size,
 694                           uint32_t *out_offset)
 695 {
 696    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 697     * messages. We need these specifically for the fragment shader since they
 698     * include a pixel mask header that we need to ensure correct behavior
 699     * with helper invocations, which cannot write to the buffer.
 700     */
 701    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 702                                  ISL_FORMAT_RAW,
 703                                  size, 1, RELOC_WRITE);
 704 }
 705
 706 /**
 707  * Set up a binding table entry for use by stream output logic (transform
 708  * feedback).
 709  *
 710  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 711  */
 712 void
 713 brw_update_sol_surface(struct brw_context *brw,
 714                        struct gl_buffer_object *buffer_obj,
 715                        uint32_t *out_offset, unsigned num_vector_components,
 716                        unsigned stride_dwords, unsigned offset_dwords)
 717 {
 718    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 719    uint32_t offset_bytes = 4 * offset_dwords;
 720    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 721                                              offset_bytes,
 722                                              buffer_obj->Size - offset_bytes,
 723                                              true);
 724    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 725    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 726    size_t size_dwords = buffer_obj->Size / 4;
 727    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 728
 729    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 730     * too big to map using a single binding table entry?
 731     */
 732    assert((size_dwords - offset_dwords) / stride_dwords
 733           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 734
 735    if (size_dwords > offset_dwords + num_vector_components) {
 736       /* There is room for at least 1 transform feedback output in the buffer.
 737        * Compute the number of additional transform feedback outputs the
 738        * buffer has room for.
 739        */
 740       buffer_size_minus_1 =
 741          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 742    } else {
 743       /* There isn't even room for a single transform feedback output in the
 744        * buffer.  We can't configure the binding table entry to prevent output
 745        * entirely; we'll have to rely on the geometry shader to detect
 746        * overflow.  But to minimize the damage in case of a bug, set up the
 747        * binding table entry to just allow a single output.
 748        */
 749       buffer_size_minus_1 = 0;
 750    }
 751    width = buffer_size_minus_1 & 0x7f;
 752    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 753    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 754
 755    switch (num_vector_components) {
 756    case 1:
 757       surface_format = ISL_FORMAT_R32_FLOAT;
 758       break;
 759    case 2:
 760       surface_format = ISL_FORMAT_R32G32_FLOAT;
 761       break;
 762    case 3:
 763       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 764       break;
 765    case 4:
 766       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 767       break;
 768    default:
 769       unreachable("Invalid vector size for transform feedback output");
 770    }
 771
 772    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 773       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 774       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 775       BRW_SURFACE_RC_READ_WRITE;
 776    surf[1] = brw_emit_reloc(&brw->batch,
 777                             *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
 778    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 779               height << BRW_SURFACE_HEIGHT_SHIFT);
 780    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 781               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 782    surf[4] = 0;
 783    surf[5] = 0;
 784 }
 785
 786 /* Creates a new WM constant buffer reflecting the current fragment program's
 787  * constants, if needed by the fragment program.
 788  *
 789  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 790  * state atom.
 791  */
 792 static void
 793 brw_upload_wm_pull_constants(struct brw_context *brw)
 794 {
 795    struct brw_stage_state *stage_state = &brw->wm.base;
 796    /* BRW_NEW_FRAGMENT_PROGRAM */
 797    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 798    /* BRW_NEW_FS_PROG_DATA */
 799    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 800
 801    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 802    /* _NEW_PROGRAM_CONSTANTS */
 803    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 804                              stage_state, prog_data);
 805 }
 806
 807 const struct brw_tracked_state brw_wm_pull_constants = {
 808    .dirty = {
 809       .mesa = _NEW_PROGRAM_CONSTANTS,
 810       .brw = BRW_NEW_BATCH |
 811              BRW_NEW_FRAGMENT_PROGRAM |
 812              BRW_NEW_FS_PROG_DATA,
 813    },
 814    .emit = brw_upload_wm_pull_constants,
 815 };
 816
 817 /**
 818  * Creates a null renderbuffer surface.
 819  *
 820  * This is used when the shader doesn't write to any color output.  An FB
 821  * write to target 0 will still be emitted, because that's how the thread is
 822  * terminated (and computed depth is returned), so we need to have the
 823  * hardware discard the target 0 color output..
 824  */
 825 static void
 826 emit_null_surface_state(struct brw_context *brw,
 827                         const struct gl_framebuffer *fb,
 828                         uint32_t *out_offset)
 829 {
 830    uint32_t *surf = brw_state_batch(brw,
 831                                     brw->isl_dev.ss.size,
 832                                     brw->isl_dev.ss.align,
 833                                     out_offset);
 834
 835    /* Use the fb dimensions or 1x1x1 */
 836    const unsigned width   = fb ? _mesa_geometric_width(fb)   : 1;
 837    const unsigned height  = fb ? _mesa_geometric_height(fb)  : 1;
 838    const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
 839
 840    if (brw->gen != 6 || samples <= 1) {
 841       isl_null_fill_state(&brw->isl_dev, surf,
 842                           isl_extent3d(width, height, 1));
 843       return;
 844    }
 845
 846    /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
 847     * So work around this problem by rendering into dummy color buffer.
 848     *
 849     * To decrease the amount of memory needed by the workaround buffer, we
 850     * set its pitch to 128 bytes (the width of a Y tile).  This means that
 851     * the amount of memory needed for the workaround buffer is
 852     * (width_in_tiles + height_in_tiles - 1) tiles.
 853     *
 854     * Note that since the workaround buffer will be interpreted by the
 855     * hardware as an interleaved multisampled buffer, we need to compute
 856     * width_in_tiles and height_in_tiles by dividing the width and height
 857     * by 16 rather than the normal Y-tile size of 32.
 858     */
 859    unsigned width_in_tiles = ALIGN(width, 16) / 16;
 860    unsigned height_in_tiles = ALIGN(height, 16) / 16;
 861    unsigned pitch_minus_1 = 127;
 862    unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 863    brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 864                       size_needed);
 865
 866    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 867               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 868    surf[1] = brw_emit_reloc(&brw->batch, *out_offset + 4,
 869                             brw->wm.multisampled_null_render_target_bo,
 870                             0, RELOC_WRITE);
 871
 872    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 873               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 874
 875    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 876     * Notes):
 877     *
 878     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 879     */
 880    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 881               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 882    surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
 883    surf[5] = 0;
 884 }
 885
 886 /**
 887  * Sets up a surface state structure to point at the given region.
 888  * While it is only used for the front/back buffer currently, it should be
 889  * usable for further buffers when doing ARB_draw_buffer support.
 890  */
 891 static uint32_t
 892 gen4_update_renderbuffer_surface(struct brw_context *brw,
 893                                  struct gl_renderbuffer *rb,
 894                                  unsigned unit,
 895                                  uint32_t surf_index)
 896 {
 897    struct gl_context *ctx = &brw->ctx;
 898    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 899    struct intel_mipmap_tree *mt = irb->mt;
 900    uint32_t *surf;
 901    uint32_t tile_x, tile_y;
 902    enum isl_format format;
 903    uint32_t offset;
 904    /* _NEW_BUFFERS */
 905    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 906    /* BRW_NEW_FS_PROG_DATA */
 907
 908    if (rb->TexImage && !brw->has_surface_tile_offset) {
 909       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 910
 911       if (tile_x != 0 || tile_y != 0) {
 912          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 913           * destination in a miptree unless you actually setup your renderbuffer
 914           * as a miptree and used the fragile lod/array_index/etc. controls to
 915           * select the image.  So, instead, we just make a new single-level
 916           * miptree and render into that.
 917           */
 918          intel_renderbuffer_move_to_temp(brw, irb, false);
 919          assert(irb->align_wa_mt);
 920          mt = irb->align_wa_mt;
 921       }
 922    }
 923
 924    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
 925
 926    format = brw->mesa_to_isl_render_format[rb_format];
 927    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 928       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 929                     __func__, _mesa_get_format_name(rb_format));
 930    }
 931
 932    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 933               format << BRW_SURFACE_FORMAT_SHIFT);
 934
 935    /* reloc */
 936    assert(mt->offset % mt->cpp == 0);
 937    surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
 938                             mt->offset +
 939                             intel_renderbuffer_get_tile_offsets(irb,
 940                                                                 &tile_x,
 941                                                                 &tile_y),
 942                             RELOC_WRITE);
 943
 944    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 945               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 946
 947    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
 948               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 949
 950    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
 951
 952    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 953    /* Note that the low bits of these fields are missing, so
 954     * there's the possibility of getting in trouble.
 955     */
 956    assert(tile_x % 4 == 0);
 957    assert(tile_y % 2 == 0);
 958    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 959               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 960               (mt->surf.image_alignment_el.height == 4 ?
 961                   BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 962
 963    if (brw->gen < 6) {
 964       /* _NEW_COLOR */
 965       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
 966           (ctx->Color.BlendEnabled & (1 << unit)))
 967          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 968
 969       if (!ctx->Color.ColorMask[unit][0])
 970          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 971       if (!ctx->Color.ColorMask[unit][1])
 972          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 973       if (!ctx->Color.ColorMask[unit][2])
 974          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 975
 976       /* As mentioned above, disable writes to the alpha component when the
 977        * renderbuffer is XRGB.
 978        */
 979       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 980           !ctx->Color.ColorMask[unit][3]) {
 981          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 982       }
 983    }
 984
 985    return offset;
 986 }
 987
 988 static void
 989 update_renderbuffer_surfaces(struct brw_context *brw)
 990 {
 991    const struct gl_context *ctx = &brw->ctx;
 992
 993    /* _NEW_BUFFERS | _NEW_COLOR */
 994    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 995
 996    /* Render targets always start at binding table index 0. */
 997    const unsigned rt_start = 0;
 998
 999    uint32_t *surf_offsets = brw->wm.base.surf_offset;
1000
1001    /* Update surfaces for drawing buffers */
1002    if (fb->_NumColorDrawBuffers >= 1) {
1003       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1004          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1005
1006          if (intel_renderbuffer(rb)) {
1007             surf_offsets[rt_start + i] = brw->gen >= 6 ?
1008                gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
1009                gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
1010          } else {
1011             emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
1012          }
1013       }
1014    } else {
1015       emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
1016    }
1017
1018    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1019 }
1020
1021 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1022    .dirty = {
1023       .mesa = _NEW_BUFFERS |
1024               _NEW_COLOR,
1025       .brw = BRW_NEW_BATCH,
1026    },
1027    .emit = update_renderbuffer_surfaces,
1028 };
1029
1030 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1031    .dirty = {
1032       .mesa = _NEW_BUFFERS,
1033       .brw = BRW_NEW_BATCH |
1034              BRW_NEW_FAST_CLEAR_COLOR,
1035    },
1036    .emit = update_renderbuffer_surfaces,
1037 };
1038
1039 static void
1040 update_renderbuffer_read_surfaces(struct brw_context *brw)
1041 {
1042    const struct gl_context *ctx = &brw->ctx;
1043
1044    /* BRW_NEW_FS_PROG_DATA */
1045    const struct brw_wm_prog_data *wm_prog_data =
1046       brw_wm_prog_data(brw->wm.base.prog_data);
1047
1048    if (wm_prog_data->has_render_target_reads &&
1049        !ctx->Extensions.MESA_shader_framebuffer_fetch) {
1050       /* _NEW_BUFFERS */
1051       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1052
1053       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1054          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1055          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1056          const unsigned surf_index =
1057             wm_prog_data->binding_table.render_target_read_start + i;
1058          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1059
1060          if (irb) {
1061             const enum isl_format format = brw->mesa_to_isl_render_format[
1062                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1063             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1064                                                 format));
1065
1066             /* Override the target of the texture if the render buffer is a
1067              * single slice of a 3D texture (since the minimum array element
1068              * field of the surface state structure is ignored by the sampler
1069              * unit for 3D textures on some hardware), or if the render buffer
1070              * is a 1D array (since shaders always provide the array index
1071              * coordinate at the Z component to avoid state-dependent
1072              * recompiles when changing the texture target of the
1073              * framebuffer).
1074              */
1075             const GLenum target =
1076                (irb->mt->target == GL_TEXTURE_3D &&
1077                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1078                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1079                irb->mt->target;
1080
1081             const struct isl_view view = {
1082                .format = format,
1083                .base_level = irb->mt_level - irb->mt->first_level,
1084                .levels = 1,
1085                .base_array_layer = irb->mt_layer,
1086                .array_len = irb->layer_count,
1087                .swizzle = ISL_SWIZZLE_IDENTITY,
1088                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1089             };
1090
1091             enum isl_aux_usage aux_usage =
1092                intel_miptree_texture_aux_usage(brw, irb->mt, format);
1093             if (brw->draw_aux_buffer_disabled[i])
1094                aux_usage = ISL_AUX_USAGE_NONE;
1095
1096             brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1097                                    tex_mocs[brw->gen],
1098                                    surf_offset, surf_index,
1099                                    0);
1100
1101          } else {
1102             emit_null_surface_state(brw, fb, surf_offset);
1103          }
1104       }
1105
1106       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1107    }
1108 }
1109
1110 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1111    .dirty = {
1112       .mesa = _NEW_BUFFERS,
1113       .brw = BRW_NEW_BATCH |
1114              BRW_NEW_FAST_CLEAR_COLOR |
1115              BRW_NEW_FS_PROG_DATA,
1116    },
1117    .emit = update_renderbuffer_read_surfaces,
1118 };
1119
1120 static void
1121 update_stage_texture_surfaces(struct brw_context *brw,
1122                               const struct gl_program *prog,
1123                               struct brw_stage_state *stage_state,
1124                               bool for_gather, uint32_t plane)
1125 {
1126    if (!prog)
1127       return;
1128
1129    struct gl_context *ctx = &brw->ctx;
1130
1131    uint32_t *surf_offset = stage_state->surf_offset;
1132
1133    /* BRW_NEW_*_PROG_DATA */
1134    if (for_gather)
1135       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1136    else
1137       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1138
1139    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1140    for (unsigned s = 0; s < num_samplers; s++) {
1141       surf_offset[s] = 0;
1142
1143       if (prog->SamplersUsed & (1 << s)) {
1144          const unsigned unit = prog->SamplerUnits[s];
1145
1146          /* _NEW_TEXTURE */
1147          if (ctx->Texture.Unit[unit]._Current) {
1148             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1149          }
1150       }
1151    }
1152 }
1153
1154
1155 /**
1156  * Construct SURFACE_STATE objects for enabled textures.
1157  */
1158 static void
1159 brw_update_texture_surfaces(struct brw_context *brw)
1160 {
1161    /* BRW_NEW_VERTEX_PROGRAM */
1162    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1163
1164    /* BRW_NEW_TESS_PROGRAMS */
1165    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1166    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1167
1168    /* BRW_NEW_GEOMETRY_PROGRAM */
1169    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1170
1171    /* BRW_NEW_FRAGMENT_PROGRAM */
1172    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1173
1174    /* _NEW_TEXTURE */
1175    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1176    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1177    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1178    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1179    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1180
1181    /* emit alternate set of surface state for gather. this
1182     * allows the surface format to be overriden for only the
1183     * gather4 messages. */
1184    if (brw->gen < 8) {
1185       if (vs && vs->nir->info.uses_texture_gather)
1186          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1187       if (tcs && tcs->nir->info.uses_texture_gather)
1188          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1189       if (tes && tes->nir->info.uses_texture_gather)
1190          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1191       if (gs && gs->nir->info.uses_texture_gather)
1192          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1193       if (fs && fs->nir->info.uses_texture_gather)
1194          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1195    }
1196
1197    if (fs) {
1198       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1199       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1200    }
1201
1202    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1203 }
1204
1205 const struct brw_tracked_state brw_texture_surfaces = {
1206    .dirty = {
1207       .mesa = _NEW_TEXTURE,
1208       .brw = BRW_NEW_BATCH |
1209              BRW_NEW_FAST_CLEAR_COLOR |
1210              BRW_NEW_FRAGMENT_PROGRAM |
1211              BRW_NEW_FS_PROG_DATA |
1212              BRW_NEW_GEOMETRY_PROGRAM |
1213              BRW_NEW_GS_PROG_DATA |
1214              BRW_NEW_TESS_PROGRAMS |
1215              BRW_NEW_TCS_PROG_DATA |
1216              BRW_NEW_TES_PROG_DATA |
1217              BRW_NEW_TEXTURE_BUFFER |
1218              BRW_NEW_VERTEX_PROGRAM |
1219              BRW_NEW_VS_PROG_DATA,
1220    },
1221    .emit = brw_update_texture_surfaces,
1222 };
1223
1224 static void
1225 brw_update_cs_texture_surfaces(struct brw_context *brw)
1226 {
1227    /* BRW_NEW_COMPUTE_PROGRAM */
1228    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1229
1230    /* _NEW_TEXTURE */
1231    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1232
1233    /* emit alternate set of surface state for gather. this
1234     * allows the surface format to be overriden for only the
1235     * gather4 messages.
1236     */
1237    if (brw->gen < 8) {
1238       if (cs && cs->nir->info.uses_texture_gather)
1239          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1240    }
1241
1242    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1243 }
1244
1245 const struct brw_tracked_state brw_cs_texture_surfaces = {
1246    .dirty = {
1247       .mesa = _NEW_TEXTURE,
1248       .brw = BRW_NEW_BATCH |
1249              BRW_NEW_COMPUTE_PROGRAM |
1250              BRW_NEW_FAST_CLEAR_COLOR,
1251    },
1252    .emit = brw_update_cs_texture_surfaces,
1253 };
1254
1255
1256 void
1257 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1258                         struct brw_stage_state *stage_state,
1259                         struct brw_stage_prog_data *prog_data)
1260 {
1261    struct gl_context *ctx = &brw->ctx;
1262
1263    if (!prog)
1264       return;
1265
1266    uint32_t *ubo_surf_offsets =
1267       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1268
1269    for (int i = 0; i < prog->info.num_ubos; i++) {
1270       struct gl_uniform_buffer_binding *binding =
1271          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1272
1273       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1274          emit_null_surface_state(brw, NULL, &ubo_surf_offsets[i]);
1275       } else {
1276          struct intel_buffer_object *intel_bo =
1277             intel_buffer_object(binding->BufferObject);
1278          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1279          if (!binding->AutomaticSize)
1280             size = MIN2(size, binding->Size);
1281          struct brw_bo *bo =
1282             intel_bufferobj_buffer(brw, intel_bo,
1283                                    binding->Offset,
1284                                    size, false);
1285          brw_create_constant_surface(brw, bo, binding->Offset,
1286                                      size,
1287                                      &ubo_surf_offsets[i]);
1288       }
1289    }
1290
1291    uint32_t *ssbo_surf_offsets =
1292       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1293
1294    for (int i = 0; i < prog->info.num_ssbos; i++) {
1295       struct gl_shader_storage_buffer_binding *binding =
1296          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1297
1298       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1299          emit_null_surface_state(brw, NULL, &ssbo_surf_offsets[i]);
1300       } else {
1301          struct intel_buffer_object *intel_bo =
1302             intel_buffer_object(binding->BufferObject);
1303          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1304          if (!binding->AutomaticSize)
1305             size = MIN2(size, binding->Size);
1306          struct brw_bo *bo =
1307             intel_bufferobj_buffer(brw, intel_bo,
1308                                    binding->Offset,
1309                                    size, true);
1310          brw_create_buffer_surface(brw, bo, binding->Offset,
1311                                    size,
1312                                    &ssbo_surf_offsets[i]);
1313       }
1314    }
1315
1316    stage_state->push_constants_dirty = true;
1317
1318    if (prog->info.num_ubos || prog->info.num_ssbos)
1319       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1320 }
1321
1322 static void
1323 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1324 {
1325    struct gl_context *ctx = &brw->ctx;
1326    /* _NEW_PROGRAM */
1327    struct gl_program *prog = ctx->FragmentProgram._Current;
1328
1329    /* BRW_NEW_FS_PROG_DATA */
1330    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1331 }
1332
1333 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1334    .dirty = {
1335       .mesa = _NEW_PROGRAM,
1336       .brw = BRW_NEW_BATCH |
1337              BRW_NEW_FS_PROG_DATA |
1338              BRW_NEW_UNIFORM_BUFFER,
1339    },
1340    .emit = brw_upload_wm_ubo_surfaces,
1341 };
1342
1343 static void
1344 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1345 {
1346    struct gl_context *ctx = &brw->ctx;
1347    /* _NEW_PROGRAM */
1348    struct gl_program *prog =
1349       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1350
1351    /* BRW_NEW_CS_PROG_DATA */
1352    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1353 }
1354
1355 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1356    .dirty = {
1357       .mesa = _NEW_PROGRAM,
1358       .brw = BRW_NEW_BATCH |
1359              BRW_NEW_CS_PROG_DATA |
1360              BRW_NEW_UNIFORM_BUFFER,
1361    },
1362    .emit = brw_upload_cs_ubo_surfaces,
1363 };
1364
1365 void
1366 brw_upload_abo_surfaces(struct brw_context *brw,
1367                         const struct gl_program *prog,
1368                         struct brw_stage_state *stage_state,
1369                         struct brw_stage_prog_data *prog_data)
1370 {
1371    struct gl_context *ctx = &brw->ctx;
1372    uint32_t *surf_offsets =
1373       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1374
1375    if (prog->info.num_abos) {
1376       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1377          struct gl_atomic_buffer_binding *binding =
1378             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1379          struct intel_buffer_object *intel_bo =
1380             intel_buffer_object(binding->BufferObject);
1381          struct brw_bo *bo =
1382             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1383                                    intel_bo->Base.Size - binding->Offset,
1384                                    true);
1385
1386          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1387                                        binding->Offset, ISL_FORMAT_RAW,
1388                                        bo->size - binding->Offset, 1,
1389                                        RELOC_WRITE);
1390       }
1391
1392       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1393    }
1394 }
1395
1396 static void
1397 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1398 {
1399    /* _NEW_PROGRAM */
1400    const struct gl_program *wm = brw->fragment_program;
1401
1402    if (wm) {
1403       /* BRW_NEW_FS_PROG_DATA */
1404       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1405    }
1406 }
1407
1408 const struct brw_tracked_state brw_wm_abo_surfaces = {
1409    .dirty = {
1410       .mesa = _NEW_PROGRAM,
1411       .brw = BRW_NEW_ATOMIC_BUFFER |
1412              BRW_NEW_BATCH |
1413              BRW_NEW_FS_PROG_DATA,
1414    },
1415    .emit = brw_upload_wm_abo_surfaces,
1416 };
1417
1418 static void
1419 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1420 {
1421    /* _NEW_PROGRAM */
1422    const struct gl_program *cp = brw->compute_program;
1423
1424    if (cp) {
1425       /* BRW_NEW_CS_PROG_DATA */
1426       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1427    }
1428 }
1429
1430 const struct brw_tracked_state brw_cs_abo_surfaces = {
1431    .dirty = {
1432       .mesa = _NEW_PROGRAM,
1433       .brw = BRW_NEW_ATOMIC_BUFFER |
1434              BRW_NEW_BATCH |
1435              BRW_NEW_CS_PROG_DATA,
1436    },
1437    .emit = brw_upload_cs_abo_surfaces,
1438 };
1439
1440 static void
1441 brw_upload_cs_image_surfaces(struct brw_context *brw)
1442 {
1443    /* _NEW_PROGRAM */
1444    const struct gl_program *cp = brw->compute_program;
1445
1446    if (cp) {
1447       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1448       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1449                                 brw->cs.base.prog_data);
1450    }
1451 }
1452
1453 const struct brw_tracked_state brw_cs_image_surfaces = {
1454    .dirty = {
1455       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1456       .brw = BRW_NEW_BATCH |
1457              BRW_NEW_CS_PROG_DATA |
1458              BRW_NEW_FAST_CLEAR_COLOR |
1459              BRW_NEW_IMAGE_UNITS
1460    },
1461    .emit = brw_upload_cs_image_surfaces,
1462 };
1463
1464 static uint32_t
1465 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1466 {
1467    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1468    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1469    if (access == GL_WRITE_ONLY) {
1470       return hw_format;
1471    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1472       /* Typed surface reads support a very limited subset of the shader
1473        * image formats.  Translate it into the closest format the
1474        * hardware supports.
1475        */
1476       return isl_lower_storage_image_format(devinfo, hw_format);
1477    } else {
1478       /* The hardware doesn't actually support a typed format that we can use
1479        * so we have to fall back to untyped read/write messages.
1480        */
1481       return ISL_FORMAT_RAW;
1482    }
1483 }
1484
1485 static void
1486 update_default_image_param(struct brw_context *brw,
1487                            struct gl_image_unit *u,
1488                            unsigned surface_idx,
1489                            struct brw_image_param *param)
1490 {
1491    memset(param, 0, sizeof(*param));
1492    param->surface_idx = surface_idx;
1493    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1494     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1495     * detailed explanation of these parameters.
1496     */
1497    param->swizzling[0] = 0xff;
1498    param->swizzling[1] = 0xff;
1499 }
1500
1501 static void
1502 update_buffer_image_param(struct brw_context *brw,
1503                           struct gl_image_unit *u,
1504                           unsigned surface_idx,
1505                           struct brw_image_param *param)
1506 {
1507    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1508    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1509    update_default_image_param(brw, u, surface_idx, param);
1510
1511    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1512    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1513 }
1514
1515 static unsigned
1516 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1517                      unsigned level)
1518 {
1519    if (target == GL_TEXTURE_CUBE_MAP)
1520       return 6;
1521
1522    return target == GL_TEXTURE_3D ?
1523       minify(mt->surf.logical_level0_px.depth, level) :
1524       mt->surf.logical_level0_px.array_len;
1525 }
1526
1527 static void
1528 update_image_surface(struct brw_context *brw,
1529                      struct gl_image_unit *u,
1530                      GLenum access,
1531                      unsigned surface_idx,
1532                      uint32_t *surf_offset,
1533                      struct brw_image_param *param)
1534 {
1535    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1536       struct gl_texture_object *obj = u->TexObj;
1537       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1538
1539       if (obj->Target == GL_TEXTURE_BUFFER) {
1540          struct intel_buffer_object *intel_obj =
1541             intel_buffer_object(obj->BufferObject);
1542          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1543                                       _mesa_get_format_bytes(u->_ActualFormat));
1544
1545          brw_emit_buffer_surface_state(
1546             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1547             format, intel_obj->Base.Size, texel_size,
1548             access != GL_READ_ONLY ? RELOC_WRITE : 0);
1549
1550          update_buffer_image_param(brw, u, surface_idx, param);
1551
1552       } else {
1553          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1554          struct intel_mipmap_tree *mt = intel_obj->mt;
1555          const unsigned num_layers = u->Layered ?
1556             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1557
1558          struct isl_view view = {
1559             .format = format,
1560             .base_level = obj->MinLevel + u->Level,
1561             .levels = 1,
1562             .base_array_layer = obj->MinLayer + u->_Layer,
1563             .array_len = num_layers,
1564             .swizzle = ISL_SWIZZLE_IDENTITY,
1565             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1566          };
1567
1568          if (format == ISL_FORMAT_RAW) {
1569             brw_emit_buffer_surface_state(
1570                brw, surf_offset, mt->bo, mt->offset,
1571                format, mt->bo->size - mt->offset, 1 /* pitch */,
1572                access != GL_READ_ONLY ? RELOC_WRITE : 0);
1573
1574          } else {
1575             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1576             assert(!intel_miptree_has_color_unresolved(mt,
1577                                                        view.base_level, 1,
1578                                                        view.base_array_layer,
1579                                                        view.array_len));
1580             brw_emit_surface_state(brw, mt, mt->target, view,
1581                                    ISL_AUX_USAGE_NONE, tex_mocs[brw->gen],
1582                                    surf_offset, surf_index,
1583                                    access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1584          }
1585
1586          isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1587          param->surface_idx = surface_idx;
1588       }
1589
1590    } else {
1591       emit_null_surface_state(brw, NULL, surf_offset);
1592       update_default_image_param(brw, u, surface_idx, param);
1593    }
1594 }
1595
1596 void
1597 brw_upload_image_surfaces(struct brw_context *brw,
1598                           const struct gl_program *prog,
1599                           struct brw_stage_state *stage_state,
1600                           struct brw_stage_prog_data *prog_data)
1601 {
1602    assert(prog);
1603    struct gl_context *ctx = &brw->ctx;
1604
1605    if (prog->info.num_images) {
1606       for (unsigned i = 0; i < prog->info.num_images; i++) {
1607          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1608          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1609
1610          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1611                               surf_idx,
1612                               &stage_state->surf_offset[surf_idx],
1613                               &prog_data->image_param[i]);
1614       }
1615
1616       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1617       /* This may have changed the image metadata dependent on the context
1618        * image unit state and passed to the program as uniforms, make sure
1619        * that push and pull constants are reuploaded.
1620        */
1621       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1622    }
1623 }
1624
1625 static void
1626 brw_upload_wm_image_surfaces(struct brw_context *brw)
1627 {
1628    /* BRW_NEW_FRAGMENT_PROGRAM */
1629    const struct gl_program *wm = brw->fragment_program;
1630
1631    if (wm) {
1632       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1633       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1634                                 brw->wm.base.prog_data);
1635    }
1636 }
1637
1638 const struct brw_tracked_state brw_wm_image_surfaces = {
1639    .dirty = {
1640       .mesa = _NEW_TEXTURE,
1641       .brw = BRW_NEW_BATCH |
1642              BRW_NEW_FAST_CLEAR_COLOR |
1643              BRW_NEW_FRAGMENT_PROGRAM |
1644              BRW_NEW_FS_PROG_DATA |
1645              BRW_NEW_IMAGE_UNITS
1646    },
1647    .emit = brw_upload_wm_image_surfaces,
1648 };
1649
1650 static void
1651 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1652 {
1653    struct gl_context *ctx = &brw->ctx;
1654    /* _NEW_PROGRAM */
1655    struct gl_program *prog =
1656       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1657    /* BRW_NEW_CS_PROG_DATA */
1658    const struct brw_cs_prog_data *cs_prog_data =
1659       brw_cs_prog_data(brw->cs.base.prog_data);
1660
1661    if (prog && cs_prog_data->uses_num_work_groups) {
1662       const unsigned surf_idx =
1663          cs_prog_data->binding_table.work_groups_start;
1664       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1665       struct brw_bo *bo;
1666       uint32_t bo_offset;
1667
1668       if (brw->compute.num_work_groups_bo == NULL) {
1669          bo = NULL;
1670          intel_upload_data(brw,
1671                            (void *)brw->compute.num_work_groups,
1672                            3 * sizeof(GLuint),
1673                            sizeof(GLuint),
1674                            &bo,
1675                            &bo_offset);
1676       } else {
1677          bo = brw->compute.num_work_groups_bo;
1678          bo_offset = brw->compute.num_work_groups_offset;
1679       }
1680
1681       brw_emit_buffer_surface_state(brw, surf_offset,
1682                                     bo, bo_offset,
1683                                     ISL_FORMAT_RAW,
1684                                     3 * sizeof(GLuint), 1,
1685                                     RELOC_WRITE);
1686       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1687    }
1688 }
1689
1690 const struct brw_tracked_state brw_cs_work_groups_surface = {
1691    .dirty = {
1692       .brw = BRW_NEW_CS_PROG_DATA |
1693              BRW_NEW_CS_WORK_GROUPS
1694    },
1695    .emit = brw_upload_cs_work_groups_surface,
1696 };