src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 uint32_t tex_mocs[] = {
  59    [7] = GEN7_MOCS_L3,
  60    [8] = BDW_MOCS_WB,
  61    [9] = SKL_MOCS_WB,
  62    [10] = CNL_MOCS_WB,
  63 };
  64
  65 uint32_t rb_mocs[] = {
  66    [7] = GEN7_MOCS_L3,
  67    [8] = BDW_MOCS_PTE,
  68    [9] = SKL_MOCS_PTE,
  69    [10] = CNL_MOCS_PTE,
  70 };
  71
  72 static void
  73 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  74              GLenum target, struct isl_view *view,
  75              uint32_t *tile_x, uint32_t *tile_y,
  76              uint32_t *offset, struct isl_surf *surf)
  77 {
  78    *surf = mt->surf;
  79
  80    const enum isl_dim_layout dim_layout =
  81       get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target);
  82
  83    if (surf->dim_layout == dim_layout)
  84       return;
  85
  86    /* The layout of the specified texture target is not compatible with the
  87     * actual layout of the miptree structure in memory -- You're entering
  88     * dangerous territory, this can only possibly work if you only intended
  89     * to access a single level and slice of the texture, and the hardware
  90     * supports the tile offset feature in order to allow non-tile-aligned
  91     * base offsets, since we'll have to point the hardware to the first
  92     * texel of the level instead of relying on the usual base level/layer
  93     * controls.
  94     */
  95    assert(brw->has_surface_tile_offset);
  96    assert(view->levels == 1 && view->array_len == 1);
  97    assert(*tile_x == 0 && *tile_y == 0);
  98
  99    *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 100                                              view->base_array_layer,
 101                                              tile_x, tile_y);
 102
 103    /* Minify the logical dimensions of the texture. */
 104    const unsigned l = view->base_level - mt->first_level;
 105    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 106    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 107       minify(surf->logical_level0_px.height, l);
 108    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 109       minify(surf->logical_level0_px.depth, l);
 110
 111    /* Only the base level and layer can be addressed with the overridden
 112     * layout.
 113     */
 114    surf->logical_level0_px.array_len = 1;
 115    surf->levels = 1;
 116    surf->dim_layout = dim_layout;
 117
 118    /* The requested slice of the texture is now at the base level and
 119     * layer.
 120     */
 121    view->base_level = 0;
 122    view->base_array_layer = 0;
 123 }
 124
 125 static void
 126 brw_emit_surface_state(struct brw_context *brw,
 127                        struct intel_mipmap_tree *mt,
 128                        GLenum target, struct isl_view view,
 129                        enum isl_aux_usage aux_usage,
 130                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
 131                        unsigned reloc_flags)
 132 {
 133    uint32_t tile_x = mt->level[0].level_x;
 134    uint32_t tile_y = mt->level[0].level_y;
 135    uint32_t offset = mt->offset;
 136
 137    struct isl_surf surf;
 138
 139    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 140
 141    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 142
 143    struct brw_bo *aux_bo;
 144    struct isl_surf *aux_surf = NULL;
 145    uint64_t aux_offset = 0;
 146    switch (aux_usage) {
 147    case ISL_AUX_USAGE_MCS:
 148    case ISL_AUX_USAGE_CCS_D:
 149    case ISL_AUX_USAGE_CCS_E:
 150       aux_surf = &mt->mcs_buf->surf;
 151       aux_bo = mt->mcs_buf->bo;
 152       aux_offset = mt->mcs_buf->offset;
 153       break;
 154
 155    case ISL_AUX_USAGE_HIZ:
 156       aux_surf = &mt->hiz_buf->surf;
 157       aux_bo = mt->hiz_buf->bo;
 158       aux_offset = 0;
 159       break;
 160
 161    case ISL_AUX_USAGE_NONE:
 162       break;
 163    }
 164
 165    if (aux_usage != ISL_AUX_USAGE_NONE) {
 166       /* We only really need a clear color if we also have an auxiliary
 167        * surface.  Without one, it does nothing.
 168        */
 169       clear_color = mt->fast_clear_color;
 170    }
 171
 172    void *state = brw_state_batch(brw,
 173                                  brw->isl_dev.ss.size,
 174                                  brw->isl_dev.ss.align,
 175                                  surf_offset);
 176
 177    isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
 178                        .address = brw_emit_reloc(&brw->batch,
 179                                                  *surf_offset + brw->isl_dev.ss.addr_offset,
 180                                                  mt->bo, offset, reloc_flags),
 181                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 182                        .aux_address = aux_offset,
 183                        .mocs = mocs, .clear_color = clear_color,
 184                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 185    if (aux_surf) {
 186       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 187        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 188        * contain other control information.  Since buffer addresses are always
 189        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 190        * an ordinary reloc to do the necessary address translation.
 191        *
 192        * FIXME: move to the point of assignment.
 193        */
 194       assert((aux_offset & 0xfff) == 0);
 195       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 196       *aux_addr = brw_emit_reloc(&brw->batch,
 197                                  *surf_offset +
 198                                  brw->isl_dev.ss.aux_addr_offset,
 199                                  aux_bo, *aux_addr,
 200                                  reloc_flags);
 201    }
 202 }
 203
 204 static uint32_t
 205 gen6_update_renderbuffer_surface(struct brw_context *brw,
 206                                  struct gl_renderbuffer *rb,
 207                                  unsigned unit,
 208                                  uint32_t surf_index)
 209 {
 210    struct gl_context *ctx = &brw->ctx;
 211    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 212    struct intel_mipmap_tree *mt = irb->mt;
 213
 214    enum isl_aux_usage aux_usage =
 215       brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
 216       intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
 217                                      ctx->Color.BlendEnabled & (1 << unit));
 218
 219    assert(brw_render_target_supported(brw, rb));
 220
 221    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 222    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 223       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 224                     __func__, _mesa_get_format_name(rb_format));
 225    }
 226
 227    struct isl_view view = {
 228       .format = brw->mesa_to_isl_render_format[rb_format],
 229       .base_level = irb->mt_level - irb->mt->first_level,
 230       .levels = 1,
 231       .base_array_layer = irb->mt_layer,
 232       .array_len = MAX2(irb->layer_count, 1),
 233       .swizzle = ISL_SWIZZLE_IDENTITY,
 234       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 235    };
 236
 237    uint32_t offset;
 238    brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 239                           rb_mocs[brw->gen],
 240                           &offset, surf_index,
 241                           RELOC_WRITE);
 242    return offset;
 243 }
 244
 245 GLuint
 246 translate_tex_target(GLenum target)
 247 {
 248    switch (target) {
 249    case GL_TEXTURE_1D:
 250    case GL_TEXTURE_1D_ARRAY_EXT:
 251       return BRW_SURFACE_1D;
 252
 253    case GL_TEXTURE_RECTANGLE_NV:
 254       return BRW_SURFACE_2D;
 255
 256    case GL_TEXTURE_2D:
 257    case GL_TEXTURE_2D_ARRAY_EXT:
 258    case GL_TEXTURE_EXTERNAL_OES:
 259    case GL_TEXTURE_2D_MULTISAMPLE:
 260    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 261       return BRW_SURFACE_2D;
 262
 263    case GL_TEXTURE_3D:
 264       return BRW_SURFACE_3D;
 265
 266    case GL_TEXTURE_CUBE_MAP:
 267    case GL_TEXTURE_CUBE_MAP_ARRAY:
 268       return BRW_SURFACE_CUBE;
 269
 270    default:
 271       unreachable("not reached");
 272    }
 273 }
 274
 275 uint32_t
 276 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 277 {
 278    switch (tiling) {
 279    case ISL_TILING_X:
 280       return BRW_SURFACE_TILED;
 281    case ISL_TILING_Y0:
 282       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 283    default:
 284       return 0;
 285    }
 286 }
 287
 288
 289 uint32_t
 290 brw_get_surface_num_multisamples(unsigned num_samples)
 291 {
 292    if (num_samples > 1)
 293       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 294    else
 295       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 296 }
 297
 298 /**
 299  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 300  * swizzling.
 301  */
 302 int
 303 brw_get_texture_swizzle(const struct gl_context *ctx,
 304                         const struct gl_texture_object *t)
 305 {
 306    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 307
 308    int swizzles[SWIZZLE_NIL + 1] = {
 309       SWIZZLE_X,
 310       SWIZZLE_Y,
 311       SWIZZLE_Z,
 312       SWIZZLE_W,
 313       SWIZZLE_ZERO,
 314       SWIZZLE_ONE,
 315       SWIZZLE_NIL
 316    };
 317
 318    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 319        img->_BaseFormat == GL_DEPTH_STENCIL) {
 320       GLenum depth_mode = t->DepthMode;
 321
 322       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 323        * with depth component data specified with a sized internal format.
 324        * Otherwise, it's left at the old default, GL_LUMINANCE.
 325        */
 326       if (_mesa_is_gles3(ctx) &&
 327           img->InternalFormat != GL_DEPTH_COMPONENT &&
 328           img->InternalFormat != GL_DEPTH_STENCIL) {
 329          depth_mode = GL_RED;
 330       }
 331
 332       switch (depth_mode) {
 333       case GL_ALPHA:
 334          swizzles[0] = SWIZZLE_ZERO;
 335          swizzles[1] = SWIZZLE_ZERO;
 336          swizzles[2] = SWIZZLE_ZERO;
 337          swizzles[3] = SWIZZLE_X;
 338          break;
 339       case GL_LUMINANCE:
 340          swizzles[0] = SWIZZLE_X;
 341          swizzles[1] = SWIZZLE_X;
 342          swizzles[2] = SWIZZLE_X;
 343          swizzles[3] = SWIZZLE_ONE;
 344          break;
 345       case GL_INTENSITY:
 346          swizzles[0] = SWIZZLE_X;
 347          swizzles[1] = SWIZZLE_X;
 348          swizzles[2] = SWIZZLE_X;
 349          swizzles[3] = SWIZZLE_X;
 350          break;
 351       case GL_RED:
 352          swizzles[0] = SWIZZLE_X;
 353          swizzles[1] = SWIZZLE_ZERO;
 354          swizzles[2] = SWIZZLE_ZERO;
 355          swizzles[3] = SWIZZLE_ONE;
 356          break;
 357       }
 358    }
 359
 360    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 361
 362    /* If the texture's format is alpha-only, force R, G, and B to
 363     * 0.0. Similarly, if the texture's format has no alpha channel,
 364     * force the alpha value read to 1.0. This allows for the
 365     * implementation to use an RGBA texture for any of these formats
 366     * without leaking any unexpected values.
 367     */
 368    switch (img->_BaseFormat) {
 369    case GL_ALPHA:
 370       swizzles[0] = SWIZZLE_ZERO;
 371       swizzles[1] = SWIZZLE_ZERO;
 372       swizzles[2] = SWIZZLE_ZERO;
 373       break;
 374    case GL_LUMINANCE:
 375       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 376          swizzles[0] = SWIZZLE_X;
 377          swizzles[1] = SWIZZLE_X;
 378          swizzles[2] = SWIZZLE_X;
 379          swizzles[3] = SWIZZLE_ONE;
 380       }
 381       break;
 382    case GL_LUMINANCE_ALPHA:
 383       if (datatype == GL_SIGNED_NORMALIZED) {
 384          swizzles[0] = SWIZZLE_X;
 385          swizzles[1] = SWIZZLE_X;
 386          swizzles[2] = SWIZZLE_X;
 387          swizzles[3] = SWIZZLE_W;
 388       }
 389       break;
 390    case GL_INTENSITY:
 391       if (datatype == GL_SIGNED_NORMALIZED) {
 392          swizzles[0] = SWIZZLE_X;
 393          swizzles[1] = SWIZZLE_X;
 394          swizzles[2] = SWIZZLE_X;
 395          swizzles[3] = SWIZZLE_X;
 396       }
 397       break;
 398    case GL_RED:
 399    case GL_RG:
 400    case GL_RGB:
 401       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 402           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 403           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 404          swizzles[3] = SWIZZLE_ONE;
 405       break;
 406    }
 407
 408    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 409                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 410                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 411                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 412 }
 413
 414 /**
 415  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 416  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 417  *
 418  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 419  *         0          1          2          3             4            5
 420  *         4          5          6          7             0            1
 421  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 422  *
 423  * which is simply adding 4 then modding by 8 (or anding with 7).
 424  *
 425  * We then may need to apply workarounds for textureGather hardware bugs.
 426  */
 427 static unsigned
 428 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 429 {
 430    unsigned scs = (swizzle + 4) & 7;
 431
 432    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 433 }
 434
 435 static bool
 436 brw_aux_surface_disabled(const struct brw_context *brw,
 437                          const struct intel_mipmap_tree *mt)
 438 {
 439    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 440
 441    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 442       const struct intel_renderbuffer *irb =
 443          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 444
 445       if (irb && irb->mt == mt)
 446          return brw->draw_aux_buffer_disabled[i];
 447    }
 448
 449    return false;
 450 }
 451
 452 void
 453 brw_update_texture_surface(struct gl_context *ctx,
 454                            unsigned unit,
 455                            uint32_t *surf_offset,
 456                            bool for_gather,
 457                            uint32_t plane)
 458 {
 459    struct brw_context *brw = brw_context(ctx);
 460    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 461
 462    if (obj->Target == GL_TEXTURE_BUFFER) {
 463       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 464
 465    } else {
 466       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 467       struct intel_mipmap_tree *mt = intel_obj->mt;
 468
 469       if (plane > 0) {
 470          if (mt->plane[plane - 1] == NULL)
 471             return;
 472          mt = mt->plane[plane - 1];
 473       }
 474
 475       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 476       /* If this is a view with restricted NumLayers, then our effective depth
 477        * is not just the miptree depth.
 478        */
 479       unsigned view_num_layers;
 480       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 481          view_num_layers = obj->NumLayers;
 482       } else {
 483          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 484                               mt->surf.logical_level0_px.depth :
 485                               mt->surf.logical_level0_px.array_len;
 486       }
 487
 488       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 489        * texturing functions that return a float, as our code generation always
 490        * selects the .x channel (which would always be 0).
 491        */
 492       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 493       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 494          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 495           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 496       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 497                                 brw_get_texture_swizzle(&brw->ctx, obj));
 498
 499       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 500       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 501                                                     sampler->sRGBDecode);
 502
 503       /* Implement gen6 and gen7 gather work-around */
 504       bool need_green_to_blue = false;
 505       if (for_gather) {
 506          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 507                                format == ISL_FORMAT_R32G32_SINT ||
 508                                format == ISL_FORMAT_R32G32_UINT)) {
 509             format = ISL_FORMAT_R32G32_FLOAT_LD;
 510             need_green_to_blue = brw->is_haswell;
 511          } else if (brw->gen == 6) {
 512             /* Sandybridge's gather4 message is broken for integer formats.
 513              * To work around this, we pretend the surface is UNORM for
 514              * 8 or 16-bit formats, and emit shader instructions to recover
 515              * the real INT/UINT value.  For 32-bit formats, we pretend
 516              * the surface is FLOAT, and simply reinterpret the resulting
 517              * bits.
 518              */
 519             switch (format) {
 520             case ISL_FORMAT_R8_SINT:
 521             case ISL_FORMAT_R8_UINT:
 522                format = ISL_FORMAT_R8_UNORM;
 523                break;
 524
 525             case ISL_FORMAT_R16_SINT:
 526             case ISL_FORMAT_R16_UINT:
 527                format = ISL_FORMAT_R16_UNORM;
 528                break;
 529
 530             case ISL_FORMAT_R32_SINT:
 531             case ISL_FORMAT_R32_UINT:
 532                format = ISL_FORMAT_R32_FLOAT;
 533                break;
 534
 535             default:
 536                break;
 537             }
 538          }
 539       }
 540
 541       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 542          if (brw->gen <= 7) {
 543             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 544             mt = mt->r8stencil_mt;
 545          } else {
 546             mt = mt->stencil_mt;
 547          }
 548          format = ISL_FORMAT_R8_UINT;
 549       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 550          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 551          mt = mt->r8stencil_mt;
 552          format = ISL_FORMAT_R8_UINT;
 553       }
 554
 555       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 556
 557       struct isl_view view = {
 558          .format = format,
 559          .base_level = obj->MinLevel + obj->BaseLevel,
 560          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 561          .base_array_layer = obj->MinLayer,
 562          .array_len = view_num_layers,
 563          .swizzle = {
 564             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 565             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 566             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 567             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 568          },
 569          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 570       };
 571
 572       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 573           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 574          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 575
 576       enum isl_aux_usage aux_usage =
 577          intel_miptree_texture_aux_usage(brw, mt, format);
 578
 579       if (brw_aux_surface_disabled(brw, mt))
 580          aux_usage = ISL_AUX_USAGE_NONE;
 581
 582       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 583                              tex_mocs[brw->gen],
 584                              surf_offset, surf_index,
 585                              0);
 586    }
 587 }
 588
 589 void
 590 brw_emit_buffer_surface_state(struct brw_context *brw,
 591                               uint32_t *out_offset,
 592                               struct brw_bo *bo,
 593                               unsigned buffer_offset,
 594                               unsigned surface_format,
 595                               unsigned buffer_size,
 596                               unsigned pitch,
 597                               unsigned reloc_flags)
 598 {
 599    uint32_t *dw = brw_state_batch(brw,
 600                                   brw->isl_dev.ss.size,
 601                                   brw->isl_dev.ss.align,
 602                                   out_offset);
 603
 604    isl_buffer_fill_state(&brw->isl_dev, dw,
 605                          .address = !bo ? buffer_offset :
 606                                     brw_emit_reloc(&brw->batch,
 607                                                    *out_offset + brw->isl_dev.ss.addr_offset,
 608                                                    bo, buffer_offset,
 609                                                    reloc_flags),
 610                          .size = buffer_size,
 611                          .format = surface_format,
 612                          .stride = pitch,
 613                          .mocs = tex_mocs[brw->gen]);
 614 }
 615
 616 void
 617 brw_update_buffer_texture_surface(struct gl_context *ctx,
 618                                   unsigned unit,
 619                                   uint32_t *surf_offset)
 620 {
 621    struct brw_context *brw = brw_context(ctx);
 622    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 623    struct intel_buffer_object *intel_obj =
 624       intel_buffer_object(tObj->BufferObject);
 625    uint32_t size = tObj->BufferSize;
 626    struct brw_bo *bo = NULL;
 627    mesa_format format = tObj->_BufferObjectFormat;
 628    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 629    int texel_size = _mesa_get_format_bytes(format);
 630
 631    if (intel_obj) {
 632       size = MIN2(size, intel_obj->Base.Size);
 633       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 634                                   false);
 635    }
 636
 637    /* The ARB_texture_buffer_specification says:
 638     *
 639     *    "The number of texels in the buffer texture's texel array is given by
 640     *
 641     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 642     *
 643     *     where <buffer_size> is the size of the buffer object, in basic
 644     *     machine units and <components> and <base_type> are the element count
 645     *     and base data type for elements, as specified in Table X.1.  The
 646     *     number of texels in the texel array is then clamped to the
 647     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 648     *
 649     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 650     * so that when ISL divides by stride to obtain the number of texels, that
 651     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 652     */
 653    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 654
 655    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 656       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 657                     _mesa_get_format_name(format));
 658    }
 659
 660    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 661                                  tObj->BufferOffset,
 662                                  isl_format,
 663                                  size,
 664                                  texel_size,
 665                                  0);
 666 }
 667
 668 /**
 669  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 670  * read from this buffer with Data Port Read instructions/messages.
 671  */
 672 void
 673 brw_create_constant_surface(struct brw_context *brw,
 674                             struct brw_bo *bo,
 675                             uint32_t offset,
 676                             uint32_t size,
 677                             uint32_t *out_offset)
 678 {
 679    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 680                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 681                                  size, 1, 0);
 682 }
 683
 684 /**
 685  * Create the buffer surface. Shader buffer variables will be
 686  * read from / write to this buffer with Data Port Read/Write
 687  * instructions/messages.
 688  */
 689 void
 690 brw_create_buffer_surface(struct brw_context *brw,
 691                           struct brw_bo *bo,
 692                           uint32_t offset,
 693                           uint32_t size,
 694                           uint32_t *out_offset)
 695 {
 696    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 697     * messages. We need these specifically for the fragment shader since they
 698     * include a pixel mask header that we need to ensure correct behavior
 699     * with helper invocations, which cannot write to the buffer.
 700     */
 701    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 702                                  ISL_FORMAT_RAW,
 703                                  size, 1, RELOC_WRITE);
 704 }
 705
 706 /**
 707  * Set up a binding table entry for use by stream output logic (transform
 708  * feedback).
 709  *
 710  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 711  */
 712 void
 713 brw_update_sol_surface(struct brw_context *brw,
 714                        struct gl_buffer_object *buffer_obj,
 715                        uint32_t *out_offset, unsigned num_vector_components,
 716                        unsigned stride_dwords, unsigned offset_dwords)
 717 {
 718    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 719    uint32_t offset_bytes = 4 * offset_dwords;
 720    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 721                                              offset_bytes,
 722                                              buffer_obj->Size - offset_bytes,
 723                                              true);
 724    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 725    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 726    size_t size_dwords = buffer_obj->Size / 4;
 727    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 728
 729    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 730     * too big to map using a single binding table entry?
 731     */
 732    assert((size_dwords - offset_dwords) / stride_dwords
 733           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 734
 735    if (size_dwords > offset_dwords + num_vector_components) {
 736       /* There is room for at least 1 transform feedback output in the buffer.
 737        * Compute the number of additional transform feedback outputs the
 738        * buffer has room for.
 739        */
 740       buffer_size_minus_1 =
 741          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 742    } else {
 743       /* There isn't even room for a single transform feedback output in the
 744        * buffer.  We can't configure the binding table entry to prevent output
 745        * entirely; we'll have to rely on the geometry shader to detect
 746        * overflow.  But to minimize the damage in case of a bug, set up the
 747        * binding table entry to just allow a single output.
 748        */
 749       buffer_size_minus_1 = 0;
 750    }
 751    width = buffer_size_minus_1 & 0x7f;
 752    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 753    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 754
 755    switch (num_vector_components) {
 756    case 1:
 757       surface_format = ISL_FORMAT_R32_FLOAT;
 758       break;
 759    case 2:
 760       surface_format = ISL_FORMAT_R32G32_FLOAT;
 761       break;
 762    case 3:
 763       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 764       break;
 765    case 4:
 766       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 767       break;
 768    default:
 769       unreachable("Invalid vector size for transform feedback output");
 770    }
 771
 772    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 773       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 774       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 775       BRW_SURFACE_RC_READ_WRITE;
 776    surf[1] = brw_emit_reloc(&brw->batch,
 777                             *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
 778    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 779               height << BRW_SURFACE_HEIGHT_SHIFT);
 780    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 781               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 782    surf[4] = 0;
 783    surf[5] = 0;
 784 }
 785
 786 /* Creates a new WM constant buffer reflecting the current fragment program's
 787  * constants, if needed by the fragment program.
 788  *
 789  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 790  * state atom.
 791  */
 792 static void
 793 brw_upload_wm_pull_constants(struct brw_context *brw)
 794 {
 795    struct brw_stage_state *stage_state = &brw->wm.base;
 796    /* BRW_NEW_FRAGMENT_PROGRAM */
 797    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 798    /* BRW_NEW_FS_PROG_DATA */
 799    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 800
 801    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 802    /* _NEW_PROGRAM_CONSTANTS */
 803    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 804                              stage_state, prog_data);
 805 }
 806
 807 const struct brw_tracked_state brw_wm_pull_constants = {
 808    .dirty = {
 809       .mesa = _NEW_PROGRAM_CONSTANTS,
 810       .brw = BRW_NEW_BATCH |
 811              BRW_NEW_FRAGMENT_PROGRAM |
 812              BRW_NEW_FS_PROG_DATA,
 813    },
 814    .emit = brw_upload_wm_pull_constants,
 815 };
 816
 817 /**
 818  * Creates a null renderbuffer surface.
 819  *
 820  * This is used when the shader doesn't write to any color output.  An FB
 821  * write to target 0 will still be emitted, because that's how the thread is
 822  * terminated (and computed depth is returned), so we need to have the
 823  * hardware discard the target 0 color output..
 824  */
 825 static void
 826 emit_null_surface_state(struct brw_context *brw,
 827                         unsigned width,
 828                         unsigned height,
 829                         unsigned samples,
 830                         uint32_t *out_offset)
 831 {
 832    uint32_t *surf = brw_state_batch(brw,
 833                                     brw->isl_dev.ss.size,
 834                                     brw->isl_dev.ss.align,
 835                                     out_offset);
 836
 837    if (brw->gen != 6 || samples <= 1) {
 838       isl_null_fill_state(&brw->isl_dev, surf,
 839                           isl_extent3d(width, height, 1));
 840       return;
 841    }
 842
 843    /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
 844     * So work around this problem by rendering into dummy color buffer.
 845     *
 846     * To decrease the amount of memory needed by the workaround buffer, we
 847     * set its pitch to 128 bytes (the width of a Y tile).  This means that
 848     * the amount of memory needed for the workaround buffer is
 849     * (width_in_tiles + height_in_tiles - 1) tiles.
 850     *
 851     * Note that since the workaround buffer will be interpreted by the
 852     * hardware as an interleaved multisampled buffer, we need to compute
 853     * width_in_tiles and height_in_tiles by dividing the width and height
 854     * by 16 rather than the normal Y-tile size of 32.
 855     */
 856    unsigned width_in_tiles = ALIGN(width, 16) / 16;
 857    unsigned height_in_tiles = ALIGN(height, 16) / 16;
 858    unsigned pitch_minus_1 = 127;
 859    unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 860    brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 861                       size_needed);
 862
 863    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 864               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 865    surf[1] = brw_emit_reloc(&brw->batch, *out_offset + 4,
 866                             brw->wm.multisampled_null_render_target_bo,
 867                             0, RELOC_WRITE);
 868
 869    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 870               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 871
 872    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 873     * Notes):
 874     *
 875     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 876     */
 877    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 878               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 879    surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
 880    surf[5] = 0;
 881 }
 882
 883 /**
 884  * Sets up a surface state structure to point at the given region.
 885  * While it is only used for the front/back buffer currently, it should be
 886  * usable for further buffers when doing ARB_draw_buffer support.
 887  */
 888 static uint32_t
 889 gen4_update_renderbuffer_surface(struct brw_context *brw,
 890                                  struct gl_renderbuffer *rb,
 891                                  unsigned unit,
 892                                  uint32_t surf_index)
 893 {
 894    struct gl_context *ctx = &brw->ctx;
 895    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 896    struct intel_mipmap_tree *mt = irb->mt;
 897    uint32_t *surf;
 898    uint32_t tile_x, tile_y;
 899    enum isl_format format;
 900    uint32_t offset;
 901    /* _NEW_BUFFERS */
 902    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 903    /* BRW_NEW_FS_PROG_DATA */
 904
 905    if (rb->TexImage && !brw->has_surface_tile_offset) {
 906       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 907
 908       if (tile_x != 0 || tile_y != 0) {
 909          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 910           * destination in a miptree unless you actually setup your renderbuffer
 911           * as a miptree and used the fragile lod/array_index/etc. controls to
 912           * select the image.  So, instead, we just make a new single-level
 913           * miptree and render into that.
 914           */
 915          intel_renderbuffer_move_to_temp(brw, irb, false);
 916          assert(irb->align_wa_mt);
 917          mt = irb->align_wa_mt;
 918       }
 919    }
 920
 921    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
 922
 923    format = brw->mesa_to_isl_render_format[rb_format];
 924    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 925       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 926                     __func__, _mesa_get_format_name(rb_format));
 927    }
 928
 929    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 930               format << BRW_SURFACE_FORMAT_SHIFT);
 931
 932    /* reloc */
 933    assert(mt->offset % mt->cpp == 0);
 934    surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
 935                             mt->offset +
 936                             intel_renderbuffer_get_tile_offsets(irb,
 937                                                                 &tile_x,
 938                                                                 &tile_y),
 939                             RELOC_WRITE);
 940
 941    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 942               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 943
 944    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
 945               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 946
 947    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
 948
 949    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 950    /* Note that the low bits of these fields are missing, so
 951     * there's the possibility of getting in trouble.
 952     */
 953    assert(tile_x % 4 == 0);
 954    assert(tile_y % 2 == 0);
 955    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 956               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 957               (mt->surf.image_alignment_el.height == 4 ?
 958                   BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 959
 960    if (brw->gen < 6) {
 961       /* _NEW_COLOR */
 962       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
 963           (ctx->Color.BlendEnabled & (1 << unit)))
 964          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 965
 966       if (!ctx->Color.ColorMask[unit][0])
 967          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 968       if (!ctx->Color.ColorMask[unit][1])
 969          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 970       if (!ctx->Color.ColorMask[unit][2])
 971          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 972
 973       /* As mentioned above, disable writes to the alpha component when the
 974        * renderbuffer is XRGB.
 975        */
 976       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 977           !ctx->Color.ColorMask[unit][3]) {
 978          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 979       }
 980    }
 981
 982    return offset;
 983 }
 984
 985 /**
 986  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 987  */
 988 void
 989 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 990                                  const struct gl_framebuffer *fb,
 991                                  uint32_t render_target_start,
 992                                  uint32_t *surf_offset)
 993 {
 994    GLuint i;
 995    const unsigned int w = _mesa_geometric_width(fb);
 996    const unsigned int h = _mesa_geometric_height(fb);
 997    const unsigned int s = _mesa_geometric_samples(fb);
 998
 999    /* Update surfaces for drawing buffers */
1000    if (fb->_NumColorDrawBuffers >= 1) {
1001       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1002          const uint32_t surf_index = render_target_start + i;
1003          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1004
1005          if (intel_renderbuffer(rb)) {
1006             surf_offset[surf_index] = brw->gen >= 6 ?
1007                gen6_update_renderbuffer_surface(brw, rb, i, surf_index) :
1008                gen4_update_renderbuffer_surface(brw, rb, i, surf_index);
1009          } else {
1010             emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]);
1011          }
1012       }
1013    } else {
1014       const uint32_t surf_index = render_target_start;
1015       emit_null_surface_state(brw, w, h, s, &surf_offset[surf_index]);
1016    }
1017 }
1018
1019 static void
1020 update_renderbuffer_surfaces(struct brw_context *brw)
1021 {
1022    const struct gl_context *ctx = &brw->ctx;
1023
1024    /* BRW_NEW_FS_PROG_DATA */
1025    const struct brw_wm_prog_data *wm_prog_data =
1026       brw_wm_prog_data(brw->wm.base.prog_data);
1027
1028    /* _NEW_BUFFERS | _NEW_COLOR */
1029    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1030    brw_update_renderbuffer_surfaces(
1031       brw, fb,
1032       wm_prog_data->binding_table.render_target_start,
1033       brw->wm.base.surf_offset);
1034    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1035 }
1036
1037 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1038    .dirty = {
1039       .mesa = _NEW_BUFFERS |
1040               _NEW_COLOR,
1041       .brw = BRW_NEW_BATCH |
1042              BRW_NEW_FS_PROG_DATA,
1043    },
1044    .emit = update_renderbuffer_surfaces,
1045 };
1046
1047 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1048    .dirty = {
1049       .mesa = _NEW_BUFFERS,
1050       .brw = BRW_NEW_BATCH |
1051              BRW_NEW_FAST_CLEAR_COLOR,
1052    },
1053    .emit = update_renderbuffer_surfaces,
1054 };
1055
1056 static void
1057 update_renderbuffer_read_surfaces(struct brw_context *brw)
1058 {
1059    const struct gl_context *ctx = &brw->ctx;
1060
1061    /* BRW_NEW_FS_PROG_DATA */
1062    const struct brw_wm_prog_data *wm_prog_data =
1063       brw_wm_prog_data(brw->wm.base.prog_data);
1064
1065    /* BRW_NEW_FRAGMENT_PROGRAM */
1066    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1067        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1068       /* _NEW_BUFFERS */
1069       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1070
1071       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1072          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1073          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1074          const unsigned surf_index =
1075             wm_prog_data->binding_table.render_target_read_start + i;
1076          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1077
1078          if (irb) {
1079             const enum isl_format format = brw->mesa_to_isl_render_format[
1080                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1081             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1082                                                 format));
1083
1084             /* Override the target of the texture if the render buffer is a
1085              * single slice of a 3D texture (since the minimum array element
1086              * field of the surface state structure is ignored by the sampler
1087              * unit for 3D textures on some hardware), or if the render buffer
1088              * is a 1D array (since shaders always provide the array index
1089              * coordinate at the Z component to avoid state-dependent
1090              * recompiles when changing the texture target of the
1091              * framebuffer).
1092              */
1093             const GLenum target =
1094                (irb->mt->target == GL_TEXTURE_3D &&
1095                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1096                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1097                irb->mt->target;
1098
1099             const struct isl_view view = {
1100                .format = format,
1101                .base_level = irb->mt_level - irb->mt->first_level,
1102                .levels = 1,
1103                .base_array_layer = irb->mt_layer,
1104                .array_len = irb->layer_count,
1105                .swizzle = ISL_SWIZZLE_IDENTITY,
1106                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1107             };
1108
1109             enum isl_aux_usage aux_usage =
1110                intel_miptree_texture_aux_usage(brw, irb->mt, format);
1111             if (brw->draw_aux_buffer_disabled[i])
1112                aux_usage = ISL_AUX_USAGE_NONE;
1113
1114             brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1115                                    tex_mocs[brw->gen],
1116                                    surf_offset, surf_index,
1117                                    0);
1118
1119          } else {
1120             emit_null_surface_state(brw,
1121                                     _mesa_geometric_width(fb),
1122                                     _mesa_geometric_height(fb),
1123                                     _mesa_geometric_samples(fb),
1124                                     surf_offset);
1125          }
1126       }
1127
1128       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1129    }
1130 }
1131
1132 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1133    .dirty = {
1134       .mesa = _NEW_BUFFERS,
1135       .brw = BRW_NEW_BATCH |
1136              BRW_NEW_FAST_CLEAR_COLOR |
1137              BRW_NEW_FRAGMENT_PROGRAM |
1138              BRW_NEW_FS_PROG_DATA,
1139    },
1140    .emit = update_renderbuffer_read_surfaces,
1141 };
1142
1143 static void
1144 update_stage_texture_surfaces(struct brw_context *brw,
1145                               const struct gl_program *prog,
1146                               struct brw_stage_state *stage_state,
1147                               bool for_gather, uint32_t plane)
1148 {
1149    if (!prog)
1150       return;
1151
1152    struct gl_context *ctx = &brw->ctx;
1153
1154    uint32_t *surf_offset = stage_state->surf_offset;
1155
1156    /* BRW_NEW_*_PROG_DATA */
1157    if (for_gather)
1158       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1159    else
1160       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1161
1162    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1163    for (unsigned s = 0; s < num_samplers; s++) {
1164       surf_offset[s] = 0;
1165
1166       if (prog->SamplersUsed & (1 << s)) {
1167          const unsigned unit = prog->SamplerUnits[s];
1168
1169          /* _NEW_TEXTURE */
1170          if (ctx->Texture.Unit[unit]._Current) {
1171             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1172          }
1173       }
1174    }
1175 }
1176
1177
1178 /**
1179  * Construct SURFACE_STATE objects for enabled textures.
1180  */
1181 static void
1182 brw_update_texture_surfaces(struct brw_context *brw)
1183 {
1184    /* BRW_NEW_VERTEX_PROGRAM */
1185    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1186
1187    /* BRW_NEW_TESS_PROGRAMS */
1188    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1189    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1190
1191    /* BRW_NEW_GEOMETRY_PROGRAM */
1192    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1193
1194    /* BRW_NEW_FRAGMENT_PROGRAM */
1195    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1196
1197    /* _NEW_TEXTURE */
1198    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1199    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1200    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1201    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1202    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1203
1204    /* emit alternate set of surface state for gather. this
1205     * allows the surface format to be overriden for only the
1206     * gather4 messages. */
1207    if (brw->gen < 8) {
1208       if (vs && vs->nir->info.uses_texture_gather)
1209          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1210       if (tcs && tcs->nir->info.uses_texture_gather)
1211          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1212       if (tes && tes->nir->info.uses_texture_gather)
1213          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1214       if (gs && gs->nir->info.uses_texture_gather)
1215          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1216       if (fs && fs->nir->info.uses_texture_gather)
1217          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1218    }
1219
1220    if (fs) {
1221       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1222       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1223    }
1224
1225    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1226 }
1227
1228 const struct brw_tracked_state brw_texture_surfaces = {
1229    .dirty = {
1230       .mesa = _NEW_TEXTURE,
1231       .brw = BRW_NEW_BATCH |
1232              BRW_NEW_FAST_CLEAR_COLOR |
1233              BRW_NEW_FRAGMENT_PROGRAM |
1234              BRW_NEW_FS_PROG_DATA |
1235              BRW_NEW_GEOMETRY_PROGRAM |
1236              BRW_NEW_GS_PROG_DATA |
1237              BRW_NEW_TESS_PROGRAMS |
1238              BRW_NEW_TCS_PROG_DATA |
1239              BRW_NEW_TES_PROG_DATA |
1240              BRW_NEW_TEXTURE_BUFFER |
1241              BRW_NEW_VERTEX_PROGRAM |
1242              BRW_NEW_VS_PROG_DATA,
1243    },
1244    .emit = brw_update_texture_surfaces,
1245 };
1246
1247 static void
1248 brw_update_cs_texture_surfaces(struct brw_context *brw)
1249 {
1250    /* BRW_NEW_COMPUTE_PROGRAM */
1251    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1252
1253    /* _NEW_TEXTURE */
1254    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1255
1256    /* emit alternate set of surface state for gather. this
1257     * allows the surface format to be overriden for only the
1258     * gather4 messages.
1259     */
1260    if (brw->gen < 8) {
1261       if (cs && cs->nir->info.uses_texture_gather)
1262          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1263    }
1264
1265    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1266 }
1267
1268 const struct brw_tracked_state brw_cs_texture_surfaces = {
1269    .dirty = {
1270       .mesa = _NEW_TEXTURE,
1271       .brw = BRW_NEW_BATCH |
1272              BRW_NEW_COMPUTE_PROGRAM |
1273              BRW_NEW_FAST_CLEAR_COLOR,
1274    },
1275    .emit = brw_update_cs_texture_surfaces,
1276 };
1277
1278
1279 void
1280 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1281                         struct brw_stage_state *stage_state,
1282                         struct brw_stage_prog_data *prog_data)
1283 {
1284    struct gl_context *ctx = &brw->ctx;
1285
1286    if (!prog)
1287       return;
1288
1289    uint32_t *ubo_surf_offsets =
1290       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1291
1292    for (int i = 0; i < prog->info.num_ubos; i++) {
1293       struct gl_uniform_buffer_binding *binding =
1294          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1295
1296       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1297          emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1298       } else {
1299          struct intel_buffer_object *intel_bo =
1300             intel_buffer_object(binding->BufferObject);
1301          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1302          if (!binding->AutomaticSize)
1303             size = MIN2(size, binding->Size);
1304          struct brw_bo *bo =
1305             intel_bufferobj_buffer(brw, intel_bo,
1306                                    binding->Offset,
1307                                    size, false);
1308          brw_create_constant_surface(brw, bo, binding->Offset,
1309                                      size,
1310                                      &ubo_surf_offsets[i]);
1311       }
1312    }
1313
1314    uint32_t *ssbo_surf_offsets =
1315       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1316
1317    for (int i = 0; i < prog->info.num_ssbos; i++) {
1318       struct gl_shader_storage_buffer_binding *binding =
1319          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1320
1321       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1322          emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1323       } else {
1324          struct intel_buffer_object *intel_bo =
1325             intel_buffer_object(binding->BufferObject);
1326          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1327          if (!binding->AutomaticSize)
1328             size = MIN2(size, binding->Size);
1329          struct brw_bo *bo =
1330             intel_bufferobj_buffer(brw, intel_bo,
1331                                    binding->Offset,
1332                                    size, true);
1333          brw_create_buffer_surface(brw, bo, binding->Offset,
1334                                    size,
1335                                    &ssbo_surf_offsets[i]);
1336       }
1337    }
1338
1339    stage_state->push_constants_dirty = true;
1340
1341    if (prog->info.num_ubos || prog->info.num_ssbos)
1342       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1343 }
1344
1345 static void
1346 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1347 {
1348    struct gl_context *ctx = &brw->ctx;
1349    /* _NEW_PROGRAM */
1350    struct gl_program *prog = ctx->FragmentProgram._Current;
1351
1352    /* BRW_NEW_FS_PROG_DATA */
1353    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1354 }
1355
1356 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1357    .dirty = {
1358       .mesa = _NEW_PROGRAM,
1359       .brw = BRW_NEW_BATCH |
1360              BRW_NEW_FS_PROG_DATA |
1361              BRW_NEW_UNIFORM_BUFFER,
1362    },
1363    .emit = brw_upload_wm_ubo_surfaces,
1364 };
1365
1366 static void
1367 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1368 {
1369    struct gl_context *ctx = &brw->ctx;
1370    /* _NEW_PROGRAM */
1371    struct gl_program *prog =
1372       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1373
1374    /* BRW_NEW_CS_PROG_DATA */
1375    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1376 }
1377
1378 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1379    .dirty = {
1380       .mesa = _NEW_PROGRAM,
1381       .brw = BRW_NEW_BATCH |
1382              BRW_NEW_CS_PROG_DATA |
1383              BRW_NEW_UNIFORM_BUFFER,
1384    },
1385    .emit = brw_upload_cs_ubo_surfaces,
1386 };
1387
1388 void
1389 brw_upload_abo_surfaces(struct brw_context *brw,
1390                         const struct gl_program *prog,
1391                         struct brw_stage_state *stage_state,
1392                         struct brw_stage_prog_data *prog_data)
1393 {
1394    struct gl_context *ctx = &brw->ctx;
1395    uint32_t *surf_offsets =
1396       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1397
1398    if (prog->info.num_abos) {
1399       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1400          struct gl_atomic_buffer_binding *binding =
1401             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1402          struct intel_buffer_object *intel_bo =
1403             intel_buffer_object(binding->BufferObject);
1404          struct brw_bo *bo =
1405             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1406                                    intel_bo->Base.Size - binding->Offset,
1407                                    true);
1408
1409          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1410                                        binding->Offset, ISL_FORMAT_RAW,
1411                                        bo->size - binding->Offset, 1,
1412                                        RELOC_WRITE);
1413       }
1414
1415       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1416    }
1417 }
1418
1419 static void
1420 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1421 {
1422    /* _NEW_PROGRAM */
1423    const struct gl_program *wm = brw->fragment_program;
1424
1425    if (wm) {
1426       /* BRW_NEW_FS_PROG_DATA */
1427       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1428    }
1429 }
1430
1431 const struct brw_tracked_state brw_wm_abo_surfaces = {
1432    .dirty = {
1433       .mesa = _NEW_PROGRAM,
1434       .brw = BRW_NEW_ATOMIC_BUFFER |
1435              BRW_NEW_BATCH |
1436              BRW_NEW_FS_PROG_DATA,
1437    },
1438    .emit = brw_upload_wm_abo_surfaces,
1439 };
1440
1441 static void
1442 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1443 {
1444    /* _NEW_PROGRAM */
1445    const struct gl_program *cp = brw->compute_program;
1446
1447    if (cp) {
1448       /* BRW_NEW_CS_PROG_DATA */
1449       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1450    }
1451 }
1452
1453 const struct brw_tracked_state brw_cs_abo_surfaces = {
1454    .dirty = {
1455       .mesa = _NEW_PROGRAM,
1456       .brw = BRW_NEW_ATOMIC_BUFFER |
1457              BRW_NEW_BATCH |
1458              BRW_NEW_CS_PROG_DATA,
1459    },
1460    .emit = brw_upload_cs_abo_surfaces,
1461 };
1462
1463 static void
1464 brw_upload_cs_image_surfaces(struct brw_context *brw)
1465 {
1466    /* _NEW_PROGRAM */
1467    const struct gl_program *cp = brw->compute_program;
1468
1469    if (cp) {
1470       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1471       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1472                                 brw->cs.base.prog_data);
1473    }
1474 }
1475
1476 const struct brw_tracked_state brw_cs_image_surfaces = {
1477    .dirty = {
1478       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1479       .brw = BRW_NEW_BATCH |
1480              BRW_NEW_CS_PROG_DATA |
1481              BRW_NEW_FAST_CLEAR_COLOR |
1482              BRW_NEW_IMAGE_UNITS
1483    },
1484    .emit = brw_upload_cs_image_surfaces,
1485 };
1486
1487 static uint32_t
1488 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1489 {
1490    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1491    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1492    if (access == GL_WRITE_ONLY) {
1493       return hw_format;
1494    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1495       /* Typed surface reads support a very limited subset of the shader
1496        * image formats.  Translate it into the closest format the
1497        * hardware supports.
1498        */
1499       return isl_lower_storage_image_format(devinfo, hw_format);
1500    } else {
1501       /* The hardware doesn't actually support a typed format that we can use
1502        * so we have to fall back to untyped read/write messages.
1503        */
1504       return ISL_FORMAT_RAW;
1505    }
1506 }
1507
1508 static void
1509 update_default_image_param(struct brw_context *brw,
1510                            struct gl_image_unit *u,
1511                            unsigned surface_idx,
1512                            struct brw_image_param *param)
1513 {
1514    memset(param, 0, sizeof(*param));
1515    param->surface_idx = surface_idx;
1516    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1517     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1518     * detailed explanation of these parameters.
1519     */
1520    param->swizzling[0] = 0xff;
1521    param->swizzling[1] = 0xff;
1522 }
1523
1524 static void
1525 update_buffer_image_param(struct brw_context *brw,
1526                           struct gl_image_unit *u,
1527                           unsigned surface_idx,
1528                           struct brw_image_param *param)
1529 {
1530    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1531    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1532    update_default_image_param(brw, u, surface_idx, param);
1533
1534    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1535    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1536 }
1537
1538 static unsigned
1539 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1540                      unsigned level)
1541 {
1542    if (target == GL_TEXTURE_CUBE_MAP)
1543       return 6;
1544
1545    return target == GL_TEXTURE_3D ?
1546       minify(mt->surf.logical_level0_px.depth, level) :
1547       mt->surf.logical_level0_px.array_len;
1548 }
1549
1550 static void
1551 update_image_surface(struct brw_context *brw,
1552                      struct gl_image_unit *u,
1553                      GLenum access,
1554                      unsigned surface_idx,
1555                      uint32_t *surf_offset,
1556                      struct brw_image_param *param)
1557 {
1558    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1559       struct gl_texture_object *obj = u->TexObj;
1560       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1561
1562       if (obj->Target == GL_TEXTURE_BUFFER) {
1563          struct intel_buffer_object *intel_obj =
1564             intel_buffer_object(obj->BufferObject);
1565          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1566                                       _mesa_get_format_bytes(u->_ActualFormat));
1567
1568          brw_emit_buffer_surface_state(
1569             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1570             format, intel_obj->Base.Size, texel_size,
1571             access != GL_READ_ONLY ? RELOC_WRITE : 0);
1572
1573          update_buffer_image_param(brw, u, surface_idx, param);
1574
1575       } else {
1576          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1577          struct intel_mipmap_tree *mt = intel_obj->mt;
1578          const unsigned num_layers = u->Layered ?
1579             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1580
1581          struct isl_view view = {
1582             .format = format,
1583             .base_level = obj->MinLevel + u->Level,
1584             .levels = 1,
1585             .base_array_layer = obj->MinLayer + u->_Layer,
1586             .array_len = num_layers,
1587             .swizzle = ISL_SWIZZLE_IDENTITY,
1588             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1589          };
1590
1591          if (format == ISL_FORMAT_RAW) {
1592             brw_emit_buffer_surface_state(
1593                brw, surf_offset, mt->bo, mt->offset,
1594                format, mt->bo->size - mt->offset, 1 /* pitch */,
1595                access != GL_READ_ONLY ? RELOC_WRITE : 0);
1596
1597          } else {
1598             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1599             assert(!intel_miptree_has_color_unresolved(mt,
1600                                                        view.base_level, 1,
1601                                                        view.base_array_layer,
1602                                                        view.array_len));
1603             brw_emit_surface_state(brw, mt, mt->target, view,
1604                                    ISL_AUX_USAGE_NONE, tex_mocs[brw->gen],
1605                                    surf_offset, surf_index,
1606                                    access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1607          }
1608
1609          isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1610          param->surface_idx = surface_idx;
1611       }
1612
1613    } else {
1614       emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1615       update_default_image_param(brw, u, surface_idx, param);
1616    }
1617 }
1618
1619 void
1620 brw_upload_image_surfaces(struct brw_context *brw,
1621                           const struct gl_program *prog,
1622                           struct brw_stage_state *stage_state,
1623                           struct brw_stage_prog_data *prog_data)
1624 {
1625    assert(prog);
1626    struct gl_context *ctx = &brw->ctx;
1627
1628    if (prog->info.num_images) {
1629       for (unsigned i = 0; i < prog->info.num_images; i++) {
1630          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1631          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1632
1633          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1634                               surf_idx,
1635                               &stage_state->surf_offset[surf_idx],
1636                               &prog_data->image_param[i]);
1637       }
1638
1639       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1640       /* This may have changed the image metadata dependent on the context
1641        * image unit state and passed to the program as uniforms, make sure
1642        * that push and pull constants are reuploaded.
1643        */
1644       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1645    }
1646 }
1647
1648 static void
1649 brw_upload_wm_image_surfaces(struct brw_context *brw)
1650 {
1651    /* BRW_NEW_FRAGMENT_PROGRAM */
1652    const struct gl_program *wm = brw->fragment_program;
1653
1654    if (wm) {
1655       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1656       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1657                                 brw->wm.base.prog_data);
1658    }
1659 }
1660
1661 const struct brw_tracked_state brw_wm_image_surfaces = {
1662    .dirty = {
1663       .mesa = _NEW_TEXTURE,
1664       .brw = BRW_NEW_BATCH |
1665              BRW_NEW_FAST_CLEAR_COLOR |
1666              BRW_NEW_FRAGMENT_PROGRAM |
1667              BRW_NEW_FS_PROG_DATA |
1668              BRW_NEW_IMAGE_UNITS
1669    },
1670    .emit = brw_upload_wm_image_surfaces,
1671 };
1672
1673 static void
1674 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1675 {
1676    struct gl_context *ctx = &brw->ctx;
1677    /* _NEW_PROGRAM */
1678    struct gl_program *prog =
1679       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1680    /* BRW_NEW_CS_PROG_DATA */
1681    const struct brw_cs_prog_data *cs_prog_data =
1682       brw_cs_prog_data(brw->cs.base.prog_data);
1683
1684    if (prog && cs_prog_data->uses_num_work_groups) {
1685       const unsigned surf_idx =
1686          cs_prog_data->binding_table.work_groups_start;
1687       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1688       struct brw_bo *bo;
1689       uint32_t bo_offset;
1690
1691       if (brw->compute.num_work_groups_bo == NULL) {
1692          bo = NULL;
1693          intel_upload_data(brw,
1694                            (void *)brw->compute.num_work_groups,
1695                            3 * sizeof(GLuint),
1696                            sizeof(GLuint),
1697                            &bo,
1698                            &bo_offset);
1699       } else {
1700          bo = brw->compute.num_work_groups_bo;
1701          bo_offset = brw->compute.num_work_groups_offset;
1702       }
1703
1704       brw_emit_buffer_surface_state(brw, surf_offset,
1705                                     bo, bo_offset,
1706                                     ISL_FORMAT_RAW,
1707                                     3 * sizeof(GLuint), 1,
1708                                     RELOC_WRITE);
1709       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1710    }
1711 }
1712
1713 const struct brw_tracked_state brw_cs_work_groups_surface = {
1714    .dirty = {
1715       .brw = BRW_NEW_CS_PROG_DATA |
1716              BRW_NEW_CS_WORK_GROUPS
1717    },
1718    .emit = brw_upload_cs_work_groups_surface,
1719 };