src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 uint32_t wb_mocs[] = {
  59    [7] = GEN7_MOCS_L3,
  60    [8] = BDW_MOCS_WB,
  61    [9] = SKL_MOCS_WB,
  62    [10] = CNL_MOCS_WB,
  63    [11] = ICL_MOCS_WB,
  64 };
  65
  66 uint32_t pte_mocs[] = {
  67    [7] = GEN7_MOCS_L3,
  68    [8] = BDW_MOCS_PTE,
  69    [9] = SKL_MOCS_PTE,
  70    [10] = CNL_MOCS_PTE,
  71    [11] = ICL_MOCS_PTE,
  72 };
  73
  74 uint32_t
  75 brw_get_bo_mocs(const struct gen_device_info *devinfo, struct brw_bo *bo)
  76 {
  77    return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen];
  78 }
  79
  80 static void
  81 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  82              GLenum target, struct isl_view *view,
  83              uint32_t *tile_x, uint32_t *tile_y,
  84              uint32_t *offset, struct isl_surf *surf)
  85 {
  86    *surf = mt->surf;
  87
  88    const struct gen_device_info *devinfo = &brw->screen->devinfo;
  89    const enum isl_dim_layout dim_layout =
  90       get_isl_dim_layout(devinfo, mt->surf.tiling, target);
  91
  92    surf->dim = get_isl_surf_dim(target);
  93
  94    if (surf->dim_layout == dim_layout)
  95       return;
  96
  97    /* The layout of the specified texture target is not compatible with the
  98     * actual layout of the miptree structure in memory -- You're entering
  99     * dangerous territory, this can only possibly work if you only intended
 100     * to access a single level and slice of the texture, and the hardware
 101     * supports the tile offset feature in order to allow non-tile-aligned
 102     * base offsets, since we'll have to point the hardware to the first
 103     * texel of the level instead of relying on the usual base level/layer
 104     * controls.
 105     */
 106    assert(devinfo->has_surface_tile_offset);
 107    assert(view->levels == 1 && view->array_len == 1);
 108    assert(*tile_x == 0 && *tile_y == 0);
 109
 110    *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 111                                              view->base_array_layer,
 112                                              tile_x, tile_y);
 113
 114    /* Minify the logical dimensions of the texture. */
 115    const unsigned l = view->base_level - mt->first_level;
 116    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 117    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 118       minify(surf->logical_level0_px.height, l);
 119    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 120       minify(surf->logical_level0_px.depth, l);
 121
 122    /* Only the base level and layer can be addressed with the overridden
 123     * layout.
 124     */
 125    surf->logical_level0_px.array_len = 1;
 126    surf->levels = 1;
 127    surf->dim_layout = dim_layout;
 128
 129    /* The requested slice of the texture is now at the base level and
 130     * layer.
 131     */
 132    view->base_level = 0;
 133    view->base_array_layer = 0;
 134 }
 135
 136 static void
 137 brw_emit_surface_state(struct brw_context *brw,
 138                        struct intel_mipmap_tree *mt,
 139                        GLenum target, struct isl_view view,
 140                        enum isl_aux_usage aux_usage,
 141                        uint32_t *surf_offset, int surf_index,
 142                        unsigned reloc_flags)
 143 {
 144    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 145    uint32_t tile_x = mt->level[0].level_x;
 146    uint32_t tile_y = mt->level[0].level_y;
 147    uint32_t offset = mt->offset;
 148
 149    struct isl_surf surf;
 150
 151    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 152
 153    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 154
 155    struct brw_bo *aux_bo = NULL;
 156    struct isl_surf *aux_surf = NULL;
 157    uint64_t aux_offset = 0;
 158    struct brw_bo *clear_bo = NULL;
 159    uint32_t clear_offset = 0;
 160
 161    if (aux_usage != ISL_AUX_USAGE_NONE) {
 162       aux_surf = &mt->aux_buf->surf;
 163       aux_bo = mt->aux_buf->bo;
 164       aux_offset = mt->aux_buf->offset;
 165
 166       /* We only really need a clear color if we also have an auxiliary
 167        * surface.  Without one, it does nothing.
 168        */
 169       clear_color =
 170          intel_miptree_get_clear_color(devinfo, mt, view.format,
 171                                        view.usage & ISL_SURF_USAGE_TEXTURE_BIT,
 172                                        &clear_bo, &clear_offset);
 173    }
 174
 175    void *state = brw_state_batch(brw,
 176                                  brw->isl_dev.ss.size,
 177                                  brw->isl_dev.ss.align,
 178                                  surf_offset);
 179
 180    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
 181                        .address = brw_state_reloc(&brw->batch,
 182                                                   *surf_offset + brw->isl_dev.ss.addr_offset,
 183                                                   mt->bo, offset, reloc_flags),
 184                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 185                        .aux_address = aux_offset,
 186                        .mocs = brw_get_bo_mocs(devinfo, mt->bo),
 187                        .clear_color = clear_color,
 188                        .use_clear_address = clear_bo != NULL,
 189                        .clear_address = clear_offset,
 190                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 191    if (aux_surf) {
 192       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 193        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 194        * contain other control information.  Since buffer addresses are always
 195        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 196        * an ordinary reloc to do the necessary address translation.
 197        *
 198        * FIXME: move to the point of assignment.
 199        */
 200       assert((aux_offset & 0xfff) == 0);
 201
 202       if (devinfo->gen >= 8) {
 203          uint64_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 204          *aux_addr = brw_state_reloc(&brw->batch,
 205                                      *surf_offset +
 206                                      brw->isl_dev.ss.aux_addr_offset,
 207                                      aux_bo, *aux_addr,
 208                                      reloc_flags);
 209       } else {
 210          uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 211          *aux_addr = brw_state_reloc(&brw->batch,
 212                                      *surf_offset +
 213                                      brw->isl_dev.ss.aux_addr_offset,
 214                                      aux_bo, *aux_addr,
 215                                      reloc_flags);
 216
 217       }
 218    }
 219
 220    if (clear_bo != NULL) {
 221       /* Make sure the offset is aligned with a cacheline. */
 222       assert((clear_offset & 0x3f) == 0);
 223       uint64_t *clear_address =
 224             state + brw->isl_dev.ss.clear_color_state_offset;
 225       *clear_address = brw_state_reloc(&brw->batch,
 226                                        *surf_offset +
 227                                        brw->isl_dev.ss.clear_color_state_offset,
 228                                        clear_bo, *clear_address, reloc_flags);
 229    }
 230 }
 231
 232 static uint32_t
 233 gen6_update_renderbuffer_surface(struct brw_context *brw,
 234                                  struct gl_renderbuffer *rb,
 235                                  unsigned unit,
 236                                  uint32_t surf_index)
 237 {
 238    struct gl_context *ctx = &brw->ctx;
 239    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 240    struct intel_mipmap_tree *mt = irb->mt;
 241
 242    assert(brw_render_target_supported(brw, rb));
 243
 244    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 245    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 246       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 247                     __func__, _mesa_get_format_name(rb_format));
 248    }
 249    enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
 250
 251    struct isl_view view = {
 252       .format = isl_format,
 253       .base_level = irb->mt_level - irb->mt->first_level,
 254       .levels = 1,
 255       .base_array_layer = irb->mt_layer,
 256       .array_len = MAX2(irb->layer_count, 1),
 257       .swizzle = ISL_SWIZZLE_IDENTITY,
 258       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 259    };
 260
 261    uint32_t offset;
 262    brw_emit_surface_state(brw, mt, mt->target, view,
 263                           brw->draw_aux_usage[unit],
 264                           &offset, surf_index,
 265                           RELOC_WRITE);
 266    return offset;
 267 }
 268
 269 GLuint
 270 translate_tex_target(GLenum target)
 271 {
 272    switch (target) {
 273    case GL_TEXTURE_1D:
 274    case GL_TEXTURE_1D_ARRAY_EXT:
 275       return BRW_SURFACE_1D;
 276
 277    case GL_TEXTURE_RECTANGLE_NV:
 278       return BRW_SURFACE_2D;
 279
 280    case GL_TEXTURE_2D:
 281    case GL_TEXTURE_2D_ARRAY_EXT:
 282    case GL_TEXTURE_EXTERNAL_OES:
 283    case GL_TEXTURE_2D_MULTISAMPLE:
 284    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 285       return BRW_SURFACE_2D;
 286
 287    case GL_TEXTURE_3D:
 288       return BRW_SURFACE_3D;
 289
 290    case GL_TEXTURE_CUBE_MAP:
 291    case GL_TEXTURE_CUBE_MAP_ARRAY:
 292       return BRW_SURFACE_CUBE;
 293
 294    default:
 295       unreachable("not reached");
 296    }
 297 }
 298
 299 uint32_t
 300 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 301 {
 302    switch (tiling) {
 303    case ISL_TILING_X:
 304       return BRW_SURFACE_TILED;
 305    case ISL_TILING_Y0:
 306       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 307    default:
 308       return 0;
 309    }
 310 }
 311
 312
 313 uint32_t
 314 brw_get_surface_num_multisamples(unsigned num_samples)
 315 {
 316    if (num_samples > 1)
 317       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 318    else
 319       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 320 }
 321
 322 /**
 323  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 324  * swizzling.
 325  */
 326 int
 327 brw_get_texture_swizzle(const struct gl_context *ctx,
 328                         const struct gl_texture_object *t)
 329 {
 330    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 331
 332    int swizzles[SWIZZLE_NIL + 1] = {
 333       SWIZZLE_X,
 334       SWIZZLE_Y,
 335       SWIZZLE_Z,
 336       SWIZZLE_W,
 337       SWIZZLE_ZERO,
 338       SWIZZLE_ONE,
 339       SWIZZLE_NIL
 340    };
 341
 342    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 343        img->_BaseFormat == GL_DEPTH_STENCIL) {
 344       GLenum depth_mode = t->DepthMode;
 345
 346       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 347        * with depth component data specified with a sized internal format.
 348        * Otherwise, it's left at the old default, GL_LUMINANCE.
 349        */
 350       if (_mesa_is_gles3(ctx) &&
 351           img->InternalFormat != GL_DEPTH_COMPONENT &&
 352           img->InternalFormat != GL_DEPTH_STENCIL) {
 353          depth_mode = GL_RED;
 354       }
 355
 356       switch (depth_mode) {
 357       case GL_ALPHA:
 358          swizzles[0] = SWIZZLE_ZERO;
 359          swizzles[1] = SWIZZLE_ZERO;
 360          swizzles[2] = SWIZZLE_ZERO;
 361          swizzles[3] = SWIZZLE_X;
 362          break;
 363       case GL_LUMINANCE:
 364          swizzles[0] = SWIZZLE_X;
 365          swizzles[1] = SWIZZLE_X;
 366          swizzles[2] = SWIZZLE_X;
 367          swizzles[3] = SWIZZLE_ONE;
 368          break;
 369       case GL_INTENSITY:
 370          swizzles[0] = SWIZZLE_X;
 371          swizzles[1] = SWIZZLE_X;
 372          swizzles[2] = SWIZZLE_X;
 373          swizzles[3] = SWIZZLE_X;
 374          break;
 375       case GL_RED:
 376          swizzles[0] = SWIZZLE_X;
 377          swizzles[1] = SWIZZLE_ZERO;
 378          swizzles[2] = SWIZZLE_ZERO;
 379          swizzles[3] = SWIZZLE_ONE;
 380          break;
 381       }
 382    }
 383
 384    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 385
 386    /* If the texture's format is alpha-only, force R, G, and B to
 387     * 0.0. Similarly, if the texture's format has no alpha channel,
 388     * force the alpha value read to 1.0. This allows for the
 389     * implementation to use an RGBA texture for any of these formats
 390     * without leaking any unexpected values.
 391     */
 392    switch (img->_BaseFormat) {
 393    case GL_ALPHA:
 394       swizzles[0] = SWIZZLE_ZERO;
 395       swizzles[1] = SWIZZLE_ZERO;
 396       swizzles[2] = SWIZZLE_ZERO;
 397       break;
 398    case GL_LUMINANCE:
 399       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 400          swizzles[0] = SWIZZLE_X;
 401          swizzles[1] = SWIZZLE_X;
 402          swizzles[2] = SWIZZLE_X;
 403          swizzles[3] = SWIZZLE_ONE;
 404       }
 405       break;
 406    case GL_LUMINANCE_ALPHA:
 407       if (datatype == GL_SIGNED_NORMALIZED) {
 408          swizzles[0] = SWIZZLE_X;
 409          swizzles[1] = SWIZZLE_X;
 410          swizzles[2] = SWIZZLE_X;
 411          swizzles[3] = SWIZZLE_W;
 412       }
 413       break;
 414    case GL_INTENSITY:
 415       if (datatype == GL_SIGNED_NORMALIZED) {
 416          swizzles[0] = SWIZZLE_X;
 417          swizzles[1] = SWIZZLE_X;
 418          swizzles[2] = SWIZZLE_X;
 419          swizzles[3] = SWIZZLE_X;
 420       }
 421       break;
 422    case GL_RED:
 423       swizzles[1] = SWIZZLE_ZERO;
 424       /* fallthrough */
 425    case GL_RG:
 426       swizzles[2] = SWIZZLE_ZERO;
 427       /* fallthrough */
 428    case GL_RGB:
 429       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 430           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 431           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 432          swizzles[3] = SWIZZLE_ONE;
 433       break;
 434    }
 435
 436    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 437                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 438                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 439                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 440 }
 441
 442 /**
 443  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 444  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 445  *
 446  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 447  *         0          1          2          3             4            5
 448  *         4          5          6          7             0            1
 449  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 450  *
 451  * which is simply adding 4 then modding by 8 (or anding with 7).
 452  *
 453  * We then may need to apply workarounds for textureGather hardware bugs.
 454  */
 455 static unsigned
 456 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 457 {
 458    unsigned scs = (swizzle + 4) & 7;
 459
 460    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 461 }
 462
 463 static void brw_update_texture_surface(struct gl_context *ctx,
 464                            unsigned unit,
 465                            uint32_t *surf_offset,
 466                            bool for_gather,
 467                            bool for_txf,
 468                            uint32_t plane)
 469 {
 470    struct brw_context *brw = brw_context(ctx);
 471    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 472    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 473
 474    if (obj->Target == GL_TEXTURE_BUFFER) {
 475       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 476
 477    } else {
 478       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 479       struct intel_mipmap_tree *mt = intel_obj->mt;
 480
 481       if (plane > 0) {
 482          if (mt->plane[plane - 1] == NULL)
 483             return;
 484          mt = mt->plane[plane - 1];
 485       }
 486
 487       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 488       /* If this is a view with restricted NumLayers, then our effective depth
 489        * is not just the miptree depth.
 490        */
 491       unsigned view_num_layers;
 492       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 493          view_num_layers = obj->NumLayers;
 494       } else {
 495          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 496                               mt->surf.logical_level0_px.depth :
 497                               mt->surf.logical_level0_px.array_len;
 498       }
 499
 500       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 501        * texturing functions that return a float, as our code generation always
 502        * selects the .x channel (which would always be 0).
 503        */
 504       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 505       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 506          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 507           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 508       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 509                                 brw_get_texture_swizzle(&brw->ctx, obj));
 510
 511       mesa_format mesa_fmt;
 512       if (firstImage->_BaseFormat == GL_DEPTH_STENCIL ||
 513           firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
 514          /* The format from intel_obj may be a combined depth stencil format
 515           * when we just want depth.  Pull it from the miptree instead.  This
 516           * is safe because texture views aren't allowed on depth/stencil.
 517           */
 518          mesa_fmt = mt->format;
 519       } else if (mt->etc_format != MESA_FORMAT_NONE) {
 520          mesa_fmt = mt->format;
 521       } else if (plane > 0) {
 522          mesa_fmt = mt->format;
 523       } else {
 524          mesa_fmt = intel_obj->_Format;
 525       }
 526       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 527                                                     for_txf ? GL_DECODE_EXT :
 528                                                     sampler->sRGBDecode);
 529
 530       /* Implement gen6 and gen7 gather work-around */
 531       bool need_green_to_blue = false;
 532       if (for_gather) {
 533          if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 534                                    format == ISL_FORMAT_R32G32_SINT ||
 535                                    format == ISL_FORMAT_R32G32_UINT)) {
 536             format = ISL_FORMAT_R32G32_FLOAT_LD;
 537             need_green_to_blue = devinfo->is_haswell;
 538          } else if (devinfo->gen == 6) {
 539             /* Sandybridge's gather4 message is broken for integer formats.
 540              * To work around this, we pretend the surface is UNORM for
 541              * 8 or 16-bit formats, and emit shader instructions to recover
 542              * the real INT/UINT value.  For 32-bit formats, we pretend
 543              * the surface is FLOAT, and simply reinterpret the resulting
 544              * bits.
 545              */
 546             switch (format) {
 547             case ISL_FORMAT_R8_SINT:
 548             case ISL_FORMAT_R8_UINT:
 549                format = ISL_FORMAT_R8_UNORM;
 550                break;
 551
 552             case ISL_FORMAT_R16_SINT:
 553             case ISL_FORMAT_R16_UINT:
 554                format = ISL_FORMAT_R16_UNORM;
 555                break;
 556
 557             case ISL_FORMAT_R32_SINT:
 558             case ISL_FORMAT_R32_UINT:
 559                format = ISL_FORMAT_R32_FLOAT;
 560                break;
 561
 562             default:
 563                break;
 564             }
 565          }
 566       }
 567
 568       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 569          if (devinfo->gen <= 7) {
 570             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 571             mt = mt->r8stencil_mt;
 572          } else {
 573             mt = mt->stencil_mt;
 574          }
 575          format = ISL_FORMAT_R8_UINT;
 576       } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 577          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 578          mt = mt->r8stencil_mt;
 579          format = ISL_FORMAT_R8_UINT;
 580       }
 581
 582       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 583
 584       struct isl_view view = {
 585          .format = format,
 586          .base_level = obj->MinLevel + obj->BaseLevel,
 587          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 588          .base_array_layer = obj->MinLayer,
 589          .array_len = view_num_layers,
 590          .swizzle = {
 591             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 592             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 593             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 594             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 595          },
 596          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 597       };
 598
 599       /* On Ivy Bridge and earlier, we handle texture swizzle with shader
 600        * code.  The actual surface swizzle should be identity.
 601        */
 602       if (devinfo->gen <= 7 && !devinfo->is_haswell)
 603          view.swizzle = ISL_SWIZZLE_IDENTITY;
 604
 605       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 606           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 607          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 608
 609       enum isl_aux_usage aux_usage =
 610          intel_miptree_texture_aux_usage(brw, mt, format,
 611                                          brw->gen9_astc5x5_wa_tex_mask);
 612
 613       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 614                              surf_offset, surf_index,
 615                              0);
 616    }
 617 }
 618
 619 void
 620 brw_emit_buffer_surface_state(struct brw_context *brw,
 621                               uint32_t *out_offset,
 622                               struct brw_bo *bo,
 623                               unsigned buffer_offset,
 624                               unsigned surface_format,
 625                               unsigned buffer_size,
 626                               unsigned pitch,
 627                               unsigned reloc_flags)
 628 {
 629    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 630    uint32_t *dw = brw_state_batch(brw,
 631                                   brw->isl_dev.ss.size,
 632                                   brw->isl_dev.ss.align,
 633                                   out_offset);
 634
 635    isl_buffer_fill_state(&brw->isl_dev, dw,
 636                          .address = !bo ? buffer_offset :
 637                                     brw_state_reloc(&brw->batch,
 638                                                     *out_offset + brw->isl_dev.ss.addr_offset,
 639                                                     bo, buffer_offset,
 640                                                     reloc_flags),
 641                          .size_B = buffer_size,
 642                          .format = surface_format,
 643                          .stride_B = pitch,
 644                          .mocs = brw_get_bo_mocs(devinfo, bo));
 645 }
 646
 647 static unsigned
 648 buffer_texture_range_size(struct brw_context *brw,
 649                           struct gl_texture_object *obj)
 650 {
 651    assert(obj->Target == GL_TEXTURE_BUFFER);
 652    const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat);
 653    const unsigned buffer_size = (!obj->BufferObject ? 0 :
 654                                  obj->BufferObject->Size);
 655    const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset);
 656
 657    /* The ARB_texture_buffer_specification says:
 658     *
 659     *    "The number of texels in the buffer texture's texel array is given by
 660     *
 661     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 662     *
 663     *     where <buffer_size> is the size of the buffer object, in basic
 664     *     machine units and <components> and <base_type> are the element count
 665     *     and base data type for elements, as specified in Table X.1.  The
 666     *     number of texels in the texel array is then clamped to the
 667     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 668     *
 669     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 670     * so that when ISL divides by stride to obtain the number of texels, that
 671     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 672     */
 673    return MIN3((unsigned)obj->BufferSize,
 674                buffer_size - buffer_offset,
 675                brw->ctx.Const.MaxTextureBufferSize * texel_size);
 676 }
 677
 678 void
 679 brw_update_buffer_texture_surface(struct gl_context *ctx,
 680                                   unsigned unit,
 681                                   uint32_t *surf_offset)
 682 {
 683    struct brw_context *brw = brw_context(ctx);
 684    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 685    struct intel_buffer_object *intel_obj =
 686       intel_buffer_object(tObj->BufferObject);
 687    const unsigned size = buffer_texture_range_size(brw, tObj);
 688    struct brw_bo *bo = NULL;
 689    mesa_format format = tObj->_BufferObjectFormat;
 690    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 691    int texel_size = _mesa_get_format_bytes(format);
 692
 693    if (intel_obj)
 694       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 695                                   false);
 696
 697    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 698       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 699                     _mesa_get_format_name(format));
 700    }
 701
 702    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 703                                  tObj->BufferOffset,
 704                                  isl_format,
 705                                  size,
 706                                  texel_size,
 707                                  0);
 708 }
 709
 710 /**
 711  * Set up a binding table entry for use by stream output logic (transform
 712  * feedback).
 713  *
 714  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 715  */
 716 void
 717 brw_update_sol_surface(struct brw_context *brw,
 718                        struct gl_buffer_object *buffer_obj,
 719                        uint32_t *out_offset, unsigned num_vector_components,
 720                        unsigned stride_dwords, unsigned offset_dwords)
 721 {
 722    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 723    uint32_t offset_bytes = 4 * offset_dwords;
 724    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 725                                              offset_bytes,
 726                                              buffer_obj->Size - offset_bytes,
 727                                              true);
 728    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 729    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 730    size_t size_dwords = buffer_obj->Size / 4;
 731    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 732
 733    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 734     * too big to map using a single binding table entry?
 735     */
 736    assert((size_dwords - offset_dwords) / stride_dwords
 737           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 738
 739    if (size_dwords > offset_dwords + num_vector_components) {
 740       /* There is room for at least 1 transform feedback output in the buffer.
 741        * Compute the number of additional transform feedback outputs the
 742        * buffer has room for.
 743        */
 744       buffer_size_minus_1 =
 745          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 746    } else {
 747       /* There isn't even room for a single transform feedback output in the
 748        * buffer.  We can't configure the binding table entry to prevent output
 749        * entirely; we'll have to rely on the geometry shader to detect
 750        * overflow.  But to minimize the damage in case of a bug, set up the
 751        * binding table entry to just allow a single output.
 752        */
 753       buffer_size_minus_1 = 0;
 754    }
 755    width = buffer_size_minus_1 & 0x7f;
 756    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 757    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 758
 759    switch (num_vector_components) {
 760    case 1:
 761       surface_format = ISL_FORMAT_R32_FLOAT;
 762       break;
 763    case 2:
 764       surface_format = ISL_FORMAT_R32G32_FLOAT;
 765       break;
 766    case 3:
 767       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 768       break;
 769    case 4:
 770       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 771       break;
 772    default:
 773       unreachable("Invalid vector size for transform feedback output");
 774    }
 775
 776    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 777       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 778       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 779       BRW_SURFACE_RC_READ_WRITE;
 780    surf[1] = brw_state_reloc(&brw->batch,
 781                              *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
 782    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 783               height << BRW_SURFACE_HEIGHT_SHIFT);
 784    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 785               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 786    surf[4] = 0;
 787    surf[5] = 0;
 788 }
 789
 790 /* Creates a new WM constant buffer reflecting the current fragment program's
 791  * constants, if needed by the fragment program.
 792  *
 793  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 794  * state atom.
 795  */
 796 static void
 797 brw_upload_wm_pull_constants(struct brw_context *brw)
 798 {
 799    struct brw_stage_state *stage_state = &brw->wm.base;
 800    /* BRW_NEW_FRAGMENT_PROGRAM */
 801    struct brw_program *fp =
 802       (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
 803
 804    /* BRW_NEW_FS_PROG_DATA */
 805    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 806
 807    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 808    /* _NEW_PROGRAM_CONSTANTS */
 809    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 810                              stage_state, prog_data);
 811 }
 812
 813 const struct brw_tracked_state brw_wm_pull_constants = {
 814    .dirty = {
 815       .mesa = _NEW_PROGRAM_CONSTANTS,
 816       .brw = BRW_NEW_BATCH |
 817              BRW_NEW_FRAGMENT_PROGRAM |
 818              BRW_NEW_FS_PROG_DATA,
 819    },
 820    .emit = brw_upload_wm_pull_constants,
 821 };
 822
 823 /**
 824  * Creates a null renderbuffer surface.
 825  *
 826  * This is used when the shader doesn't write to any color output.  An FB
 827  * write to target 0 will still be emitted, because that's how the thread is
 828  * terminated (and computed depth is returned), so we need to have the
 829  * hardware discard the target 0 color output..
 830  */
 831 static void
 832 emit_null_surface_state(struct brw_context *brw,
 833                         const struct gl_framebuffer *fb,
 834                         uint32_t *out_offset)
 835 {
 836    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 837    uint32_t *surf = brw_state_batch(brw,
 838                                     brw->isl_dev.ss.size,
 839                                     brw->isl_dev.ss.align,
 840                                     out_offset);
 841
 842    /* Use the fb dimensions or 1x1x1 */
 843    const unsigned width   = fb ? _mesa_geometric_width(fb)   : 1;
 844    const unsigned height  = fb ? _mesa_geometric_height(fb)  : 1;
 845    const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
 846
 847    if (devinfo->gen != 6 || samples <= 1) {
 848       isl_null_fill_state(&brw->isl_dev, surf,
 849                           isl_extent3d(width, height, 1));
 850       return;
 851    }
 852
 853    /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
 854     * So work around this problem by rendering into dummy color buffer.
 855     *
 856     * To decrease the amount of memory needed by the workaround buffer, we
 857     * set its pitch to 128 bytes (the width of a Y tile).  This means that
 858     * the amount of memory needed for the workaround buffer is
 859     * (width_in_tiles + height_in_tiles - 1) tiles.
 860     *
 861     * Note that since the workaround buffer will be interpreted by the
 862     * hardware as an interleaved multisampled buffer, we need to compute
 863     * width_in_tiles and height_in_tiles by dividing the width and height
 864     * by 16 rather than the normal Y-tile size of 32.
 865     */
 866    unsigned width_in_tiles = ALIGN(width, 16) / 16;
 867    unsigned height_in_tiles = ALIGN(height, 16) / 16;
 868    unsigned pitch_minus_1 = 127;
 869    unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 870    brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 871                       size_needed);
 872
 873    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 874               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 875    surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
 876                              brw->wm.multisampled_null_render_target_bo,
 877                              0, RELOC_WRITE);
 878
 879    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 880               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 881
 882    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 883     * Notes):
 884     *
 885     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 886     */
 887    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 888               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 889    surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
 890    surf[5] = 0;
 891 }
 892
 893 /**
 894  * Sets up a surface state structure to point at the given region.
 895  * While it is only used for the front/back buffer currently, it should be
 896  * usable for further buffers when doing ARB_draw_buffer support.
 897  */
 898 static uint32_t
 899 gen4_update_renderbuffer_surface(struct brw_context *brw,
 900                                  struct gl_renderbuffer *rb,
 901                                  unsigned unit,
 902                                  uint32_t surf_index)
 903 {
 904    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 905    struct gl_context *ctx = &brw->ctx;
 906    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 907    struct intel_mipmap_tree *mt = irb->mt;
 908    uint32_t *surf;
 909    uint32_t tile_x, tile_y;
 910    enum isl_format format;
 911    uint32_t offset;
 912    /* _NEW_BUFFERS */
 913    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 914    /* BRW_NEW_FS_PROG_DATA */
 915
 916    if (rb->TexImage && !devinfo->has_surface_tile_offset) {
 917       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 918
 919       if (tile_x != 0 || tile_y != 0) {
 920          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 921           * destination in a miptree unless you actually setup your renderbuffer
 922           * as a miptree and used the fragile lod/array_index/etc. controls to
 923           * select the image.  So, instead, we just make a new single-level
 924           * miptree and render into that.
 925           */
 926          intel_renderbuffer_move_to_temp(brw, irb, false);
 927          assert(irb->align_wa_mt);
 928          mt = irb->align_wa_mt;
 929       }
 930    }
 931
 932    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
 933
 934    format = brw->mesa_to_isl_render_format[rb_format];
 935    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 936       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 937                     __func__, _mesa_get_format_name(rb_format));
 938    }
 939
 940    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 941               format << BRW_SURFACE_FORMAT_SHIFT);
 942
 943    /* reloc */
 944    assert(mt->offset % mt->cpp == 0);
 945    surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
 946                              mt->offset +
 947                              intel_renderbuffer_get_tile_offsets(irb,
 948                                                                  &tile_x,
 949                                                                  &tile_y),
 950                              RELOC_WRITE);
 951
 952    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 953               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 954
 955    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
 956               (mt->surf.row_pitch_B - 1) << BRW_SURFACE_PITCH_SHIFT);
 957
 958    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
 959
 960    assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 961    /* Note that the low bits of these fields are missing, so
 962     * there's the possibility of getting in trouble.
 963     */
 964    assert(tile_x % 4 == 0);
 965    assert(tile_y % 2 == 0);
 966    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 967               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 968               (mt->surf.image_alignment_el.height == 4 ?
 969                   BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 970
 971    if (devinfo->gen < 6) {
 972       /* _NEW_COLOR */
 973       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
 974           (ctx->Color.BlendEnabled & (1 << unit)))
 975          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 976
 977       if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 0))
 978          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 979       if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 1))
 980          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 981       if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 2))
 982          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 983
 984       /* As mentioned above, disable writes to the alpha component when the
 985        * renderbuffer is XRGB.
 986        */
 987       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 988           !GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 3)) {
 989          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 990       }
 991    }
 992
 993    return offset;
 994 }
 995
 996 static void
 997 update_renderbuffer_surfaces(struct brw_context *brw)
 998 {
 999    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1000    const struct gl_context *ctx = &brw->ctx;
1001
1002    /* _NEW_BUFFERS | _NEW_COLOR */
1003    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1004
1005    /* Render targets always start at binding table index 0. */
1006    const unsigned rt_start = 0;
1007
1008    uint32_t *surf_offsets = brw->wm.base.surf_offset;
1009
1010    /* Update surfaces for drawing buffers */
1011    if (fb->_NumColorDrawBuffers >= 1) {
1012       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1013          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1014
1015          if (intel_renderbuffer(rb)) {
1016             surf_offsets[rt_start + i] = devinfo->gen >= 6 ?
1017                gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
1018                gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
1019          } else {
1020             emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
1021          }
1022       }
1023    } else {
1024       emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
1025    }
1026
1027    /* The PIPE_CONTROL command description says:
1028     *
1029     * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
1030     *  points to a different RENDER_SURFACE_STATE, SW must issue a Render
1031     *  Target Cache Flush by enabling this bit. When render target flush
1032     *  is set due to new association of BTI, PS Scoreboard Stall bit must
1033     *  be set in this packet."
1034    */
1035    if (devinfo->gen >= 11) {
1036       brw_emit_pipe_control_flush(brw,
1037                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
1038                                   PIPE_CONTROL_STALL_AT_SCOREBOARD);
1039    }
1040
1041    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1042 }
1043
1044 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1045    .dirty = {
1046       .mesa = _NEW_BUFFERS |
1047               _NEW_COLOR,
1048       .brw = BRW_NEW_BATCH,
1049    },
1050    .emit = update_renderbuffer_surfaces,
1051 };
1052
1053 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1054    .dirty = {
1055       .mesa = _NEW_BUFFERS,
1056       .brw = BRW_NEW_BATCH |
1057              BRW_NEW_AUX_STATE,
1058    },
1059    .emit = update_renderbuffer_surfaces,
1060 };
1061
1062 static void
1063 update_renderbuffer_read_surfaces(struct brw_context *brw)
1064 {
1065    const struct gl_context *ctx = &brw->ctx;
1066
1067    /* BRW_NEW_FS_PROG_DATA */
1068    const struct brw_wm_prog_data *wm_prog_data =
1069       brw_wm_prog_data(brw->wm.base.prog_data);
1070
1071    if (wm_prog_data->has_render_target_reads &&
1072        !ctx->Extensions.EXT_shader_framebuffer_fetch) {
1073       /* _NEW_BUFFERS */
1074       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1075
1076       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1077          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1078          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1079          const unsigned surf_index =
1080             wm_prog_data->binding_table.render_target_read_start + i;
1081          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1082
1083          if (irb) {
1084             const enum isl_format format = brw->mesa_to_isl_render_format[
1085                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1086             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1087                                                 format));
1088
1089             /* Override the target of the texture if the render buffer is a
1090              * single slice of a 3D texture (since the minimum array element
1091              * field of the surface state structure is ignored by the sampler
1092              * unit for 3D textures on some hardware), or if the render buffer
1093              * is a 1D array (since shaders always provide the array index
1094              * coordinate at the Z component to avoid state-dependent
1095              * recompiles when changing the texture target of the
1096              * framebuffer).
1097              */
1098             const GLenum target =
1099                (irb->mt->target == GL_TEXTURE_3D &&
1100                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1101                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1102                irb->mt->target;
1103
1104             const struct isl_view view = {
1105                .format = format,
1106                .base_level = irb->mt_level - irb->mt->first_level,
1107                .levels = 1,
1108                .base_array_layer = irb->mt_layer,
1109                .array_len = irb->layer_count,
1110                .swizzle = ISL_SWIZZLE_IDENTITY,
1111                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1112             };
1113
1114             enum isl_aux_usage aux_usage =
1115                intel_miptree_texture_aux_usage(brw, irb->mt, format,
1116                                                brw->gen9_astc5x5_wa_tex_mask);
1117             if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
1118                aux_usage = ISL_AUX_USAGE_NONE;
1119
1120             brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1121                                    surf_offset, surf_index,
1122                                    0);
1123
1124          } else {
1125             emit_null_surface_state(brw, fb, surf_offset);
1126          }
1127       }
1128
1129       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1130    }
1131 }
1132
1133 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1134    .dirty = {
1135       .mesa = _NEW_BUFFERS,
1136       .brw = BRW_NEW_BATCH |
1137              BRW_NEW_AUX_STATE |
1138              BRW_NEW_FS_PROG_DATA,
1139    },
1140    .emit = update_renderbuffer_read_surfaces,
1141 };
1142
1143 static bool
1144 is_depth_texture(struct intel_texture_object *iobj)
1145 {
1146    GLenum base_format = _mesa_get_format_base_format(iobj->_Format);
1147    return base_format == GL_DEPTH_COMPONENT ||
1148           (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling);
1149 }
1150
1151 static void
1152 update_stage_texture_surfaces(struct brw_context *brw,
1153                               const struct gl_program *prog,
1154                               struct brw_stage_state *stage_state,
1155                               bool for_gather, uint32_t plane)
1156 {
1157    if (!prog)
1158       return;
1159
1160    struct gl_context *ctx = &brw->ctx;
1161
1162    uint32_t *surf_offset = stage_state->surf_offset;
1163
1164    /* BRW_NEW_*_PROG_DATA */
1165    if (for_gather)
1166       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1167    else
1168       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1169
1170    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1171    for (unsigned s = 0; s < num_samplers; s++) {
1172       surf_offset[s] = 0;
1173
1174       if (prog->SamplersUsed & (1 << s)) {
1175          const unsigned unit = prog->SamplerUnits[s];
1176          const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s);
1177          struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
1178          struct intel_texture_object *iobj = intel_texture_object(obj);
1179
1180          /* _NEW_TEXTURE */
1181          if (!obj)
1182             continue;
1183
1184          if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) {
1185             /* A programming note for the sample_c message says:
1186              *
1187              *    "The Surface Format of the associated surface must be
1188              *     indicated as supporting shadow mapping as indicated in the
1189              *     surface format table."
1190              *
1191              * Accessing non-depth textures via a sampler*Shadow type is
1192              * undefined.  GLSL 4.50 page 162 says:
1193              *
1194              *    "If a shadow texture call is made to a sampler that does not
1195              *     represent a depth texture, then results are undefined."
1196              *
1197              * We give them a null surface (zeros) for undefined.  We've seen
1198              * GPU hangs with color buffers and sample_c, so we try and avoid
1199              * those with this hack.
1200              */
1201             emit_null_surface_state(brw, NULL, surf_offset + s);
1202          } else {
1203             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
1204                                        used_by_txf, plane);
1205          }
1206       }
1207    }
1208 }
1209
1210
1211 /**
1212  * Construct SURFACE_STATE objects for enabled textures.
1213  */
1214 static void
1215 brw_update_texture_surfaces(struct brw_context *brw)
1216 {
1217    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1218
1219    /* BRW_NEW_VERTEX_PROGRAM */
1220    struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
1221
1222    /* BRW_NEW_TESS_PROGRAMS */
1223    struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
1224    struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
1225
1226    /* BRW_NEW_GEOMETRY_PROGRAM */
1227    struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
1228
1229    /* BRW_NEW_FRAGMENT_PROGRAM */
1230    struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
1231
1232    /* _NEW_TEXTURE */
1233    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1234    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1235    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1236    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1237    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1238
1239    /* emit alternate set of surface state for gather. this
1240     * allows the surface format to be overriden for only the
1241     * gather4 messages. */
1242    if (devinfo->gen < 8) {
1243       if (vs && vs->info.uses_texture_gather)
1244          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1245       if (tcs && tcs->info.uses_texture_gather)
1246          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1247       if (tes && tes->info.uses_texture_gather)
1248          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1249       if (gs && gs->info.uses_texture_gather)
1250          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1251       if (fs && fs->info.uses_texture_gather)
1252          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1253    }
1254
1255    if (fs) {
1256       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1257       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1258    }
1259
1260    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1261 }
1262
1263 const struct brw_tracked_state brw_texture_surfaces = {
1264    .dirty = {
1265       .mesa = _NEW_TEXTURE,
1266       .brw = BRW_NEW_BATCH |
1267              BRW_NEW_AUX_STATE |
1268              BRW_NEW_FRAGMENT_PROGRAM |
1269              BRW_NEW_FS_PROG_DATA |
1270              BRW_NEW_GEOMETRY_PROGRAM |
1271              BRW_NEW_GS_PROG_DATA |
1272              BRW_NEW_TESS_PROGRAMS |
1273              BRW_NEW_TCS_PROG_DATA |
1274              BRW_NEW_TES_PROG_DATA |
1275              BRW_NEW_TEXTURE_BUFFER |
1276              BRW_NEW_VERTEX_PROGRAM |
1277              BRW_NEW_VS_PROG_DATA,
1278    },
1279    .emit = brw_update_texture_surfaces,
1280 };
1281
1282 static void
1283 brw_update_cs_texture_surfaces(struct brw_context *brw)
1284 {
1285    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1286
1287    /* BRW_NEW_COMPUTE_PROGRAM */
1288    struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
1289
1290    /* _NEW_TEXTURE */
1291    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1292
1293    /* emit alternate set of surface state for gather. this
1294     * allows the surface format to be overriden for only the
1295     * gather4 messages.
1296     */
1297    if (devinfo->gen < 8) {
1298       if (cs && cs->info.uses_texture_gather)
1299          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1300    }
1301
1302    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1303 }
1304
1305 const struct brw_tracked_state brw_cs_texture_surfaces = {
1306    .dirty = {
1307       .mesa = _NEW_TEXTURE,
1308       .brw = BRW_NEW_BATCH |
1309              BRW_NEW_COMPUTE_PROGRAM |
1310              BRW_NEW_AUX_STATE,
1311    },
1312    .emit = brw_update_cs_texture_surfaces,
1313 };
1314
1315 static void
1316 upload_buffer_surface(struct brw_context *brw,
1317                       struct gl_buffer_binding *binding,
1318                       uint32_t *out_offset,
1319                       enum isl_format format,
1320                       unsigned reloc_flags)
1321 {
1322    struct gl_context *ctx = &brw->ctx;
1323
1324    if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1325       emit_null_surface_state(brw, NULL, out_offset);
1326    } else {
1327       ptrdiff_t size = binding->BufferObject->Size - binding->Offset;
1328       if (!binding->AutomaticSize)
1329          size = MIN2(size, binding->Size);
1330
1331       if (size == 0) {
1332          emit_null_surface_state(brw, NULL, out_offset);
1333          return;
1334       }
1335
1336       struct intel_buffer_object *iobj =
1337          intel_buffer_object(binding->BufferObject);
1338       struct brw_bo *bo =
1339          intel_bufferobj_buffer(brw, iobj, binding->Offset, size,
1340                                 (reloc_flags & RELOC_WRITE) != 0);
1341
1342       brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset,
1343                                     format, size, 1, reloc_flags);
1344    }
1345 }
1346
1347 void
1348 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1349                         struct brw_stage_state *stage_state,
1350                         struct brw_stage_prog_data *prog_data)
1351 {
1352    struct gl_context *ctx = &brw->ctx;
1353
1354    if (!prog || (prog->info.num_ubos == 0 &&
1355                  prog->info.num_ssbos == 0 &&
1356                  prog->info.num_abos == 0))
1357       return;
1358
1359    uint32_t *ubo_surf_offsets =
1360       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1361
1362    for (int i = 0; i < prog->info.num_ubos; i++) {
1363       struct gl_buffer_binding *binding =
1364          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1365       upload_buffer_surface(brw, binding, &ubo_surf_offsets[i],
1366                             ISL_FORMAT_R32G32B32A32_FLOAT, 0);
1367    }
1368
1369    uint32_t *abo_surf_offsets =
1370       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1371    uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos;
1372
1373    for (int i = 0; i < prog->info.num_abos; i++) {
1374       struct gl_buffer_binding *binding =
1375          &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1376       upload_buffer_surface(brw, binding, &abo_surf_offsets[i],
1377                             ISL_FORMAT_RAW, RELOC_WRITE);
1378    }
1379
1380    for (int i = 0; i < prog->info.num_ssbos; i++) {
1381       struct gl_buffer_binding *binding =
1382          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1383
1384       upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i],
1385                             ISL_FORMAT_RAW, RELOC_WRITE);
1386    }
1387
1388    stage_state->push_constants_dirty = true;
1389    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1390 }
1391
1392 static void
1393 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1394 {
1395    struct gl_context *ctx = &brw->ctx;
1396    /* _NEW_PROGRAM */
1397    struct gl_program *prog = ctx->FragmentProgram._Current;
1398
1399    /* BRW_NEW_FS_PROG_DATA */
1400    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1401 }
1402
1403 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1404    .dirty = {
1405       .mesa = _NEW_PROGRAM,
1406       .brw = BRW_NEW_BATCH |
1407              BRW_NEW_FS_PROG_DATA |
1408              BRW_NEW_UNIFORM_BUFFER,
1409    },
1410    .emit = brw_upload_wm_ubo_surfaces,
1411 };
1412
1413 static void
1414 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1415 {
1416    struct gl_context *ctx = &brw->ctx;
1417    /* _NEW_PROGRAM */
1418    struct gl_program *prog =
1419       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1420
1421    /* BRW_NEW_CS_PROG_DATA */
1422    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1423 }
1424
1425 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1426    .dirty = {
1427       .mesa = _NEW_PROGRAM,
1428       .brw = BRW_NEW_BATCH |
1429              BRW_NEW_CS_PROG_DATA |
1430              BRW_NEW_UNIFORM_BUFFER,
1431    },
1432    .emit = brw_upload_cs_ubo_surfaces,
1433 };
1434
1435 static void
1436 brw_upload_cs_image_surfaces(struct brw_context *brw)
1437 {
1438    /* _NEW_PROGRAM */
1439    const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
1440
1441    if (cp) {
1442       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1443       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1444                                 brw->cs.base.prog_data);
1445    }
1446 }
1447
1448 const struct brw_tracked_state brw_cs_image_surfaces = {
1449    .dirty = {
1450       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1451       .brw = BRW_NEW_BATCH |
1452              BRW_NEW_CS_PROG_DATA |
1453              BRW_NEW_AUX_STATE |
1454              BRW_NEW_IMAGE_UNITS
1455    },
1456    .emit = brw_upload_cs_image_surfaces,
1457 };
1458
1459 static uint32_t
1460 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1461 {
1462    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1463    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1464    if (access == GL_WRITE_ONLY || access == GL_NONE) {
1465       return hw_format;
1466    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1467       /* Typed surface reads support a very limited subset of the shader
1468        * image formats.  Translate it into the closest format the
1469        * hardware supports.
1470        */
1471       return isl_lower_storage_image_format(devinfo, hw_format);
1472    } else {
1473       /* The hardware doesn't actually support a typed format that we can use
1474        * so we have to fall back to untyped read/write messages.
1475        */
1476       return ISL_FORMAT_RAW;
1477    }
1478 }
1479
1480 static void
1481 update_default_image_param(struct brw_context *brw,
1482                            struct gl_image_unit *u,
1483                            struct brw_image_param *param)
1484 {
1485    memset(param, 0, sizeof(*param));
1486    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1487     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1488     * detailed explanation of these parameters.
1489     */
1490    param->swizzling[0] = 0xff;
1491    param->swizzling[1] = 0xff;
1492 }
1493
1494 static void
1495 update_buffer_image_param(struct brw_context *brw,
1496                           struct gl_image_unit *u,
1497                           struct brw_image_param *param)
1498 {
1499    const unsigned size = buffer_texture_range_size(brw, u->TexObj);
1500    update_default_image_param(brw, u, param);
1501
1502    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1503    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1504 }
1505
1506 static void
1507 update_image_surface(struct brw_context *brw,
1508                      struct gl_image_unit *u,
1509                      GLenum access,
1510                      uint32_t *surf_offset,
1511                      struct brw_image_param *param)
1512 {
1513    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1514       struct gl_texture_object *obj = u->TexObj;
1515       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1516       const bool written = (access != GL_READ_ONLY && access != GL_NONE);
1517
1518       if (obj->Target == GL_TEXTURE_BUFFER) {
1519          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1520                                       _mesa_get_format_bytes(u->_ActualFormat));
1521          const unsigned buffer_size = buffer_texture_range_size(brw, obj);
1522          struct brw_bo *const bo = !obj->BufferObject ? NULL :
1523             intel_bufferobj_buffer(brw, intel_buffer_object(obj->BufferObject),
1524                                    obj->BufferOffset, buffer_size, written);
1525
1526          brw_emit_buffer_surface_state(
1527             brw, surf_offset, bo, obj->BufferOffset,
1528             format, buffer_size, texel_size,
1529             written ? RELOC_WRITE : 0);
1530
1531          update_buffer_image_param(brw, u, param);
1532
1533       } else {
1534          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1535          struct intel_mipmap_tree *mt = intel_obj->mt;
1536
1537          unsigned base_layer, num_layers;
1538          if (u->Layered) {
1539             if (obj->Target == GL_TEXTURE_3D) {
1540                base_layer = 0;
1541                num_layers = minify(mt->surf.logical_level0_px.depth, u->Level);
1542             } else {
1543                assert(obj->Immutable || obj->MinLayer == 0);
1544                base_layer = obj->MinLayer;
1545                num_layers = obj->Immutable ?
1546                                 obj->NumLayers :
1547                                 mt->surf.logical_level0_px.array_len;
1548             }
1549          } else {
1550             base_layer = obj->MinLayer + u->_Layer;
1551             num_layers = 1;
1552          }
1553
1554          struct isl_view view = {
1555             .format = format,
1556             .base_level = obj->MinLevel + u->Level,
1557             .levels = 1,
1558             .base_array_layer = base_layer,
1559             .array_len = num_layers,
1560             .swizzle = ISL_SWIZZLE_IDENTITY,
1561             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1562          };
1563
1564          if (format == ISL_FORMAT_RAW) {
1565             brw_emit_buffer_surface_state(
1566                brw, surf_offset, mt->bo, mt->offset,
1567                format, mt->bo->size - mt->offset, 1 /* pitch */,
1568                written ? RELOC_WRITE : 0);
1569
1570          } else {
1571             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1572             assert(!intel_miptree_has_color_unresolved(mt,
1573                                                        view.base_level, 1,
1574                                                        view.base_array_layer,
1575                                                        view.array_len));
1576             brw_emit_surface_state(brw, mt, mt->target, view,
1577                                    ISL_AUX_USAGE_NONE,
1578                                    surf_offset, surf_index,
1579                                    written ? RELOC_WRITE : 0);
1580          }
1581
1582          isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1583       }
1584
1585    } else {
1586       emit_null_surface_state(brw, NULL, surf_offset);
1587       update_default_image_param(brw, u, param);
1588    }
1589 }
1590
1591 void
1592 brw_upload_image_surfaces(struct brw_context *brw,
1593                           const struct gl_program *prog,
1594                           struct brw_stage_state *stage_state,
1595                           struct brw_stage_prog_data *prog_data)
1596 {
1597    assert(prog);
1598    struct gl_context *ctx = &brw->ctx;
1599
1600    if (prog->info.num_images) {
1601       for (unsigned i = 0; i < prog->info.num_images; i++) {
1602          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1603          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1604
1605          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1606                               &stage_state->surf_offset[surf_idx],
1607                               &stage_state->image_param[i]);
1608       }
1609
1610       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1611       /* This may have changed the image metadata dependent on the context
1612        * image unit state and passed to the program as uniforms, make sure
1613        * that push and pull constants are reuploaded.
1614        */
1615       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1616    }
1617 }
1618
1619 static void
1620 brw_upload_wm_image_surfaces(struct brw_context *brw)
1621 {
1622    /* BRW_NEW_FRAGMENT_PROGRAM */
1623    const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
1624
1625    if (wm) {
1626       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1627       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1628                                 brw->wm.base.prog_data);
1629    }
1630 }
1631
1632 const struct brw_tracked_state brw_wm_image_surfaces = {
1633    .dirty = {
1634       .mesa = _NEW_TEXTURE,
1635       .brw = BRW_NEW_BATCH |
1636              BRW_NEW_AUX_STATE |
1637              BRW_NEW_FRAGMENT_PROGRAM |
1638              BRW_NEW_FS_PROG_DATA |
1639              BRW_NEW_IMAGE_UNITS
1640    },
1641    .emit = brw_upload_wm_image_surfaces,
1642 };
1643
1644 static void
1645 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1646 {
1647    struct gl_context *ctx = &brw->ctx;
1648    /* _NEW_PROGRAM */
1649    struct gl_program *prog =
1650       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1651    /* BRW_NEW_CS_PROG_DATA */
1652    const struct brw_cs_prog_data *cs_prog_data =
1653       brw_cs_prog_data(brw->cs.base.prog_data);
1654
1655    if (prog && cs_prog_data->uses_num_work_groups) {
1656       const unsigned surf_idx =
1657          cs_prog_data->binding_table.work_groups_start;
1658       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1659       struct brw_bo *bo;
1660       uint32_t bo_offset;
1661
1662       if (brw->compute.num_work_groups_bo == NULL) {
1663          bo = NULL;
1664          brw_upload_data(&brw->upload,
1665                          (void *)brw->compute.num_work_groups,
1666                          3 * sizeof(GLuint),
1667                          sizeof(GLuint),
1668                          &bo,
1669                          &bo_offset);
1670       } else {
1671          bo = brw->compute.num_work_groups_bo;
1672          bo_offset = brw->compute.num_work_groups_offset;
1673       }
1674
1675       brw_emit_buffer_surface_state(brw, surf_offset,
1676                                     bo, bo_offset,
1677                                     ISL_FORMAT_RAW,
1678                                     3 * sizeof(GLuint), 1,
1679                                     RELOC_WRITE);
1680       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1681    }
1682 }
1683
1684 const struct brw_tracked_state brw_cs_work_groups_surface = {
1685    .dirty = {
1686       .brw = BRW_NEW_CS_PROG_DATA |
1687              BRW_NEW_CS_WORK_GROUPS
1688    },
1689    .emit = brw_upload_cs_work_groups_surface,
1690 };