src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "main/teximage.h"
  39 #include "program/prog_parameter.h"
  40 #include "program/prog_instruction.h"
  41 #include "main/framebuffer.h"
  42
  43 #include "isl/isl.h"
  44
  45 #include "intel_mipmap_tree.h"
  46 #include "intel_batchbuffer.h"
  47 #include "intel_tex.h"
  48 #include "intel_fbo.h"
  49 #include "intel_buffer_objects.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_state.h"
  53 #include "brw_defines.h"
  54 #include "brw_wm.h"
  55
  56 struct surface_state_info {
  57    unsigned num_dwords;
  58    unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
  59    unsigned reloc_dw;
  60    unsigned aux_reloc_dw;
  61    unsigned tex_mocs;
  62    unsigned rb_mocs;
  63 };
  64
  65 static const struct surface_state_info surface_state_infos[] = {
  66    [4] = {6,  32, 1,  0},
  67    [5] = {6,  32, 1,  0},
  68    [6] = {6,  32, 1,  0},
  69    [7] = {8,  32, 1,  6,  GEN7_MOCS_L3, GEN7_MOCS_L3},
  70    [8] = {13, 64, 8,  10, BDW_MOCS_WB,  BDW_MOCS_PTE},
  71    [9] = {16, 64, 8,  10, SKL_MOCS_WB,  SKL_MOCS_PTE},
  72 };
  73
  74 void
  75 brw_emit_surface_state(struct brw_context *brw,
  76                        struct intel_mipmap_tree *mt,
  77                        const struct isl_view *view,
  78                        uint32_t mocs, bool for_gather,
  79                        uint32_t *surf_offset, int surf_index,
  80                        unsigned read_domains, unsigned write_domains)
  81 {
  82    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
  83
  84    struct isl_surf surf;
  85    intel_miptree_get_isl_surf(brw, mt, &surf);
  86
  87    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
  88
  89    struct isl_surf *aux_surf = NULL, aux_surf_s;
  90    uint64_t aux_offset = 0;
  91    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
  92    if (mt->mcs_mt &&
  93        ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
  94         mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
  95       intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
  96       aux_surf = &aux_surf_s;
  97       assert(mt->mcs_mt->offset == 0);
  98       aux_offset = mt->mcs_mt->bo->offset64;
  99
 100       /* We only really need a clear color if we also have an auxiliary
 101        * surfacae.  Without one, it does nothing.
 102        */
 103       clear_color = intel_miptree_get_isl_clear_color(brw, mt);
 104    }
 105
 106    uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 107                                     ss_info.num_dwords * 4, ss_info.ss_align,
 108                                     surf_index, surf_offset);
 109
 110    isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
 111                        .address = mt->bo->offset64 + mt->offset,
 112                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 113                        .aux_address = aux_offset,
 114                        .mocs = mocs, .clear_color = clear_color);
 115
 116    drm_intel_bo_emit_reloc(brw->batch.bo,
 117                            *surf_offset + 4 * ss_info.reloc_dw,
 118                            mt->bo, mt->offset,
 119                            read_domains, write_domains);
 120
 121    if (aux_surf) {
 122       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 123        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 124        * contain other control information.  Since buffer addresses are always
 125        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 126        * an ordinary reloc to do the necessary address translation.
 127        */
 128       assert((aux_offset & 0xfff) == 0);
 129       drm_intel_bo_emit_reloc(brw->batch.bo,
 130                               *surf_offset + 4 * ss_info.aux_reloc_dw,
 131                               mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
 132                               read_domains, write_domains);
 133    }
 134 }
 135
 136 uint32_t
 137 brw_update_renderbuffer_surface(struct brw_context *brw,
 138                                 struct gl_renderbuffer *rb,
 139                                 bool layered, unsigned unit /* unused */,
 140                                 uint32_t surf_index)
 141 {
 142    struct gl_context *ctx = &brw->ctx;
 143    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 144    struct intel_mipmap_tree *mt = irb->mt;
 145
 146    assert(brw_render_target_supported(brw, rb));
 147    intel_miptree_used_for_rendering(mt);
 148
 149    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 150    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 151       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 152                     __func__, _mesa_get_format_name(rb_format));
 153    }
 154
 155    const unsigned layer_multiplier =
 156       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 157        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 158       MAX2(irb->mt->num_samples, 1) : 1;
 159
 160    struct isl_view view = {
 161       .format = brw->render_target_format[rb_format],
 162       .base_level = irb->mt_level - irb->mt->first_level,
 163       .levels = 1,
 164       .base_array_layer = irb->mt_layer / layer_multiplier,
 165       .array_len = MAX2(irb->layer_count, 1),
 166       .channel_select = {
 167          ISL_CHANNEL_SELECT_RED,
 168          ISL_CHANNEL_SELECT_GREEN,
 169          ISL_CHANNEL_SELECT_BLUE,
 170          ISL_CHANNEL_SELECT_ALPHA,
 171       },
 172       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 173    };
 174
 175    uint32_t offset;
 176    brw_emit_surface_state(brw, mt, &view,
 177                           surface_state_infos[brw->gen].rb_mocs, false,
 178                           &offset, surf_index,
 179                           I915_GEM_DOMAIN_RENDER,
 180                           I915_GEM_DOMAIN_RENDER);
 181    return offset;
 182 }
 183
 184 GLuint
 185 translate_tex_target(GLenum target)
 186 {
 187    switch (target) {
 188    case GL_TEXTURE_1D:
 189    case GL_TEXTURE_1D_ARRAY_EXT:
 190       return BRW_SURFACE_1D;
 191
 192    case GL_TEXTURE_RECTANGLE_NV:
 193       return BRW_SURFACE_2D;
 194
 195    case GL_TEXTURE_2D:
 196    case GL_TEXTURE_2D_ARRAY_EXT:
 197    case GL_TEXTURE_EXTERNAL_OES:
 198    case GL_TEXTURE_2D_MULTISAMPLE:
 199    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 200       return BRW_SURFACE_2D;
 201
 202    case GL_TEXTURE_3D:
 203       return BRW_SURFACE_3D;
 204
 205    case GL_TEXTURE_CUBE_MAP:
 206    case GL_TEXTURE_CUBE_MAP_ARRAY:
 207       return BRW_SURFACE_CUBE;
 208
 209    default:
 210       unreachable("not reached");
 211    }
 212 }
 213
 214 uint32_t
 215 brw_get_surface_tiling_bits(uint32_t tiling)
 216 {
 217    switch (tiling) {
 218    case I915_TILING_X:
 219       return BRW_SURFACE_TILED;
 220    case I915_TILING_Y:
 221       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 222    default:
 223       return 0;
 224    }
 225 }
 226
 227
 228 uint32_t
 229 brw_get_surface_num_multisamples(unsigned num_samples)
 230 {
 231    if (num_samples > 1)
 232       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 233    else
 234       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 235 }
 236
 237 /**
 238  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 239  * swizzling.
 240  */
 241 int
 242 brw_get_texture_swizzle(const struct gl_context *ctx,
 243                         const struct gl_texture_object *t)
 244 {
 245    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 246
 247    int swizzles[SWIZZLE_NIL + 1] = {
 248       SWIZZLE_X,
 249       SWIZZLE_Y,
 250       SWIZZLE_Z,
 251       SWIZZLE_W,
 252       SWIZZLE_ZERO,
 253       SWIZZLE_ONE,
 254       SWIZZLE_NIL
 255    };
 256
 257    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 258        img->_BaseFormat == GL_DEPTH_STENCIL) {
 259       GLenum depth_mode = t->DepthMode;
 260
 261       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 262        * with depth component data specified with a sized internal format.
 263        * Otherwise, it's left at the old default, GL_LUMINANCE.
 264        */
 265       if (_mesa_is_gles3(ctx) &&
 266           img->InternalFormat != GL_DEPTH_COMPONENT &&
 267           img->InternalFormat != GL_DEPTH_STENCIL) {
 268          depth_mode = GL_RED;
 269       }
 270
 271       switch (depth_mode) {
 272       case GL_ALPHA:
 273          swizzles[0] = SWIZZLE_ZERO;
 274          swizzles[1] = SWIZZLE_ZERO;
 275          swizzles[2] = SWIZZLE_ZERO;
 276          swizzles[3] = SWIZZLE_X;
 277          break;
 278       case GL_LUMINANCE:
 279          swizzles[0] = SWIZZLE_X;
 280          swizzles[1] = SWIZZLE_X;
 281          swizzles[2] = SWIZZLE_X;
 282          swizzles[3] = SWIZZLE_ONE;
 283          break;
 284       case GL_INTENSITY:
 285          swizzles[0] = SWIZZLE_X;
 286          swizzles[1] = SWIZZLE_X;
 287          swizzles[2] = SWIZZLE_X;
 288          swizzles[3] = SWIZZLE_X;
 289          break;
 290       case GL_RED:
 291          swizzles[0] = SWIZZLE_X;
 292          swizzles[1] = SWIZZLE_ZERO;
 293          swizzles[2] = SWIZZLE_ZERO;
 294          swizzles[3] = SWIZZLE_ONE;
 295          break;
 296       }
 297    }
 298
 299    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 300
 301    /* If the texture's format is alpha-only, force R, G, and B to
 302     * 0.0. Similarly, if the texture's format has no alpha channel,
 303     * force the alpha value read to 1.0. This allows for the
 304     * implementation to use an RGBA texture for any of these formats
 305     * without leaking any unexpected values.
 306     */
 307    switch (img->_BaseFormat) {
 308    case GL_ALPHA:
 309       swizzles[0] = SWIZZLE_ZERO;
 310       swizzles[1] = SWIZZLE_ZERO;
 311       swizzles[2] = SWIZZLE_ZERO;
 312       break;
 313    case GL_LUMINANCE:
 314       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 315          swizzles[0] = SWIZZLE_X;
 316          swizzles[1] = SWIZZLE_X;
 317          swizzles[2] = SWIZZLE_X;
 318          swizzles[3] = SWIZZLE_ONE;
 319       }
 320       break;
 321    case GL_LUMINANCE_ALPHA:
 322       if (datatype == GL_SIGNED_NORMALIZED) {
 323          swizzles[0] = SWIZZLE_X;
 324          swizzles[1] = SWIZZLE_X;
 325          swizzles[2] = SWIZZLE_X;
 326          swizzles[3] = SWIZZLE_W;
 327       }
 328       break;
 329    case GL_INTENSITY:
 330       if (datatype == GL_SIGNED_NORMALIZED) {
 331          swizzles[0] = SWIZZLE_X;
 332          swizzles[1] = SWIZZLE_X;
 333          swizzles[2] = SWIZZLE_X;
 334          swizzles[3] = SWIZZLE_X;
 335       }
 336       break;
 337    case GL_RED:
 338    case GL_RG:
 339    case GL_RGB:
 340       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 341          swizzles[3] = SWIZZLE_ONE;
 342       break;
 343    }
 344
 345    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 346                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 347                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 348                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 349 }
 350
 351 /**
 352  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 353  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 354  *
 355  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 356  *         0          1          2          3             4            5
 357  *         4          5          6          7             0            1
 358  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 359  *
 360  * which is simply adding 4 then modding by 8 (or anding with 7).
 361  *
 362  * We then may need to apply workarounds for textureGather hardware bugs.
 363  */
 364 static unsigned
 365 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 366 {
 367    unsigned scs = (swizzle + 4) & 7;
 368
 369    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 370 }
 371
 372 void
 373 brw_update_texture_surface(struct gl_context *ctx,
 374                            unsigned unit,
 375                            uint32_t *surf_offset,
 376                            bool for_gather,
 377                            uint32_t plane)
 378 {
 379    struct brw_context *brw = brw_context(ctx);
 380    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 381
 382    if (obj->Target == GL_TEXTURE_BUFFER) {
 383       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 384
 385    } else {
 386       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 387       struct intel_mipmap_tree *mt = intel_obj->mt;
 388       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 389       /* If this is a view with restricted NumLayers, then our effective depth
 390        * is not just the miptree depth.
 391        */
 392       const unsigned mt_num_layers =
 393          mt->logical_depth0 * (_mesa_is_cube_map_texture(mt->target) ? 6 : 1);
 394       const unsigned view_num_layers =
 395          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 396                                                             mt_num_layers;
 397
 398       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 399        * texturing functions that return a float, as our code generation always
 400        * selects the .x channel (which would always be 0).
 401        */
 402       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 403       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 404          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 405           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 406       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 407                                 brw_get_texture_swizzle(&brw->ctx, obj));
 408
 409       unsigned format = translate_tex_format(
 410          brw, intel_obj->_Format, sampler->sRGBDecode);
 411
 412       /* Implement gen6 and gen7 gather work-around */
 413       bool need_green_to_blue = false;
 414       if (for_gather) {
 415          if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
 416             format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 417             need_green_to_blue = brw->is_haswell;
 418          } else if (brw->gen == 6) {
 419             /* Sandybridge's gather4 message is broken for integer formats.
 420              * To work around this, we pretend the surface is UNORM for
 421              * 8 or 16-bit formats, and emit shader instructions to recover
 422              * the real INT/UINT value.  For 32-bit formats, we pretend
 423              * the surface is FLOAT, and simply reinterpret the resulting
 424              * bits.
 425              */
 426             switch (format) {
 427             case BRW_SURFACEFORMAT_R8_SINT:
 428             case BRW_SURFACEFORMAT_R8_UINT:
 429                format = BRW_SURFACEFORMAT_R8_UNORM;
 430                break;
 431
 432             case BRW_SURFACEFORMAT_R16_SINT:
 433             case BRW_SURFACEFORMAT_R16_UINT:
 434                format = BRW_SURFACEFORMAT_R16_UNORM;
 435                break;
 436
 437             case BRW_SURFACEFORMAT_R32_SINT:
 438             case BRW_SURFACEFORMAT_R32_UINT:
 439                format = BRW_SURFACEFORMAT_R32_FLOAT;
 440                break;
 441
 442             default:
 443                break;
 444             }
 445          }
 446       }
 447
 448       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 449          assert(brw->gen >= 8);
 450          mt = mt->stencil_mt;
 451          format = BRW_SURFACEFORMAT_R8_UINT;
 452       } else if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
 453          if (plane > 0)
 454             mt = mt->plane[plane - 1];
 455          if (mt == NULL)
 456             return;
 457          format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
 458       }
 459
 460       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 461
 462       struct isl_view view = {
 463          .format = format,
 464          .base_level = obj->MinLevel + obj->BaseLevel,
 465          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 466          .base_array_layer = obj->MinLayer,
 467          .array_len = view_num_layers,
 468          .channel_select = {
 469             swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 470             swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 471             swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 472             swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 473          },
 474          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 475       };
 476
 477       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 478           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 479          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 480
 481       brw_emit_surface_state(brw, mt, &view,
 482                              surface_state_infos[brw->gen].tex_mocs, for_gather,
 483                              surf_offset, surf_index,
 484                              I915_GEM_DOMAIN_SAMPLER, 0);
 485    }
 486 }
 487
 488 static void
 489 gen4_emit_buffer_surface_state(struct brw_context *brw,
 490                                uint32_t *out_offset,
 491                                drm_intel_bo *bo,
 492                                unsigned buffer_offset,
 493                                unsigned surface_format,
 494                                unsigned buffer_size,
 495                                unsigned pitch,
 496                                bool rw)
 497 {
 498    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 499                                     6 * 4, 32, out_offset);
 500    memset(surf, 0, 6 * 4);
 501
 502    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 503              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 504              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 505    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 506    surf[2] = ((buffer_size - 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 507              (((buffer_size - 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 508    surf[3] = (((buffer_size - 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 509              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 510
 511    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 512     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 513     * physical cache.  It is mapped in hardware to the sampler cache."
 514     */
 515    if (bo) {
 516       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 517                               bo, buffer_offset,
 518                               I915_GEM_DOMAIN_SAMPLER,
 519                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 520    }
 521 }
 522
 523 void
 524 brw_update_buffer_texture_surface(struct gl_context *ctx,
 525                                   unsigned unit,
 526                                   uint32_t *surf_offset)
 527 {
 528    struct brw_context *brw = brw_context(ctx);
 529    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 530    struct intel_buffer_object *intel_obj =
 531       intel_buffer_object(tObj->BufferObject);
 532    uint32_t size = tObj->BufferSize;
 533    drm_intel_bo *bo = NULL;
 534    mesa_format format = tObj->_BufferObjectFormat;
 535    uint32_t brw_format = brw_format_for_mesa_format(format);
 536    int texel_size = _mesa_get_format_bytes(format);
 537
 538    if (intel_obj) {
 539       size = MIN2(size, intel_obj->Base.Size);
 540       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 541    }
 542
 543    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 544       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 545                     _mesa_get_format_name(format));
 546    }
 547
 548    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 549                                        tObj->BufferOffset,
 550                                        brw_format,
 551                                        size / texel_size,
 552                                        texel_size,
 553                                        false /* rw */);
 554 }
 555
 556 static void
 557 gen4_update_texture_surface(struct gl_context *ctx,
 558                             unsigned unit,
 559                             uint32_t *surf_offset,
 560                             bool for_gather,
 561                             uint32_t plane)
 562 {
 563    struct brw_context *brw = brw_context(ctx);
 564    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 565    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 566    struct intel_mipmap_tree *mt = intelObj->mt;
 567    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 568    uint32_t *surf;
 569
 570    /* BRW_NEW_TEXTURE_BUFFER */
 571    if (tObj->Target == GL_TEXTURE_BUFFER) {
 572       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 573       return;
 574    }
 575
 576    if (plane > 0) {
 577       if (mt->plane[plane - 1] == NULL)
 578          return;
 579       mt = mt->plane[plane - 1];
 580    }
 581
 582    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 583                           6 * 4, 32, surf_offset);
 584
 585    mesa_format mesa_fmt = plane == 0 ? intelObj->_Format : mt->format;
 586    uint32_t tex_format = translate_tex_format(brw, mesa_fmt,
 587                                               sampler->sRGBDecode);
 588
 589    if (for_gather) {
 590       /* Sandybridge's gather4 message is broken for integer formats.
 591        * To work around this, we pretend the surface is UNORM for
 592        * 8 or 16-bit formats, and emit shader instructions to recover
 593        * the real INT/UINT value.  For 32-bit formats, we pretend
 594        * the surface is FLOAT, and simply reinterpret the resulting
 595        * bits.
 596        */
 597       switch (tex_format) {
 598       case BRW_SURFACEFORMAT_R8_SINT:
 599       case BRW_SURFACEFORMAT_R8_UINT:
 600          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 601          break;
 602
 603       case BRW_SURFACEFORMAT_R16_SINT:
 604       case BRW_SURFACEFORMAT_R16_UINT:
 605          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 606          break;
 607
 608       case BRW_SURFACEFORMAT_R32_SINT:
 609       case BRW_SURFACEFORMAT_R32_UINT:
 610          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 611          break;
 612
 613       default:
 614          break;
 615       }
 616    }
 617
 618    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 619               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 620               BRW_SURFACE_CUBEFACE_ENABLES |
 621               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 622
 623    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 624
 625    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 626               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 627               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 628
 629    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 630               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 631               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 632
 633    const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
 634    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 635               SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
 636               SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
 637
 638    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 639
 640    /* Emit relocation to surface contents */
 641    drm_intel_bo_emit_reloc(brw->batch.bo,
 642                            *surf_offset + 4,
 643                            mt->bo,
 644                            surf[1] - mt->bo->offset64,
 645                            I915_GEM_DOMAIN_SAMPLER, 0);
 646 }
 647
 648 /**
 649  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 650  * read from this buffer with Data Port Read instructions/messages.
 651  */
 652 void
 653 brw_create_constant_surface(struct brw_context *brw,
 654                             drm_intel_bo *bo,
 655                             uint32_t offset,
 656                             uint32_t size,
 657                             uint32_t *out_offset)
 658 {
 659    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 660                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 661                                        size, 1, false);
 662 }
 663
 664 /**
 665  * Create the buffer surface. Shader buffer variables will be
 666  * read from / write to this buffer with Data Port Read/Write
 667  * instructions/messages.
 668  */
 669 void
 670 brw_create_buffer_surface(struct brw_context *brw,
 671                           drm_intel_bo *bo,
 672                           uint32_t offset,
 673                           uint32_t size,
 674                           uint32_t *out_offset)
 675 {
 676    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 677     * messages. We need these specifically for the fragment shader since they
 678     * include a pixel mask header that we need to ensure correct behavior
 679     * with helper invocations, which cannot write to the buffer.
 680     */
 681    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 682                                        BRW_SURFACEFORMAT_RAW,
 683                                        size, 1, true);
 684 }
 685
 686 /**
 687  * Set up a binding table entry for use by stream output logic (transform
 688  * feedback).
 689  *
 690  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 691  */
 692 void
 693 brw_update_sol_surface(struct brw_context *brw,
 694                        struct gl_buffer_object *buffer_obj,
 695                        uint32_t *out_offset, unsigned num_vector_components,
 696                        unsigned stride_dwords, unsigned offset_dwords)
 697 {
 698    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 699    uint32_t offset_bytes = 4 * offset_dwords;
 700    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 701                                              offset_bytes,
 702                                              buffer_obj->Size - offset_bytes);
 703    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 704                                     out_offset);
 705    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 706    size_t size_dwords = buffer_obj->Size / 4;
 707    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 708
 709    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 710     * too big to map using a single binding table entry?
 711     */
 712    assert((size_dwords - offset_dwords) / stride_dwords
 713           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 714
 715    if (size_dwords > offset_dwords + num_vector_components) {
 716       /* There is room for at least 1 transform feedback output in the buffer.
 717        * Compute the number of additional transform feedback outputs the
 718        * buffer has room for.
 719        */
 720       buffer_size_minus_1 =
 721          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 722    } else {
 723       /* There isn't even room for a single transform feedback output in the
 724        * buffer.  We can't configure the binding table entry to prevent output
 725        * entirely; we'll have to rely on the geometry shader to detect
 726        * overflow.  But to minimize the damage in case of a bug, set up the
 727        * binding table entry to just allow a single output.
 728        */
 729       buffer_size_minus_1 = 0;
 730    }
 731    width = buffer_size_minus_1 & 0x7f;
 732    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 733    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 734
 735    switch (num_vector_components) {
 736    case 1:
 737       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 738       break;
 739    case 2:
 740       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 741       break;
 742    case 3:
 743       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 744       break;
 745    case 4:
 746       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 747       break;
 748    default:
 749       unreachable("Invalid vector size for transform feedback output");
 750    }
 751
 752    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 753       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 754       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 755       BRW_SURFACE_RC_READ_WRITE;
 756    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 757    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 758               height << BRW_SURFACE_HEIGHT_SHIFT);
 759    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 760               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 761    surf[4] = 0;
 762    surf[5] = 0;
 763
 764    /* Emit relocation to surface contents. */
 765    drm_intel_bo_emit_reloc(brw->batch.bo,
 766                            *out_offset + 4,
 767                            bo, offset_bytes,
 768                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 769 }
 770
 771 /* Creates a new WM constant buffer reflecting the current fragment program's
 772  * constants, if needed by the fragment program.
 773  *
 774  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 775  * state atom.
 776  */
 777 static void
 778 brw_upload_wm_pull_constants(struct brw_context *brw)
 779 {
 780    struct brw_stage_state *stage_state = &brw->wm.base;
 781    /* BRW_NEW_FRAGMENT_PROGRAM */
 782    struct brw_fragment_program *fp =
 783       (struct brw_fragment_program *) brw->fragment_program;
 784    /* BRW_NEW_FS_PROG_DATA */
 785    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 786
 787    /* _NEW_PROGRAM_CONSTANTS */
 788    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 789                              stage_state, prog_data);
 790 }
 791
 792 const struct brw_tracked_state brw_wm_pull_constants = {
 793    .dirty = {
 794       .mesa = _NEW_PROGRAM_CONSTANTS,
 795       .brw = BRW_NEW_BATCH |
 796              BRW_NEW_BLORP |
 797              BRW_NEW_FRAGMENT_PROGRAM |
 798              BRW_NEW_FS_PROG_DATA,
 799    },
 800    .emit = brw_upload_wm_pull_constants,
 801 };
 802
 803 /**
 804  * Creates a null renderbuffer surface.
 805  *
 806  * This is used when the shader doesn't write to any color output.  An FB
 807  * write to target 0 will still be emitted, because that's how the thread is
 808  * terminated (and computed depth is returned), so we need to have the
 809  * hardware discard the target 0 color output..
 810  */
 811 static void
 812 brw_emit_null_surface_state(struct brw_context *brw,
 813                             unsigned width,
 814                             unsigned height,
 815                             unsigned samples,
 816                             uint32_t *out_offset)
 817 {
 818    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 819     * Notes):
 820     *
 821     *     A null surface will be used in instances where an actual surface is
 822     *     not bound. When a write message is generated to a null surface, no
 823     *     actual surface is written to. When a read message (including any
 824     *     sampling engine message) is generated to a null surface, the result
 825     *     is all zeros. Note that a null surface type is allowed to be used
 826     *     with all messages, even if it is not specificially indicated as
 827     *     supported. All of the remaining fields in surface state are ignored
 828     *     for null surfaces, with the following exceptions:
 829     *
 830     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 831     *       depth buffer’s corresponding state for all render target surfaces,
 832     *       including null.
 833     *
 834     *     - Surface Format must be R8G8B8A8_UNORM.
 835     */
 836    unsigned surface_type = BRW_SURFACE_NULL;
 837    drm_intel_bo *bo = NULL;
 838    unsigned pitch_minus_1 = 0;
 839    uint32_t multisampling_state = 0;
 840    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 841                                     out_offset);
 842
 843    if (samples > 1) {
 844       /* On Gen6, null render targets seem to cause GPU hangs when
 845        * multisampling.  So work around this problem by rendering into dummy
 846        * color buffer.
 847        *
 848        * To decrease the amount of memory needed by the workaround buffer, we
 849        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 850        * the amount of memory needed for the workaround buffer is
 851        * (width_in_tiles + height_in_tiles - 1) tiles.
 852        *
 853        * Note that since the workaround buffer will be interpreted by the
 854        * hardware as an interleaved multisampled buffer, we need to compute
 855        * width_in_tiles and height_in_tiles by dividing the width and height
 856        * by 16 rather than the normal Y-tile size of 32.
 857        */
 858       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 859       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 860       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 861       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 862                          size_needed);
 863       bo = brw->wm.multisampled_null_render_target_bo;
 864       surface_type = BRW_SURFACE_2D;
 865       pitch_minus_1 = 127;
 866       multisampling_state = brw_get_surface_num_multisamples(samples);
 867    }
 868
 869    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 870               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 871    if (brw->gen < 6) {
 872       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 873                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 874                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 875                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 876    }
 877    surf[1] = bo ? bo->offset64 : 0;
 878    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 879               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 880
 881    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 882     * Notes):
 883     *
 884     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 885     */
 886    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 887               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 888    surf[4] = multisampling_state;
 889    surf[5] = 0;
 890
 891    if (bo) {
 892       drm_intel_bo_emit_reloc(brw->batch.bo,
 893                               *out_offset + 4,
 894                               bo, 0,
 895                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 896    }
 897 }
 898
 899 /**
 900  * Sets up a surface state structure to point at the given region.
 901  * While it is only used for the front/back buffer currently, it should be
 902  * usable for further buffers when doing ARB_draw_buffer support.
 903  */
 904 static uint32_t
 905 gen4_update_renderbuffer_surface(struct brw_context *brw,
 906                                  struct gl_renderbuffer *rb,
 907                                  bool layered, unsigned unit,
 908                                  uint32_t surf_index)
 909 {
 910    struct gl_context *ctx = &brw->ctx;
 911    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 912    struct intel_mipmap_tree *mt = irb->mt;
 913    uint32_t *surf;
 914    uint32_t tile_x, tile_y;
 915    uint32_t format = 0;
 916    uint32_t offset;
 917    /* _NEW_BUFFERS */
 918    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 919    /* BRW_NEW_FS_PROG_DATA */
 920
 921    assert(!layered);
 922
 923    if (rb->TexImage && !brw->has_surface_tile_offset) {
 924       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 925
 926       if (tile_x != 0 || tile_y != 0) {
 927          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 928           * destination in a miptree unless you actually setup your renderbuffer
 929           * as a miptree and used the fragile lod/array_index/etc. controls to
 930           * select the image.  So, instead, we just make a new single-level
 931           * miptree and render into that.
 932           */
 933          intel_renderbuffer_move_to_temp(brw, irb, false);
 934          mt = irb->mt;
 935       }
 936    }
 937
 938    intel_miptree_used_for_rendering(irb->mt);
 939
 940    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 941
 942    format = brw->render_target_format[rb_format];
 943    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 944       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 945                     __func__, _mesa_get_format_name(rb_format));
 946    }
 947
 948    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 949               format << BRW_SURFACE_FORMAT_SHIFT);
 950
 951    /* reloc */
 952    assert(mt->offset % mt->cpp == 0);
 953    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 954               mt->bo->offset64 + mt->offset);
 955
 956    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 957               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 958
 959    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 960               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 961
 962    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 963
 964    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 965    /* Note that the low bits of these fields are missing, so
 966     * there's the possibility of getting in trouble.
 967     */
 968    assert(tile_x % 4 == 0);
 969    assert(tile_y % 2 == 0);
 970    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 971               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 972               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 973
 974    if (brw->gen < 6) {
 975       /* _NEW_COLOR */
 976       if (!ctx->Color.ColorLogicOpEnabled &&
 977           (ctx->Color.BlendEnabled & (1 << unit)))
 978          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 979
 980       if (!ctx->Color.ColorMask[unit][0])
 981          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 982       if (!ctx->Color.ColorMask[unit][1])
 983          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 984       if (!ctx->Color.ColorMask[unit][2])
 985          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 986
 987       /* As mentioned above, disable writes to the alpha component when the
 988        * renderbuffer is XRGB.
 989        */
 990       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 991           !ctx->Color.ColorMask[unit][3]) {
 992          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 993       }
 994    }
 995
 996    drm_intel_bo_emit_reloc(brw->batch.bo,
 997                            offset + 4,
 998                            mt->bo,
 999                            surf[1] - mt->bo->offset64,
1000                            I915_GEM_DOMAIN_RENDER,
1001                            I915_GEM_DOMAIN_RENDER);
1002
1003    return offset;
1004 }
1005
1006 /**
1007  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1008  */
1009 void
1010 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1011                                  const struct gl_framebuffer *fb,
1012                                  uint32_t render_target_start,
1013                                  uint32_t *surf_offset)
1014 {
1015    GLuint i;
1016    const unsigned int w = _mesa_geometric_width(fb);
1017    const unsigned int h = _mesa_geometric_height(fb);
1018    const unsigned int s = _mesa_geometric_samples(fb);
1019
1020    /* Update surfaces for drawing buffers */
1021    if (fb->_NumColorDrawBuffers >= 1) {
1022       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1023          const uint32_t surf_index = render_target_start + i;
1024
1025          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1026             surf_offset[surf_index] =
1027                brw->vtbl.update_renderbuffer_surface(
1028                   brw, fb->_ColorDrawBuffers[i],
1029                   _mesa_geometric_layers(fb) > 0, i, surf_index);
1030          } else {
1031             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1032                &surf_offset[surf_index]);
1033          }
1034       }
1035    } else {
1036       const uint32_t surf_index = render_target_start;
1037       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1038          &surf_offset[surf_index]);
1039    }
1040 }
1041
1042 static void
1043 update_renderbuffer_surfaces(struct brw_context *brw)
1044 {
1045    const struct gl_context *ctx = &brw->ctx;
1046
1047    /* _NEW_BUFFERS | _NEW_COLOR */
1048    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1049    brw_update_renderbuffer_surfaces(
1050       brw, fb,
1051       brw->wm.prog_data->binding_table.render_target_start,
1052       brw->wm.base.surf_offset);
1053    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1054 }
1055
1056 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1057    .dirty = {
1058       .mesa = _NEW_BUFFERS |
1059               _NEW_COLOR,
1060       .brw = BRW_NEW_BATCH |
1061              BRW_NEW_BLORP |
1062              BRW_NEW_FS_PROG_DATA,
1063    },
1064    .emit = update_renderbuffer_surfaces,
1065 };
1066
1067 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1068    .dirty = {
1069       .mesa = _NEW_BUFFERS,
1070       .brw = BRW_NEW_BATCH |
1071              BRW_NEW_BLORP,
1072    },
1073    .emit = update_renderbuffer_surfaces,
1074 };
1075
1076
1077 static void
1078 update_stage_texture_surfaces(struct brw_context *brw,
1079                               const struct gl_program *prog,
1080                               struct brw_stage_state *stage_state,
1081                               bool for_gather, uint32_t plane)
1082 {
1083    if (!prog)
1084       return;
1085
1086    struct gl_context *ctx = &brw->ctx;
1087
1088    uint32_t *surf_offset = stage_state->surf_offset;
1089
1090    /* BRW_NEW_*_PROG_DATA */
1091    if (for_gather)
1092       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1093    else
1094       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1095
1096    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
1097    for (unsigned s = 0; s < num_samplers; s++) {
1098       surf_offset[s] = 0;
1099
1100       if (prog->SamplersUsed & (1 << s)) {
1101          const unsigned unit = prog->SamplerUnits[s];
1102
1103          /* _NEW_TEXTURE */
1104          if (ctx->Texture.Unit[unit]._Current) {
1105             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1106          }
1107       }
1108    }
1109 }
1110
1111
1112 /**
1113  * Construct SURFACE_STATE objects for enabled textures.
1114  */
1115 static void
1116 brw_update_texture_surfaces(struct brw_context *brw)
1117 {
1118    /* BRW_NEW_VERTEX_PROGRAM */
1119    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1120
1121    /* BRW_NEW_TESS_PROGRAMS */
1122    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1123    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1124
1125    /* BRW_NEW_GEOMETRY_PROGRAM */
1126    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1127
1128    /* BRW_NEW_FRAGMENT_PROGRAM */
1129    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1130
1131    /* _NEW_TEXTURE */
1132    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1133    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1134    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1135    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1136    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1137
1138    /* emit alternate set of surface state for gather. this
1139     * allows the surface format to be overriden for only the
1140     * gather4 messages. */
1141    if (brw->gen < 8) {
1142       if (vs && vs->UsesGather)
1143          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1144       if (tcs && tcs->UsesGather)
1145          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1146       if (tes && tes->UsesGather)
1147          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1148       if (gs && gs->UsesGather)
1149          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1150       if (fs && fs->UsesGather)
1151          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1152    }
1153
1154    if (fs) {
1155       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1156       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1157    }
1158
1159    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1160 }
1161
1162 const struct brw_tracked_state brw_texture_surfaces = {
1163    .dirty = {
1164       .mesa = _NEW_TEXTURE,
1165       .brw = BRW_NEW_BATCH |
1166              BRW_NEW_BLORP |
1167              BRW_NEW_FRAGMENT_PROGRAM |
1168              BRW_NEW_FS_PROG_DATA |
1169              BRW_NEW_GEOMETRY_PROGRAM |
1170              BRW_NEW_GS_PROG_DATA |
1171              BRW_NEW_TESS_PROGRAMS |
1172              BRW_NEW_TCS_PROG_DATA |
1173              BRW_NEW_TES_PROG_DATA |
1174              BRW_NEW_TEXTURE_BUFFER |
1175              BRW_NEW_VERTEX_PROGRAM |
1176              BRW_NEW_VS_PROG_DATA,
1177    },
1178    .emit = brw_update_texture_surfaces,
1179 };
1180
1181 static void
1182 brw_update_cs_texture_surfaces(struct brw_context *brw)
1183 {
1184    /* BRW_NEW_COMPUTE_PROGRAM */
1185    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1186
1187    /* _NEW_TEXTURE */
1188    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1189
1190    /* emit alternate set of surface state for gather. this
1191     * allows the surface format to be overriden for only the
1192     * gather4 messages.
1193     */
1194    if (brw->gen < 8) {
1195       if (cs && cs->UsesGather)
1196          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1197    }
1198
1199    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1200 }
1201
1202 const struct brw_tracked_state brw_cs_texture_surfaces = {
1203    .dirty = {
1204       .mesa = _NEW_TEXTURE,
1205       .brw = BRW_NEW_BATCH |
1206              BRW_NEW_BLORP |
1207              BRW_NEW_COMPUTE_PROGRAM,
1208    },
1209    .emit = brw_update_cs_texture_surfaces,
1210 };
1211
1212
1213 void
1214 brw_upload_ubo_surfaces(struct brw_context *brw,
1215                         struct gl_linked_shader *shader,
1216                         struct brw_stage_state *stage_state,
1217                         struct brw_stage_prog_data *prog_data)
1218 {
1219    struct gl_context *ctx = &brw->ctx;
1220
1221    if (!shader)
1222       return;
1223
1224    uint32_t *ubo_surf_offsets =
1225       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1226
1227    for (int i = 0; i < shader->NumUniformBlocks; i++) {
1228       struct gl_uniform_buffer_binding *binding =
1229          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1230
1231       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1232          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1233       } else {
1234          struct intel_buffer_object *intel_bo =
1235             intel_buffer_object(binding->BufferObject);
1236          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1237          if (!binding->AutomaticSize)
1238             size = MIN2(size, binding->Size);
1239          drm_intel_bo *bo =
1240             intel_bufferobj_buffer(brw, intel_bo,
1241                                    binding->Offset,
1242                                    size);
1243          brw_create_constant_surface(brw, bo, binding->Offset,
1244                                      size,
1245                                      &ubo_surf_offsets[i]);
1246       }
1247    }
1248
1249    uint32_t *ssbo_surf_offsets =
1250       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1251
1252    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1253       struct gl_shader_storage_buffer_binding *binding =
1254          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1255
1256       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1257          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1258       } else {
1259          struct intel_buffer_object *intel_bo =
1260             intel_buffer_object(binding->BufferObject);
1261          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1262          if (!binding->AutomaticSize)
1263             size = MIN2(size, binding->Size);
1264          drm_intel_bo *bo =
1265             intel_bufferobj_buffer(brw, intel_bo,
1266                                    binding->Offset,
1267                                    size);
1268          brw_create_buffer_surface(brw, bo, binding->Offset,
1269                                    size,
1270                                    &ssbo_surf_offsets[i]);
1271       }
1272    }
1273
1274    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1275       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1276 }
1277
1278 static void
1279 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1280 {
1281    struct gl_context *ctx = &brw->ctx;
1282    /* _NEW_PROGRAM */
1283    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1284
1285    if (!prog)
1286       return;
1287
1288    /* BRW_NEW_FS_PROG_DATA */
1289    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1290                            &brw->wm.base, &brw->wm.prog_data->base);
1291 }
1292
1293 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1294    .dirty = {
1295       .mesa = _NEW_PROGRAM,
1296       .brw = BRW_NEW_BATCH |
1297              BRW_NEW_BLORP |
1298              BRW_NEW_FS_PROG_DATA |
1299              BRW_NEW_UNIFORM_BUFFER,
1300    },
1301    .emit = brw_upload_wm_ubo_surfaces,
1302 };
1303
1304 static void
1305 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1306 {
1307    struct gl_context *ctx = &brw->ctx;
1308    /* _NEW_PROGRAM */
1309    struct gl_shader_program *prog =
1310       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1311
1312    if (!prog)
1313       return;
1314
1315    /* BRW_NEW_CS_PROG_DATA */
1316    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1317                            &brw->cs.base, &brw->cs.prog_data->base);
1318 }
1319
1320 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1321    .dirty = {
1322       .mesa = _NEW_PROGRAM,
1323       .brw = BRW_NEW_BATCH |
1324              BRW_NEW_BLORP |
1325              BRW_NEW_CS_PROG_DATA |
1326              BRW_NEW_UNIFORM_BUFFER,
1327    },
1328    .emit = brw_upload_cs_ubo_surfaces,
1329 };
1330
1331 void
1332 brw_upload_abo_surfaces(struct brw_context *brw,
1333                         struct gl_linked_shader *shader,
1334                         struct brw_stage_state *stage_state,
1335                         struct brw_stage_prog_data *prog_data)
1336 {
1337    struct gl_context *ctx = &brw->ctx;
1338    uint32_t *surf_offsets =
1339       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1340
1341    if (shader && shader->NumAtomicBuffers) {
1342       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1343          struct gl_atomic_buffer_binding *binding =
1344             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1345          struct intel_buffer_object *intel_bo =
1346             intel_buffer_object(binding->BufferObject);
1347          drm_intel_bo *bo = intel_bufferobj_buffer(
1348             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1349
1350          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1351                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1352                                              bo->size - binding->Offset, 1, true);
1353       }
1354
1355       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1356    }
1357 }
1358
1359 static void
1360 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1361 {
1362    struct gl_context *ctx = &brw->ctx;
1363    /* _NEW_PROGRAM */
1364    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1365
1366    if (prog) {
1367       /* BRW_NEW_FS_PROG_DATA */
1368       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1369                               &brw->wm.base, &brw->wm.prog_data->base);
1370    }
1371 }
1372
1373 const struct brw_tracked_state brw_wm_abo_surfaces = {
1374    .dirty = {
1375       .mesa = _NEW_PROGRAM,
1376       .brw = BRW_NEW_ATOMIC_BUFFER |
1377              BRW_NEW_BLORP |
1378              BRW_NEW_BATCH |
1379              BRW_NEW_FS_PROG_DATA,
1380    },
1381    .emit = brw_upload_wm_abo_surfaces,
1382 };
1383
1384 static void
1385 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1386 {
1387    struct gl_context *ctx = &brw->ctx;
1388    /* _NEW_PROGRAM */
1389    struct gl_shader_program *prog =
1390       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1391
1392    if (prog) {
1393       /* BRW_NEW_CS_PROG_DATA */
1394       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1395                               &brw->cs.base, &brw->cs.prog_data->base);
1396    }
1397 }
1398
1399 const struct brw_tracked_state brw_cs_abo_surfaces = {
1400    .dirty = {
1401       .mesa = _NEW_PROGRAM,
1402       .brw = BRW_NEW_ATOMIC_BUFFER |
1403              BRW_NEW_BLORP |
1404              BRW_NEW_BATCH |
1405              BRW_NEW_CS_PROG_DATA,
1406    },
1407    .emit = brw_upload_cs_abo_surfaces,
1408 };
1409
1410 static void
1411 brw_upload_cs_image_surfaces(struct brw_context *brw)
1412 {
1413    struct gl_context *ctx = &brw->ctx;
1414    /* _NEW_PROGRAM */
1415    struct gl_shader_program *prog =
1416       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1417
1418    if (prog) {
1419       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1420       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1421                                 &brw->cs.base, &brw->cs.prog_data->base);
1422    }
1423 }
1424
1425 const struct brw_tracked_state brw_cs_image_surfaces = {
1426    .dirty = {
1427       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1428       .brw = BRW_NEW_BATCH |
1429              BRW_NEW_BLORP |
1430              BRW_NEW_CS_PROG_DATA |
1431              BRW_NEW_IMAGE_UNITS
1432    },
1433    .emit = brw_upload_cs_image_surfaces,
1434 };
1435
1436 static uint32_t
1437 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1438 {
1439    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1440    uint32_t hw_format = brw_format_for_mesa_format(format);
1441    if (access == GL_WRITE_ONLY) {
1442       return hw_format;
1443    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1444       /* Typed surface reads support a very limited subset of the shader
1445        * image formats.  Translate it into the closest format the
1446        * hardware supports.
1447        */
1448       return isl_lower_storage_image_format(devinfo, hw_format);
1449    } else {
1450       /* The hardware doesn't actually support a typed format that we can use
1451        * so we have to fall back to untyped read/write messages.
1452        */
1453       return BRW_SURFACEFORMAT_RAW;
1454    }
1455 }
1456
1457 static void
1458 update_default_image_param(struct brw_context *brw,
1459                            struct gl_image_unit *u,
1460                            unsigned surface_idx,
1461                            struct brw_image_param *param)
1462 {
1463    memset(param, 0, sizeof(*param));
1464    param->surface_idx = surface_idx;
1465    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1466     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1467     * detailed explanation of these parameters.
1468     */
1469    param->swizzling[0] = 0xff;
1470    param->swizzling[1] = 0xff;
1471 }
1472
1473 static void
1474 update_buffer_image_param(struct brw_context *brw,
1475                           struct gl_image_unit *u,
1476                           unsigned surface_idx,
1477                           struct brw_image_param *param)
1478 {
1479    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1480
1481    update_default_image_param(brw, u, surface_idx, param);
1482
1483    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1484    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1485 }
1486
1487 static void
1488 update_texture_image_param(struct brw_context *brw,
1489                            struct gl_image_unit *u,
1490                            unsigned surface_idx,
1491                            struct brw_image_param *param)
1492 {
1493    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1494
1495    update_default_image_param(brw, u, surface_idx, param);
1496
1497    param->size[0] = minify(mt->logical_width0, u->Level);
1498    param->size[1] = minify(mt->logical_height0, u->Level);
1499    param->size[2] = (!u->Layered ? 1 :
1500                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1501                      u->TexObj->Target == GL_TEXTURE_3D ?
1502                      minify(mt->logical_depth0, u->Level) :
1503                      mt->logical_depth0);
1504
1505    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1506                                   &param->offset[0],
1507                                   &param->offset[1]);
1508
1509    param->stride[0] = mt->cpp;
1510    param->stride[1] = mt->pitch / mt->cpp;
1511    param->stride[2] =
1512       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1513    param->stride[3] =
1514       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1515
1516    if (mt->tiling == I915_TILING_X) {
1517       /* An X tile is a rectangular block of 512x8 bytes. */
1518       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1519       param->tiling[1] = _mesa_logbase2(8);
1520
1521       if (brw->has_swizzling) {
1522          /* Right shifts required to swizzle bits 9 and 10 of the memory
1523           * address with bit 6.
1524           */
1525          param->swizzling[0] = 3;
1526          param->swizzling[1] = 4;
1527       }
1528    } else if (mt->tiling == I915_TILING_Y) {
1529       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1530        * different to the layout of an X-tiled surface, we simply pretend that
1531        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1532        * one arranged in X-major order just like is the case for X-tiling.
1533        */
1534       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1535       param->tiling[1] = _mesa_logbase2(32);
1536
1537       if (brw->has_swizzling) {
1538          /* Right shift required to swizzle bit 9 of the memory address with
1539           * bit 6.
1540           */
1541          param->swizzling[0] = 3;
1542       }
1543    }
1544
1545    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1546     * address calculation algorithm (emit_address_calculation() in
1547     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1548     * modulus equal to the LOD.
1549     */
1550    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1551                        0);
1552 }
1553
1554 static void
1555 update_image_surface(struct brw_context *brw,
1556                      struct gl_image_unit *u,
1557                      GLenum access,
1558                      unsigned surface_idx,
1559                      uint32_t *surf_offset,
1560                      struct brw_image_param *param)
1561 {
1562    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1563       struct gl_texture_object *obj = u->TexObj;
1564       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1565
1566       if (obj->Target == GL_TEXTURE_BUFFER) {
1567          struct intel_buffer_object *intel_obj =
1568             intel_buffer_object(obj->BufferObject);
1569          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1570                                       _mesa_get_format_bytes(u->_ActualFormat));
1571
1572          brw->vtbl.emit_buffer_surface_state(
1573             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1574             format, intel_obj->Base.Size / texel_size, texel_size,
1575             access != GL_READ_ONLY);
1576
1577          update_buffer_image_param(brw, u, surface_idx, param);
1578
1579       } else {
1580          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1581          struct intel_mipmap_tree *mt = intel_obj->mt;
1582
1583          if (format == BRW_SURFACEFORMAT_RAW) {
1584             brw->vtbl.emit_buffer_surface_state(
1585                brw, surf_offset, mt->bo, mt->offset,
1586                format, mt->bo->size - mt->offset, 1 /* pitch */,
1587                access != GL_READ_ONLY);
1588
1589          } else {
1590             const unsigned num_layers = (!u->Layered ? 1 :
1591                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1592                                          mt->logical_depth0);
1593
1594             struct isl_view view = {
1595                .format = format,
1596                .base_level = obj->MinLevel + u->Level,
1597                .levels = 1,
1598                .base_array_layer = obj->MinLayer + u->_Layer,
1599                .array_len = num_layers,
1600                .channel_select = {
1601                   ISL_CHANNEL_SELECT_RED,
1602                   ISL_CHANNEL_SELECT_GREEN,
1603                   ISL_CHANNEL_SELECT_BLUE,
1604                   ISL_CHANNEL_SELECT_ALPHA,
1605                },
1606                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1607             };
1608
1609             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1610
1611             brw_emit_surface_state(brw, mt, &view,
1612                                    surface_state_infos[brw->gen].rb_mocs, false,
1613                                    surf_offset, surf_index,
1614                                    I915_GEM_DOMAIN_SAMPLER,
1615                                    access == GL_READ_ONLY ? 0 :
1616                                              I915_GEM_DOMAIN_SAMPLER);
1617          }
1618
1619          update_texture_image_param(brw, u, surface_idx, param);
1620       }
1621
1622    } else {
1623       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1624       update_default_image_param(brw, u, surface_idx, param);
1625    }
1626 }
1627
1628 void
1629 brw_upload_image_surfaces(struct brw_context *brw,
1630                           struct gl_linked_shader *shader,
1631                           struct brw_stage_state *stage_state,
1632                           struct brw_stage_prog_data *prog_data)
1633 {
1634    struct gl_context *ctx = &brw->ctx;
1635
1636    if (shader && shader->NumImages) {
1637       for (unsigned i = 0; i < shader->NumImages; i++) {
1638          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1639          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1640
1641          update_image_surface(brw, u, shader->ImageAccess[i],
1642                               surf_idx,
1643                               &stage_state->surf_offset[surf_idx],
1644                               &prog_data->image_param[i]);
1645       }
1646
1647       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1648       /* This may have changed the image metadata dependent on the context
1649        * image unit state and passed to the program as uniforms, make sure
1650        * that push and pull constants are reuploaded.
1651        */
1652       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1653    }
1654 }
1655
1656 static void
1657 brw_upload_wm_image_surfaces(struct brw_context *brw)
1658 {
1659    struct gl_context *ctx = &brw->ctx;
1660    /* BRW_NEW_FRAGMENT_PROGRAM */
1661    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1662
1663    if (prog) {
1664       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1665       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1666                                 &brw->wm.base, &brw->wm.prog_data->base);
1667    }
1668 }
1669
1670 const struct brw_tracked_state brw_wm_image_surfaces = {
1671    .dirty = {
1672       .mesa = _NEW_TEXTURE,
1673       .brw = BRW_NEW_BATCH |
1674              BRW_NEW_BLORP |
1675              BRW_NEW_FRAGMENT_PROGRAM |
1676              BRW_NEW_FS_PROG_DATA |
1677              BRW_NEW_IMAGE_UNITS
1678    },
1679    .emit = brw_upload_wm_image_surfaces,
1680 };
1681
1682 void
1683 gen4_init_vtable_surface_functions(struct brw_context *brw)
1684 {
1685    brw->vtbl.update_texture_surface = gen4_update_texture_surface;
1686    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1687    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1688    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1689 }
1690
1691 static void
1692 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1693 {
1694    struct gl_context *ctx = &brw->ctx;
1695    /* _NEW_PROGRAM */
1696    struct gl_shader_program *prog =
1697       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1698
1699    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1700       const unsigned surf_idx =
1701          brw->cs.prog_data->binding_table.work_groups_start;
1702       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1703       drm_intel_bo *bo;
1704       uint32_t bo_offset;
1705
1706       if (brw->compute.num_work_groups_bo == NULL) {
1707          bo = NULL;
1708          intel_upload_data(brw,
1709                            (void *)brw->compute.num_work_groups,
1710                            3 * sizeof(GLuint),
1711                            sizeof(GLuint),
1712                            &bo,
1713                            &bo_offset);
1714       } else {
1715          bo = brw->compute.num_work_groups_bo;
1716          bo_offset = brw->compute.num_work_groups_offset;
1717       }
1718
1719       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1720                                           bo, bo_offset,
1721                                           BRW_SURFACEFORMAT_RAW,
1722                                           3 * sizeof(GLuint), 1, true);
1723       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1724    }
1725 }
1726
1727 const struct brw_tracked_state brw_cs_work_groups_surface = {
1728    .dirty = {
1729       .brw = BRW_NEW_BLORP |
1730              BRW_NEW_CS_WORK_GROUPS
1731    },
1732    .emit = brw_upload_cs_work_groups_surface,
1733 };