src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 enum {
  59    INTEL_RENDERBUFFER_LAYERED = 1 << 0,
  60    INTEL_AUX_BUFFER_DISABLED = 1 << 1,
  61 };
  62
  63 uint32_t tex_mocs[] = {
  64    [7] = GEN7_MOCS_L3,
  65    [8] = BDW_MOCS_WB,
  66    [9] = SKL_MOCS_WB,
  67    [10] = CNL_MOCS_WB,
  68 };
  69
  70 uint32_t rb_mocs[] = {
  71    [7] = GEN7_MOCS_L3,
  72    [8] = BDW_MOCS_PTE,
  73    [9] = SKL_MOCS_PTE,
  74    [10] = CNL_MOCS_PTE,
  75 };
  76
  77 static void
  78 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  79              GLenum target, struct isl_view *view,
  80              uint32_t *tile_x, uint32_t *tile_y,
  81              uint32_t *offset, struct isl_surf *surf)
  82 {
  83    intel_miptree_get_isl_surf(brw, mt, surf);
  84
  85    surf->dim = get_isl_surf_dim(target);
  86
  87    const enum isl_dim_layout dim_layout =
  88       get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target,
  89                          mt->array_layout);
  90
  91    if (surf->dim_layout == dim_layout)
  92       return;
  93
  94    /* The layout of the specified texture target is not compatible with the
  95     * actual layout of the miptree structure in memory -- You're entering
  96     * dangerous territory, this can only possibly work if you only intended
  97     * to access a single level and slice of the texture, and the hardware
  98     * supports the tile offset feature in order to allow non-tile-aligned
  99     * base offsets, since we'll have to point the hardware to the first
 100     * texel of the level instead of relying on the usual base level/layer
 101     * controls.
 102     */
 103    assert(brw->has_surface_tile_offset);
 104    assert(view->levels == 1 && view->array_len == 1);
 105    assert(*tile_x == 0 && *tile_y == 0);
 106
 107    offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 108                                             view->base_array_layer,
 109                                             tile_x, tile_y);
 110
 111    /* Minify the logical dimensions of the texture. */
 112    const unsigned l = view->base_level - mt->first_level;
 113    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 114    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 115       minify(surf->logical_level0_px.height, l);
 116    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 117       minify(surf->logical_level0_px.depth, l);
 118
 119    /* Only the base level and layer can be addressed with the overridden
 120     * layout.
 121     */
 122    surf->logical_level0_px.array_len = 1;
 123    surf->levels = 1;
 124    surf->dim_layout = dim_layout;
 125
 126    /* The requested slice of the texture is now at the base level and
 127     * layer.
 128     */
 129    view->base_level = 0;
 130    view->base_array_layer = 0;
 131 }
 132
 133 static void
 134 brw_emit_surface_state(struct brw_context *brw,
 135                        struct intel_mipmap_tree *mt, uint32_t flags,
 136                        GLenum target, struct isl_view view,
 137                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
 138                        unsigned read_domains, unsigned write_domains)
 139 {
 140    uint32_t tile_x = mt->level[0].level_x;
 141    uint32_t tile_y = mt->level[0].level_y;
 142    uint32_t offset = mt->offset;
 143
 144    struct isl_surf surf;
 145
 146    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 147
 148    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 149
 150    struct brw_bo *aux_bo;
 151    struct isl_surf *aux_surf = NULL;
 152    uint64_t aux_offset = 0;
 153    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
 154    if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
 155        !(flags & INTEL_AUX_BUFFER_DISABLED)) {
 156       aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
 157
 158       if (mt->mcs_buf) {
 159          aux_surf = &mt->mcs_buf->surf;
 160
 161          aux_bo = mt->mcs_buf->bo;
 162          aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
 163       } else {
 164          aux_surf = &mt->hiz_buf->surf;
 165
 166          aux_bo = mt->hiz_buf->bo;
 167          aux_offset = mt->hiz_buf->bo->offset64;
 168       }
 169
 170       /* We only really need a clear color if we also have an auxiliary
 171        * surface.  Without one, it does nothing.
 172        */
 173       clear_color = mt->fast_clear_color;
 174    }
 175
 176    void *state = brw_state_batch(brw,
 177                                  brw->isl_dev.ss.size,
 178                                  brw->isl_dev.ss.align,
 179                                  surf_offset);
 180
 181    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
 182                        .address = mt->bo->offset64 + offset,
 183                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 184                        .aux_address = aux_offset,
 185                        .mocs = mocs, .clear_color = clear_color,
 186                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 187
 188    brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
 189                   mt->bo, offset, read_domains, write_domains);
 190
 191    if (aux_surf) {
 192       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 193        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 194        * contain other control information.  Since buffer addresses are always
 195        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 196        * an ordinary reloc to do the necessary address translation.
 197        */
 198       assert((aux_offset & 0xfff) == 0);
 199       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 200       brw_emit_reloc(&brw->batch,
 201                      *surf_offset + brw->isl_dev.ss.aux_addr_offset,
 202                      aux_bo, *aux_addr - aux_bo->offset64,
 203                      read_domains, write_domains);
 204    }
 205 }
 206
 207 uint32_t
 208 brw_update_renderbuffer_surface(struct brw_context *brw,
 209                                 struct gl_renderbuffer *rb,
 210                                 uint32_t flags, unsigned unit /* unused */,
 211                                 uint32_t surf_index)
 212 {
 213    struct gl_context *ctx = &brw->ctx;
 214    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 215    struct intel_mipmap_tree *mt = irb->mt;
 216
 217    if (brw->gen < 9) {
 218       assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 219    }
 220
 221    assert(brw_render_target_supported(brw, rb));
 222
 223    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 224    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 225       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 226                     __func__, _mesa_get_format_name(rb_format));
 227    }
 228
 229    struct isl_view view = {
 230       .format = brw->mesa_to_isl_render_format[rb_format],
 231       .base_level = irb->mt_level - irb->mt->first_level,
 232       .levels = 1,
 233       .base_array_layer = irb->mt_layer,
 234       .array_len = MAX2(irb->layer_count, 1),
 235       .swizzle = ISL_SWIZZLE_IDENTITY,
 236       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 237    };
 238
 239    uint32_t offset;
 240    brw_emit_surface_state(brw, mt, flags, mt->target, view,
 241                           rb_mocs[brw->gen],
 242                           &offset, surf_index,
 243                           I915_GEM_DOMAIN_RENDER,
 244                           I915_GEM_DOMAIN_RENDER);
 245    return offset;
 246 }
 247
 248 GLuint
 249 translate_tex_target(GLenum target)
 250 {
 251    switch (target) {
 252    case GL_TEXTURE_1D:
 253    case GL_TEXTURE_1D_ARRAY_EXT:
 254       return BRW_SURFACE_1D;
 255
 256    case GL_TEXTURE_RECTANGLE_NV:
 257       return BRW_SURFACE_2D;
 258
 259    case GL_TEXTURE_2D:
 260    case GL_TEXTURE_2D_ARRAY_EXT:
 261    case GL_TEXTURE_EXTERNAL_OES:
 262    case GL_TEXTURE_2D_MULTISAMPLE:
 263    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 264       return BRW_SURFACE_2D;
 265
 266    case GL_TEXTURE_3D:
 267       return BRW_SURFACE_3D;
 268
 269    case GL_TEXTURE_CUBE_MAP:
 270    case GL_TEXTURE_CUBE_MAP_ARRAY:
 271       return BRW_SURFACE_CUBE;
 272
 273    default:
 274       unreachable("not reached");
 275    }
 276 }
 277
 278 uint32_t
 279 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 280 {
 281    switch (tiling) {
 282    case ISL_TILING_X:
 283       return BRW_SURFACE_TILED;
 284    case ISL_TILING_Y0:
 285       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 286    default:
 287       return 0;
 288    }
 289 }
 290
 291
 292 uint32_t
 293 brw_get_surface_num_multisamples(unsigned num_samples)
 294 {
 295    if (num_samples > 1)
 296       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 297    else
 298       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 299 }
 300
 301 /**
 302  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 303  * swizzling.
 304  */
 305 int
 306 brw_get_texture_swizzle(const struct gl_context *ctx,
 307                         const struct gl_texture_object *t)
 308 {
 309    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 310
 311    int swizzles[SWIZZLE_NIL + 1] = {
 312       SWIZZLE_X,
 313       SWIZZLE_Y,
 314       SWIZZLE_Z,
 315       SWIZZLE_W,
 316       SWIZZLE_ZERO,
 317       SWIZZLE_ONE,
 318       SWIZZLE_NIL
 319    };
 320
 321    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 322        img->_BaseFormat == GL_DEPTH_STENCIL) {
 323       GLenum depth_mode = t->DepthMode;
 324
 325       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 326        * with depth component data specified with a sized internal format.
 327        * Otherwise, it's left at the old default, GL_LUMINANCE.
 328        */
 329       if (_mesa_is_gles3(ctx) &&
 330           img->InternalFormat != GL_DEPTH_COMPONENT &&
 331           img->InternalFormat != GL_DEPTH_STENCIL) {
 332          depth_mode = GL_RED;
 333       }
 334
 335       switch (depth_mode) {
 336       case GL_ALPHA:
 337          swizzles[0] = SWIZZLE_ZERO;
 338          swizzles[1] = SWIZZLE_ZERO;
 339          swizzles[2] = SWIZZLE_ZERO;
 340          swizzles[3] = SWIZZLE_X;
 341          break;
 342       case GL_LUMINANCE:
 343          swizzles[0] = SWIZZLE_X;
 344          swizzles[1] = SWIZZLE_X;
 345          swizzles[2] = SWIZZLE_X;
 346          swizzles[3] = SWIZZLE_ONE;
 347          break;
 348       case GL_INTENSITY:
 349          swizzles[0] = SWIZZLE_X;
 350          swizzles[1] = SWIZZLE_X;
 351          swizzles[2] = SWIZZLE_X;
 352          swizzles[3] = SWIZZLE_X;
 353          break;
 354       case GL_RED:
 355          swizzles[0] = SWIZZLE_X;
 356          swizzles[1] = SWIZZLE_ZERO;
 357          swizzles[2] = SWIZZLE_ZERO;
 358          swizzles[3] = SWIZZLE_ONE;
 359          break;
 360       }
 361    }
 362
 363    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 364
 365    /* If the texture's format is alpha-only, force R, G, and B to
 366     * 0.0. Similarly, if the texture's format has no alpha channel,
 367     * force the alpha value read to 1.0. This allows for the
 368     * implementation to use an RGBA texture for any of these formats
 369     * without leaking any unexpected values.
 370     */
 371    switch (img->_BaseFormat) {
 372    case GL_ALPHA:
 373       swizzles[0] = SWIZZLE_ZERO;
 374       swizzles[1] = SWIZZLE_ZERO;
 375       swizzles[2] = SWIZZLE_ZERO;
 376       break;
 377    case GL_LUMINANCE:
 378       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 379          swizzles[0] = SWIZZLE_X;
 380          swizzles[1] = SWIZZLE_X;
 381          swizzles[2] = SWIZZLE_X;
 382          swizzles[3] = SWIZZLE_ONE;
 383       }
 384       break;
 385    case GL_LUMINANCE_ALPHA:
 386       if (datatype == GL_SIGNED_NORMALIZED) {
 387          swizzles[0] = SWIZZLE_X;
 388          swizzles[1] = SWIZZLE_X;
 389          swizzles[2] = SWIZZLE_X;
 390          swizzles[3] = SWIZZLE_W;
 391       }
 392       break;
 393    case GL_INTENSITY:
 394       if (datatype == GL_SIGNED_NORMALIZED) {
 395          swizzles[0] = SWIZZLE_X;
 396          swizzles[1] = SWIZZLE_X;
 397          swizzles[2] = SWIZZLE_X;
 398          swizzles[3] = SWIZZLE_X;
 399       }
 400       break;
 401    case GL_RED:
 402    case GL_RG:
 403    case GL_RGB:
 404       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 405           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 406           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 407          swizzles[3] = SWIZZLE_ONE;
 408       break;
 409    }
 410
 411    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 412                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 413                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 414                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 415 }
 416
 417 /**
 418  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 419  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 420  *
 421  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 422  *         0          1          2          3             4            5
 423  *         4          5          6          7             0            1
 424  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 425  *
 426  * which is simply adding 4 then modding by 8 (or anding with 7).
 427  *
 428  * We then may need to apply workarounds for textureGather hardware bugs.
 429  */
 430 static unsigned
 431 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 432 {
 433    unsigned scs = (swizzle + 4) & 7;
 434
 435    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 436 }
 437
 438 static unsigned
 439 brw_find_matching_rb(const struct gl_framebuffer *fb,
 440                      const struct intel_mipmap_tree *mt)
 441 {
 442    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 443       const struct intel_renderbuffer *irb =
 444          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 445
 446       if (irb && irb->mt == mt)
 447          return i;
 448    }
 449
 450    return fb->_NumColorDrawBuffers;
 451 }
 452
 453 static inline bool
 454 brw_texture_view_sane(const struct brw_context *brw,
 455                       const struct intel_mipmap_tree *mt,
 456                       const struct isl_view *view)
 457 {
 458    /* There are special cases only for lossless compression. */
 459    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 460       return true;
 461
 462    if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
 463       return true;
 464
 465    /* Logic elsewhere needs to take care to resolve the color buffer prior
 466     * to sampling it as non-compressed.
 467     */
 468    if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
 469                                           view->base_array_layer,
 470                                           view->array_len))
 471       return false;
 472
 473    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 474    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 475
 476    if (rb_index == fb->_NumColorDrawBuffers)
 477       return true;
 478
 479    /* Underlying surface is compressed but it is sampled using a format that
 480     * the sampling engine doesn't support as compressed. Compression must be
 481     * disabled for both sampling engine and data port in case the same surface
 482     * is used also as render target.
 483     */
 484    return brw->draw_aux_buffer_disabled[rb_index];
 485 }
 486
 487 static bool
 488 brw_disable_aux_surface(const struct brw_context *brw,
 489                         const struct intel_mipmap_tree *mt,
 490                         const struct isl_view *view)
 491 {
 492    /* Nothing to disable. */
 493    if (!mt->mcs_buf)
 494       return false;
 495
 496    const bool is_unresolved = intel_miptree_has_color_unresolved(
 497                                  mt, view->base_level, view->levels,
 498                                  view->base_array_layer, view->array_len);
 499
 500    /* There are special cases only for lossless compression. */
 501    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 502       return !is_unresolved;
 503
 504    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 505    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 506
 507    /* If we are drawing into this with compression enabled, then we must also
 508     * enable compression when texturing from it regardless of
 509     * fast_clear_state.  If we don't then, after the first draw call with
 510     * this setup, there will be data in the CCS which won't get picked up by
 511     * subsequent texturing operations as required by ARB_texture_barrier.
 512     * Since we don't want to re-emit the binding table or do a resolve
 513     * operation every draw call, the easiest thing to do is just enable
 514     * compression on the texturing side.  This is completely safe to do
 515     * since, if compressed texturing weren't allowed, we would have disabled
 516     * compression of render targets in whatever_that_function_is_called().
 517     */
 518    if (rb_index < fb->_NumColorDrawBuffers) {
 519       if (brw->draw_aux_buffer_disabled[rb_index]) {
 520          assert(!is_unresolved);
 521       }
 522
 523       return brw->draw_aux_buffer_disabled[rb_index];
 524    }
 525
 526    return !is_unresolved;
 527 }
 528
 529 void
 530 brw_update_texture_surface(struct gl_context *ctx,
 531                            unsigned unit,
 532                            uint32_t *surf_offset,
 533                            bool for_gather,
 534                            uint32_t plane)
 535 {
 536    struct brw_context *brw = brw_context(ctx);
 537    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 538
 539    if (obj->Target == GL_TEXTURE_BUFFER) {
 540       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 541
 542    } else {
 543       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 544       struct intel_mipmap_tree *mt = intel_obj->mt;
 545
 546       if (plane > 0) {
 547          if (mt->plane[plane - 1] == NULL)
 548             return;
 549          mt = mt->plane[plane - 1];
 550       }
 551
 552       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 553       /* If this is a view with restricted NumLayers, then our effective depth
 554        * is not just the miptree depth.
 555        */
 556       const unsigned view_num_layers =
 557          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 558                                                             mt->logical_depth0;
 559
 560       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 561        * texturing functions that return a float, as our code generation always
 562        * selects the .x channel (which would always be 0).
 563        */
 564       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 565       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 566          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 567           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 568       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 569                                 brw_get_texture_swizzle(&brw->ctx, obj));
 570
 571       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 572       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 573                                                     sampler->sRGBDecode);
 574
 575       /* Implement gen6 and gen7 gather work-around */
 576       bool need_green_to_blue = false;
 577       if (for_gather) {
 578          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 579                                format == ISL_FORMAT_R32G32_SINT ||
 580                                format == ISL_FORMAT_R32G32_UINT)) {
 581             format = ISL_FORMAT_R32G32_FLOAT_LD;
 582             need_green_to_blue = brw->is_haswell;
 583          } else if (brw->gen == 6) {
 584             /* Sandybridge's gather4 message is broken for integer formats.
 585              * To work around this, we pretend the surface is UNORM for
 586              * 8 or 16-bit formats, and emit shader instructions to recover
 587              * the real INT/UINT value.  For 32-bit formats, we pretend
 588              * the surface is FLOAT, and simply reinterpret the resulting
 589              * bits.
 590              */
 591             switch (format) {
 592             case ISL_FORMAT_R8_SINT:
 593             case ISL_FORMAT_R8_UINT:
 594                format = ISL_FORMAT_R8_UNORM;
 595                break;
 596
 597             case ISL_FORMAT_R16_SINT:
 598             case ISL_FORMAT_R16_UINT:
 599                format = ISL_FORMAT_R16_UNORM;
 600                break;
 601
 602             case ISL_FORMAT_R32_SINT:
 603             case ISL_FORMAT_R32_UINT:
 604                format = ISL_FORMAT_R32_FLOAT;
 605                break;
 606
 607             default:
 608                break;
 609             }
 610          }
 611       }
 612
 613       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 614          if (brw->gen <= 7) {
 615             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 616             mt = mt->r8stencil_mt;
 617          } else {
 618             mt = mt->stencil_mt;
 619          }
 620          format = ISL_FORMAT_R8_UINT;
 621       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 622          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 623          mt = mt->r8stencil_mt;
 624          format = ISL_FORMAT_R8_UINT;
 625       }
 626
 627       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 628
 629       struct isl_view view = {
 630          .format = format,
 631          .base_level = obj->MinLevel + obj->BaseLevel,
 632          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 633          .base_array_layer = obj->MinLayer,
 634          .array_len = view_num_layers,
 635          .swizzle = {
 636             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 637             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 638             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 639             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 640          },
 641          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 642       };
 643
 644       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 645           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 646          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 647
 648       assert(brw_texture_view_sane(brw, mt, &view));
 649
 650       const int flags = brw_disable_aux_surface(brw, mt, &view) ?
 651                            INTEL_AUX_BUFFER_DISABLED : 0;
 652       brw_emit_surface_state(brw, mt, flags, mt->target, view,
 653                              tex_mocs[brw->gen],
 654                              surf_offset, surf_index,
 655                              I915_GEM_DOMAIN_SAMPLER, 0);
 656    }
 657 }
 658
 659 void
 660 brw_emit_buffer_surface_state(struct brw_context *brw,
 661                               uint32_t *out_offset,
 662                               struct brw_bo *bo,
 663                               unsigned buffer_offset,
 664                               unsigned surface_format,
 665                               unsigned buffer_size,
 666                               unsigned pitch,
 667                               bool rw)
 668 {
 669    uint32_t *dw = brw_state_batch(brw,
 670                                   brw->isl_dev.ss.size,
 671                                   brw->isl_dev.ss.align,
 672                                   out_offset);
 673
 674    isl_buffer_fill_state(&brw->isl_dev, dw,
 675                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 676                          .size = buffer_size,
 677                          .format = surface_format,
 678                          .stride = pitch,
 679                          .mocs = tex_mocs[brw->gen]);
 680
 681    if (bo) {
 682       brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
 683                      bo, buffer_offset,
 684                      I915_GEM_DOMAIN_SAMPLER,
 685                      (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 686    }
 687 }
 688
 689 void
 690 brw_update_buffer_texture_surface(struct gl_context *ctx,
 691                                   unsigned unit,
 692                                   uint32_t *surf_offset)
 693 {
 694    struct brw_context *brw = brw_context(ctx);
 695    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 696    struct intel_buffer_object *intel_obj =
 697       intel_buffer_object(tObj->BufferObject);
 698    uint32_t size = tObj->BufferSize;
 699    struct brw_bo *bo = NULL;
 700    mesa_format format = tObj->_BufferObjectFormat;
 701    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 702    int texel_size = _mesa_get_format_bytes(format);
 703
 704    if (intel_obj) {
 705       size = MIN2(size, intel_obj->Base.Size);
 706       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 707                                   false);
 708    }
 709
 710    /* The ARB_texture_buffer_specification says:
 711     *
 712     *    "The number of texels in the buffer texture's texel array is given by
 713     *
 714     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 715     *
 716     *     where <buffer_size> is the size of the buffer object, in basic
 717     *     machine units and <components> and <base_type> are the element count
 718     *     and base data type for elements, as specified in Table X.1.  The
 719     *     number of texels in the texel array is then clamped to the
 720     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 721     *
 722     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 723     * so that when ISL divides by stride to obtain the number of texels, that
 724     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 725     */
 726    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 727
 728    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 729       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 730                     _mesa_get_format_name(format));
 731    }
 732
 733    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 734                                  tObj->BufferOffset,
 735                                  isl_format,
 736                                  size,
 737                                  texel_size,
 738                                  false /* rw */);
 739 }
 740
 741 /**
 742  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 743  * read from this buffer with Data Port Read instructions/messages.
 744  */
 745 void
 746 brw_create_constant_surface(struct brw_context *brw,
 747                             struct brw_bo *bo,
 748                             uint32_t offset,
 749                             uint32_t size,
 750                             uint32_t *out_offset)
 751 {
 752    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 753                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 754                                  size, 1, false);
 755 }
 756
 757 /**
 758  * Create the buffer surface. Shader buffer variables will be
 759  * read from / write to this buffer with Data Port Read/Write
 760  * instructions/messages.
 761  */
 762 void
 763 brw_create_buffer_surface(struct brw_context *brw,
 764                           struct brw_bo *bo,
 765                           uint32_t offset,
 766                           uint32_t size,
 767                           uint32_t *out_offset)
 768 {
 769    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 770     * messages. We need these specifically for the fragment shader since they
 771     * include a pixel mask header that we need to ensure correct behavior
 772     * with helper invocations, which cannot write to the buffer.
 773     */
 774    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 775                                  ISL_FORMAT_RAW,
 776                                  size, 1, true);
 777 }
 778
 779 /**
 780  * Set up a binding table entry for use by stream output logic (transform
 781  * feedback).
 782  *
 783  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 784  */
 785 void
 786 brw_update_sol_surface(struct brw_context *brw,
 787                        struct gl_buffer_object *buffer_obj,
 788                        uint32_t *out_offset, unsigned num_vector_components,
 789                        unsigned stride_dwords, unsigned offset_dwords)
 790 {
 791    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 792    uint32_t offset_bytes = 4 * offset_dwords;
 793    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 794                                              offset_bytes,
 795                                              buffer_obj->Size - offset_bytes,
 796                                              true);
 797    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 798    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 799    size_t size_dwords = buffer_obj->Size / 4;
 800    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 801
 802    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 803     * too big to map using a single binding table entry?
 804     */
 805    assert((size_dwords - offset_dwords) / stride_dwords
 806           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 807
 808    if (size_dwords > offset_dwords + num_vector_components) {
 809       /* There is room for at least 1 transform feedback output in the buffer.
 810        * Compute the number of additional transform feedback outputs the
 811        * buffer has room for.
 812        */
 813       buffer_size_minus_1 =
 814          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 815    } else {
 816       /* There isn't even room for a single transform feedback output in the
 817        * buffer.  We can't configure the binding table entry to prevent output
 818        * entirely; we'll have to rely on the geometry shader to detect
 819        * overflow.  But to minimize the damage in case of a bug, set up the
 820        * binding table entry to just allow a single output.
 821        */
 822       buffer_size_minus_1 = 0;
 823    }
 824    width = buffer_size_minus_1 & 0x7f;
 825    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 826    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 827
 828    switch (num_vector_components) {
 829    case 1:
 830       surface_format = ISL_FORMAT_R32_FLOAT;
 831       break;
 832    case 2:
 833       surface_format = ISL_FORMAT_R32G32_FLOAT;
 834       break;
 835    case 3:
 836       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 837       break;
 838    case 4:
 839       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 840       break;
 841    default:
 842       unreachable("Invalid vector size for transform feedback output");
 843    }
 844
 845    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 846       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 847       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 848       BRW_SURFACE_RC_READ_WRITE;
 849    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 850    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 851               height << BRW_SURFACE_HEIGHT_SHIFT);
 852    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 853               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 854    surf[4] = 0;
 855    surf[5] = 0;
 856
 857    /* Emit relocation to surface contents. */
 858    brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
 859                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 860 }
 861
 862 /* Creates a new WM constant buffer reflecting the current fragment program's
 863  * constants, if needed by the fragment program.
 864  *
 865  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 866  * state atom.
 867  */
 868 static void
 869 brw_upload_wm_pull_constants(struct brw_context *brw)
 870 {
 871    struct brw_stage_state *stage_state = &brw->wm.base;
 872    /* BRW_NEW_FRAGMENT_PROGRAM */
 873    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 874    /* BRW_NEW_FS_PROG_DATA */
 875    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 876
 877    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 878    /* _NEW_PROGRAM_CONSTANTS */
 879    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 880                              stage_state, prog_data);
 881 }
 882
 883 const struct brw_tracked_state brw_wm_pull_constants = {
 884    .dirty = {
 885       .mesa = _NEW_PROGRAM_CONSTANTS,
 886       .brw = BRW_NEW_BATCH |
 887              BRW_NEW_BLORP |
 888              BRW_NEW_FRAGMENT_PROGRAM |
 889              BRW_NEW_FS_PROG_DATA,
 890    },
 891    .emit = brw_upload_wm_pull_constants,
 892 };
 893
 894 /**
 895  * Creates a null renderbuffer surface.
 896  *
 897  * This is used when the shader doesn't write to any color output.  An FB
 898  * write to target 0 will still be emitted, because that's how the thread is
 899  * terminated (and computed depth is returned), so we need to have the
 900  * hardware discard the target 0 color output..
 901  */
 902 static void
 903 brw_emit_null_surface_state(struct brw_context *brw,
 904                             unsigned width,
 905                             unsigned height,
 906                             unsigned samples,
 907                             uint32_t *out_offset)
 908 {
 909    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 910     * Notes):
 911     *
 912     *     A null surface will be used in instances where an actual surface is
 913     *     not bound. When a write message is generated to a null surface, no
 914     *     actual surface is written to. When a read message (including any
 915     *     sampling engine message) is generated to a null surface, the result
 916     *     is all zeros. Note that a null surface type is allowed to be used
 917     *     with all messages, even if it is not specificially indicated as
 918     *     supported. All of the remaining fields in surface state are ignored
 919     *     for null surfaces, with the following exceptions:
 920     *
 921     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 922     *       depth buffer’s corresponding state for all render target surfaces,
 923     *       including null.
 924     *
 925     *     - Surface Format must be R8G8B8A8_UNORM.
 926     */
 927    unsigned surface_type = BRW_SURFACE_NULL;
 928    struct brw_bo *bo = NULL;
 929    unsigned pitch_minus_1 = 0;
 930    uint32_t multisampling_state = 0;
 931    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 932
 933    if (samples > 1) {
 934       /* On Gen6, null render targets seem to cause GPU hangs when
 935        * multisampling.  So work around this problem by rendering into dummy
 936        * color buffer.
 937        *
 938        * To decrease the amount of memory needed by the workaround buffer, we
 939        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 940        * the amount of memory needed for the workaround buffer is
 941        * (width_in_tiles + height_in_tiles - 1) tiles.
 942        *
 943        * Note that since the workaround buffer will be interpreted by the
 944        * hardware as an interleaved multisampled buffer, we need to compute
 945        * width_in_tiles and height_in_tiles by dividing the width and height
 946        * by 16 rather than the normal Y-tile size of 32.
 947        */
 948       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 949       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 950       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 951       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 952                          size_needed);
 953       bo = brw->wm.multisampled_null_render_target_bo;
 954       surface_type = BRW_SURFACE_2D;
 955       pitch_minus_1 = 127;
 956       multisampling_state = brw_get_surface_num_multisamples(samples);
 957    }
 958
 959    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 960               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 961    if (brw->gen < 6) {
 962       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 963                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 964                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 965                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 966    }
 967    surf[1] = bo ? bo->offset64 : 0;
 968    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 969               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 970
 971    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 972     * Notes):
 973     *
 974     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 975     */
 976    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 977               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 978    surf[4] = multisampling_state;
 979    surf[5] = 0;
 980
 981    if (bo) {
 982       brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
 983                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 984    }
 985 }
 986
 987 /**
 988  * Sets up a surface state structure to point at the given region.
 989  * While it is only used for the front/back buffer currently, it should be
 990  * usable for further buffers when doing ARB_draw_buffer support.
 991  */
 992 static uint32_t
 993 gen4_update_renderbuffer_surface(struct brw_context *brw,
 994                                  struct gl_renderbuffer *rb,
 995                                  uint32_t flags, unsigned unit,
 996                                  uint32_t surf_index)
 997 {
 998    struct gl_context *ctx = &brw->ctx;
 999    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1000    struct intel_mipmap_tree *mt = irb->mt;
1001    uint32_t *surf;
1002    uint32_t tile_x, tile_y;
1003    enum isl_format format;
1004    uint32_t offset;
1005    /* _NEW_BUFFERS */
1006    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1007    /* BRW_NEW_FS_PROG_DATA */
1008
1009    assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1010    assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1011
1012    if (rb->TexImage && !brw->has_surface_tile_offset) {
1013       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1014
1015       if (tile_x != 0 || tile_y != 0) {
1016          /* Original gen4 hardware couldn't draw to a non-tile-aligned
1017           * destination in a miptree unless you actually setup your renderbuffer
1018           * as a miptree and used the fragile lod/array_index/etc. controls to
1019           * select the image.  So, instead, we just make a new single-level
1020           * miptree and render into that.
1021           */
1022          intel_renderbuffer_move_to_temp(brw, irb, false);
1023          assert(irb->align_wa_mt);
1024          mt = irb->align_wa_mt;
1025       }
1026    }
1027
1028    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1029
1030    format = brw->mesa_to_isl_render_format[rb_format];
1031    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1032       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1033                     __func__, _mesa_get_format_name(rb_format));
1034    }
1035
1036    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1037               format << BRW_SURFACE_FORMAT_SHIFT);
1038
1039    /* reloc */
1040    assert(mt->offset % mt->cpp == 0);
1041    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1042               mt->bo->offset64 + mt->offset);
1043
1044    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1045               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1046
1047    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
1048               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1049
1050    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
1051
1052    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1053    /* Note that the low bits of these fields are missing, so
1054     * there's the possibility of getting in trouble.
1055     */
1056    assert(tile_x % 4 == 0);
1057    assert(tile_y % 2 == 0);
1058    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1059               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1060               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1061
1062    if (brw->gen < 6) {
1063       /* _NEW_COLOR */
1064       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1065           (ctx->Color.BlendEnabled & (1 << unit)))
1066          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1067
1068       if (!ctx->Color.ColorMask[unit][0])
1069          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1070       if (!ctx->Color.ColorMask[unit][1])
1071          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1072       if (!ctx->Color.ColorMask[unit][2])
1073          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1074
1075       /* As mentioned above, disable writes to the alpha component when the
1076        * renderbuffer is XRGB.
1077        */
1078       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1079           !ctx->Color.ColorMask[unit][3]) {
1080          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1081       }
1082    }
1083
1084    brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1085                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1086
1087    return offset;
1088 }
1089
1090 /**
1091  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1092  */
1093 void
1094 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1095                                  const struct gl_framebuffer *fb,
1096                                  uint32_t render_target_start,
1097                                  uint32_t *surf_offset)
1098 {
1099    GLuint i;
1100    const unsigned int w = _mesa_geometric_width(fb);
1101    const unsigned int h = _mesa_geometric_height(fb);
1102    const unsigned int s = _mesa_geometric_samples(fb);
1103
1104    /* Update surfaces for drawing buffers */
1105    if (fb->_NumColorDrawBuffers >= 1) {
1106       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1107          const uint32_t surf_index = render_target_start + i;
1108          const int flags = (_mesa_geometric_layers(fb) > 0 ?
1109                               INTEL_RENDERBUFFER_LAYERED : 0) |
1110                            (brw->draw_aux_buffer_disabled[i] ?
1111                               INTEL_AUX_BUFFER_DISABLED : 0);
1112
1113          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1114             surf_offset[surf_index] =
1115                brw->vtbl.update_renderbuffer_surface(
1116                   brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1117          } else {
1118             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1119                &surf_offset[surf_index]);
1120          }
1121       }
1122    } else {
1123       const uint32_t surf_index = render_target_start;
1124       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1125          &surf_offset[surf_index]);
1126    }
1127 }
1128
1129 static void
1130 update_renderbuffer_surfaces(struct brw_context *brw)
1131 {
1132    const struct gl_context *ctx = &brw->ctx;
1133
1134    /* BRW_NEW_FS_PROG_DATA */
1135    const struct brw_wm_prog_data *wm_prog_data =
1136       brw_wm_prog_data(brw->wm.base.prog_data);
1137
1138    /* _NEW_BUFFERS | _NEW_COLOR */
1139    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1140    brw_update_renderbuffer_surfaces(
1141       brw, fb,
1142       wm_prog_data->binding_table.render_target_start,
1143       brw->wm.base.surf_offset);
1144    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1145 }
1146
1147 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1148    .dirty = {
1149       .mesa = _NEW_BUFFERS |
1150               _NEW_COLOR,
1151       .brw = BRW_NEW_BATCH |
1152              BRW_NEW_BLORP |
1153              BRW_NEW_FS_PROG_DATA,
1154    },
1155    .emit = update_renderbuffer_surfaces,
1156 };
1157
1158 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1159    .dirty = {
1160       .mesa = _NEW_BUFFERS,
1161       .brw = BRW_NEW_BATCH |
1162              BRW_NEW_BLORP,
1163    },
1164    .emit = update_renderbuffer_surfaces,
1165 };
1166
1167 static void
1168 update_renderbuffer_read_surfaces(struct brw_context *brw)
1169 {
1170    const struct gl_context *ctx = &brw->ctx;
1171
1172    /* BRW_NEW_FS_PROG_DATA */
1173    const struct brw_wm_prog_data *wm_prog_data =
1174       brw_wm_prog_data(brw->wm.base.prog_data);
1175
1176    /* BRW_NEW_FRAGMENT_PROGRAM */
1177    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1178        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1179       /* _NEW_BUFFERS */
1180       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1181
1182       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1183          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1184          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1185          const unsigned surf_index =
1186             wm_prog_data->binding_table.render_target_read_start + i;
1187          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1188
1189          if (irb) {
1190             const enum isl_format format = brw->mesa_to_isl_render_format[
1191                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1192             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1193                                                 format));
1194
1195             /* Override the target of the texture if the render buffer is a
1196              * single slice of a 3D texture (since the minimum array element
1197              * field of the surface state structure is ignored by the sampler
1198              * unit for 3D textures on some hardware), or if the render buffer
1199              * is a 1D array (since shaders always provide the array index
1200              * coordinate at the Z component to avoid state-dependent
1201              * recompiles when changing the texture target of the
1202              * framebuffer).
1203              */
1204             const GLenum target =
1205                (irb->mt->target == GL_TEXTURE_3D &&
1206                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1207                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1208                irb->mt->target;
1209
1210             const struct isl_view view = {
1211                .format = format,
1212                .base_level = irb->mt_level - irb->mt->first_level,
1213                .levels = 1,
1214                .base_array_layer = irb->mt_layer,
1215                .array_len = irb->layer_count,
1216                .swizzle = ISL_SWIZZLE_IDENTITY,
1217                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1218             };
1219
1220             const int flags = brw->draw_aux_buffer_disabled[i] ?
1221                                  INTEL_AUX_BUFFER_DISABLED : 0;
1222             brw_emit_surface_state(brw, irb->mt, flags, target, view,
1223                                    tex_mocs[brw->gen],
1224                                    surf_offset, surf_index,
1225                                    I915_GEM_DOMAIN_SAMPLER, 0);
1226
1227          } else {
1228             brw->vtbl.emit_null_surface_state(
1229                brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1230                _mesa_geometric_samples(fb), surf_offset);
1231          }
1232       }
1233
1234       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1235    }
1236 }
1237
1238 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1239    .dirty = {
1240       .mesa = _NEW_BUFFERS,
1241       .brw = BRW_NEW_BATCH |
1242              BRW_NEW_FRAGMENT_PROGRAM |
1243              BRW_NEW_FS_PROG_DATA,
1244    },
1245    .emit = update_renderbuffer_read_surfaces,
1246 };
1247
1248 static void
1249 update_stage_texture_surfaces(struct brw_context *brw,
1250                               const struct gl_program *prog,
1251                               struct brw_stage_state *stage_state,
1252                               bool for_gather, uint32_t plane)
1253 {
1254    if (!prog)
1255       return;
1256
1257    struct gl_context *ctx = &brw->ctx;
1258
1259    uint32_t *surf_offset = stage_state->surf_offset;
1260
1261    /* BRW_NEW_*_PROG_DATA */
1262    if (for_gather)
1263       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1264    else
1265       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1266
1267    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1268    for (unsigned s = 0; s < num_samplers; s++) {
1269       surf_offset[s] = 0;
1270
1271       if (prog->SamplersUsed & (1 << s)) {
1272          const unsigned unit = prog->SamplerUnits[s];
1273
1274          /* _NEW_TEXTURE */
1275          if (ctx->Texture.Unit[unit]._Current) {
1276             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1277          }
1278       }
1279    }
1280 }
1281
1282
1283 /**
1284  * Construct SURFACE_STATE objects for enabled textures.
1285  */
1286 static void
1287 brw_update_texture_surfaces(struct brw_context *brw)
1288 {
1289    /* BRW_NEW_VERTEX_PROGRAM */
1290    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1291
1292    /* BRW_NEW_TESS_PROGRAMS */
1293    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1294    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1295
1296    /* BRW_NEW_GEOMETRY_PROGRAM */
1297    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1298
1299    /* BRW_NEW_FRAGMENT_PROGRAM */
1300    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1301
1302    /* _NEW_TEXTURE */
1303    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1304    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1305    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1306    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1307    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1308
1309    /* emit alternate set of surface state for gather. this
1310     * allows the surface format to be overriden for only the
1311     * gather4 messages. */
1312    if (brw->gen < 8) {
1313       if (vs && vs->nir->info.uses_texture_gather)
1314          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1315       if (tcs && tcs->nir->info.uses_texture_gather)
1316          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1317       if (tes && tes->nir->info.uses_texture_gather)
1318          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1319       if (gs && gs->nir->info.uses_texture_gather)
1320          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1321       if (fs && fs->nir->info.uses_texture_gather)
1322          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1323    }
1324
1325    if (fs) {
1326       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1327       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1328    }
1329
1330    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1331 }
1332
1333 const struct brw_tracked_state brw_texture_surfaces = {
1334    .dirty = {
1335       .mesa = _NEW_TEXTURE,
1336       .brw = BRW_NEW_BATCH |
1337              BRW_NEW_BLORP |
1338              BRW_NEW_FRAGMENT_PROGRAM |
1339              BRW_NEW_FS_PROG_DATA |
1340              BRW_NEW_GEOMETRY_PROGRAM |
1341              BRW_NEW_GS_PROG_DATA |
1342              BRW_NEW_TESS_PROGRAMS |
1343              BRW_NEW_TCS_PROG_DATA |
1344              BRW_NEW_TES_PROG_DATA |
1345              BRW_NEW_TEXTURE_BUFFER |
1346              BRW_NEW_VERTEX_PROGRAM |
1347              BRW_NEW_VS_PROG_DATA,
1348    },
1349    .emit = brw_update_texture_surfaces,
1350 };
1351
1352 static void
1353 brw_update_cs_texture_surfaces(struct brw_context *brw)
1354 {
1355    /* BRW_NEW_COMPUTE_PROGRAM */
1356    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1357
1358    /* _NEW_TEXTURE */
1359    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1360
1361    /* emit alternate set of surface state for gather. this
1362     * allows the surface format to be overriden for only the
1363     * gather4 messages.
1364     */
1365    if (brw->gen < 8) {
1366       if (cs && cs->nir->info.uses_texture_gather)
1367          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1368    }
1369
1370    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1371 }
1372
1373 const struct brw_tracked_state brw_cs_texture_surfaces = {
1374    .dirty = {
1375       .mesa = _NEW_TEXTURE,
1376       .brw = BRW_NEW_BATCH |
1377              BRW_NEW_BLORP |
1378              BRW_NEW_COMPUTE_PROGRAM,
1379    },
1380    .emit = brw_update_cs_texture_surfaces,
1381 };
1382
1383
1384 void
1385 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1386                         struct brw_stage_state *stage_state,
1387                         struct brw_stage_prog_data *prog_data)
1388 {
1389    struct gl_context *ctx = &brw->ctx;
1390
1391    if (!prog)
1392       return;
1393
1394    uint32_t *ubo_surf_offsets =
1395       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1396
1397    for (int i = 0; i < prog->info.num_ubos; i++) {
1398       struct gl_uniform_buffer_binding *binding =
1399          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1400
1401       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1402          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1403       } else {
1404          struct intel_buffer_object *intel_bo =
1405             intel_buffer_object(binding->BufferObject);
1406          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1407          if (!binding->AutomaticSize)
1408             size = MIN2(size, binding->Size);
1409          struct brw_bo *bo =
1410             intel_bufferobj_buffer(brw, intel_bo,
1411                                    binding->Offset,
1412                                    size, false);
1413          brw_create_constant_surface(brw, bo, binding->Offset,
1414                                      size,
1415                                      &ubo_surf_offsets[i]);
1416       }
1417    }
1418
1419    uint32_t *ssbo_surf_offsets =
1420       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1421
1422    for (int i = 0; i < prog->info.num_ssbos; i++) {
1423       struct gl_shader_storage_buffer_binding *binding =
1424          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1425
1426       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1427          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1428       } else {
1429          struct intel_buffer_object *intel_bo =
1430             intel_buffer_object(binding->BufferObject);
1431          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1432          if (!binding->AutomaticSize)
1433             size = MIN2(size, binding->Size);
1434          struct brw_bo *bo =
1435             intel_bufferobj_buffer(brw, intel_bo,
1436                                    binding->Offset,
1437                                    size, true);
1438          brw_create_buffer_surface(brw, bo, binding->Offset,
1439                                    size,
1440                                    &ssbo_surf_offsets[i]);
1441       }
1442    }
1443
1444    stage_state->push_constants_dirty = true;
1445
1446    if (prog->info.num_ubos || prog->info.num_ssbos)
1447       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1448 }
1449
1450 static void
1451 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1452 {
1453    struct gl_context *ctx = &brw->ctx;
1454    /* _NEW_PROGRAM */
1455    struct gl_program *prog = ctx->FragmentProgram._Current;
1456
1457    /* BRW_NEW_FS_PROG_DATA */
1458    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1459 }
1460
1461 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1462    .dirty = {
1463       .mesa = _NEW_PROGRAM,
1464       .brw = BRW_NEW_BATCH |
1465              BRW_NEW_BLORP |
1466              BRW_NEW_FS_PROG_DATA |
1467              BRW_NEW_UNIFORM_BUFFER,
1468    },
1469    .emit = brw_upload_wm_ubo_surfaces,
1470 };
1471
1472 static void
1473 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1474 {
1475    struct gl_context *ctx = &brw->ctx;
1476    /* _NEW_PROGRAM */
1477    struct gl_program *prog =
1478       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1479
1480    /* BRW_NEW_CS_PROG_DATA */
1481    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1482 }
1483
1484 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1485    .dirty = {
1486       .mesa = _NEW_PROGRAM,
1487       .brw = BRW_NEW_BATCH |
1488              BRW_NEW_BLORP |
1489              BRW_NEW_CS_PROG_DATA |
1490              BRW_NEW_UNIFORM_BUFFER,
1491    },
1492    .emit = brw_upload_cs_ubo_surfaces,
1493 };
1494
1495 void
1496 brw_upload_abo_surfaces(struct brw_context *brw,
1497                         const struct gl_program *prog,
1498                         struct brw_stage_state *stage_state,
1499                         struct brw_stage_prog_data *prog_data)
1500 {
1501    struct gl_context *ctx = &brw->ctx;
1502    uint32_t *surf_offsets =
1503       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1504
1505    if (prog->info.num_abos) {
1506       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1507          struct gl_atomic_buffer_binding *binding =
1508             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1509          struct intel_buffer_object *intel_bo =
1510             intel_buffer_object(binding->BufferObject);
1511          struct brw_bo *bo =
1512             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1513                                    intel_bo->Base.Size - binding->Offset,
1514                                    true);
1515
1516          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1517                                        binding->Offset, ISL_FORMAT_RAW,
1518                                        bo->size - binding->Offset, 1, true);
1519       }
1520
1521       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1522    }
1523 }
1524
1525 static void
1526 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1527 {
1528    /* _NEW_PROGRAM */
1529    const struct gl_program *wm = brw->fragment_program;
1530
1531    if (wm) {
1532       /* BRW_NEW_FS_PROG_DATA */
1533       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1534    }
1535 }
1536
1537 const struct brw_tracked_state brw_wm_abo_surfaces = {
1538    .dirty = {
1539       .mesa = _NEW_PROGRAM,
1540       .brw = BRW_NEW_ATOMIC_BUFFER |
1541              BRW_NEW_BLORP |
1542              BRW_NEW_BATCH |
1543              BRW_NEW_FS_PROG_DATA,
1544    },
1545    .emit = brw_upload_wm_abo_surfaces,
1546 };
1547
1548 static void
1549 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1550 {
1551    /* _NEW_PROGRAM */
1552    const struct gl_program *cp = brw->compute_program;
1553
1554    if (cp) {
1555       /* BRW_NEW_CS_PROG_DATA */
1556       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1557    }
1558 }
1559
1560 const struct brw_tracked_state brw_cs_abo_surfaces = {
1561    .dirty = {
1562       .mesa = _NEW_PROGRAM,
1563       .brw = BRW_NEW_ATOMIC_BUFFER |
1564              BRW_NEW_BLORP |
1565              BRW_NEW_BATCH |
1566              BRW_NEW_CS_PROG_DATA,
1567    },
1568    .emit = brw_upload_cs_abo_surfaces,
1569 };
1570
1571 static void
1572 brw_upload_cs_image_surfaces(struct brw_context *brw)
1573 {
1574    /* _NEW_PROGRAM */
1575    const struct gl_program *cp = brw->compute_program;
1576
1577    if (cp) {
1578       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1579       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1580                                 brw->cs.base.prog_data);
1581    }
1582 }
1583
1584 const struct brw_tracked_state brw_cs_image_surfaces = {
1585    .dirty = {
1586       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1587       .brw = BRW_NEW_BATCH |
1588              BRW_NEW_BLORP |
1589              BRW_NEW_CS_PROG_DATA |
1590              BRW_NEW_IMAGE_UNITS
1591    },
1592    .emit = brw_upload_cs_image_surfaces,
1593 };
1594
1595 static uint32_t
1596 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1597 {
1598    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1599    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1600    if (access == GL_WRITE_ONLY) {
1601       return hw_format;
1602    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1603       /* Typed surface reads support a very limited subset of the shader
1604        * image formats.  Translate it into the closest format the
1605        * hardware supports.
1606        */
1607       return isl_lower_storage_image_format(devinfo, hw_format);
1608    } else {
1609       /* The hardware doesn't actually support a typed format that we can use
1610        * so we have to fall back to untyped read/write messages.
1611        */
1612       return ISL_FORMAT_RAW;
1613    }
1614 }
1615
1616 static void
1617 update_default_image_param(struct brw_context *brw,
1618                            struct gl_image_unit *u,
1619                            unsigned surface_idx,
1620                            struct brw_image_param *param)
1621 {
1622    memset(param, 0, sizeof(*param));
1623    param->surface_idx = surface_idx;
1624    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1625     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1626     * detailed explanation of these parameters.
1627     */
1628    param->swizzling[0] = 0xff;
1629    param->swizzling[1] = 0xff;
1630 }
1631
1632 static void
1633 update_buffer_image_param(struct brw_context *brw,
1634                           struct gl_image_unit *u,
1635                           unsigned surface_idx,
1636                           struct brw_image_param *param)
1637 {
1638    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1639    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1640    update_default_image_param(brw, u, surface_idx, param);
1641
1642    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1643    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1644 }
1645
1646 static unsigned
1647 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1648                      unsigned level)
1649 {
1650    if (target == GL_TEXTURE_CUBE_MAP)
1651       return 6;
1652
1653    if (mt->surf.size > 0) {
1654       return target == GL_TEXTURE_3D ?
1655          minify(mt->surf.logical_level0_px.depth, level) :
1656          mt->surf.logical_level0_px.array_len;
1657    }
1658
1659    return target == GL_TEXTURE_3D ?
1660       minify(mt->logical_depth0, level) : mt->logical_depth0;
1661 }
1662
1663 static void
1664 update_image_surface(struct brw_context *brw,
1665                      struct gl_image_unit *u,
1666                      GLenum access,
1667                      unsigned surface_idx,
1668                      uint32_t *surf_offset,
1669                      struct brw_image_param *param)
1670 {
1671    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1672       struct gl_texture_object *obj = u->TexObj;
1673       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1674
1675       if (obj->Target == GL_TEXTURE_BUFFER) {
1676          struct intel_buffer_object *intel_obj =
1677             intel_buffer_object(obj->BufferObject);
1678          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1679                                       _mesa_get_format_bytes(u->_ActualFormat));
1680
1681          brw_emit_buffer_surface_state(
1682             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1683             format, intel_obj->Base.Size, texel_size,
1684             access != GL_READ_ONLY);
1685
1686          update_buffer_image_param(brw, u, surface_idx, param);
1687
1688       } else {
1689          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1690          struct intel_mipmap_tree *mt = intel_obj->mt;
1691          const unsigned num_layers = u->Layered ?
1692             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1693
1694          struct isl_view view = {
1695             .format = format,
1696             .base_level = obj->MinLevel + u->Level,
1697             .levels = 1,
1698             .base_array_layer = obj->MinLayer + u->_Layer,
1699             .array_len = num_layers,
1700             .swizzle = ISL_SWIZZLE_IDENTITY,
1701             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1702          };
1703
1704          if (format == ISL_FORMAT_RAW) {
1705             brw_emit_buffer_surface_state(
1706                brw, surf_offset, mt->bo, mt->offset,
1707                format, mt->bo->size - mt->offset, 1 /* pitch */,
1708                access != GL_READ_ONLY);
1709
1710          } else {
1711             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1712             assert(!intel_miptree_has_color_unresolved(mt,
1713                                                        view.base_level, 1,
1714                                                        view.base_array_layer,
1715                                                        view.array_len));
1716             brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1717                                    mt->target, view, tex_mocs[brw->gen],
1718                                    surf_offset, surf_index,
1719                                    I915_GEM_DOMAIN_SAMPLER,
1720                                    access == GL_READ_ONLY ? 0 :
1721                                              I915_GEM_DOMAIN_SAMPLER);
1722          }
1723
1724          struct isl_surf surf;
1725          intel_miptree_get_isl_surf(brw, mt, &surf);
1726
1727          isl_surf_fill_image_param(&brw->isl_dev, param, &surf, &view);
1728          param->surface_idx = surface_idx;
1729       }
1730
1731    } else {
1732       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1733       update_default_image_param(brw, u, surface_idx, param);
1734    }
1735 }
1736
1737 void
1738 brw_upload_image_surfaces(struct brw_context *brw,
1739                           const struct gl_program *prog,
1740                           struct brw_stage_state *stage_state,
1741                           struct brw_stage_prog_data *prog_data)
1742 {
1743    assert(prog);
1744    struct gl_context *ctx = &brw->ctx;
1745
1746    if (prog->info.num_images) {
1747       for (unsigned i = 0; i < prog->info.num_images; i++) {
1748          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1749          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1750
1751          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1752                               surf_idx,
1753                               &stage_state->surf_offset[surf_idx],
1754                               &prog_data->image_param[i]);
1755       }
1756
1757       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1758       /* This may have changed the image metadata dependent on the context
1759        * image unit state and passed to the program as uniforms, make sure
1760        * that push and pull constants are reuploaded.
1761        */
1762       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1763    }
1764 }
1765
1766 static void
1767 brw_upload_wm_image_surfaces(struct brw_context *brw)
1768 {
1769    /* BRW_NEW_FRAGMENT_PROGRAM */
1770    const struct gl_program *wm = brw->fragment_program;
1771
1772    if (wm) {
1773       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1774       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1775                                 brw->wm.base.prog_data);
1776    }
1777 }
1778
1779 const struct brw_tracked_state brw_wm_image_surfaces = {
1780    .dirty = {
1781       .mesa = _NEW_TEXTURE,
1782       .brw = BRW_NEW_BATCH |
1783              BRW_NEW_BLORP |
1784              BRW_NEW_FRAGMENT_PROGRAM |
1785              BRW_NEW_FS_PROG_DATA |
1786              BRW_NEW_IMAGE_UNITS
1787    },
1788    .emit = brw_upload_wm_image_surfaces,
1789 };
1790
1791 void
1792 gen4_init_vtable_surface_functions(struct brw_context *brw)
1793 {
1794    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1795    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1796 }
1797
1798 void
1799 gen6_init_vtable_surface_functions(struct brw_context *brw)
1800 {
1801    gen4_init_vtable_surface_functions(brw);
1802    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1803 }
1804
1805 static void
1806 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1807 {
1808    struct gl_context *ctx = &brw->ctx;
1809    /* _NEW_PROGRAM */
1810    struct gl_program *prog =
1811       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1812    /* BRW_NEW_CS_PROG_DATA */
1813    const struct brw_cs_prog_data *cs_prog_data =
1814       brw_cs_prog_data(brw->cs.base.prog_data);
1815
1816    if (prog && cs_prog_data->uses_num_work_groups) {
1817       const unsigned surf_idx =
1818          cs_prog_data->binding_table.work_groups_start;
1819       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1820       struct brw_bo *bo;
1821       uint32_t bo_offset;
1822
1823       if (brw->compute.num_work_groups_bo == NULL) {
1824          bo = NULL;
1825          intel_upload_data(brw,
1826                            (void *)brw->compute.num_work_groups,
1827                            3 * sizeof(GLuint),
1828                            sizeof(GLuint),
1829                            &bo,
1830                            &bo_offset);
1831       } else {
1832          bo = brw->compute.num_work_groups_bo;
1833          bo_offset = brw->compute.num_work_groups_offset;
1834       }
1835
1836       brw_emit_buffer_surface_state(brw, surf_offset,
1837                                     bo, bo_offset,
1838                                     ISL_FORMAT_RAW,
1839                                     3 * sizeof(GLuint), 1, true);
1840       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1841    }
1842 }
1843
1844 const struct brw_tracked_state brw_cs_work_groups_surface = {
1845    .dirty = {
1846       .brw = BRW_NEW_BLORP |
1847              BRW_NEW_CS_PROG_DATA |
1848              BRW_NEW_CS_WORK_GROUPS
1849    },
1850    .emit = brw_upload_cs_work_groups_surface,
1851 };