src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 enum {
  59    INTEL_RENDERBUFFER_LAYERED = 1 << 0,
  60    INTEL_AUX_BUFFER_DISABLED = 1 << 1,
  61 };
  62
  63 uint32_t tex_mocs[] = {
  64    [7] = GEN7_MOCS_L3,
  65    [8] = BDW_MOCS_WB,
  66    [9] = SKL_MOCS_WB,
  67    [10] = CNL_MOCS_WB,
  68 };
  69
  70 uint32_t rb_mocs[] = {
  71    [7] = GEN7_MOCS_L3,
  72    [8] = BDW_MOCS_PTE,
  73    [9] = SKL_MOCS_PTE,
  74    [10] = CNL_MOCS_PTE,
  75 };
  76
  77 static void
  78 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  79              GLenum target, struct isl_view *view,
  80              uint32_t *tile_x, uint32_t *tile_y,
  81              uint32_t *offset, struct isl_surf *surf)
  82 {
  83    *surf = mt->surf;
  84
  85    const enum isl_dim_layout dim_layout =
  86       get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target);
  87
  88    if (surf->dim_layout == dim_layout)
  89       return;
  90
  91    /* The layout of the specified texture target is not compatible with the
  92     * actual layout of the miptree structure in memory -- You're entering
  93     * dangerous territory, this can only possibly work if you only intended
  94     * to access a single level and slice of the texture, and the hardware
  95     * supports the tile offset feature in order to allow non-tile-aligned
  96     * base offsets, since we'll have to point the hardware to the first
  97     * texel of the level instead of relying on the usual base level/layer
  98     * controls.
  99     */
 100    assert(brw->has_surface_tile_offset);
 101    assert(view->levels == 1 && view->array_len == 1);
 102    assert(*tile_x == 0 && *tile_y == 0);
 103
 104    offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 105                                             view->base_array_layer,
 106                                             tile_x, tile_y);
 107
 108    /* Minify the logical dimensions of the texture. */
 109    const unsigned l = view->base_level - mt->first_level;
 110    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 111    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 112       minify(surf->logical_level0_px.height, l);
 113    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 114       minify(surf->logical_level0_px.depth, l);
 115
 116    /* Only the base level and layer can be addressed with the overridden
 117     * layout.
 118     */
 119    surf->logical_level0_px.array_len = 1;
 120    surf->levels = 1;
 121    surf->dim_layout = dim_layout;
 122
 123    /* The requested slice of the texture is now at the base level and
 124     * layer.
 125     */
 126    view->base_level = 0;
 127    view->base_array_layer = 0;
 128 }
 129
 130 static void
 131 brw_emit_surface_state(struct brw_context *brw,
 132                        struct intel_mipmap_tree *mt, uint32_t flags,
 133                        GLenum target, struct isl_view view,
 134                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
 135                        unsigned read_domains, unsigned write_domains)
 136 {
 137    uint32_t tile_x = mt->level[0].level_x;
 138    uint32_t tile_y = mt->level[0].level_y;
 139    uint32_t offset = mt->offset;
 140
 141    struct isl_surf surf;
 142
 143    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 144
 145    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 146
 147    struct brw_bo *aux_bo;
 148    struct isl_surf *aux_surf = NULL;
 149    uint64_t aux_offset = 0;
 150    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
 151    if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
 152        !(flags & INTEL_AUX_BUFFER_DISABLED)) {
 153       aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
 154
 155       if (mt->mcs_buf) {
 156          aux_surf = &mt->mcs_buf->surf;
 157
 158          aux_bo = mt->mcs_buf->bo;
 159          aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
 160       } else {
 161          aux_surf = &mt->hiz_buf->surf;
 162
 163          aux_bo = mt->hiz_buf->bo;
 164          aux_offset = mt->hiz_buf->bo->offset64;
 165       }
 166
 167       /* We only really need a clear color if we also have an auxiliary
 168        * surface.  Without one, it does nothing.
 169        */
 170       clear_color = mt->fast_clear_color;
 171    }
 172
 173    void *state = brw_state_batch(brw,
 174                                  brw->isl_dev.ss.size,
 175                                  brw->isl_dev.ss.align,
 176                                  surf_offset);
 177
 178    isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
 179                        .address = mt->bo->offset64 + offset,
 180                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 181                        .aux_address = aux_offset,
 182                        .mocs = mocs, .clear_color = clear_color,
 183                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 184
 185    brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
 186                   mt->bo, offset, read_domains, write_domains);
 187
 188    if (aux_surf) {
 189       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 190        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 191        * contain other control information.  Since buffer addresses are always
 192        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 193        * an ordinary reloc to do the necessary address translation.
 194        */
 195       assert((aux_offset & 0xfff) == 0);
 196       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 197       brw_emit_reloc(&brw->batch,
 198                      *surf_offset + brw->isl_dev.ss.aux_addr_offset,
 199                      aux_bo, *aux_addr - aux_bo->offset64,
 200                      read_domains, write_domains);
 201    }
 202 }
 203
 204 uint32_t
 205 brw_update_renderbuffer_surface(struct brw_context *brw,
 206                                 struct gl_renderbuffer *rb,
 207                                 uint32_t flags, unsigned unit /* unused */,
 208                                 uint32_t surf_index)
 209 {
 210    struct gl_context *ctx = &brw->ctx;
 211    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 212    struct intel_mipmap_tree *mt = irb->mt;
 213
 214    if (brw->gen < 9) {
 215       assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 216    }
 217
 218    assert(brw_render_target_supported(brw, rb));
 219
 220    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 221    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 222       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 223                     __func__, _mesa_get_format_name(rb_format));
 224    }
 225
 226    struct isl_view view = {
 227       .format = brw->mesa_to_isl_render_format[rb_format],
 228       .base_level = irb->mt_level - irb->mt->first_level,
 229       .levels = 1,
 230       .base_array_layer = irb->mt_layer,
 231       .array_len = MAX2(irb->layer_count, 1),
 232       .swizzle = ISL_SWIZZLE_IDENTITY,
 233       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 234    };
 235
 236    uint32_t offset;
 237    brw_emit_surface_state(brw, mt, flags, mt->target, view,
 238                           rb_mocs[brw->gen],
 239                           &offset, surf_index,
 240                           I915_GEM_DOMAIN_RENDER,
 241                           I915_GEM_DOMAIN_RENDER);
 242    return offset;
 243 }
 244
 245 GLuint
 246 translate_tex_target(GLenum target)
 247 {
 248    switch (target) {
 249    case GL_TEXTURE_1D:
 250    case GL_TEXTURE_1D_ARRAY_EXT:
 251       return BRW_SURFACE_1D;
 252
 253    case GL_TEXTURE_RECTANGLE_NV:
 254       return BRW_SURFACE_2D;
 255
 256    case GL_TEXTURE_2D:
 257    case GL_TEXTURE_2D_ARRAY_EXT:
 258    case GL_TEXTURE_EXTERNAL_OES:
 259    case GL_TEXTURE_2D_MULTISAMPLE:
 260    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 261       return BRW_SURFACE_2D;
 262
 263    case GL_TEXTURE_3D:
 264       return BRW_SURFACE_3D;
 265
 266    case GL_TEXTURE_CUBE_MAP:
 267    case GL_TEXTURE_CUBE_MAP_ARRAY:
 268       return BRW_SURFACE_CUBE;
 269
 270    default:
 271       unreachable("not reached");
 272    }
 273 }
 274
 275 uint32_t
 276 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 277 {
 278    switch (tiling) {
 279    case ISL_TILING_X:
 280       return BRW_SURFACE_TILED;
 281    case ISL_TILING_Y0:
 282       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 283    default:
 284       return 0;
 285    }
 286 }
 287
 288
 289 uint32_t
 290 brw_get_surface_num_multisamples(unsigned num_samples)
 291 {
 292    if (num_samples > 1)
 293       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 294    else
 295       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 296 }
 297
 298 /**
 299  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 300  * swizzling.
 301  */
 302 int
 303 brw_get_texture_swizzle(const struct gl_context *ctx,
 304                         const struct gl_texture_object *t)
 305 {
 306    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 307
 308    int swizzles[SWIZZLE_NIL + 1] = {
 309       SWIZZLE_X,
 310       SWIZZLE_Y,
 311       SWIZZLE_Z,
 312       SWIZZLE_W,
 313       SWIZZLE_ZERO,
 314       SWIZZLE_ONE,
 315       SWIZZLE_NIL
 316    };
 317
 318    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 319        img->_BaseFormat == GL_DEPTH_STENCIL) {
 320       GLenum depth_mode = t->DepthMode;
 321
 322       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 323        * with depth component data specified with a sized internal format.
 324        * Otherwise, it's left at the old default, GL_LUMINANCE.
 325        */
 326       if (_mesa_is_gles3(ctx) &&
 327           img->InternalFormat != GL_DEPTH_COMPONENT &&
 328           img->InternalFormat != GL_DEPTH_STENCIL) {
 329          depth_mode = GL_RED;
 330       }
 331
 332       switch (depth_mode) {
 333       case GL_ALPHA:
 334          swizzles[0] = SWIZZLE_ZERO;
 335          swizzles[1] = SWIZZLE_ZERO;
 336          swizzles[2] = SWIZZLE_ZERO;
 337          swizzles[3] = SWIZZLE_X;
 338          break;
 339       case GL_LUMINANCE:
 340          swizzles[0] = SWIZZLE_X;
 341          swizzles[1] = SWIZZLE_X;
 342          swizzles[2] = SWIZZLE_X;
 343          swizzles[3] = SWIZZLE_ONE;
 344          break;
 345       case GL_INTENSITY:
 346          swizzles[0] = SWIZZLE_X;
 347          swizzles[1] = SWIZZLE_X;
 348          swizzles[2] = SWIZZLE_X;
 349          swizzles[3] = SWIZZLE_X;
 350          break;
 351       case GL_RED:
 352          swizzles[0] = SWIZZLE_X;
 353          swizzles[1] = SWIZZLE_ZERO;
 354          swizzles[2] = SWIZZLE_ZERO;
 355          swizzles[3] = SWIZZLE_ONE;
 356          break;
 357       }
 358    }
 359
 360    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 361
 362    /* If the texture's format is alpha-only, force R, G, and B to
 363     * 0.0. Similarly, if the texture's format has no alpha channel,
 364     * force the alpha value read to 1.0. This allows for the
 365     * implementation to use an RGBA texture for any of these formats
 366     * without leaking any unexpected values.
 367     */
 368    switch (img->_BaseFormat) {
 369    case GL_ALPHA:
 370       swizzles[0] = SWIZZLE_ZERO;
 371       swizzles[1] = SWIZZLE_ZERO;
 372       swizzles[2] = SWIZZLE_ZERO;
 373       break;
 374    case GL_LUMINANCE:
 375       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 376          swizzles[0] = SWIZZLE_X;
 377          swizzles[1] = SWIZZLE_X;
 378          swizzles[2] = SWIZZLE_X;
 379          swizzles[3] = SWIZZLE_ONE;
 380       }
 381       break;
 382    case GL_LUMINANCE_ALPHA:
 383       if (datatype == GL_SIGNED_NORMALIZED) {
 384          swizzles[0] = SWIZZLE_X;
 385          swizzles[1] = SWIZZLE_X;
 386          swizzles[2] = SWIZZLE_X;
 387          swizzles[3] = SWIZZLE_W;
 388       }
 389       break;
 390    case GL_INTENSITY:
 391       if (datatype == GL_SIGNED_NORMALIZED) {
 392          swizzles[0] = SWIZZLE_X;
 393          swizzles[1] = SWIZZLE_X;
 394          swizzles[2] = SWIZZLE_X;
 395          swizzles[3] = SWIZZLE_X;
 396       }
 397       break;
 398    case GL_RED:
 399    case GL_RG:
 400    case GL_RGB:
 401       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 402           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 403           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 404          swizzles[3] = SWIZZLE_ONE;
 405       break;
 406    }
 407
 408    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 409                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 410                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 411                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 412 }
 413
 414 /**
 415  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 416  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 417  *
 418  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 419  *         0          1          2          3             4            5
 420  *         4          5          6          7             0            1
 421  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 422  *
 423  * which is simply adding 4 then modding by 8 (or anding with 7).
 424  *
 425  * We then may need to apply workarounds for textureGather hardware bugs.
 426  */
 427 static unsigned
 428 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 429 {
 430    unsigned scs = (swizzle + 4) & 7;
 431
 432    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 433 }
 434
 435 static unsigned
 436 brw_find_matching_rb(const struct gl_framebuffer *fb,
 437                      const struct intel_mipmap_tree *mt)
 438 {
 439    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 440       const struct intel_renderbuffer *irb =
 441          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 442
 443       if (irb && irb->mt == mt)
 444          return i;
 445    }
 446
 447    return fb->_NumColorDrawBuffers;
 448 }
 449
 450 static inline bool
 451 brw_texture_view_sane(const struct brw_context *brw,
 452                       const struct intel_mipmap_tree *mt,
 453                       const struct isl_view *view)
 454 {
 455    /* There are special cases only for lossless compression. */
 456    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 457       return true;
 458
 459    if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
 460       return true;
 461
 462    /* Logic elsewhere needs to take care to resolve the color buffer prior
 463     * to sampling it as non-compressed.
 464     */
 465    if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
 466                                           view->base_array_layer,
 467                                           view->array_len))
 468       return false;
 469
 470    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 471    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 472
 473    if (rb_index == fb->_NumColorDrawBuffers)
 474       return true;
 475
 476    /* Underlying surface is compressed but it is sampled using a format that
 477     * the sampling engine doesn't support as compressed. Compression must be
 478     * disabled for both sampling engine and data port in case the same surface
 479     * is used also as render target.
 480     */
 481    return brw->draw_aux_buffer_disabled[rb_index];
 482 }
 483
 484 static bool
 485 brw_disable_aux_surface(const struct brw_context *brw,
 486                         const struct intel_mipmap_tree *mt,
 487                         const struct isl_view *view)
 488 {
 489    /* Nothing to disable. */
 490    if (!mt->mcs_buf)
 491       return false;
 492
 493    const bool is_unresolved = intel_miptree_has_color_unresolved(
 494                                  mt, view->base_level, view->levels,
 495                                  view->base_array_layer, view->array_len);
 496
 497    /* There are special cases only for lossless compression. */
 498    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 499       return !is_unresolved;
 500
 501    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 502    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 503
 504    /* If we are drawing into this with compression enabled, then we must also
 505     * enable compression when texturing from it regardless of
 506     * fast_clear_state.  If we don't then, after the first draw call with
 507     * this setup, there will be data in the CCS which won't get picked up by
 508     * subsequent texturing operations as required by ARB_texture_barrier.
 509     * Since we don't want to re-emit the binding table or do a resolve
 510     * operation every draw call, the easiest thing to do is just enable
 511     * compression on the texturing side.  This is completely safe to do
 512     * since, if compressed texturing weren't allowed, we would have disabled
 513     * compression of render targets in whatever_that_function_is_called().
 514     */
 515    if (rb_index < fb->_NumColorDrawBuffers) {
 516       if (brw->draw_aux_buffer_disabled[rb_index]) {
 517          assert(!is_unresolved);
 518       }
 519
 520       return brw->draw_aux_buffer_disabled[rb_index];
 521    }
 522
 523    return !is_unresolved;
 524 }
 525
 526 void
 527 brw_update_texture_surface(struct gl_context *ctx,
 528                            unsigned unit,
 529                            uint32_t *surf_offset,
 530                            bool for_gather,
 531                            uint32_t plane)
 532 {
 533    struct brw_context *brw = brw_context(ctx);
 534    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 535
 536    if (obj->Target == GL_TEXTURE_BUFFER) {
 537       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 538
 539    } else {
 540       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 541       struct intel_mipmap_tree *mt = intel_obj->mt;
 542
 543       if (plane > 0) {
 544          if (mt->plane[plane - 1] == NULL)
 545             return;
 546          mt = mt->plane[plane - 1];
 547       }
 548
 549       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 550       /* If this is a view with restricted NumLayers, then our effective depth
 551        * is not just the miptree depth.
 552        */
 553       unsigned view_num_layers;
 554       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 555          view_num_layers = obj->NumLayers;
 556       } else {
 557          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 558                               mt->surf.logical_level0_px.depth :
 559                               mt->surf.logical_level0_px.array_len;
 560       }
 561
 562       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 563        * texturing functions that return a float, as our code generation always
 564        * selects the .x channel (which would always be 0).
 565        */
 566       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 567       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 568          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 569           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 570       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 571                                 brw_get_texture_swizzle(&brw->ctx, obj));
 572
 573       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 574       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 575                                                     sampler->sRGBDecode);
 576
 577       /* Implement gen6 and gen7 gather work-around */
 578       bool need_green_to_blue = false;
 579       if (for_gather) {
 580          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 581                                format == ISL_FORMAT_R32G32_SINT ||
 582                                format == ISL_FORMAT_R32G32_UINT)) {
 583             format = ISL_FORMAT_R32G32_FLOAT_LD;
 584             need_green_to_blue = brw->is_haswell;
 585          } else if (brw->gen == 6) {
 586             /* Sandybridge's gather4 message is broken for integer formats.
 587              * To work around this, we pretend the surface is UNORM for
 588              * 8 or 16-bit formats, and emit shader instructions to recover
 589              * the real INT/UINT value.  For 32-bit formats, we pretend
 590              * the surface is FLOAT, and simply reinterpret the resulting
 591              * bits.
 592              */
 593             switch (format) {
 594             case ISL_FORMAT_R8_SINT:
 595             case ISL_FORMAT_R8_UINT:
 596                format = ISL_FORMAT_R8_UNORM;
 597                break;
 598
 599             case ISL_FORMAT_R16_SINT:
 600             case ISL_FORMAT_R16_UINT:
 601                format = ISL_FORMAT_R16_UNORM;
 602                break;
 603
 604             case ISL_FORMAT_R32_SINT:
 605             case ISL_FORMAT_R32_UINT:
 606                format = ISL_FORMAT_R32_FLOAT;
 607                break;
 608
 609             default:
 610                break;
 611             }
 612          }
 613       }
 614
 615       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 616          if (brw->gen <= 7) {
 617             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 618             mt = mt->r8stencil_mt;
 619          } else {
 620             mt = mt->stencil_mt;
 621          }
 622          format = ISL_FORMAT_R8_UINT;
 623       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 624          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 625          mt = mt->r8stencil_mt;
 626          format = ISL_FORMAT_R8_UINT;
 627       }
 628
 629       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 630
 631       struct isl_view view = {
 632          .format = format,
 633          .base_level = obj->MinLevel + obj->BaseLevel,
 634          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 635          .base_array_layer = obj->MinLayer,
 636          .array_len = view_num_layers,
 637          .swizzle = {
 638             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 639             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 640             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 641             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 642          },
 643          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 644       };
 645
 646       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 647           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 648          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 649
 650       assert(brw_texture_view_sane(brw, mt, &view));
 651
 652       const int flags = brw_disable_aux_surface(brw, mt, &view) ?
 653                            INTEL_AUX_BUFFER_DISABLED : 0;
 654       brw_emit_surface_state(brw, mt, flags, mt->target, view,
 655                              tex_mocs[brw->gen],
 656                              surf_offset, surf_index,
 657                              I915_GEM_DOMAIN_SAMPLER, 0);
 658    }
 659 }
 660
 661 void
 662 brw_emit_buffer_surface_state(struct brw_context *brw,
 663                               uint32_t *out_offset,
 664                               struct brw_bo *bo,
 665                               unsigned buffer_offset,
 666                               unsigned surface_format,
 667                               unsigned buffer_size,
 668                               unsigned pitch,
 669                               bool rw)
 670 {
 671    uint32_t *dw = brw_state_batch(brw,
 672                                   brw->isl_dev.ss.size,
 673                                   brw->isl_dev.ss.align,
 674                                   out_offset);
 675
 676    isl_buffer_fill_state(&brw->isl_dev, dw,
 677                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 678                          .size = buffer_size,
 679                          .format = surface_format,
 680                          .stride = pitch,
 681                          .mocs = tex_mocs[brw->gen]);
 682
 683    if (bo) {
 684       brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
 685                      bo, buffer_offset,
 686                      I915_GEM_DOMAIN_SAMPLER,
 687                      (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 688    }
 689 }
 690
 691 void
 692 brw_update_buffer_texture_surface(struct gl_context *ctx,
 693                                   unsigned unit,
 694                                   uint32_t *surf_offset)
 695 {
 696    struct brw_context *brw = brw_context(ctx);
 697    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 698    struct intel_buffer_object *intel_obj =
 699       intel_buffer_object(tObj->BufferObject);
 700    uint32_t size = tObj->BufferSize;
 701    struct brw_bo *bo = NULL;
 702    mesa_format format = tObj->_BufferObjectFormat;
 703    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 704    int texel_size = _mesa_get_format_bytes(format);
 705
 706    if (intel_obj) {
 707       size = MIN2(size, intel_obj->Base.Size);
 708       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 709                                   false);
 710    }
 711
 712    /* The ARB_texture_buffer_specification says:
 713     *
 714     *    "The number of texels in the buffer texture's texel array is given by
 715     *
 716     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 717     *
 718     *     where <buffer_size> is the size of the buffer object, in basic
 719     *     machine units and <components> and <base_type> are the element count
 720     *     and base data type for elements, as specified in Table X.1.  The
 721     *     number of texels in the texel array is then clamped to the
 722     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 723     *
 724     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 725     * so that when ISL divides by stride to obtain the number of texels, that
 726     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 727     */
 728    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 729
 730    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 731       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 732                     _mesa_get_format_name(format));
 733    }
 734
 735    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 736                                  tObj->BufferOffset,
 737                                  isl_format,
 738                                  size,
 739                                  texel_size,
 740                                  false /* rw */);
 741 }
 742
 743 /**
 744  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 745  * read from this buffer with Data Port Read instructions/messages.
 746  */
 747 void
 748 brw_create_constant_surface(struct brw_context *brw,
 749                             struct brw_bo *bo,
 750                             uint32_t offset,
 751                             uint32_t size,
 752                             uint32_t *out_offset)
 753 {
 754    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 755                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 756                                  size, 1, false);
 757 }
 758
 759 /**
 760  * Create the buffer surface. Shader buffer variables will be
 761  * read from / write to this buffer with Data Port Read/Write
 762  * instructions/messages.
 763  */
 764 void
 765 brw_create_buffer_surface(struct brw_context *brw,
 766                           struct brw_bo *bo,
 767                           uint32_t offset,
 768                           uint32_t size,
 769                           uint32_t *out_offset)
 770 {
 771    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 772     * messages. We need these specifically for the fragment shader since they
 773     * include a pixel mask header that we need to ensure correct behavior
 774     * with helper invocations, which cannot write to the buffer.
 775     */
 776    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 777                                  ISL_FORMAT_RAW,
 778                                  size, 1, true);
 779 }
 780
 781 /**
 782  * Set up a binding table entry for use by stream output logic (transform
 783  * feedback).
 784  *
 785  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 786  */
 787 void
 788 brw_update_sol_surface(struct brw_context *brw,
 789                        struct gl_buffer_object *buffer_obj,
 790                        uint32_t *out_offset, unsigned num_vector_components,
 791                        unsigned stride_dwords, unsigned offset_dwords)
 792 {
 793    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 794    uint32_t offset_bytes = 4 * offset_dwords;
 795    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 796                                              offset_bytes,
 797                                              buffer_obj->Size - offset_bytes,
 798                                              true);
 799    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 800    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 801    size_t size_dwords = buffer_obj->Size / 4;
 802    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 803
 804    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 805     * too big to map using a single binding table entry?
 806     */
 807    assert((size_dwords - offset_dwords) / stride_dwords
 808           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 809
 810    if (size_dwords > offset_dwords + num_vector_components) {
 811       /* There is room for at least 1 transform feedback output in the buffer.
 812        * Compute the number of additional transform feedback outputs the
 813        * buffer has room for.
 814        */
 815       buffer_size_minus_1 =
 816          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 817    } else {
 818       /* There isn't even room for a single transform feedback output in the
 819        * buffer.  We can't configure the binding table entry to prevent output
 820        * entirely; we'll have to rely on the geometry shader to detect
 821        * overflow.  But to minimize the damage in case of a bug, set up the
 822        * binding table entry to just allow a single output.
 823        */
 824       buffer_size_minus_1 = 0;
 825    }
 826    width = buffer_size_minus_1 & 0x7f;
 827    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 828    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 829
 830    switch (num_vector_components) {
 831    case 1:
 832       surface_format = ISL_FORMAT_R32_FLOAT;
 833       break;
 834    case 2:
 835       surface_format = ISL_FORMAT_R32G32_FLOAT;
 836       break;
 837    case 3:
 838       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 839       break;
 840    case 4:
 841       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 842       break;
 843    default:
 844       unreachable("Invalid vector size for transform feedback output");
 845    }
 846
 847    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 848       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 849       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 850       BRW_SURFACE_RC_READ_WRITE;
 851    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 852    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 853               height << BRW_SURFACE_HEIGHT_SHIFT);
 854    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 855               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 856    surf[4] = 0;
 857    surf[5] = 0;
 858
 859    /* Emit relocation to surface contents. */
 860    brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
 861                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 862 }
 863
 864 /* Creates a new WM constant buffer reflecting the current fragment program's
 865  * constants, if needed by the fragment program.
 866  *
 867  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 868  * state atom.
 869  */
 870 static void
 871 brw_upload_wm_pull_constants(struct brw_context *brw)
 872 {
 873    struct brw_stage_state *stage_state = &brw->wm.base;
 874    /* BRW_NEW_FRAGMENT_PROGRAM */
 875    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 876    /* BRW_NEW_FS_PROG_DATA */
 877    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 878
 879    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 880    /* _NEW_PROGRAM_CONSTANTS */
 881    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 882                              stage_state, prog_data);
 883 }
 884
 885 const struct brw_tracked_state brw_wm_pull_constants = {
 886    .dirty = {
 887       .mesa = _NEW_PROGRAM_CONSTANTS,
 888       .brw = BRW_NEW_BATCH |
 889              BRW_NEW_BLORP |
 890              BRW_NEW_FRAGMENT_PROGRAM |
 891              BRW_NEW_FS_PROG_DATA,
 892    },
 893    .emit = brw_upload_wm_pull_constants,
 894 };
 895
 896 /**
 897  * Creates a null renderbuffer surface.
 898  *
 899  * This is used when the shader doesn't write to any color output.  An FB
 900  * write to target 0 will still be emitted, because that's how the thread is
 901  * terminated (and computed depth is returned), so we need to have the
 902  * hardware discard the target 0 color output..
 903  */
 904 static void
 905 brw_emit_null_surface_state(struct brw_context *brw,
 906                             unsigned width,
 907                             unsigned height,
 908                             unsigned samples,
 909                             uint32_t *out_offset)
 910 {
 911    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 912     * Notes):
 913     *
 914     *     A null surface will be used in instances where an actual surface is
 915     *     not bound. When a write message is generated to a null surface, no
 916     *     actual surface is written to. When a read message (including any
 917     *     sampling engine message) is generated to a null surface, the result
 918     *     is all zeros. Note that a null surface type is allowed to be used
 919     *     with all messages, even if it is not specificially indicated as
 920     *     supported. All of the remaining fields in surface state are ignored
 921     *     for null surfaces, with the following exceptions:
 922     *
 923     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 924     *       depth buffer’s corresponding state for all render target surfaces,
 925     *       including null.
 926     *
 927     *     - Surface Format must be R8G8B8A8_UNORM.
 928     */
 929    unsigned surface_type = BRW_SURFACE_NULL;
 930    struct brw_bo *bo = NULL;
 931    unsigned pitch_minus_1 = 0;
 932    uint32_t multisampling_state = 0;
 933    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 934
 935    if (samples > 1) {
 936       /* On Gen6, null render targets seem to cause GPU hangs when
 937        * multisampling.  So work around this problem by rendering into dummy
 938        * color buffer.
 939        *
 940        * To decrease the amount of memory needed by the workaround buffer, we
 941        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 942        * the amount of memory needed for the workaround buffer is
 943        * (width_in_tiles + height_in_tiles - 1) tiles.
 944        *
 945        * Note that since the workaround buffer will be interpreted by the
 946        * hardware as an interleaved multisampled buffer, we need to compute
 947        * width_in_tiles and height_in_tiles by dividing the width and height
 948        * by 16 rather than the normal Y-tile size of 32.
 949        */
 950       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 951       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 952       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 953       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 954                          size_needed);
 955       bo = brw->wm.multisampled_null_render_target_bo;
 956       surface_type = BRW_SURFACE_2D;
 957       pitch_minus_1 = 127;
 958       multisampling_state = brw_get_surface_num_multisamples(samples);
 959    }
 960
 961    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 962               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 963    if (brw->gen < 6) {
 964       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 965                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 966                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 967                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 968    }
 969    surf[1] = bo ? bo->offset64 : 0;
 970    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 971               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 972
 973    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 974     * Notes):
 975     *
 976     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 977     */
 978    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 979               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 980    surf[4] = multisampling_state;
 981    surf[5] = 0;
 982
 983    if (bo) {
 984       brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
 985                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 986    }
 987 }
 988
 989 /**
 990  * Sets up a surface state structure to point at the given region.
 991  * While it is only used for the front/back buffer currently, it should be
 992  * usable for further buffers when doing ARB_draw_buffer support.
 993  */
 994 static uint32_t
 995 gen4_update_renderbuffer_surface(struct brw_context *brw,
 996                                  struct gl_renderbuffer *rb,
 997                                  uint32_t flags, unsigned unit,
 998                                  uint32_t surf_index)
 999 {
1000    struct gl_context *ctx = &brw->ctx;
1001    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1002    struct intel_mipmap_tree *mt = irb->mt;
1003    uint32_t *surf;
1004    uint32_t tile_x, tile_y;
1005    enum isl_format format;
1006    uint32_t offset;
1007    /* _NEW_BUFFERS */
1008    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1009    /* BRW_NEW_FS_PROG_DATA */
1010
1011    assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1012    assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1013
1014    if (rb->TexImage && !brw->has_surface_tile_offset) {
1015       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1016
1017       if (tile_x != 0 || tile_y != 0) {
1018          /* Original gen4 hardware couldn't draw to a non-tile-aligned
1019           * destination in a miptree unless you actually setup your renderbuffer
1020           * as a miptree and used the fragile lod/array_index/etc. controls to
1021           * select the image.  So, instead, we just make a new single-level
1022           * miptree and render into that.
1023           */
1024          intel_renderbuffer_move_to_temp(brw, irb, false);
1025          assert(irb->align_wa_mt);
1026          mt = irb->align_wa_mt;
1027       }
1028    }
1029
1030    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1031
1032    format = brw->mesa_to_isl_render_format[rb_format];
1033    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1034       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1035                     __func__, _mesa_get_format_name(rb_format));
1036    }
1037
1038    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1039               format << BRW_SURFACE_FORMAT_SHIFT);
1040
1041    /* reloc */
1042    assert(mt->offset % mt->cpp == 0);
1043    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1044               mt->bo->offset64 + mt->offset);
1045
1046    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1047               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1048
1049    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
1050               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1051
1052    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
1053
1054    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1055    /* Note that the low bits of these fields are missing, so
1056     * there's the possibility of getting in trouble.
1057     */
1058    assert(tile_x % 4 == 0);
1059    assert(tile_y % 2 == 0);
1060    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1061               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1062               (mt->surf.image_alignment_el.height == 4 ?
1063                   BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1064
1065    if (brw->gen < 6) {
1066       /* _NEW_COLOR */
1067       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1068           (ctx->Color.BlendEnabled & (1 << unit)))
1069          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1070
1071       if (!ctx->Color.ColorMask[unit][0])
1072          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1073       if (!ctx->Color.ColorMask[unit][1])
1074          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1075       if (!ctx->Color.ColorMask[unit][2])
1076          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1077
1078       /* As mentioned above, disable writes to the alpha component when the
1079        * renderbuffer is XRGB.
1080        */
1081       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1082           !ctx->Color.ColorMask[unit][3]) {
1083          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1084       }
1085    }
1086
1087    brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1088                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1089
1090    return offset;
1091 }
1092
1093 /**
1094  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1095  */
1096 void
1097 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1098                                  const struct gl_framebuffer *fb,
1099                                  uint32_t render_target_start,
1100                                  uint32_t *surf_offset)
1101 {
1102    GLuint i;
1103    const unsigned int w = _mesa_geometric_width(fb);
1104    const unsigned int h = _mesa_geometric_height(fb);
1105    const unsigned int s = _mesa_geometric_samples(fb);
1106
1107    /* Update surfaces for drawing buffers */
1108    if (fb->_NumColorDrawBuffers >= 1) {
1109       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1110          const uint32_t surf_index = render_target_start + i;
1111          const int flags = (_mesa_geometric_layers(fb) > 0 ?
1112                               INTEL_RENDERBUFFER_LAYERED : 0) |
1113                            (brw->draw_aux_buffer_disabled[i] ?
1114                               INTEL_AUX_BUFFER_DISABLED : 0);
1115
1116          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1117             surf_offset[surf_index] =
1118                brw->vtbl.update_renderbuffer_surface(
1119                   brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1120          } else {
1121             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1122                &surf_offset[surf_index]);
1123          }
1124       }
1125    } else {
1126       const uint32_t surf_index = render_target_start;
1127       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1128          &surf_offset[surf_index]);
1129    }
1130 }
1131
1132 static void
1133 update_renderbuffer_surfaces(struct brw_context *brw)
1134 {
1135    const struct gl_context *ctx = &brw->ctx;
1136
1137    /* BRW_NEW_FS_PROG_DATA */
1138    const struct brw_wm_prog_data *wm_prog_data =
1139       brw_wm_prog_data(brw->wm.base.prog_data);
1140
1141    /* _NEW_BUFFERS | _NEW_COLOR */
1142    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1143    brw_update_renderbuffer_surfaces(
1144       brw, fb,
1145       wm_prog_data->binding_table.render_target_start,
1146       brw->wm.base.surf_offset);
1147    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1148 }
1149
1150 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1151    .dirty = {
1152       .mesa = _NEW_BUFFERS |
1153               _NEW_COLOR,
1154       .brw = BRW_NEW_BATCH |
1155              BRW_NEW_BLORP |
1156              BRW_NEW_FS_PROG_DATA,
1157    },
1158    .emit = update_renderbuffer_surfaces,
1159 };
1160
1161 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1162    .dirty = {
1163       .mesa = _NEW_BUFFERS,
1164       .brw = BRW_NEW_BATCH |
1165              BRW_NEW_BLORP,
1166    },
1167    .emit = update_renderbuffer_surfaces,
1168 };
1169
1170 static void
1171 update_renderbuffer_read_surfaces(struct brw_context *brw)
1172 {
1173    const struct gl_context *ctx = &brw->ctx;
1174
1175    /* BRW_NEW_FS_PROG_DATA */
1176    const struct brw_wm_prog_data *wm_prog_data =
1177       brw_wm_prog_data(brw->wm.base.prog_data);
1178
1179    /* BRW_NEW_FRAGMENT_PROGRAM */
1180    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1181        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1182       /* _NEW_BUFFERS */
1183       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1184
1185       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1186          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1187          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1188          const unsigned surf_index =
1189             wm_prog_data->binding_table.render_target_read_start + i;
1190          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1191
1192          if (irb) {
1193             const enum isl_format format = brw->mesa_to_isl_render_format[
1194                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1195             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1196                                                 format));
1197
1198             /* Override the target of the texture if the render buffer is a
1199              * single slice of a 3D texture (since the minimum array element
1200              * field of the surface state structure is ignored by the sampler
1201              * unit for 3D textures on some hardware), or if the render buffer
1202              * is a 1D array (since shaders always provide the array index
1203              * coordinate at the Z component to avoid state-dependent
1204              * recompiles when changing the texture target of the
1205              * framebuffer).
1206              */
1207             const GLenum target =
1208                (irb->mt->target == GL_TEXTURE_3D &&
1209                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1210                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1211                irb->mt->target;
1212
1213             const struct isl_view view = {
1214                .format = format,
1215                .base_level = irb->mt_level - irb->mt->first_level,
1216                .levels = 1,
1217                .base_array_layer = irb->mt_layer,
1218                .array_len = irb->layer_count,
1219                .swizzle = ISL_SWIZZLE_IDENTITY,
1220                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1221             };
1222
1223             const int flags = brw->draw_aux_buffer_disabled[i] ?
1224                                  INTEL_AUX_BUFFER_DISABLED : 0;
1225             brw_emit_surface_state(brw, irb->mt, flags, target, view,
1226                                    tex_mocs[brw->gen],
1227                                    surf_offset, surf_index,
1228                                    I915_GEM_DOMAIN_SAMPLER, 0);
1229
1230          } else {
1231             brw->vtbl.emit_null_surface_state(
1232                brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1233                _mesa_geometric_samples(fb), surf_offset);
1234          }
1235       }
1236
1237       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1238    }
1239 }
1240
1241 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1242    .dirty = {
1243       .mesa = _NEW_BUFFERS,
1244       .brw = BRW_NEW_BATCH |
1245              BRW_NEW_FRAGMENT_PROGRAM |
1246              BRW_NEW_FS_PROG_DATA,
1247    },
1248    .emit = update_renderbuffer_read_surfaces,
1249 };
1250
1251 static void
1252 update_stage_texture_surfaces(struct brw_context *brw,
1253                               const struct gl_program *prog,
1254                               struct brw_stage_state *stage_state,
1255                               bool for_gather, uint32_t plane)
1256 {
1257    if (!prog)
1258       return;
1259
1260    struct gl_context *ctx = &brw->ctx;
1261
1262    uint32_t *surf_offset = stage_state->surf_offset;
1263
1264    /* BRW_NEW_*_PROG_DATA */
1265    if (for_gather)
1266       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1267    else
1268       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1269
1270    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1271    for (unsigned s = 0; s < num_samplers; s++) {
1272       surf_offset[s] = 0;
1273
1274       if (prog->SamplersUsed & (1 << s)) {
1275          const unsigned unit = prog->SamplerUnits[s];
1276
1277          /* _NEW_TEXTURE */
1278          if (ctx->Texture.Unit[unit]._Current) {
1279             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1280          }
1281       }
1282    }
1283 }
1284
1285
1286 /**
1287  * Construct SURFACE_STATE objects for enabled textures.
1288  */
1289 static void
1290 brw_update_texture_surfaces(struct brw_context *brw)
1291 {
1292    /* BRW_NEW_VERTEX_PROGRAM */
1293    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1294
1295    /* BRW_NEW_TESS_PROGRAMS */
1296    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1297    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1298
1299    /* BRW_NEW_GEOMETRY_PROGRAM */
1300    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1301
1302    /* BRW_NEW_FRAGMENT_PROGRAM */
1303    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1304
1305    /* _NEW_TEXTURE */
1306    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1307    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1308    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1309    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1310    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1311
1312    /* emit alternate set of surface state for gather. this
1313     * allows the surface format to be overriden for only the
1314     * gather4 messages. */
1315    if (brw->gen < 8) {
1316       if (vs && vs->nir->info.uses_texture_gather)
1317          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1318       if (tcs && tcs->nir->info.uses_texture_gather)
1319          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1320       if (tes && tes->nir->info.uses_texture_gather)
1321          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1322       if (gs && gs->nir->info.uses_texture_gather)
1323          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1324       if (fs && fs->nir->info.uses_texture_gather)
1325          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1326    }
1327
1328    if (fs) {
1329       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1330       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1331    }
1332
1333    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1334 }
1335
1336 const struct brw_tracked_state brw_texture_surfaces = {
1337    .dirty = {
1338       .mesa = _NEW_TEXTURE,
1339       .brw = BRW_NEW_BATCH |
1340              BRW_NEW_BLORP |
1341              BRW_NEW_FRAGMENT_PROGRAM |
1342              BRW_NEW_FS_PROG_DATA |
1343              BRW_NEW_GEOMETRY_PROGRAM |
1344              BRW_NEW_GS_PROG_DATA |
1345              BRW_NEW_TESS_PROGRAMS |
1346              BRW_NEW_TCS_PROG_DATA |
1347              BRW_NEW_TES_PROG_DATA |
1348              BRW_NEW_TEXTURE_BUFFER |
1349              BRW_NEW_VERTEX_PROGRAM |
1350              BRW_NEW_VS_PROG_DATA,
1351    },
1352    .emit = brw_update_texture_surfaces,
1353 };
1354
1355 static void
1356 brw_update_cs_texture_surfaces(struct brw_context *brw)
1357 {
1358    /* BRW_NEW_COMPUTE_PROGRAM */
1359    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1360
1361    /* _NEW_TEXTURE */
1362    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1363
1364    /* emit alternate set of surface state for gather. this
1365     * allows the surface format to be overriden for only the
1366     * gather4 messages.
1367     */
1368    if (brw->gen < 8) {
1369       if (cs && cs->nir->info.uses_texture_gather)
1370          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1371    }
1372
1373    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1374 }
1375
1376 const struct brw_tracked_state brw_cs_texture_surfaces = {
1377    .dirty = {
1378       .mesa = _NEW_TEXTURE,
1379       .brw = BRW_NEW_BATCH |
1380              BRW_NEW_BLORP |
1381              BRW_NEW_COMPUTE_PROGRAM,
1382    },
1383    .emit = brw_update_cs_texture_surfaces,
1384 };
1385
1386
1387 void
1388 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1389                         struct brw_stage_state *stage_state,
1390                         struct brw_stage_prog_data *prog_data)
1391 {
1392    struct gl_context *ctx = &brw->ctx;
1393
1394    if (!prog)
1395       return;
1396
1397    uint32_t *ubo_surf_offsets =
1398       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1399
1400    for (int i = 0; i < prog->info.num_ubos; i++) {
1401       struct gl_uniform_buffer_binding *binding =
1402          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1403
1404       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1405          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1406       } else {
1407          struct intel_buffer_object *intel_bo =
1408             intel_buffer_object(binding->BufferObject);
1409          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1410          if (!binding->AutomaticSize)
1411             size = MIN2(size, binding->Size);
1412          struct brw_bo *bo =
1413             intel_bufferobj_buffer(brw, intel_bo,
1414                                    binding->Offset,
1415                                    size, false);
1416          brw_create_constant_surface(brw, bo, binding->Offset,
1417                                      size,
1418                                      &ubo_surf_offsets[i]);
1419       }
1420    }
1421
1422    uint32_t *ssbo_surf_offsets =
1423       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1424
1425    for (int i = 0; i < prog->info.num_ssbos; i++) {
1426       struct gl_shader_storage_buffer_binding *binding =
1427          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1428
1429       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1430          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1431       } else {
1432          struct intel_buffer_object *intel_bo =
1433             intel_buffer_object(binding->BufferObject);
1434          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1435          if (!binding->AutomaticSize)
1436             size = MIN2(size, binding->Size);
1437          struct brw_bo *bo =
1438             intel_bufferobj_buffer(brw, intel_bo,
1439                                    binding->Offset,
1440                                    size, true);
1441          brw_create_buffer_surface(brw, bo, binding->Offset,
1442                                    size,
1443                                    &ssbo_surf_offsets[i]);
1444       }
1445    }
1446
1447    stage_state->push_constants_dirty = true;
1448
1449    if (prog->info.num_ubos || prog->info.num_ssbos)
1450       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1451 }
1452
1453 static void
1454 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1455 {
1456    struct gl_context *ctx = &brw->ctx;
1457    /* _NEW_PROGRAM */
1458    struct gl_program *prog = ctx->FragmentProgram._Current;
1459
1460    /* BRW_NEW_FS_PROG_DATA */
1461    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1462 }
1463
1464 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1465    .dirty = {
1466       .mesa = _NEW_PROGRAM,
1467       .brw = BRW_NEW_BATCH |
1468              BRW_NEW_BLORP |
1469              BRW_NEW_FS_PROG_DATA |
1470              BRW_NEW_UNIFORM_BUFFER,
1471    },
1472    .emit = brw_upload_wm_ubo_surfaces,
1473 };
1474
1475 static void
1476 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1477 {
1478    struct gl_context *ctx = &brw->ctx;
1479    /* _NEW_PROGRAM */
1480    struct gl_program *prog =
1481       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1482
1483    /* BRW_NEW_CS_PROG_DATA */
1484    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1485 }
1486
1487 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1488    .dirty = {
1489       .mesa = _NEW_PROGRAM,
1490       .brw = BRW_NEW_BATCH |
1491              BRW_NEW_BLORP |
1492              BRW_NEW_CS_PROG_DATA |
1493              BRW_NEW_UNIFORM_BUFFER,
1494    },
1495    .emit = brw_upload_cs_ubo_surfaces,
1496 };
1497
1498 void
1499 brw_upload_abo_surfaces(struct brw_context *brw,
1500                         const struct gl_program *prog,
1501                         struct brw_stage_state *stage_state,
1502                         struct brw_stage_prog_data *prog_data)
1503 {
1504    struct gl_context *ctx = &brw->ctx;
1505    uint32_t *surf_offsets =
1506       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1507
1508    if (prog->info.num_abos) {
1509       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1510          struct gl_atomic_buffer_binding *binding =
1511             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1512          struct intel_buffer_object *intel_bo =
1513             intel_buffer_object(binding->BufferObject);
1514          struct brw_bo *bo =
1515             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1516                                    intel_bo->Base.Size - binding->Offset,
1517                                    true);
1518
1519          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1520                                        binding->Offset, ISL_FORMAT_RAW,
1521                                        bo->size - binding->Offset, 1, true);
1522       }
1523
1524       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1525    }
1526 }
1527
1528 static void
1529 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1530 {
1531    /* _NEW_PROGRAM */
1532    const struct gl_program *wm = brw->fragment_program;
1533
1534    if (wm) {
1535       /* BRW_NEW_FS_PROG_DATA */
1536       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1537    }
1538 }
1539
1540 const struct brw_tracked_state brw_wm_abo_surfaces = {
1541    .dirty = {
1542       .mesa = _NEW_PROGRAM,
1543       .brw = BRW_NEW_ATOMIC_BUFFER |
1544              BRW_NEW_BLORP |
1545              BRW_NEW_BATCH |
1546              BRW_NEW_FS_PROG_DATA,
1547    },
1548    .emit = brw_upload_wm_abo_surfaces,
1549 };
1550
1551 static void
1552 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1553 {
1554    /* _NEW_PROGRAM */
1555    const struct gl_program *cp = brw->compute_program;
1556
1557    if (cp) {
1558       /* BRW_NEW_CS_PROG_DATA */
1559       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1560    }
1561 }
1562
1563 const struct brw_tracked_state brw_cs_abo_surfaces = {
1564    .dirty = {
1565       .mesa = _NEW_PROGRAM,
1566       .brw = BRW_NEW_ATOMIC_BUFFER |
1567              BRW_NEW_BLORP |
1568              BRW_NEW_BATCH |
1569              BRW_NEW_CS_PROG_DATA,
1570    },
1571    .emit = brw_upload_cs_abo_surfaces,
1572 };
1573
1574 static void
1575 brw_upload_cs_image_surfaces(struct brw_context *brw)
1576 {
1577    /* _NEW_PROGRAM */
1578    const struct gl_program *cp = brw->compute_program;
1579
1580    if (cp) {
1581       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1582       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1583                                 brw->cs.base.prog_data);
1584    }
1585 }
1586
1587 const struct brw_tracked_state brw_cs_image_surfaces = {
1588    .dirty = {
1589       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1590       .brw = BRW_NEW_BATCH |
1591              BRW_NEW_BLORP |
1592              BRW_NEW_CS_PROG_DATA |
1593              BRW_NEW_IMAGE_UNITS
1594    },
1595    .emit = brw_upload_cs_image_surfaces,
1596 };
1597
1598 static uint32_t
1599 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1600 {
1601    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1602    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1603    if (access == GL_WRITE_ONLY) {
1604       return hw_format;
1605    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1606       /* Typed surface reads support a very limited subset of the shader
1607        * image formats.  Translate it into the closest format the
1608        * hardware supports.
1609        */
1610       return isl_lower_storage_image_format(devinfo, hw_format);
1611    } else {
1612       /* The hardware doesn't actually support a typed format that we can use
1613        * so we have to fall back to untyped read/write messages.
1614        */
1615       return ISL_FORMAT_RAW;
1616    }
1617 }
1618
1619 static void
1620 update_default_image_param(struct brw_context *brw,
1621                            struct gl_image_unit *u,
1622                            unsigned surface_idx,
1623                            struct brw_image_param *param)
1624 {
1625    memset(param, 0, sizeof(*param));
1626    param->surface_idx = surface_idx;
1627    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1628     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1629     * detailed explanation of these parameters.
1630     */
1631    param->swizzling[0] = 0xff;
1632    param->swizzling[1] = 0xff;
1633 }
1634
1635 static void
1636 update_buffer_image_param(struct brw_context *brw,
1637                           struct gl_image_unit *u,
1638                           unsigned surface_idx,
1639                           struct brw_image_param *param)
1640 {
1641    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1642    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1643    update_default_image_param(brw, u, surface_idx, param);
1644
1645    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1646    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1647 }
1648
1649 static unsigned
1650 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1651                      unsigned level)
1652 {
1653    if (target == GL_TEXTURE_CUBE_MAP)
1654       return 6;
1655
1656    return target == GL_TEXTURE_3D ?
1657       minify(mt->surf.logical_level0_px.depth, level) :
1658       mt->surf.logical_level0_px.array_len;
1659 }
1660
1661 static void
1662 update_image_surface(struct brw_context *brw,
1663                      struct gl_image_unit *u,
1664                      GLenum access,
1665                      unsigned surface_idx,
1666                      uint32_t *surf_offset,
1667                      struct brw_image_param *param)
1668 {
1669    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1670       struct gl_texture_object *obj = u->TexObj;
1671       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1672
1673       if (obj->Target == GL_TEXTURE_BUFFER) {
1674          struct intel_buffer_object *intel_obj =
1675             intel_buffer_object(obj->BufferObject);
1676          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1677                                       _mesa_get_format_bytes(u->_ActualFormat));
1678
1679          brw_emit_buffer_surface_state(
1680             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1681             format, intel_obj->Base.Size, texel_size,
1682             access != GL_READ_ONLY);
1683
1684          update_buffer_image_param(brw, u, surface_idx, param);
1685
1686       } else {
1687          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1688          struct intel_mipmap_tree *mt = intel_obj->mt;
1689          const unsigned num_layers = u->Layered ?
1690             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1691
1692          struct isl_view view = {
1693             .format = format,
1694             .base_level = obj->MinLevel + u->Level,
1695             .levels = 1,
1696             .base_array_layer = obj->MinLayer + u->_Layer,
1697             .array_len = num_layers,
1698             .swizzle = ISL_SWIZZLE_IDENTITY,
1699             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1700          };
1701
1702          if (format == ISL_FORMAT_RAW) {
1703             brw_emit_buffer_surface_state(
1704                brw, surf_offset, mt->bo, mt->offset,
1705                format, mt->bo->size - mt->offset, 1 /* pitch */,
1706                access != GL_READ_ONLY);
1707
1708          } else {
1709             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1710             assert(!intel_miptree_has_color_unresolved(mt,
1711                                                        view.base_level, 1,
1712                                                        view.base_array_layer,
1713                                                        view.array_len));
1714             brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1715                                    mt->target, view, tex_mocs[brw->gen],
1716                                    surf_offset, surf_index,
1717                                    I915_GEM_DOMAIN_SAMPLER,
1718                                    access == GL_READ_ONLY ? 0 :
1719                                              I915_GEM_DOMAIN_SAMPLER);
1720          }
1721
1722          isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1723          param->surface_idx = surface_idx;
1724       }
1725
1726    } else {
1727       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1728       update_default_image_param(brw, u, surface_idx, param);
1729    }
1730 }
1731
1732 void
1733 brw_upload_image_surfaces(struct brw_context *brw,
1734                           const struct gl_program *prog,
1735                           struct brw_stage_state *stage_state,
1736                           struct brw_stage_prog_data *prog_data)
1737 {
1738    assert(prog);
1739    struct gl_context *ctx = &brw->ctx;
1740
1741    if (prog->info.num_images) {
1742       for (unsigned i = 0; i < prog->info.num_images; i++) {
1743          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1744          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1745
1746          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1747                               surf_idx,
1748                               &stage_state->surf_offset[surf_idx],
1749                               &prog_data->image_param[i]);
1750       }
1751
1752       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1753       /* This may have changed the image metadata dependent on the context
1754        * image unit state and passed to the program as uniforms, make sure
1755        * that push and pull constants are reuploaded.
1756        */
1757       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1758    }
1759 }
1760
1761 static void
1762 brw_upload_wm_image_surfaces(struct brw_context *brw)
1763 {
1764    /* BRW_NEW_FRAGMENT_PROGRAM */
1765    const struct gl_program *wm = brw->fragment_program;
1766
1767    if (wm) {
1768       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1769       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1770                                 brw->wm.base.prog_data);
1771    }
1772 }
1773
1774 const struct brw_tracked_state brw_wm_image_surfaces = {
1775    .dirty = {
1776       .mesa = _NEW_TEXTURE,
1777       .brw = BRW_NEW_BATCH |
1778              BRW_NEW_BLORP |
1779              BRW_NEW_FRAGMENT_PROGRAM |
1780              BRW_NEW_FS_PROG_DATA |
1781              BRW_NEW_IMAGE_UNITS
1782    },
1783    .emit = brw_upload_wm_image_surfaces,
1784 };
1785
1786 void
1787 gen4_init_vtable_surface_functions(struct brw_context *brw)
1788 {
1789    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1790    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1791 }
1792
1793 void
1794 gen6_init_vtable_surface_functions(struct brw_context *brw)
1795 {
1796    gen4_init_vtable_surface_functions(brw);
1797    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1798 }
1799
1800 static void
1801 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1802 {
1803    struct gl_context *ctx = &brw->ctx;
1804    /* _NEW_PROGRAM */
1805    struct gl_program *prog =
1806       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1807    /* BRW_NEW_CS_PROG_DATA */
1808    const struct brw_cs_prog_data *cs_prog_data =
1809       brw_cs_prog_data(brw->cs.base.prog_data);
1810
1811    if (prog && cs_prog_data->uses_num_work_groups) {
1812       const unsigned surf_idx =
1813          cs_prog_data->binding_table.work_groups_start;
1814       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1815       struct brw_bo *bo;
1816       uint32_t bo_offset;
1817
1818       if (brw->compute.num_work_groups_bo == NULL) {
1819          bo = NULL;
1820          intel_upload_data(brw,
1821                            (void *)brw->compute.num_work_groups,
1822                            3 * sizeof(GLuint),
1823                            sizeof(GLuint),
1824                            &bo,
1825                            &bo_offset);
1826       } else {
1827          bo = brw->compute.num_work_groups_bo;
1828          bo_offset = brw->compute.num_work_groups_offset;
1829       }
1830
1831       brw_emit_buffer_surface_state(brw, surf_offset,
1832                                     bo, bo_offset,
1833                                     ISL_FORMAT_RAW,
1834                                     3 * sizeof(GLuint), 1, true);
1835       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1836    }
1837 }
1838
1839 const struct brw_tracked_state brw_cs_work_groups_surface = {
1840    .dirty = {
1841       .brw = BRW_NEW_BLORP |
1842              BRW_NEW_CS_PROG_DATA |
1843              BRW_NEW_CS_WORK_GROUPS
1844    },
1845    .emit = brw_upload_cs_work_groups_surface,
1846 };