src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 enum {
  59    INTEL_RENDERBUFFER_LAYERED = 1 << 0,
  60    INTEL_AUX_BUFFER_DISABLED = 1 << 1,
  61 };
  62
  63 uint32_t tex_mocs[] = {
  64    [7] = GEN7_MOCS_L3,
  65    [8] = BDW_MOCS_WB,
  66    [9] = SKL_MOCS_WB,
  67    [10] = CNL_MOCS_WB,
  68 };
  69
  70 uint32_t rb_mocs[] = {
  71    [7] = GEN7_MOCS_L3,
  72    [8] = BDW_MOCS_PTE,
  73    [9] = SKL_MOCS_PTE,
  74    [10] = CNL_MOCS_PTE,
  75 };
  76
  77 static void
  78 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  79              GLenum target, struct isl_view *view,
  80              uint32_t *tile_x, uint32_t *tile_y,
  81              uint32_t *offset, struct isl_surf *surf)
  82 {
  83    if (mt->surf.size > 0) {
  84       *surf = mt->surf;
  85    } else {
  86       intel_miptree_get_isl_surf(brw, mt, surf);
  87
  88       surf->dim = get_isl_surf_dim(target);
  89    }
  90
  91    const enum isl_dim_layout dim_layout =
  92       get_isl_dim_layout(&brw->screen->devinfo, mt->surf.tiling, target,
  93                          mt->array_layout);
  94
  95    if (surf->dim_layout == dim_layout)
  96       return;
  97
  98    /* The layout of the specified texture target is not compatible with the
  99     * actual layout of the miptree structure in memory -- You're entering
 100     * dangerous territory, this can only possibly work if you only intended
 101     * to access a single level and slice of the texture, and the hardware
 102     * supports the tile offset feature in order to allow non-tile-aligned
 103     * base offsets, since we'll have to point the hardware to the first
 104     * texel of the level instead of relying on the usual base level/layer
 105     * controls.
 106     */
 107    assert(brw->has_surface_tile_offset);
 108    assert(view->levels == 1 && view->array_len == 1);
 109    assert(*tile_x == 0 && *tile_y == 0);
 110
 111    offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 112                                             view->base_array_layer,
 113                                             tile_x, tile_y);
 114
 115    /* Minify the logical dimensions of the texture. */
 116    const unsigned l = view->base_level - mt->first_level;
 117    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 118    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 119       minify(surf->logical_level0_px.height, l);
 120    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 121       minify(surf->logical_level0_px.depth, l);
 122
 123    /* Only the base level and layer can be addressed with the overridden
 124     * layout.
 125     */
 126    surf->logical_level0_px.array_len = 1;
 127    surf->levels = 1;
 128    surf->dim_layout = dim_layout;
 129
 130    /* The requested slice of the texture is now at the base level and
 131     * layer.
 132     */
 133    view->base_level = 0;
 134    view->base_array_layer = 0;
 135 }
 136
 137 static void
 138 brw_emit_surface_state(struct brw_context *brw,
 139                        struct intel_mipmap_tree *mt, uint32_t flags,
 140                        GLenum target, struct isl_view view,
 141                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
 142                        unsigned read_domains, unsigned write_domains)
 143 {
 144    uint32_t tile_x = mt->level[0].level_x;
 145    uint32_t tile_y = mt->level[0].level_y;
 146    uint32_t offset = mt->offset;
 147
 148    struct isl_surf surf;
 149
 150    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 151
 152    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 153
 154    struct brw_bo *aux_bo;
 155    struct isl_surf *aux_surf = NULL;
 156    uint64_t aux_offset = 0;
 157    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
 158    if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
 159        !(flags & INTEL_AUX_BUFFER_DISABLED)) {
 160       aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
 161
 162       if (mt->mcs_buf) {
 163          aux_surf = &mt->mcs_buf->surf;
 164
 165          aux_bo = mt->mcs_buf->bo;
 166          aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
 167       } else {
 168          aux_surf = &mt->hiz_buf->surf;
 169
 170          aux_bo = mt->hiz_buf->bo;
 171          aux_offset = mt->hiz_buf->bo->offset64;
 172       }
 173
 174       /* We only really need a clear color if we also have an auxiliary
 175        * surface.  Without one, it does nothing.
 176        */
 177       clear_color = mt->fast_clear_color;
 178    }
 179
 180    void *state = brw_state_batch(brw,
 181                                  brw->isl_dev.ss.size,
 182                                  brw->isl_dev.ss.align,
 183                                  surf_offset);
 184
 185    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
 186                        .address = mt->bo->offset64 + offset,
 187                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 188                        .aux_address = aux_offset,
 189                        .mocs = mocs, .clear_color = clear_color,
 190                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 191
 192    brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
 193                   mt->bo, offset, read_domains, write_domains);
 194
 195    if (aux_surf) {
 196       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 197        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 198        * contain other control information.  Since buffer addresses are always
 199        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 200        * an ordinary reloc to do the necessary address translation.
 201        */
 202       assert((aux_offset & 0xfff) == 0);
 203       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 204       brw_emit_reloc(&brw->batch,
 205                      *surf_offset + brw->isl_dev.ss.aux_addr_offset,
 206                      aux_bo, *aux_addr - aux_bo->offset64,
 207                      read_domains, write_domains);
 208    }
 209 }
 210
 211 uint32_t
 212 brw_update_renderbuffer_surface(struct brw_context *brw,
 213                                 struct gl_renderbuffer *rb,
 214                                 uint32_t flags, unsigned unit /* unused */,
 215                                 uint32_t surf_index)
 216 {
 217    struct gl_context *ctx = &brw->ctx;
 218    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 219    struct intel_mipmap_tree *mt = irb->mt;
 220
 221    if (brw->gen < 9) {
 222       assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 223    }
 224
 225    assert(brw_render_target_supported(brw, rb));
 226
 227    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 228    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 229       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 230                     __func__, _mesa_get_format_name(rb_format));
 231    }
 232
 233    struct isl_view view = {
 234       .format = brw->mesa_to_isl_render_format[rb_format],
 235       .base_level = irb->mt_level - irb->mt->first_level,
 236       .levels = 1,
 237       .base_array_layer = irb->mt_layer,
 238       .array_len = MAX2(irb->layer_count, 1),
 239       .swizzle = ISL_SWIZZLE_IDENTITY,
 240       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 241    };
 242
 243    uint32_t offset;
 244    brw_emit_surface_state(brw, mt, flags, mt->target, view,
 245                           rb_mocs[brw->gen],
 246                           &offset, surf_index,
 247                           I915_GEM_DOMAIN_RENDER,
 248                           I915_GEM_DOMAIN_RENDER);
 249    return offset;
 250 }
 251
 252 GLuint
 253 translate_tex_target(GLenum target)
 254 {
 255    switch (target) {
 256    case GL_TEXTURE_1D:
 257    case GL_TEXTURE_1D_ARRAY_EXT:
 258       return BRW_SURFACE_1D;
 259
 260    case GL_TEXTURE_RECTANGLE_NV:
 261       return BRW_SURFACE_2D;
 262
 263    case GL_TEXTURE_2D:
 264    case GL_TEXTURE_2D_ARRAY_EXT:
 265    case GL_TEXTURE_EXTERNAL_OES:
 266    case GL_TEXTURE_2D_MULTISAMPLE:
 267    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 268       return BRW_SURFACE_2D;
 269
 270    case GL_TEXTURE_3D:
 271       return BRW_SURFACE_3D;
 272
 273    case GL_TEXTURE_CUBE_MAP:
 274    case GL_TEXTURE_CUBE_MAP_ARRAY:
 275       return BRW_SURFACE_CUBE;
 276
 277    default:
 278       unreachable("not reached");
 279    }
 280 }
 281
 282 uint32_t
 283 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 284 {
 285    switch (tiling) {
 286    case ISL_TILING_X:
 287       return BRW_SURFACE_TILED;
 288    case ISL_TILING_Y0:
 289       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 290    default:
 291       return 0;
 292    }
 293 }
 294
 295
 296 uint32_t
 297 brw_get_surface_num_multisamples(unsigned num_samples)
 298 {
 299    if (num_samples > 1)
 300       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 301    else
 302       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 303 }
 304
 305 /**
 306  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 307  * swizzling.
 308  */
 309 int
 310 brw_get_texture_swizzle(const struct gl_context *ctx,
 311                         const struct gl_texture_object *t)
 312 {
 313    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 314
 315    int swizzles[SWIZZLE_NIL + 1] = {
 316       SWIZZLE_X,
 317       SWIZZLE_Y,
 318       SWIZZLE_Z,
 319       SWIZZLE_W,
 320       SWIZZLE_ZERO,
 321       SWIZZLE_ONE,
 322       SWIZZLE_NIL
 323    };
 324
 325    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 326        img->_BaseFormat == GL_DEPTH_STENCIL) {
 327       GLenum depth_mode = t->DepthMode;
 328
 329       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 330        * with depth component data specified with a sized internal format.
 331        * Otherwise, it's left at the old default, GL_LUMINANCE.
 332        */
 333       if (_mesa_is_gles3(ctx) &&
 334           img->InternalFormat != GL_DEPTH_COMPONENT &&
 335           img->InternalFormat != GL_DEPTH_STENCIL) {
 336          depth_mode = GL_RED;
 337       }
 338
 339       switch (depth_mode) {
 340       case GL_ALPHA:
 341          swizzles[0] = SWIZZLE_ZERO;
 342          swizzles[1] = SWIZZLE_ZERO;
 343          swizzles[2] = SWIZZLE_ZERO;
 344          swizzles[3] = SWIZZLE_X;
 345          break;
 346       case GL_LUMINANCE:
 347          swizzles[0] = SWIZZLE_X;
 348          swizzles[1] = SWIZZLE_X;
 349          swizzles[2] = SWIZZLE_X;
 350          swizzles[3] = SWIZZLE_ONE;
 351          break;
 352       case GL_INTENSITY:
 353          swizzles[0] = SWIZZLE_X;
 354          swizzles[1] = SWIZZLE_X;
 355          swizzles[2] = SWIZZLE_X;
 356          swizzles[3] = SWIZZLE_X;
 357          break;
 358       case GL_RED:
 359          swizzles[0] = SWIZZLE_X;
 360          swizzles[1] = SWIZZLE_ZERO;
 361          swizzles[2] = SWIZZLE_ZERO;
 362          swizzles[3] = SWIZZLE_ONE;
 363          break;
 364       }
 365    }
 366
 367    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 368
 369    /* If the texture's format is alpha-only, force R, G, and B to
 370     * 0.0. Similarly, if the texture's format has no alpha channel,
 371     * force the alpha value read to 1.0. This allows for the
 372     * implementation to use an RGBA texture for any of these formats
 373     * without leaking any unexpected values.
 374     */
 375    switch (img->_BaseFormat) {
 376    case GL_ALPHA:
 377       swizzles[0] = SWIZZLE_ZERO;
 378       swizzles[1] = SWIZZLE_ZERO;
 379       swizzles[2] = SWIZZLE_ZERO;
 380       break;
 381    case GL_LUMINANCE:
 382       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 383          swizzles[0] = SWIZZLE_X;
 384          swizzles[1] = SWIZZLE_X;
 385          swizzles[2] = SWIZZLE_X;
 386          swizzles[3] = SWIZZLE_ONE;
 387       }
 388       break;
 389    case GL_LUMINANCE_ALPHA:
 390       if (datatype == GL_SIGNED_NORMALIZED) {
 391          swizzles[0] = SWIZZLE_X;
 392          swizzles[1] = SWIZZLE_X;
 393          swizzles[2] = SWIZZLE_X;
 394          swizzles[3] = SWIZZLE_W;
 395       }
 396       break;
 397    case GL_INTENSITY:
 398       if (datatype == GL_SIGNED_NORMALIZED) {
 399          swizzles[0] = SWIZZLE_X;
 400          swizzles[1] = SWIZZLE_X;
 401          swizzles[2] = SWIZZLE_X;
 402          swizzles[3] = SWIZZLE_X;
 403       }
 404       break;
 405    case GL_RED:
 406    case GL_RG:
 407    case GL_RGB:
 408       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 409           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 410           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 411          swizzles[3] = SWIZZLE_ONE;
 412       break;
 413    }
 414
 415    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 416                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 417                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 418                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 419 }
 420
 421 /**
 422  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 423  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 424  *
 425  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 426  *         0          1          2          3             4            5
 427  *         4          5          6          7             0            1
 428  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 429  *
 430  * which is simply adding 4 then modding by 8 (or anding with 7).
 431  *
 432  * We then may need to apply workarounds for textureGather hardware bugs.
 433  */
 434 static unsigned
 435 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 436 {
 437    unsigned scs = (swizzle + 4) & 7;
 438
 439    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 440 }
 441
 442 static unsigned
 443 brw_find_matching_rb(const struct gl_framebuffer *fb,
 444                      const struct intel_mipmap_tree *mt)
 445 {
 446    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 447       const struct intel_renderbuffer *irb =
 448          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 449
 450       if (irb && irb->mt == mt)
 451          return i;
 452    }
 453
 454    return fb->_NumColorDrawBuffers;
 455 }
 456
 457 static inline bool
 458 brw_texture_view_sane(const struct brw_context *brw,
 459                       const struct intel_mipmap_tree *mt,
 460                       const struct isl_view *view)
 461 {
 462    /* There are special cases only for lossless compression. */
 463    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 464       return true;
 465
 466    if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
 467       return true;
 468
 469    /* Logic elsewhere needs to take care to resolve the color buffer prior
 470     * to sampling it as non-compressed.
 471     */
 472    if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
 473                                           view->base_array_layer,
 474                                           view->array_len))
 475       return false;
 476
 477    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 478    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 479
 480    if (rb_index == fb->_NumColorDrawBuffers)
 481       return true;
 482
 483    /* Underlying surface is compressed but it is sampled using a format that
 484     * the sampling engine doesn't support as compressed. Compression must be
 485     * disabled for both sampling engine and data port in case the same surface
 486     * is used also as render target.
 487     */
 488    return brw->draw_aux_buffer_disabled[rb_index];
 489 }
 490
 491 static bool
 492 brw_disable_aux_surface(const struct brw_context *brw,
 493                         const struct intel_mipmap_tree *mt,
 494                         const struct isl_view *view)
 495 {
 496    /* Nothing to disable. */
 497    if (!mt->mcs_buf)
 498       return false;
 499
 500    const bool is_unresolved = intel_miptree_has_color_unresolved(
 501                                  mt, view->base_level, view->levels,
 502                                  view->base_array_layer, view->array_len);
 503
 504    /* There are special cases only for lossless compression. */
 505    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 506       return !is_unresolved;
 507
 508    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 509    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 510
 511    /* If we are drawing into this with compression enabled, then we must also
 512     * enable compression when texturing from it regardless of
 513     * fast_clear_state.  If we don't then, after the first draw call with
 514     * this setup, there will be data in the CCS which won't get picked up by
 515     * subsequent texturing operations as required by ARB_texture_barrier.
 516     * Since we don't want to re-emit the binding table or do a resolve
 517     * operation every draw call, the easiest thing to do is just enable
 518     * compression on the texturing side.  This is completely safe to do
 519     * since, if compressed texturing weren't allowed, we would have disabled
 520     * compression of render targets in whatever_that_function_is_called().
 521     */
 522    if (rb_index < fb->_NumColorDrawBuffers) {
 523       if (brw->draw_aux_buffer_disabled[rb_index]) {
 524          assert(!is_unresolved);
 525       }
 526
 527       return brw->draw_aux_buffer_disabled[rb_index];
 528    }
 529
 530    return !is_unresolved;
 531 }
 532
 533 void
 534 brw_update_texture_surface(struct gl_context *ctx,
 535                            unsigned unit,
 536                            uint32_t *surf_offset,
 537                            bool for_gather,
 538                            uint32_t plane)
 539 {
 540    struct brw_context *brw = brw_context(ctx);
 541    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 542
 543    if (obj->Target == GL_TEXTURE_BUFFER) {
 544       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 545
 546    } else {
 547       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 548       struct intel_mipmap_tree *mt = intel_obj->mt;
 549
 550       if (plane > 0) {
 551          if (mt->plane[plane - 1] == NULL)
 552             return;
 553          mt = mt->plane[plane - 1];
 554       }
 555
 556       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 557       /* If this is a view with restricted NumLayers, then our effective depth
 558        * is not just the miptree depth.
 559        */
 560       unsigned view_num_layers;
 561       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 562          view_num_layers = obj->NumLayers;
 563       } else if (mt->surf.size > 0) {
 564          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 565                               mt->surf.logical_level0_px.depth :
 566                               mt->surf.logical_level0_px.array_len;
 567       } else {
 568          view_num_layers = mt->logical_depth0;
 569       }
 570
 571       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 572        * texturing functions that return a float, as our code generation always
 573        * selects the .x channel (which would always be 0).
 574        */
 575       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 576       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 577          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 578           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 579       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 580                                 brw_get_texture_swizzle(&brw->ctx, obj));
 581
 582       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 583       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 584                                                     sampler->sRGBDecode);
 585
 586       /* Implement gen6 and gen7 gather work-around */
 587       bool need_green_to_blue = false;
 588       if (for_gather) {
 589          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 590                                format == ISL_FORMAT_R32G32_SINT ||
 591                                format == ISL_FORMAT_R32G32_UINT)) {
 592             format = ISL_FORMAT_R32G32_FLOAT_LD;
 593             need_green_to_blue = brw->is_haswell;
 594          } else if (brw->gen == 6) {
 595             /* Sandybridge's gather4 message is broken for integer formats.
 596              * To work around this, we pretend the surface is UNORM for
 597              * 8 or 16-bit formats, and emit shader instructions to recover
 598              * the real INT/UINT value.  For 32-bit formats, we pretend
 599              * the surface is FLOAT, and simply reinterpret the resulting
 600              * bits.
 601              */
 602             switch (format) {
 603             case ISL_FORMAT_R8_SINT:
 604             case ISL_FORMAT_R8_UINT:
 605                format = ISL_FORMAT_R8_UNORM;
 606                break;
 607
 608             case ISL_FORMAT_R16_SINT:
 609             case ISL_FORMAT_R16_UINT:
 610                format = ISL_FORMAT_R16_UNORM;
 611                break;
 612
 613             case ISL_FORMAT_R32_SINT:
 614             case ISL_FORMAT_R32_UINT:
 615                format = ISL_FORMAT_R32_FLOAT;
 616                break;
 617
 618             default:
 619                break;
 620             }
 621          }
 622       }
 623
 624       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 625          if (brw->gen <= 7) {
 626             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 627             mt = mt->r8stencil_mt;
 628          } else {
 629             mt = mt->stencil_mt;
 630          }
 631          format = ISL_FORMAT_R8_UINT;
 632       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 633          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 634          mt = mt->r8stencil_mt;
 635          format = ISL_FORMAT_R8_UINT;
 636       }
 637
 638       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 639
 640       struct isl_view view = {
 641          .format = format,
 642          .base_level = obj->MinLevel + obj->BaseLevel,
 643          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 644          .base_array_layer = obj->MinLayer,
 645          .array_len = view_num_layers,
 646          .swizzle = {
 647             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 648             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 649             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 650             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 651          },
 652          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 653       };
 654
 655       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 656           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 657          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 658
 659       assert(brw_texture_view_sane(brw, mt, &view));
 660
 661       const int flags = brw_disable_aux_surface(brw, mt, &view) ?
 662                            INTEL_AUX_BUFFER_DISABLED : 0;
 663       brw_emit_surface_state(brw, mt, flags, mt->target, view,
 664                              tex_mocs[brw->gen],
 665                              surf_offset, surf_index,
 666                              I915_GEM_DOMAIN_SAMPLER, 0);
 667    }
 668 }
 669
 670 void
 671 brw_emit_buffer_surface_state(struct brw_context *brw,
 672                               uint32_t *out_offset,
 673                               struct brw_bo *bo,
 674                               unsigned buffer_offset,
 675                               unsigned surface_format,
 676                               unsigned buffer_size,
 677                               unsigned pitch,
 678                               bool rw)
 679 {
 680    uint32_t *dw = brw_state_batch(brw,
 681                                   brw->isl_dev.ss.size,
 682                                   brw->isl_dev.ss.align,
 683                                   out_offset);
 684
 685    isl_buffer_fill_state(&brw->isl_dev, dw,
 686                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 687                          .size = buffer_size,
 688                          .format = surface_format,
 689                          .stride = pitch,
 690                          .mocs = tex_mocs[brw->gen]);
 691
 692    if (bo) {
 693       brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
 694                      bo, buffer_offset,
 695                      I915_GEM_DOMAIN_SAMPLER,
 696                      (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 697    }
 698 }
 699
 700 void
 701 brw_update_buffer_texture_surface(struct gl_context *ctx,
 702                                   unsigned unit,
 703                                   uint32_t *surf_offset)
 704 {
 705    struct brw_context *brw = brw_context(ctx);
 706    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 707    struct intel_buffer_object *intel_obj =
 708       intel_buffer_object(tObj->BufferObject);
 709    uint32_t size = tObj->BufferSize;
 710    struct brw_bo *bo = NULL;
 711    mesa_format format = tObj->_BufferObjectFormat;
 712    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 713    int texel_size = _mesa_get_format_bytes(format);
 714
 715    if (intel_obj) {
 716       size = MIN2(size, intel_obj->Base.Size);
 717       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 718                                   false);
 719    }
 720
 721    /* The ARB_texture_buffer_specification says:
 722     *
 723     *    "The number of texels in the buffer texture's texel array is given by
 724     *
 725     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 726     *
 727     *     where <buffer_size> is the size of the buffer object, in basic
 728     *     machine units and <components> and <base_type> are the element count
 729     *     and base data type for elements, as specified in Table X.1.  The
 730     *     number of texels in the texel array is then clamped to the
 731     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 732     *
 733     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 734     * so that when ISL divides by stride to obtain the number of texels, that
 735     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 736     */
 737    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 738
 739    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 740       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 741                     _mesa_get_format_name(format));
 742    }
 743
 744    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 745                                  tObj->BufferOffset,
 746                                  isl_format,
 747                                  size,
 748                                  texel_size,
 749                                  false /* rw */);
 750 }
 751
 752 /**
 753  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 754  * read from this buffer with Data Port Read instructions/messages.
 755  */
 756 void
 757 brw_create_constant_surface(struct brw_context *brw,
 758                             struct brw_bo *bo,
 759                             uint32_t offset,
 760                             uint32_t size,
 761                             uint32_t *out_offset)
 762 {
 763    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 764                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 765                                  size, 1, false);
 766 }
 767
 768 /**
 769  * Create the buffer surface. Shader buffer variables will be
 770  * read from / write to this buffer with Data Port Read/Write
 771  * instructions/messages.
 772  */
 773 void
 774 brw_create_buffer_surface(struct brw_context *brw,
 775                           struct brw_bo *bo,
 776                           uint32_t offset,
 777                           uint32_t size,
 778                           uint32_t *out_offset)
 779 {
 780    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 781     * messages. We need these specifically for the fragment shader since they
 782     * include a pixel mask header that we need to ensure correct behavior
 783     * with helper invocations, which cannot write to the buffer.
 784     */
 785    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 786                                  ISL_FORMAT_RAW,
 787                                  size, 1, true);
 788 }
 789
 790 /**
 791  * Set up a binding table entry for use by stream output logic (transform
 792  * feedback).
 793  *
 794  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 795  */
 796 void
 797 brw_update_sol_surface(struct brw_context *brw,
 798                        struct gl_buffer_object *buffer_obj,
 799                        uint32_t *out_offset, unsigned num_vector_components,
 800                        unsigned stride_dwords, unsigned offset_dwords)
 801 {
 802    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 803    uint32_t offset_bytes = 4 * offset_dwords;
 804    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 805                                              offset_bytes,
 806                                              buffer_obj->Size - offset_bytes,
 807                                              true);
 808    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 809    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 810    size_t size_dwords = buffer_obj->Size / 4;
 811    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 812
 813    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 814     * too big to map using a single binding table entry?
 815     */
 816    assert((size_dwords - offset_dwords) / stride_dwords
 817           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 818
 819    if (size_dwords > offset_dwords + num_vector_components) {
 820       /* There is room for at least 1 transform feedback output in the buffer.
 821        * Compute the number of additional transform feedback outputs the
 822        * buffer has room for.
 823        */
 824       buffer_size_minus_1 =
 825          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 826    } else {
 827       /* There isn't even room for a single transform feedback output in the
 828        * buffer.  We can't configure the binding table entry to prevent output
 829        * entirely; we'll have to rely on the geometry shader to detect
 830        * overflow.  But to minimize the damage in case of a bug, set up the
 831        * binding table entry to just allow a single output.
 832        */
 833       buffer_size_minus_1 = 0;
 834    }
 835    width = buffer_size_minus_1 & 0x7f;
 836    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 837    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 838
 839    switch (num_vector_components) {
 840    case 1:
 841       surface_format = ISL_FORMAT_R32_FLOAT;
 842       break;
 843    case 2:
 844       surface_format = ISL_FORMAT_R32G32_FLOAT;
 845       break;
 846    case 3:
 847       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 848       break;
 849    case 4:
 850       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 851       break;
 852    default:
 853       unreachable("Invalid vector size for transform feedback output");
 854    }
 855
 856    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 857       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 858       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 859       BRW_SURFACE_RC_READ_WRITE;
 860    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 861    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 862               height << BRW_SURFACE_HEIGHT_SHIFT);
 863    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 864               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 865    surf[4] = 0;
 866    surf[5] = 0;
 867
 868    /* Emit relocation to surface contents. */
 869    brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
 870                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 871 }
 872
 873 /* Creates a new WM constant buffer reflecting the current fragment program's
 874  * constants, if needed by the fragment program.
 875  *
 876  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 877  * state atom.
 878  */
 879 static void
 880 brw_upload_wm_pull_constants(struct brw_context *brw)
 881 {
 882    struct brw_stage_state *stage_state = &brw->wm.base;
 883    /* BRW_NEW_FRAGMENT_PROGRAM */
 884    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 885    /* BRW_NEW_FS_PROG_DATA */
 886    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 887
 888    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 889    /* _NEW_PROGRAM_CONSTANTS */
 890    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 891                              stage_state, prog_data);
 892 }
 893
 894 const struct brw_tracked_state brw_wm_pull_constants = {
 895    .dirty = {
 896       .mesa = _NEW_PROGRAM_CONSTANTS,
 897       .brw = BRW_NEW_BATCH |
 898              BRW_NEW_BLORP |
 899              BRW_NEW_FRAGMENT_PROGRAM |
 900              BRW_NEW_FS_PROG_DATA,
 901    },
 902    .emit = brw_upload_wm_pull_constants,
 903 };
 904
 905 /**
 906  * Creates a null renderbuffer surface.
 907  *
 908  * This is used when the shader doesn't write to any color output.  An FB
 909  * write to target 0 will still be emitted, because that's how the thread is
 910  * terminated (and computed depth is returned), so we need to have the
 911  * hardware discard the target 0 color output..
 912  */
 913 static void
 914 brw_emit_null_surface_state(struct brw_context *brw,
 915                             unsigned width,
 916                             unsigned height,
 917                             unsigned samples,
 918                             uint32_t *out_offset)
 919 {
 920    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 921     * Notes):
 922     *
 923     *     A null surface will be used in instances where an actual surface is
 924     *     not bound. When a write message is generated to a null surface, no
 925     *     actual surface is written to. When a read message (including any
 926     *     sampling engine message) is generated to a null surface, the result
 927     *     is all zeros. Note that a null surface type is allowed to be used
 928     *     with all messages, even if it is not specificially indicated as
 929     *     supported. All of the remaining fields in surface state are ignored
 930     *     for null surfaces, with the following exceptions:
 931     *
 932     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 933     *       depth buffer’s corresponding state for all render target surfaces,
 934     *       including null.
 935     *
 936     *     - Surface Format must be R8G8B8A8_UNORM.
 937     */
 938    unsigned surface_type = BRW_SURFACE_NULL;
 939    struct brw_bo *bo = NULL;
 940    unsigned pitch_minus_1 = 0;
 941    uint32_t multisampling_state = 0;
 942    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 943
 944    if (samples > 1) {
 945       /* On Gen6, null render targets seem to cause GPU hangs when
 946        * multisampling.  So work around this problem by rendering into dummy
 947        * color buffer.
 948        *
 949        * To decrease the amount of memory needed by the workaround buffer, we
 950        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 951        * the amount of memory needed for the workaround buffer is
 952        * (width_in_tiles + height_in_tiles - 1) tiles.
 953        *
 954        * Note that since the workaround buffer will be interpreted by the
 955        * hardware as an interleaved multisampled buffer, we need to compute
 956        * width_in_tiles and height_in_tiles by dividing the width and height
 957        * by 16 rather than the normal Y-tile size of 32.
 958        */
 959       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 960       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 961       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 962       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 963                          size_needed);
 964       bo = brw->wm.multisampled_null_render_target_bo;
 965       surface_type = BRW_SURFACE_2D;
 966       pitch_minus_1 = 127;
 967       multisampling_state = brw_get_surface_num_multisamples(samples);
 968    }
 969
 970    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 971               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 972    if (brw->gen < 6) {
 973       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 974                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 975                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 976                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 977    }
 978    surf[1] = bo ? bo->offset64 : 0;
 979    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 980               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 981
 982    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 983     * Notes):
 984     *
 985     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 986     */
 987    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 988               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 989    surf[4] = multisampling_state;
 990    surf[5] = 0;
 991
 992    if (bo) {
 993       brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
 994                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 995    }
 996 }
 997
 998 /**
 999  * Sets up a surface state structure to point at the given region.
1000  * While it is only used for the front/back buffer currently, it should be
1001  * usable for further buffers when doing ARB_draw_buffer support.
1002  */
1003 static uint32_t
1004 gen4_update_renderbuffer_surface(struct brw_context *brw,
1005                                  struct gl_renderbuffer *rb,
1006                                  uint32_t flags, unsigned unit,
1007                                  uint32_t surf_index)
1008 {
1009    struct gl_context *ctx = &brw->ctx;
1010    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1011    struct intel_mipmap_tree *mt = irb->mt;
1012    uint32_t *surf;
1013    uint32_t tile_x, tile_y;
1014    enum isl_format format;
1015    uint32_t offset;
1016    /* _NEW_BUFFERS */
1017    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1018    /* BRW_NEW_FS_PROG_DATA */
1019
1020    assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1021    assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1022
1023    if (rb->TexImage && !brw->has_surface_tile_offset) {
1024       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1025
1026       if (tile_x != 0 || tile_y != 0) {
1027          /* Original gen4 hardware couldn't draw to a non-tile-aligned
1028           * destination in a miptree unless you actually setup your renderbuffer
1029           * as a miptree and used the fragile lod/array_index/etc. controls to
1030           * select the image.  So, instead, we just make a new single-level
1031           * miptree and render into that.
1032           */
1033          intel_renderbuffer_move_to_temp(brw, irb, false);
1034          assert(irb->align_wa_mt);
1035          mt = irb->align_wa_mt;
1036       }
1037    }
1038
1039    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1040
1041    format = brw->mesa_to_isl_render_format[rb_format];
1042    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1043       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1044                     __func__, _mesa_get_format_name(rb_format));
1045    }
1046
1047    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1048               format << BRW_SURFACE_FORMAT_SHIFT);
1049
1050    /* reloc */
1051    assert(mt->offset % mt->cpp == 0);
1052    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1053               mt->bo->offset64 + mt->offset);
1054
1055    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1056               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1057
1058    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
1059               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1060
1061    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
1062
1063    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1064    /* Note that the low bits of these fields are missing, so
1065     * there's the possibility of getting in trouble.
1066     */
1067    assert(tile_x % 4 == 0);
1068    assert(tile_y % 2 == 0);
1069    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1070               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1071               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1072
1073    if (brw->gen < 6) {
1074       /* _NEW_COLOR */
1075       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1076           (ctx->Color.BlendEnabled & (1 << unit)))
1077          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1078
1079       if (!ctx->Color.ColorMask[unit][0])
1080          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1081       if (!ctx->Color.ColorMask[unit][1])
1082          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1083       if (!ctx->Color.ColorMask[unit][2])
1084          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1085
1086       /* As mentioned above, disable writes to the alpha component when the
1087        * renderbuffer is XRGB.
1088        */
1089       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1090           !ctx->Color.ColorMask[unit][3]) {
1091          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1092       }
1093    }
1094
1095    brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1096                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1097
1098    return offset;
1099 }
1100
1101 /**
1102  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1103  */
1104 void
1105 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1106                                  const struct gl_framebuffer *fb,
1107                                  uint32_t render_target_start,
1108                                  uint32_t *surf_offset)
1109 {
1110    GLuint i;
1111    const unsigned int w = _mesa_geometric_width(fb);
1112    const unsigned int h = _mesa_geometric_height(fb);
1113    const unsigned int s = _mesa_geometric_samples(fb);
1114
1115    /* Update surfaces for drawing buffers */
1116    if (fb->_NumColorDrawBuffers >= 1) {
1117       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1118          const uint32_t surf_index = render_target_start + i;
1119          const int flags = (_mesa_geometric_layers(fb) > 0 ?
1120                               INTEL_RENDERBUFFER_LAYERED : 0) |
1121                            (brw->draw_aux_buffer_disabled[i] ?
1122                               INTEL_AUX_BUFFER_DISABLED : 0);
1123
1124          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1125             surf_offset[surf_index] =
1126                brw->vtbl.update_renderbuffer_surface(
1127                   brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1128          } else {
1129             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1130                &surf_offset[surf_index]);
1131          }
1132       }
1133    } else {
1134       const uint32_t surf_index = render_target_start;
1135       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1136          &surf_offset[surf_index]);
1137    }
1138 }
1139
1140 static void
1141 update_renderbuffer_surfaces(struct brw_context *brw)
1142 {
1143    const struct gl_context *ctx = &brw->ctx;
1144
1145    /* BRW_NEW_FS_PROG_DATA */
1146    const struct brw_wm_prog_data *wm_prog_data =
1147       brw_wm_prog_data(brw->wm.base.prog_data);
1148
1149    /* _NEW_BUFFERS | _NEW_COLOR */
1150    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1151    brw_update_renderbuffer_surfaces(
1152       brw, fb,
1153       wm_prog_data->binding_table.render_target_start,
1154       brw->wm.base.surf_offset);
1155    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1156 }
1157
1158 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1159    .dirty = {
1160       .mesa = _NEW_BUFFERS |
1161               _NEW_COLOR,
1162       .brw = BRW_NEW_BATCH |
1163              BRW_NEW_BLORP |
1164              BRW_NEW_FS_PROG_DATA,
1165    },
1166    .emit = update_renderbuffer_surfaces,
1167 };
1168
1169 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1170    .dirty = {
1171       .mesa = _NEW_BUFFERS,
1172       .brw = BRW_NEW_BATCH |
1173              BRW_NEW_BLORP,
1174    },
1175    .emit = update_renderbuffer_surfaces,
1176 };
1177
1178 static void
1179 update_renderbuffer_read_surfaces(struct brw_context *brw)
1180 {
1181    const struct gl_context *ctx = &brw->ctx;
1182
1183    /* BRW_NEW_FS_PROG_DATA */
1184    const struct brw_wm_prog_data *wm_prog_data =
1185       brw_wm_prog_data(brw->wm.base.prog_data);
1186
1187    /* BRW_NEW_FRAGMENT_PROGRAM */
1188    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1189        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1190       /* _NEW_BUFFERS */
1191       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1192
1193       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1194          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1195          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1196          const unsigned surf_index =
1197             wm_prog_data->binding_table.render_target_read_start + i;
1198          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1199
1200          if (irb) {
1201             const enum isl_format format = brw->mesa_to_isl_render_format[
1202                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1203             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1204                                                 format));
1205
1206             /* Override the target of the texture if the render buffer is a
1207              * single slice of a 3D texture (since the minimum array element
1208              * field of the surface state structure is ignored by the sampler
1209              * unit for 3D textures on some hardware), or if the render buffer
1210              * is a 1D array (since shaders always provide the array index
1211              * coordinate at the Z component to avoid state-dependent
1212              * recompiles when changing the texture target of the
1213              * framebuffer).
1214              */
1215             const GLenum target =
1216                (irb->mt->target == GL_TEXTURE_3D &&
1217                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1218                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1219                irb->mt->target;
1220
1221             const struct isl_view view = {
1222                .format = format,
1223                .base_level = irb->mt_level - irb->mt->first_level,
1224                .levels = 1,
1225                .base_array_layer = irb->mt_layer,
1226                .array_len = irb->layer_count,
1227                .swizzle = ISL_SWIZZLE_IDENTITY,
1228                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1229             };
1230
1231             const int flags = brw->draw_aux_buffer_disabled[i] ?
1232                                  INTEL_AUX_BUFFER_DISABLED : 0;
1233             brw_emit_surface_state(brw, irb->mt, flags, target, view,
1234                                    tex_mocs[brw->gen],
1235                                    surf_offset, surf_index,
1236                                    I915_GEM_DOMAIN_SAMPLER, 0);
1237
1238          } else {
1239             brw->vtbl.emit_null_surface_state(
1240                brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1241                _mesa_geometric_samples(fb), surf_offset);
1242          }
1243       }
1244
1245       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1246    }
1247 }
1248
1249 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1250    .dirty = {
1251       .mesa = _NEW_BUFFERS,
1252       .brw = BRW_NEW_BATCH |
1253              BRW_NEW_FRAGMENT_PROGRAM |
1254              BRW_NEW_FS_PROG_DATA,
1255    },
1256    .emit = update_renderbuffer_read_surfaces,
1257 };
1258
1259 static void
1260 update_stage_texture_surfaces(struct brw_context *brw,
1261                               const struct gl_program *prog,
1262                               struct brw_stage_state *stage_state,
1263                               bool for_gather, uint32_t plane)
1264 {
1265    if (!prog)
1266       return;
1267
1268    struct gl_context *ctx = &brw->ctx;
1269
1270    uint32_t *surf_offset = stage_state->surf_offset;
1271
1272    /* BRW_NEW_*_PROG_DATA */
1273    if (for_gather)
1274       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1275    else
1276       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1277
1278    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1279    for (unsigned s = 0; s < num_samplers; s++) {
1280       surf_offset[s] = 0;
1281
1282       if (prog->SamplersUsed & (1 << s)) {
1283          const unsigned unit = prog->SamplerUnits[s];
1284
1285          /* _NEW_TEXTURE */
1286          if (ctx->Texture.Unit[unit]._Current) {
1287             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1288          }
1289       }
1290    }
1291 }
1292
1293
1294 /**
1295  * Construct SURFACE_STATE objects for enabled textures.
1296  */
1297 static void
1298 brw_update_texture_surfaces(struct brw_context *brw)
1299 {
1300    /* BRW_NEW_VERTEX_PROGRAM */
1301    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1302
1303    /* BRW_NEW_TESS_PROGRAMS */
1304    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1305    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1306
1307    /* BRW_NEW_GEOMETRY_PROGRAM */
1308    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1309
1310    /* BRW_NEW_FRAGMENT_PROGRAM */
1311    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1312
1313    /* _NEW_TEXTURE */
1314    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1315    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1316    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1317    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1318    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1319
1320    /* emit alternate set of surface state for gather. this
1321     * allows the surface format to be overriden for only the
1322     * gather4 messages. */
1323    if (brw->gen < 8) {
1324       if (vs && vs->nir->info.uses_texture_gather)
1325          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1326       if (tcs && tcs->nir->info.uses_texture_gather)
1327          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1328       if (tes && tes->nir->info.uses_texture_gather)
1329          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1330       if (gs && gs->nir->info.uses_texture_gather)
1331          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1332       if (fs && fs->nir->info.uses_texture_gather)
1333          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1334    }
1335
1336    if (fs) {
1337       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1338       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1339    }
1340
1341    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1342 }
1343
1344 const struct brw_tracked_state brw_texture_surfaces = {
1345    .dirty = {
1346       .mesa = _NEW_TEXTURE,
1347       .brw = BRW_NEW_BATCH |
1348              BRW_NEW_BLORP |
1349              BRW_NEW_FRAGMENT_PROGRAM |
1350              BRW_NEW_FS_PROG_DATA |
1351              BRW_NEW_GEOMETRY_PROGRAM |
1352              BRW_NEW_GS_PROG_DATA |
1353              BRW_NEW_TESS_PROGRAMS |
1354              BRW_NEW_TCS_PROG_DATA |
1355              BRW_NEW_TES_PROG_DATA |
1356              BRW_NEW_TEXTURE_BUFFER |
1357              BRW_NEW_VERTEX_PROGRAM |
1358              BRW_NEW_VS_PROG_DATA,
1359    },
1360    .emit = brw_update_texture_surfaces,
1361 };
1362
1363 static void
1364 brw_update_cs_texture_surfaces(struct brw_context *brw)
1365 {
1366    /* BRW_NEW_COMPUTE_PROGRAM */
1367    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1368
1369    /* _NEW_TEXTURE */
1370    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1371
1372    /* emit alternate set of surface state for gather. this
1373     * allows the surface format to be overriden for only the
1374     * gather4 messages.
1375     */
1376    if (brw->gen < 8) {
1377       if (cs && cs->nir->info.uses_texture_gather)
1378          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1379    }
1380
1381    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1382 }
1383
1384 const struct brw_tracked_state brw_cs_texture_surfaces = {
1385    .dirty = {
1386       .mesa = _NEW_TEXTURE,
1387       .brw = BRW_NEW_BATCH |
1388              BRW_NEW_BLORP |
1389              BRW_NEW_COMPUTE_PROGRAM,
1390    },
1391    .emit = brw_update_cs_texture_surfaces,
1392 };
1393
1394
1395 void
1396 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1397                         struct brw_stage_state *stage_state,
1398                         struct brw_stage_prog_data *prog_data)
1399 {
1400    struct gl_context *ctx = &brw->ctx;
1401
1402    if (!prog)
1403       return;
1404
1405    uint32_t *ubo_surf_offsets =
1406       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1407
1408    for (int i = 0; i < prog->info.num_ubos; i++) {
1409       struct gl_uniform_buffer_binding *binding =
1410          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1411
1412       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1413          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1414       } else {
1415          struct intel_buffer_object *intel_bo =
1416             intel_buffer_object(binding->BufferObject);
1417          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1418          if (!binding->AutomaticSize)
1419             size = MIN2(size, binding->Size);
1420          struct brw_bo *bo =
1421             intel_bufferobj_buffer(brw, intel_bo,
1422                                    binding->Offset,
1423                                    size, false);
1424          brw_create_constant_surface(brw, bo, binding->Offset,
1425                                      size,
1426                                      &ubo_surf_offsets[i]);
1427       }
1428    }
1429
1430    uint32_t *ssbo_surf_offsets =
1431       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1432
1433    for (int i = 0; i < prog->info.num_ssbos; i++) {
1434       struct gl_shader_storage_buffer_binding *binding =
1435          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1436
1437       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1438          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1439       } else {
1440          struct intel_buffer_object *intel_bo =
1441             intel_buffer_object(binding->BufferObject);
1442          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1443          if (!binding->AutomaticSize)
1444             size = MIN2(size, binding->Size);
1445          struct brw_bo *bo =
1446             intel_bufferobj_buffer(brw, intel_bo,
1447                                    binding->Offset,
1448                                    size, true);
1449          brw_create_buffer_surface(brw, bo, binding->Offset,
1450                                    size,
1451                                    &ssbo_surf_offsets[i]);
1452       }
1453    }
1454
1455    stage_state->push_constants_dirty = true;
1456
1457    if (prog->info.num_ubos || prog->info.num_ssbos)
1458       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1459 }
1460
1461 static void
1462 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1463 {
1464    struct gl_context *ctx = &brw->ctx;
1465    /* _NEW_PROGRAM */
1466    struct gl_program *prog = ctx->FragmentProgram._Current;
1467
1468    /* BRW_NEW_FS_PROG_DATA */
1469    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1470 }
1471
1472 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1473    .dirty = {
1474       .mesa = _NEW_PROGRAM,
1475       .brw = BRW_NEW_BATCH |
1476              BRW_NEW_BLORP |
1477              BRW_NEW_FS_PROG_DATA |
1478              BRW_NEW_UNIFORM_BUFFER,
1479    },
1480    .emit = brw_upload_wm_ubo_surfaces,
1481 };
1482
1483 static void
1484 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1485 {
1486    struct gl_context *ctx = &brw->ctx;
1487    /* _NEW_PROGRAM */
1488    struct gl_program *prog =
1489       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1490
1491    /* BRW_NEW_CS_PROG_DATA */
1492    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1493 }
1494
1495 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1496    .dirty = {
1497       .mesa = _NEW_PROGRAM,
1498       .brw = BRW_NEW_BATCH |
1499              BRW_NEW_BLORP |
1500              BRW_NEW_CS_PROG_DATA |
1501              BRW_NEW_UNIFORM_BUFFER,
1502    },
1503    .emit = brw_upload_cs_ubo_surfaces,
1504 };
1505
1506 void
1507 brw_upload_abo_surfaces(struct brw_context *brw,
1508                         const struct gl_program *prog,
1509                         struct brw_stage_state *stage_state,
1510                         struct brw_stage_prog_data *prog_data)
1511 {
1512    struct gl_context *ctx = &brw->ctx;
1513    uint32_t *surf_offsets =
1514       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1515
1516    if (prog->info.num_abos) {
1517       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1518          struct gl_atomic_buffer_binding *binding =
1519             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1520          struct intel_buffer_object *intel_bo =
1521             intel_buffer_object(binding->BufferObject);
1522          struct brw_bo *bo =
1523             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1524                                    intel_bo->Base.Size - binding->Offset,
1525                                    true);
1526
1527          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1528                                        binding->Offset, ISL_FORMAT_RAW,
1529                                        bo->size - binding->Offset, 1, true);
1530       }
1531
1532       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1533    }
1534 }
1535
1536 static void
1537 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1538 {
1539    /* _NEW_PROGRAM */
1540    const struct gl_program *wm = brw->fragment_program;
1541
1542    if (wm) {
1543       /* BRW_NEW_FS_PROG_DATA */
1544       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1545    }
1546 }
1547
1548 const struct brw_tracked_state brw_wm_abo_surfaces = {
1549    .dirty = {
1550       .mesa = _NEW_PROGRAM,
1551       .brw = BRW_NEW_ATOMIC_BUFFER |
1552              BRW_NEW_BLORP |
1553              BRW_NEW_BATCH |
1554              BRW_NEW_FS_PROG_DATA,
1555    },
1556    .emit = brw_upload_wm_abo_surfaces,
1557 };
1558
1559 static void
1560 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1561 {
1562    /* _NEW_PROGRAM */
1563    const struct gl_program *cp = brw->compute_program;
1564
1565    if (cp) {
1566       /* BRW_NEW_CS_PROG_DATA */
1567       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1568    }
1569 }
1570
1571 const struct brw_tracked_state brw_cs_abo_surfaces = {
1572    .dirty = {
1573       .mesa = _NEW_PROGRAM,
1574       .brw = BRW_NEW_ATOMIC_BUFFER |
1575              BRW_NEW_BLORP |
1576              BRW_NEW_BATCH |
1577              BRW_NEW_CS_PROG_DATA,
1578    },
1579    .emit = brw_upload_cs_abo_surfaces,
1580 };
1581
1582 static void
1583 brw_upload_cs_image_surfaces(struct brw_context *brw)
1584 {
1585    /* _NEW_PROGRAM */
1586    const struct gl_program *cp = brw->compute_program;
1587
1588    if (cp) {
1589       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1590       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1591                                 brw->cs.base.prog_data);
1592    }
1593 }
1594
1595 const struct brw_tracked_state brw_cs_image_surfaces = {
1596    .dirty = {
1597       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1598       .brw = BRW_NEW_BATCH |
1599              BRW_NEW_BLORP |
1600              BRW_NEW_CS_PROG_DATA |
1601              BRW_NEW_IMAGE_UNITS
1602    },
1603    .emit = brw_upload_cs_image_surfaces,
1604 };
1605
1606 static uint32_t
1607 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1608 {
1609    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1610    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1611    if (access == GL_WRITE_ONLY) {
1612       return hw_format;
1613    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1614       /* Typed surface reads support a very limited subset of the shader
1615        * image formats.  Translate it into the closest format the
1616        * hardware supports.
1617        */
1618       return isl_lower_storage_image_format(devinfo, hw_format);
1619    } else {
1620       /* The hardware doesn't actually support a typed format that we can use
1621        * so we have to fall back to untyped read/write messages.
1622        */
1623       return ISL_FORMAT_RAW;
1624    }
1625 }
1626
1627 static void
1628 update_default_image_param(struct brw_context *brw,
1629                            struct gl_image_unit *u,
1630                            unsigned surface_idx,
1631                            struct brw_image_param *param)
1632 {
1633    memset(param, 0, sizeof(*param));
1634    param->surface_idx = surface_idx;
1635    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1636     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1637     * detailed explanation of these parameters.
1638     */
1639    param->swizzling[0] = 0xff;
1640    param->swizzling[1] = 0xff;
1641 }
1642
1643 static void
1644 update_buffer_image_param(struct brw_context *brw,
1645                           struct gl_image_unit *u,
1646                           unsigned surface_idx,
1647                           struct brw_image_param *param)
1648 {
1649    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1650    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1651    update_default_image_param(brw, u, surface_idx, param);
1652
1653    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1654    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1655 }
1656
1657 static unsigned
1658 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1659                      unsigned level)
1660 {
1661    if (target == GL_TEXTURE_CUBE_MAP)
1662       return 6;
1663
1664    if (mt->surf.size > 0) {
1665       return target == GL_TEXTURE_3D ?
1666          minify(mt->surf.logical_level0_px.depth, level) :
1667          mt->surf.logical_level0_px.array_len;
1668    }
1669
1670    return target == GL_TEXTURE_3D ?
1671       minify(mt->logical_depth0, level) : mt->logical_depth0;
1672 }
1673
1674 static void
1675 update_image_surface(struct brw_context *brw,
1676                      struct gl_image_unit *u,
1677                      GLenum access,
1678                      unsigned surface_idx,
1679                      uint32_t *surf_offset,
1680                      struct brw_image_param *param)
1681 {
1682    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1683       struct gl_texture_object *obj = u->TexObj;
1684       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1685
1686       if (obj->Target == GL_TEXTURE_BUFFER) {
1687          struct intel_buffer_object *intel_obj =
1688             intel_buffer_object(obj->BufferObject);
1689          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1690                                       _mesa_get_format_bytes(u->_ActualFormat));
1691
1692          brw_emit_buffer_surface_state(
1693             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1694             format, intel_obj->Base.Size, texel_size,
1695             access != GL_READ_ONLY);
1696
1697          update_buffer_image_param(brw, u, surface_idx, param);
1698
1699       } else {
1700          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1701          struct intel_mipmap_tree *mt = intel_obj->mt;
1702          const unsigned num_layers = u->Layered ?
1703             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1704
1705          struct isl_view view = {
1706             .format = format,
1707             .base_level = obj->MinLevel + u->Level,
1708             .levels = 1,
1709             .base_array_layer = obj->MinLayer + u->_Layer,
1710             .array_len = num_layers,
1711             .swizzle = ISL_SWIZZLE_IDENTITY,
1712             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1713          };
1714
1715          if (format == ISL_FORMAT_RAW) {
1716             brw_emit_buffer_surface_state(
1717                brw, surf_offset, mt->bo, mt->offset,
1718                format, mt->bo->size - mt->offset, 1 /* pitch */,
1719                access != GL_READ_ONLY);
1720
1721          } else {
1722             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1723             assert(!intel_miptree_has_color_unresolved(mt,
1724                                                        view.base_level, 1,
1725                                                        view.base_array_layer,
1726                                                        view.array_len));
1727             brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1728                                    mt->target, view, tex_mocs[brw->gen],
1729                                    surf_offset, surf_index,
1730                                    I915_GEM_DOMAIN_SAMPLER,
1731                                    access == GL_READ_ONLY ? 0 :
1732                                              I915_GEM_DOMAIN_SAMPLER);
1733          }
1734
1735          struct isl_surf surf;
1736          intel_miptree_get_isl_surf(brw, mt, &surf);
1737
1738          isl_surf_fill_image_param(&brw->isl_dev, param, &surf, &view);
1739          param->surface_idx = surface_idx;
1740       }
1741
1742    } else {
1743       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1744       update_default_image_param(brw, u, surface_idx, param);
1745    }
1746 }
1747
1748 void
1749 brw_upload_image_surfaces(struct brw_context *brw,
1750                           const struct gl_program *prog,
1751                           struct brw_stage_state *stage_state,
1752                           struct brw_stage_prog_data *prog_data)
1753 {
1754    assert(prog);
1755    struct gl_context *ctx = &brw->ctx;
1756
1757    if (prog->info.num_images) {
1758       for (unsigned i = 0; i < prog->info.num_images; i++) {
1759          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1760          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1761
1762          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1763                               surf_idx,
1764                               &stage_state->surf_offset[surf_idx],
1765                               &prog_data->image_param[i]);
1766       }
1767
1768       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1769       /* This may have changed the image metadata dependent on the context
1770        * image unit state and passed to the program as uniforms, make sure
1771        * that push and pull constants are reuploaded.
1772        */
1773       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1774    }
1775 }
1776
1777 static void
1778 brw_upload_wm_image_surfaces(struct brw_context *brw)
1779 {
1780    /* BRW_NEW_FRAGMENT_PROGRAM */
1781    const struct gl_program *wm = brw->fragment_program;
1782
1783    if (wm) {
1784       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1785       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1786                                 brw->wm.base.prog_data);
1787    }
1788 }
1789
1790 const struct brw_tracked_state brw_wm_image_surfaces = {
1791    .dirty = {
1792       .mesa = _NEW_TEXTURE,
1793       .brw = BRW_NEW_BATCH |
1794              BRW_NEW_BLORP |
1795              BRW_NEW_FRAGMENT_PROGRAM |
1796              BRW_NEW_FS_PROG_DATA |
1797              BRW_NEW_IMAGE_UNITS
1798    },
1799    .emit = brw_upload_wm_image_surfaces,
1800 };
1801
1802 void
1803 gen4_init_vtable_surface_functions(struct brw_context *brw)
1804 {
1805    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1806    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1807 }
1808
1809 void
1810 gen6_init_vtable_surface_functions(struct brw_context *brw)
1811 {
1812    gen4_init_vtable_surface_functions(brw);
1813    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1814 }
1815
1816 static void
1817 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1818 {
1819    struct gl_context *ctx = &brw->ctx;
1820    /* _NEW_PROGRAM */
1821    struct gl_program *prog =
1822       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1823    /* BRW_NEW_CS_PROG_DATA */
1824    const struct brw_cs_prog_data *cs_prog_data =
1825       brw_cs_prog_data(brw->cs.base.prog_data);
1826
1827    if (prog && cs_prog_data->uses_num_work_groups) {
1828       const unsigned surf_idx =
1829          cs_prog_data->binding_table.work_groups_start;
1830       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1831       struct brw_bo *bo;
1832       uint32_t bo_offset;
1833
1834       if (brw->compute.num_work_groups_bo == NULL) {
1835          bo = NULL;
1836          intel_upload_data(brw,
1837                            (void *)brw->compute.num_work_groups,
1838                            3 * sizeof(GLuint),
1839                            sizeof(GLuint),
1840                            &bo,
1841                            &bo_offset);
1842       } else {
1843          bo = brw->compute.num_work_groups_bo;
1844          bo_offset = brw->compute.num_work_groups_offset;
1845       }
1846
1847       brw_emit_buffer_surface_state(brw, surf_offset,
1848                                     bo, bo_offset,
1849                                     ISL_FORMAT_RAW,
1850                                     3 * sizeof(GLuint), 1, true);
1851       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1852    }
1853 }
1854
1855 const struct brw_tracked_state brw_cs_work_groups_surface = {
1856    .dirty = {
1857       .brw = BRW_NEW_BLORP |
1858              BRW_NEW_CS_PROG_DATA |
1859              BRW_NEW_CS_WORK_GROUPS
1860    },
1861    .emit = brw_upload_cs_work_groups_surface,
1862 };