src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 enum {
  59    INTEL_RENDERBUFFER_LAYERED = 1 << 0,
  60    INTEL_AUX_BUFFER_DISABLED = 1 << 1,
  61 };
  62
  63 uint32_t tex_mocs[] = {
  64    [7] = GEN7_MOCS_L3,
  65    [8] = BDW_MOCS_WB,
  66    [9] = SKL_MOCS_WB,
  67    [10] = CNL_MOCS_WB,
  68 };
  69
  70 uint32_t rb_mocs[] = {
  71    [7] = GEN7_MOCS_L3,
  72    [8] = BDW_MOCS_PTE,
  73    [9] = SKL_MOCS_PTE,
  74    [10] = CNL_MOCS_PTE,
  75 };
  76
  77 static void
  78 brw_emit_surface_state(struct brw_context *brw,
  79                        struct intel_mipmap_tree *mt, uint32_t flags,
  80                        GLenum target, struct isl_view view,
  81                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
  82                        unsigned read_domains, unsigned write_domains)
  83 {
  84    uint32_t tile_x = mt->level[0].slice[0].x_offset;
  85    uint32_t tile_y = mt->level[0].slice[0].y_offset;
  86    uint32_t offset = mt->offset;
  87
  88    struct isl_surf surf;
  89    intel_miptree_get_isl_surf(brw, mt, &surf);
  90
  91    surf.dim = get_isl_surf_dim(target);
  92
  93    const enum isl_dim_layout dim_layout =
  94       get_isl_dim_layout(&brw->screen->devinfo, mt->tiling, target,
  95                          mt->array_layout);
  96
  97    if (surf.dim_layout != dim_layout) {
  98       /* The layout of the specified texture target is not compatible with the
  99        * actual layout of the miptree structure in memory -- You're entering
 100        * dangerous territory, this can only possibly work if you only intended
 101        * to access a single level and slice of the texture, and the hardware
 102        * supports the tile offset feature in order to allow non-tile-aligned
 103        * base offsets, since we'll have to point the hardware to the first
 104        * texel of the level instead of relying on the usual base level/layer
 105        * controls.
 106        */
 107       assert(brw->has_surface_tile_offset);
 108       assert(view.levels == 1 && view.array_len == 1);
 109       assert(tile_x == 0 && tile_y == 0);
 110
 111       offset += intel_miptree_get_tile_offsets(mt, view.base_level,
 112                                                view.base_array_layer,
 113                                                &tile_x, &tile_y);
 114
 115       /* Minify the logical dimensions of the texture. */
 116       const unsigned l = view.base_level - mt->first_level;
 117       surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
 118       surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
 119          minify(surf.logical_level0_px.height, l);
 120       surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
 121          minify(surf.logical_level0_px.depth, l);
 122
 123       /* Only the base level and layer can be addressed with the overridden
 124        * layout.
 125        */
 126       surf.logical_level0_px.array_len = 1;
 127       surf.levels = 1;
 128       surf.dim_layout = dim_layout;
 129
 130       /* The requested slice of the texture is now at the base level and
 131        * layer.
 132        */
 133       view.base_level = 0;
 134       view.base_array_layer = 0;
 135    }
 136
 137    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 138
 139    struct brw_bo *aux_bo;
 140    struct isl_surf *aux_surf = NULL;
 141    uint64_t aux_offset = 0;
 142    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
 143    if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
 144        !(flags & INTEL_AUX_BUFFER_DISABLED)) {
 145       aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
 146
 147       if (mt->mcs_buf) {
 148          aux_surf = &mt->mcs_buf->surf;
 149
 150          assert(mt->mcs_buf->offset == 0);
 151          aux_bo = mt->mcs_buf->bo;
 152          aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
 153       } else {
 154          aux_surf = &mt->hiz_buf->surf;
 155
 156          aux_bo = mt->hiz_buf->bo;
 157          aux_offset = mt->hiz_buf->bo->offset64;
 158       }
 159
 160       /* We only really need a clear color if we also have an auxiliary
 161        * surface.  Without one, it does nothing.
 162        */
 163       clear_color = mt->fast_clear_color;
 164    }
 165
 166    void *state = brw_state_batch(brw,
 167                                  brw->isl_dev.ss.size,
 168                                  brw->isl_dev.ss.align,
 169                                  surf_offset);
 170
 171    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
 172                        .address = mt->bo->offset64 + offset,
 173                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 174                        .aux_address = aux_offset,
 175                        .mocs = mocs, .clear_color = clear_color,
 176                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 177
 178    brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
 179                   mt->bo, offset, read_domains, write_domains);
 180
 181    if (aux_surf) {
 182       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 183        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 184        * contain other control information.  Since buffer addresses are always
 185        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 186        * an ordinary reloc to do the necessary address translation.
 187        */
 188       assert((aux_offset & 0xfff) == 0);
 189       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 190       brw_emit_reloc(&brw->batch,
 191                      *surf_offset + brw->isl_dev.ss.aux_addr_offset,
 192                      aux_bo, *aux_addr - aux_bo->offset64,
 193                      read_domains, write_domains);
 194    }
 195 }
 196
 197 uint32_t
 198 brw_update_renderbuffer_surface(struct brw_context *brw,
 199                                 struct gl_renderbuffer *rb,
 200                                 uint32_t flags, unsigned unit /* unused */,
 201                                 uint32_t surf_index)
 202 {
 203    struct gl_context *ctx = &brw->ctx;
 204    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 205    struct intel_mipmap_tree *mt = irb->mt;
 206
 207    if (brw->gen < 9) {
 208       assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 209    }
 210
 211    assert(brw_render_target_supported(brw, rb));
 212
 213    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 214    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 215       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 216                     __func__, _mesa_get_format_name(rb_format));
 217    }
 218
 219    const unsigned layer_multiplier =
 220       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 221        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 222       MAX2(irb->mt->num_samples, 1) : 1;
 223
 224    struct isl_view view = {
 225       .format = brw->mesa_to_isl_render_format[rb_format],
 226       .base_level = irb->mt_level - irb->mt->first_level,
 227       .levels = 1,
 228       .base_array_layer = irb->mt_layer / layer_multiplier,
 229       .array_len = MAX2(irb->layer_count, 1),
 230       .swizzle = ISL_SWIZZLE_IDENTITY,
 231       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 232    };
 233
 234    uint32_t offset;
 235    brw_emit_surface_state(brw, mt, flags, mt->target, view,
 236                           rb_mocs[brw->gen],
 237                           &offset, surf_index,
 238                           I915_GEM_DOMAIN_RENDER,
 239                           I915_GEM_DOMAIN_RENDER);
 240    return offset;
 241 }
 242
 243 GLuint
 244 translate_tex_target(GLenum target)
 245 {
 246    switch (target) {
 247    case GL_TEXTURE_1D:
 248    case GL_TEXTURE_1D_ARRAY_EXT:
 249       return BRW_SURFACE_1D;
 250
 251    case GL_TEXTURE_RECTANGLE_NV:
 252       return BRW_SURFACE_2D;
 253
 254    case GL_TEXTURE_2D:
 255    case GL_TEXTURE_2D_ARRAY_EXT:
 256    case GL_TEXTURE_EXTERNAL_OES:
 257    case GL_TEXTURE_2D_MULTISAMPLE:
 258    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 259       return BRW_SURFACE_2D;
 260
 261    case GL_TEXTURE_3D:
 262       return BRW_SURFACE_3D;
 263
 264    case GL_TEXTURE_CUBE_MAP:
 265    case GL_TEXTURE_CUBE_MAP_ARRAY:
 266       return BRW_SURFACE_CUBE;
 267
 268    default:
 269       unreachable("not reached");
 270    }
 271 }
 272
 273 uint32_t
 274 brw_get_surface_tiling_bits(uint32_t tiling)
 275 {
 276    switch (tiling) {
 277    case I915_TILING_X:
 278       return BRW_SURFACE_TILED;
 279    case I915_TILING_Y:
 280       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 281    default:
 282       return 0;
 283    }
 284 }
 285
 286
 287 uint32_t
 288 brw_get_surface_num_multisamples(unsigned num_samples)
 289 {
 290    if (num_samples > 1)
 291       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 292    else
 293       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 294 }
 295
 296 /**
 297  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 298  * swizzling.
 299  */
 300 int
 301 brw_get_texture_swizzle(const struct gl_context *ctx,
 302                         const struct gl_texture_object *t)
 303 {
 304    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 305
 306    int swizzles[SWIZZLE_NIL + 1] = {
 307       SWIZZLE_X,
 308       SWIZZLE_Y,
 309       SWIZZLE_Z,
 310       SWIZZLE_W,
 311       SWIZZLE_ZERO,
 312       SWIZZLE_ONE,
 313       SWIZZLE_NIL
 314    };
 315
 316    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 317        img->_BaseFormat == GL_DEPTH_STENCIL) {
 318       GLenum depth_mode = t->DepthMode;
 319
 320       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 321        * with depth component data specified with a sized internal format.
 322        * Otherwise, it's left at the old default, GL_LUMINANCE.
 323        */
 324       if (_mesa_is_gles3(ctx) &&
 325           img->InternalFormat != GL_DEPTH_COMPONENT &&
 326           img->InternalFormat != GL_DEPTH_STENCIL) {
 327          depth_mode = GL_RED;
 328       }
 329
 330       switch (depth_mode) {
 331       case GL_ALPHA:
 332          swizzles[0] = SWIZZLE_ZERO;
 333          swizzles[1] = SWIZZLE_ZERO;
 334          swizzles[2] = SWIZZLE_ZERO;
 335          swizzles[3] = SWIZZLE_X;
 336          break;
 337       case GL_LUMINANCE:
 338          swizzles[0] = SWIZZLE_X;
 339          swizzles[1] = SWIZZLE_X;
 340          swizzles[2] = SWIZZLE_X;
 341          swizzles[3] = SWIZZLE_ONE;
 342          break;
 343       case GL_INTENSITY:
 344          swizzles[0] = SWIZZLE_X;
 345          swizzles[1] = SWIZZLE_X;
 346          swizzles[2] = SWIZZLE_X;
 347          swizzles[3] = SWIZZLE_X;
 348          break;
 349       case GL_RED:
 350          swizzles[0] = SWIZZLE_X;
 351          swizzles[1] = SWIZZLE_ZERO;
 352          swizzles[2] = SWIZZLE_ZERO;
 353          swizzles[3] = SWIZZLE_ONE;
 354          break;
 355       }
 356    }
 357
 358    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 359
 360    /* If the texture's format is alpha-only, force R, G, and B to
 361     * 0.0. Similarly, if the texture's format has no alpha channel,
 362     * force the alpha value read to 1.0. This allows for the
 363     * implementation to use an RGBA texture for any of these formats
 364     * without leaking any unexpected values.
 365     */
 366    switch (img->_BaseFormat) {
 367    case GL_ALPHA:
 368       swizzles[0] = SWIZZLE_ZERO;
 369       swizzles[1] = SWIZZLE_ZERO;
 370       swizzles[2] = SWIZZLE_ZERO;
 371       break;
 372    case GL_LUMINANCE:
 373       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 374          swizzles[0] = SWIZZLE_X;
 375          swizzles[1] = SWIZZLE_X;
 376          swizzles[2] = SWIZZLE_X;
 377          swizzles[3] = SWIZZLE_ONE;
 378       }
 379       break;
 380    case GL_LUMINANCE_ALPHA:
 381       if (datatype == GL_SIGNED_NORMALIZED) {
 382          swizzles[0] = SWIZZLE_X;
 383          swizzles[1] = SWIZZLE_X;
 384          swizzles[2] = SWIZZLE_X;
 385          swizzles[3] = SWIZZLE_W;
 386       }
 387       break;
 388    case GL_INTENSITY:
 389       if (datatype == GL_SIGNED_NORMALIZED) {
 390          swizzles[0] = SWIZZLE_X;
 391          swizzles[1] = SWIZZLE_X;
 392          swizzles[2] = SWIZZLE_X;
 393          swizzles[3] = SWIZZLE_X;
 394       }
 395       break;
 396    case GL_RED:
 397    case GL_RG:
 398    case GL_RGB:
 399       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 400           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 401           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 402          swizzles[3] = SWIZZLE_ONE;
 403       break;
 404    }
 405
 406    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 407                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 408                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 409                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 410 }
 411
 412 /**
 413  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 414  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 415  *
 416  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 417  *         0          1          2          3             4            5
 418  *         4          5          6          7             0            1
 419  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 420  *
 421  * which is simply adding 4 then modding by 8 (or anding with 7).
 422  *
 423  * We then may need to apply workarounds for textureGather hardware bugs.
 424  */
 425 static unsigned
 426 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 427 {
 428    unsigned scs = (swizzle + 4) & 7;
 429
 430    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 431 }
 432
 433 static unsigned
 434 brw_find_matching_rb(const struct gl_framebuffer *fb,
 435                      const struct intel_mipmap_tree *mt)
 436 {
 437    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 438       const struct intel_renderbuffer *irb =
 439          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 440
 441       if (irb && irb->mt == mt)
 442          return i;
 443    }
 444
 445    return fb->_NumColorDrawBuffers;
 446 }
 447
 448 static inline bool
 449 brw_texture_view_sane(const struct brw_context *brw,
 450                       const struct intel_mipmap_tree *mt,
 451                       const struct isl_view *view)
 452 {
 453    /* There are special cases only for lossless compression. */
 454    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 455       return true;
 456
 457    if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
 458       return true;
 459
 460    /* Logic elsewhere needs to take care to resolve the color buffer prior
 461     * to sampling it as non-compressed.
 462     */
 463    if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
 464                                           view->base_array_layer,
 465                                           view->array_len))
 466       return false;
 467
 468    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 469    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 470
 471    if (rb_index == fb->_NumColorDrawBuffers)
 472       return true;
 473
 474    /* Underlying surface is compressed but it is sampled using a format that
 475     * the sampling engine doesn't support as compressed. Compression must be
 476     * disabled for both sampling engine and data port in case the same surface
 477     * is used also as render target.
 478     */
 479    return brw->draw_aux_buffer_disabled[rb_index];
 480 }
 481
 482 static bool
 483 brw_disable_aux_surface(const struct brw_context *brw,
 484                         const struct intel_mipmap_tree *mt,
 485                         const struct isl_view *view)
 486 {
 487    /* Nothing to disable. */
 488    if (!mt->mcs_buf)
 489       return false;
 490
 491    const bool is_unresolved = intel_miptree_has_color_unresolved(
 492                                  mt, view->base_level, view->levels,
 493                                  view->base_array_layer, view->array_len);
 494
 495    /* There are special cases only for lossless compression. */
 496    if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
 497       return !is_unresolved;
 498
 499    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 500    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 501
 502    /* If we are drawing into this with compression enabled, then we must also
 503     * enable compression when texturing from it regardless of
 504     * fast_clear_state.  If we don't then, after the first draw call with
 505     * this setup, there will be data in the CCS which won't get picked up by
 506     * subsequent texturing operations as required by ARB_texture_barrier.
 507     * Since we don't want to re-emit the binding table or do a resolve
 508     * operation every draw call, the easiest thing to do is just enable
 509     * compression on the texturing side.  This is completely safe to do
 510     * since, if compressed texturing weren't allowed, we would have disabled
 511     * compression of render targets in whatever_that_function_is_called().
 512     */
 513    if (rb_index < fb->_NumColorDrawBuffers) {
 514       if (brw->draw_aux_buffer_disabled[rb_index]) {
 515          assert(!is_unresolved);
 516       }
 517
 518       return brw->draw_aux_buffer_disabled[rb_index];
 519    }
 520
 521    return !is_unresolved;
 522 }
 523
 524 void
 525 brw_update_texture_surface(struct gl_context *ctx,
 526                            unsigned unit,
 527                            uint32_t *surf_offset,
 528                            bool for_gather,
 529                            uint32_t plane)
 530 {
 531    struct brw_context *brw = brw_context(ctx);
 532    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 533
 534    if (obj->Target == GL_TEXTURE_BUFFER) {
 535       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 536
 537    } else {
 538       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 539       struct intel_mipmap_tree *mt = intel_obj->mt;
 540
 541       if (plane > 0) {
 542          if (mt->plane[plane - 1] == NULL)
 543             return;
 544          mt = mt->plane[plane - 1];
 545       }
 546
 547       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 548       /* If this is a view with restricted NumLayers, then our effective depth
 549        * is not just the miptree depth.
 550        */
 551       const unsigned view_num_layers =
 552          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 553                                                             mt->logical_depth0;
 554
 555       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 556        * texturing functions that return a float, as our code generation always
 557        * selects the .x channel (which would always be 0).
 558        */
 559       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 560       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 561          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 562           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 563       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 564                                 brw_get_texture_swizzle(&brw->ctx, obj));
 565
 566       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 567       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 568                                                     sampler->sRGBDecode);
 569
 570       /* Implement gen6 and gen7 gather work-around */
 571       bool need_green_to_blue = false;
 572       if (for_gather) {
 573          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 574                                format == ISL_FORMAT_R32G32_SINT ||
 575                                format == ISL_FORMAT_R32G32_UINT)) {
 576             format = ISL_FORMAT_R32G32_FLOAT_LD;
 577             need_green_to_blue = brw->is_haswell;
 578          } else if (brw->gen == 6) {
 579             /* Sandybridge's gather4 message is broken for integer formats.
 580              * To work around this, we pretend the surface is UNORM for
 581              * 8 or 16-bit formats, and emit shader instructions to recover
 582              * the real INT/UINT value.  For 32-bit formats, we pretend
 583              * the surface is FLOAT, and simply reinterpret the resulting
 584              * bits.
 585              */
 586             switch (format) {
 587             case ISL_FORMAT_R8_SINT:
 588             case ISL_FORMAT_R8_UINT:
 589                format = ISL_FORMAT_R8_UNORM;
 590                break;
 591
 592             case ISL_FORMAT_R16_SINT:
 593             case ISL_FORMAT_R16_UINT:
 594                format = ISL_FORMAT_R16_UNORM;
 595                break;
 596
 597             case ISL_FORMAT_R32_SINT:
 598             case ISL_FORMAT_R32_UINT:
 599                format = ISL_FORMAT_R32_FLOAT;
 600                break;
 601
 602             default:
 603                break;
 604             }
 605          }
 606       }
 607
 608       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 609          if (brw->gen <= 7) {
 610             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 611             mt = mt->r8stencil_mt;
 612          } else {
 613             mt = mt->stencil_mt;
 614          }
 615          format = ISL_FORMAT_R8_UINT;
 616       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 617          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 618          mt = mt->r8stencil_mt;
 619          format = ISL_FORMAT_R8_UINT;
 620       }
 621
 622       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 623
 624       struct isl_view view = {
 625          .format = format,
 626          .base_level = obj->MinLevel + obj->BaseLevel,
 627          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 628          .base_array_layer = obj->MinLayer,
 629          .array_len = view_num_layers,
 630          .swizzle = {
 631             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 632             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 633             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 634             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 635          },
 636          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 637       };
 638
 639       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 640           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 641          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 642
 643       assert(brw_texture_view_sane(brw, mt, &view));
 644
 645       const int flags = brw_disable_aux_surface(brw, mt, &view) ?
 646                            INTEL_AUX_BUFFER_DISABLED : 0;
 647       brw_emit_surface_state(brw, mt, flags, mt->target, view,
 648                              tex_mocs[brw->gen],
 649                              surf_offset, surf_index,
 650                              I915_GEM_DOMAIN_SAMPLER, 0);
 651    }
 652 }
 653
 654 void
 655 brw_emit_buffer_surface_state(struct brw_context *brw,
 656                               uint32_t *out_offset,
 657                               struct brw_bo *bo,
 658                               unsigned buffer_offset,
 659                               unsigned surface_format,
 660                               unsigned buffer_size,
 661                               unsigned pitch,
 662                               bool rw)
 663 {
 664    uint32_t *dw = brw_state_batch(brw,
 665                                   brw->isl_dev.ss.size,
 666                                   brw->isl_dev.ss.align,
 667                                   out_offset);
 668
 669    isl_buffer_fill_state(&brw->isl_dev, dw,
 670                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 671                          .size = buffer_size,
 672                          .format = surface_format,
 673                          .stride = pitch,
 674                          .mocs = tex_mocs[brw->gen]);
 675
 676    if (bo) {
 677       brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
 678                      bo, buffer_offset,
 679                      I915_GEM_DOMAIN_SAMPLER,
 680                      (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 681    }
 682 }
 683
 684 void
 685 brw_update_buffer_texture_surface(struct gl_context *ctx,
 686                                   unsigned unit,
 687                                   uint32_t *surf_offset)
 688 {
 689    struct brw_context *brw = brw_context(ctx);
 690    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 691    struct intel_buffer_object *intel_obj =
 692       intel_buffer_object(tObj->BufferObject);
 693    uint32_t size = tObj->BufferSize;
 694    struct brw_bo *bo = NULL;
 695    mesa_format format = tObj->_BufferObjectFormat;
 696    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 697    int texel_size = _mesa_get_format_bytes(format);
 698
 699    if (intel_obj) {
 700       size = MIN2(size, intel_obj->Base.Size);
 701       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 702                                   false);
 703    }
 704
 705    /* The ARB_texture_buffer_specification says:
 706     *
 707     *    "The number of texels in the buffer texture's texel array is given by
 708     *
 709     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 710     *
 711     *     where <buffer_size> is the size of the buffer object, in basic
 712     *     machine units and <components> and <base_type> are the element count
 713     *     and base data type for elements, as specified in Table X.1.  The
 714     *     number of texels in the texel array is then clamped to the
 715     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 716     *
 717     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 718     * so that when ISL divides by stride to obtain the number of texels, that
 719     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 720     */
 721    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 722
 723    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 724       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 725                     _mesa_get_format_name(format));
 726    }
 727
 728    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 729                                  tObj->BufferOffset,
 730                                  isl_format,
 731                                  size,
 732                                  texel_size,
 733                                  false /* rw */);
 734 }
 735
 736 /**
 737  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 738  * read from this buffer with Data Port Read instructions/messages.
 739  */
 740 void
 741 brw_create_constant_surface(struct brw_context *brw,
 742                             struct brw_bo *bo,
 743                             uint32_t offset,
 744                             uint32_t size,
 745                             uint32_t *out_offset)
 746 {
 747    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 748                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 749                                  size, 1, false);
 750 }
 751
 752 /**
 753  * Create the buffer surface. Shader buffer variables will be
 754  * read from / write to this buffer with Data Port Read/Write
 755  * instructions/messages.
 756  */
 757 void
 758 brw_create_buffer_surface(struct brw_context *brw,
 759                           struct brw_bo *bo,
 760                           uint32_t offset,
 761                           uint32_t size,
 762                           uint32_t *out_offset)
 763 {
 764    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 765     * messages. We need these specifically for the fragment shader since they
 766     * include a pixel mask header that we need to ensure correct behavior
 767     * with helper invocations, which cannot write to the buffer.
 768     */
 769    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 770                                  ISL_FORMAT_RAW,
 771                                  size, 1, true);
 772 }
 773
 774 /**
 775  * Set up a binding table entry for use by stream output logic (transform
 776  * feedback).
 777  *
 778  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 779  */
 780 void
 781 brw_update_sol_surface(struct brw_context *brw,
 782                        struct gl_buffer_object *buffer_obj,
 783                        uint32_t *out_offset, unsigned num_vector_components,
 784                        unsigned stride_dwords, unsigned offset_dwords)
 785 {
 786    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 787    uint32_t offset_bytes = 4 * offset_dwords;
 788    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 789                                              offset_bytes,
 790                                              buffer_obj->Size - offset_bytes,
 791                                              true);
 792    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 793    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 794    size_t size_dwords = buffer_obj->Size / 4;
 795    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 796
 797    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 798     * too big to map using a single binding table entry?
 799     */
 800    assert((size_dwords - offset_dwords) / stride_dwords
 801           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 802
 803    if (size_dwords > offset_dwords + num_vector_components) {
 804       /* There is room for at least 1 transform feedback output in the buffer.
 805        * Compute the number of additional transform feedback outputs the
 806        * buffer has room for.
 807        */
 808       buffer_size_minus_1 =
 809          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 810    } else {
 811       /* There isn't even room for a single transform feedback output in the
 812        * buffer.  We can't configure the binding table entry to prevent output
 813        * entirely; we'll have to rely on the geometry shader to detect
 814        * overflow.  But to minimize the damage in case of a bug, set up the
 815        * binding table entry to just allow a single output.
 816        */
 817       buffer_size_minus_1 = 0;
 818    }
 819    width = buffer_size_minus_1 & 0x7f;
 820    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 821    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 822
 823    switch (num_vector_components) {
 824    case 1:
 825       surface_format = ISL_FORMAT_R32_FLOAT;
 826       break;
 827    case 2:
 828       surface_format = ISL_FORMAT_R32G32_FLOAT;
 829       break;
 830    case 3:
 831       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 832       break;
 833    case 4:
 834       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 835       break;
 836    default:
 837       unreachable("Invalid vector size for transform feedback output");
 838    }
 839
 840    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 841       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 842       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 843       BRW_SURFACE_RC_READ_WRITE;
 844    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 845    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 846               height << BRW_SURFACE_HEIGHT_SHIFT);
 847    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 848               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 849    surf[4] = 0;
 850    surf[5] = 0;
 851
 852    /* Emit relocation to surface contents. */
 853    brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
 854                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 855 }
 856
 857 /* Creates a new WM constant buffer reflecting the current fragment program's
 858  * constants, if needed by the fragment program.
 859  *
 860  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 861  * state atom.
 862  */
 863 static void
 864 brw_upload_wm_pull_constants(struct brw_context *brw)
 865 {
 866    struct brw_stage_state *stage_state = &brw->wm.base;
 867    /* BRW_NEW_FRAGMENT_PROGRAM */
 868    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 869    /* BRW_NEW_FS_PROG_DATA */
 870    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 871
 872    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 873    /* _NEW_PROGRAM_CONSTANTS */
 874    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 875                              stage_state, prog_data);
 876 }
 877
 878 const struct brw_tracked_state brw_wm_pull_constants = {
 879    .dirty = {
 880       .mesa = _NEW_PROGRAM_CONSTANTS,
 881       .brw = BRW_NEW_BATCH |
 882              BRW_NEW_BLORP |
 883              BRW_NEW_FRAGMENT_PROGRAM |
 884              BRW_NEW_FS_PROG_DATA,
 885    },
 886    .emit = brw_upload_wm_pull_constants,
 887 };
 888
 889 /**
 890  * Creates a null renderbuffer surface.
 891  *
 892  * This is used when the shader doesn't write to any color output.  An FB
 893  * write to target 0 will still be emitted, because that's how the thread is
 894  * terminated (and computed depth is returned), so we need to have the
 895  * hardware discard the target 0 color output..
 896  */
 897 static void
 898 brw_emit_null_surface_state(struct brw_context *brw,
 899                             unsigned width,
 900                             unsigned height,
 901                             unsigned samples,
 902                             uint32_t *out_offset)
 903 {
 904    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 905     * Notes):
 906     *
 907     *     A null surface will be used in instances where an actual surface is
 908     *     not bound. When a write message is generated to a null surface, no
 909     *     actual surface is written to. When a read message (including any
 910     *     sampling engine message) is generated to a null surface, the result
 911     *     is all zeros. Note that a null surface type is allowed to be used
 912     *     with all messages, even if it is not specificially indicated as
 913     *     supported. All of the remaining fields in surface state are ignored
 914     *     for null surfaces, with the following exceptions:
 915     *
 916     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 917     *       depth buffer’s corresponding state for all render target surfaces,
 918     *       including null.
 919     *
 920     *     - Surface Format must be R8G8B8A8_UNORM.
 921     */
 922    unsigned surface_type = BRW_SURFACE_NULL;
 923    struct brw_bo *bo = NULL;
 924    unsigned pitch_minus_1 = 0;
 925    uint32_t multisampling_state = 0;
 926    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 927
 928    if (samples > 1) {
 929       /* On Gen6, null render targets seem to cause GPU hangs when
 930        * multisampling.  So work around this problem by rendering into dummy
 931        * color buffer.
 932        *
 933        * To decrease the amount of memory needed by the workaround buffer, we
 934        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 935        * the amount of memory needed for the workaround buffer is
 936        * (width_in_tiles + height_in_tiles - 1) tiles.
 937        *
 938        * Note that since the workaround buffer will be interpreted by the
 939        * hardware as an interleaved multisampled buffer, we need to compute
 940        * width_in_tiles and height_in_tiles by dividing the width and height
 941        * by 16 rather than the normal Y-tile size of 32.
 942        */
 943       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 944       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 945       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 946       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 947                          size_needed);
 948       bo = brw->wm.multisampled_null_render_target_bo;
 949       surface_type = BRW_SURFACE_2D;
 950       pitch_minus_1 = 127;
 951       multisampling_state = brw_get_surface_num_multisamples(samples);
 952    }
 953
 954    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 955               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 956    if (brw->gen < 6) {
 957       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 958                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 959                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 960                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 961    }
 962    surf[1] = bo ? bo->offset64 : 0;
 963    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 964               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 965
 966    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 967     * Notes):
 968     *
 969     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 970     */
 971    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 972               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 973    surf[4] = multisampling_state;
 974    surf[5] = 0;
 975
 976    if (bo) {
 977       brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
 978                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 979    }
 980 }
 981
 982 /**
 983  * Sets up a surface state structure to point at the given region.
 984  * While it is only used for the front/back buffer currently, it should be
 985  * usable for further buffers when doing ARB_draw_buffer support.
 986  */
 987 static uint32_t
 988 gen4_update_renderbuffer_surface(struct brw_context *brw,
 989                                  struct gl_renderbuffer *rb,
 990                                  uint32_t flags, unsigned unit,
 991                                  uint32_t surf_index)
 992 {
 993    struct gl_context *ctx = &brw->ctx;
 994    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 995    struct intel_mipmap_tree *mt = irb->mt;
 996    uint32_t *surf;
 997    uint32_t tile_x, tile_y;
 998    enum isl_format format;
 999    uint32_t offset;
1000    /* _NEW_BUFFERS */
1001    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1002    /* BRW_NEW_FS_PROG_DATA */
1003
1004    assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1005    assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1006
1007    if (rb->TexImage && !brw->has_surface_tile_offset) {
1008       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1009
1010       if (tile_x != 0 || tile_y != 0) {
1011          /* Original gen4 hardware couldn't draw to a non-tile-aligned
1012           * destination in a miptree unless you actually setup your renderbuffer
1013           * as a miptree and used the fragile lod/array_index/etc. controls to
1014           * select the image.  So, instead, we just make a new single-level
1015           * miptree and render into that.
1016           */
1017          intel_renderbuffer_move_to_temp(brw, irb, false);
1018          assert(irb->align_wa_mt);
1019          mt = irb->align_wa_mt;
1020       }
1021    }
1022
1023    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1024
1025    format = brw->mesa_to_isl_render_format[rb_format];
1026    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1027       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1028                     __func__, _mesa_get_format_name(rb_format));
1029    }
1030
1031    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1032               format << BRW_SURFACE_FORMAT_SHIFT);
1033
1034    /* reloc */
1035    assert(mt->offset % mt->cpp == 0);
1036    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1037               mt->bo->offset64 + mt->offset);
1038
1039    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1040               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1041
1042    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
1043               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1044
1045    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
1046
1047    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1048    /* Note that the low bits of these fields are missing, so
1049     * there's the possibility of getting in trouble.
1050     */
1051    assert(tile_x % 4 == 0);
1052    assert(tile_y % 2 == 0);
1053    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1054               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1055               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1056
1057    if (brw->gen < 6) {
1058       /* _NEW_COLOR */
1059       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1060           (ctx->Color.BlendEnabled & (1 << unit)))
1061          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1062
1063       if (!ctx->Color.ColorMask[unit][0])
1064          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1065       if (!ctx->Color.ColorMask[unit][1])
1066          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1067       if (!ctx->Color.ColorMask[unit][2])
1068          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1069
1070       /* As mentioned above, disable writes to the alpha component when the
1071        * renderbuffer is XRGB.
1072        */
1073       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1074           !ctx->Color.ColorMask[unit][3]) {
1075          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1076       }
1077    }
1078
1079    brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1080                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1081
1082    return offset;
1083 }
1084
1085 /**
1086  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1087  */
1088 void
1089 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1090                                  const struct gl_framebuffer *fb,
1091                                  uint32_t render_target_start,
1092                                  uint32_t *surf_offset)
1093 {
1094    GLuint i;
1095    const unsigned int w = _mesa_geometric_width(fb);
1096    const unsigned int h = _mesa_geometric_height(fb);
1097    const unsigned int s = _mesa_geometric_samples(fb);
1098
1099    /* Update surfaces for drawing buffers */
1100    if (fb->_NumColorDrawBuffers >= 1) {
1101       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1102          const uint32_t surf_index = render_target_start + i;
1103          const int flags = (_mesa_geometric_layers(fb) > 0 ?
1104                               INTEL_RENDERBUFFER_LAYERED : 0) |
1105                            (brw->draw_aux_buffer_disabled[i] ?
1106                               INTEL_AUX_BUFFER_DISABLED : 0);
1107
1108          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1109             surf_offset[surf_index] =
1110                brw->vtbl.update_renderbuffer_surface(
1111                   brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1112          } else {
1113             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1114                &surf_offset[surf_index]);
1115          }
1116       }
1117    } else {
1118       const uint32_t surf_index = render_target_start;
1119       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1120          &surf_offset[surf_index]);
1121    }
1122 }
1123
1124 static void
1125 update_renderbuffer_surfaces(struct brw_context *brw)
1126 {
1127    const struct gl_context *ctx = &brw->ctx;
1128
1129    /* BRW_NEW_FS_PROG_DATA */
1130    const struct brw_wm_prog_data *wm_prog_data =
1131       brw_wm_prog_data(brw->wm.base.prog_data);
1132
1133    /* _NEW_BUFFERS | _NEW_COLOR */
1134    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1135    brw_update_renderbuffer_surfaces(
1136       brw, fb,
1137       wm_prog_data->binding_table.render_target_start,
1138       brw->wm.base.surf_offset);
1139    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1140 }
1141
1142 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1143    .dirty = {
1144       .mesa = _NEW_BUFFERS |
1145               _NEW_COLOR,
1146       .brw = BRW_NEW_BATCH |
1147              BRW_NEW_BLORP |
1148              BRW_NEW_FS_PROG_DATA,
1149    },
1150    .emit = update_renderbuffer_surfaces,
1151 };
1152
1153 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1154    .dirty = {
1155       .mesa = _NEW_BUFFERS,
1156       .brw = BRW_NEW_BATCH |
1157              BRW_NEW_BLORP,
1158    },
1159    .emit = update_renderbuffer_surfaces,
1160 };
1161
1162 static void
1163 update_renderbuffer_read_surfaces(struct brw_context *brw)
1164 {
1165    const struct gl_context *ctx = &brw->ctx;
1166
1167    /* BRW_NEW_FS_PROG_DATA */
1168    const struct brw_wm_prog_data *wm_prog_data =
1169       brw_wm_prog_data(brw->wm.base.prog_data);
1170
1171    /* BRW_NEW_FRAGMENT_PROGRAM */
1172    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1173        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1174       /* _NEW_BUFFERS */
1175       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1176
1177       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1178          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1179          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1180          const unsigned surf_index =
1181             wm_prog_data->binding_table.render_target_read_start + i;
1182          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1183
1184          if (irb) {
1185             const enum isl_format format = brw->mesa_to_isl_render_format[
1186                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1187             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1188                                                 format));
1189
1190             /* Override the target of the texture if the render buffer is a
1191              * single slice of a 3D texture (since the minimum array element
1192              * field of the surface state structure is ignored by the sampler
1193              * unit for 3D textures on some hardware), or if the render buffer
1194              * is a 1D array (since shaders always provide the array index
1195              * coordinate at the Z component to avoid state-dependent
1196              * recompiles when changing the texture target of the
1197              * framebuffer).
1198              */
1199             const GLenum target =
1200                (irb->mt->target == GL_TEXTURE_3D &&
1201                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1202                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1203                irb->mt->target;
1204
1205             /* intel_renderbuffer::mt_layer is expressed in sample units for
1206              * the UMS and CMS multisample layouts, but
1207              * intel_renderbuffer::layer_count is expressed in units of whole
1208              * logical layers regardless of the multisample layout.
1209              */
1210             const unsigned mt_layer_unit =
1211                (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
1212                 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
1213                MAX2(irb->mt->num_samples, 1) : 1;
1214
1215             const struct isl_view view = {
1216                .format = format,
1217                .base_level = irb->mt_level - irb->mt->first_level,
1218                .levels = 1,
1219                .base_array_layer = irb->mt_layer / mt_layer_unit,
1220                .array_len = irb->layer_count,
1221                .swizzle = ISL_SWIZZLE_IDENTITY,
1222                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1223             };
1224
1225             const int flags = brw->draw_aux_buffer_disabled[i] ?
1226                                  INTEL_AUX_BUFFER_DISABLED : 0;
1227             brw_emit_surface_state(brw, irb->mt, flags, target, view,
1228                                    tex_mocs[brw->gen],
1229                                    surf_offset, surf_index,
1230                                    I915_GEM_DOMAIN_SAMPLER, 0);
1231
1232          } else {
1233             brw->vtbl.emit_null_surface_state(
1234                brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1235                _mesa_geometric_samples(fb), surf_offset);
1236          }
1237       }
1238
1239       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1240    }
1241 }
1242
1243 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1244    .dirty = {
1245       .mesa = _NEW_BUFFERS,
1246       .brw = BRW_NEW_BATCH |
1247              BRW_NEW_FRAGMENT_PROGRAM |
1248              BRW_NEW_FS_PROG_DATA,
1249    },
1250    .emit = update_renderbuffer_read_surfaces,
1251 };
1252
1253 static void
1254 update_stage_texture_surfaces(struct brw_context *brw,
1255                               const struct gl_program *prog,
1256                               struct brw_stage_state *stage_state,
1257                               bool for_gather, uint32_t plane)
1258 {
1259    if (!prog)
1260       return;
1261
1262    struct gl_context *ctx = &brw->ctx;
1263
1264    uint32_t *surf_offset = stage_state->surf_offset;
1265
1266    /* BRW_NEW_*_PROG_DATA */
1267    if (for_gather)
1268       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1269    else
1270       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1271
1272    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1273    for (unsigned s = 0; s < num_samplers; s++) {
1274       surf_offset[s] = 0;
1275
1276       if (prog->SamplersUsed & (1 << s)) {
1277          const unsigned unit = prog->SamplerUnits[s];
1278
1279          /* _NEW_TEXTURE */
1280          if (ctx->Texture.Unit[unit]._Current) {
1281             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1282          }
1283       }
1284    }
1285 }
1286
1287
1288 /**
1289  * Construct SURFACE_STATE objects for enabled textures.
1290  */
1291 static void
1292 brw_update_texture_surfaces(struct brw_context *brw)
1293 {
1294    /* BRW_NEW_VERTEX_PROGRAM */
1295    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1296
1297    /* BRW_NEW_TESS_PROGRAMS */
1298    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1299    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1300
1301    /* BRW_NEW_GEOMETRY_PROGRAM */
1302    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1303
1304    /* BRW_NEW_FRAGMENT_PROGRAM */
1305    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1306
1307    /* _NEW_TEXTURE */
1308    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1309    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1310    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1311    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1312    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1313
1314    /* emit alternate set of surface state for gather. this
1315     * allows the surface format to be overriden for only the
1316     * gather4 messages. */
1317    if (brw->gen < 8) {
1318       if (vs && vs->nir->info.uses_texture_gather)
1319          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1320       if (tcs && tcs->nir->info.uses_texture_gather)
1321          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1322       if (tes && tes->nir->info.uses_texture_gather)
1323          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1324       if (gs && gs->nir->info.uses_texture_gather)
1325          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1326       if (fs && fs->nir->info.uses_texture_gather)
1327          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1328    }
1329
1330    if (fs) {
1331       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1332       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1333    }
1334
1335    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1336 }
1337
1338 const struct brw_tracked_state brw_texture_surfaces = {
1339    .dirty = {
1340       .mesa = _NEW_TEXTURE,
1341       .brw = BRW_NEW_BATCH |
1342              BRW_NEW_BLORP |
1343              BRW_NEW_FRAGMENT_PROGRAM |
1344              BRW_NEW_FS_PROG_DATA |
1345              BRW_NEW_GEOMETRY_PROGRAM |
1346              BRW_NEW_GS_PROG_DATA |
1347              BRW_NEW_TESS_PROGRAMS |
1348              BRW_NEW_TCS_PROG_DATA |
1349              BRW_NEW_TES_PROG_DATA |
1350              BRW_NEW_TEXTURE_BUFFER |
1351              BRW_NEW_VERTEX_PROGRAM |
1352              BRW_NEW_VS_PROG_DATA,
1353    },
1354    .emit = brw_update_texture_surfaces,
1355 };
1356
1357 static void
1358 brw_update_cs_texture_surfaces(struct brw_context *brw)
1359 {
1360    /* BRW_NEW_COMPUTE_PROGRAM */
1361    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1362
1363    /* _NEW_TEXTURE */
1364    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1365
1366    /* emit alternate set of surface state for gather. this
1367     * allows the surface format to be overriden for only the
1368     * gather4 messages.
1369     */
1370    if (brw->gen < 8) {
1371       if (cs && cs->nir->info.uses_texture_gather)
1372          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1373    }
1374
1375    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1376 }
1377
1378 const struct brw_tracked_state brw_cs_texture_surfaces = {
1379    .dirty = {
1380       .mesa = _NEW_TEXTURE,
1381       .brw = BRW_NEW_BATCH |
1382              BRW_NEW_BLORP |
1383              BRW_NEW_COMPUTE_PROGRAM,
1384    },
1385    .emit = brw_update_cs_texture_surfaces,
1386 };
1387
1388
1389 void
1390 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1391                         struct brw_stage_state *stage_state,
1392                         struct brw_stage_prog_data *prog_data)
1393 {
1394    struct gl_context *ctx = &brw->ctx;
1395
1396    if (!prog)
1397       return;
1398
1399    uint32_t *ubo_surf_offsets =
1400       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1401
1402    for (int i = 0; i < prog->info.num_ubos; i++) {
1403       struct gl_uniform_buffer_binding *binding =
1404          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1405
1406       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1407          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1408       } else {
1409          struct intel_buffer_object *intel_bo =
1410             intel_buffer_object(binding->BufferObject);
1411          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1412          if (!binding->AutomaticSize)
1413             size = MIN2(size, binding->Size);
1414          struct brw_bo *bo =
1415             intel_bufferobj_buffer(brw, intel_bo,
1416                                    binding->Offset,
1417                                    size, false);
1418          brw_create_constant_surface(brw, bo, binding->Offset,
1419                                      size,
1420                                      &ubo_surf_offsets[i]);
1421       }
1422    }
1423
1424    uint32_t *ssbo_surf_offsets =
1425       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1426
1427    for (int i = 0; i < prog->info.num_ssbos; i++) {
1428       struct gl_shader_storage_buffer_binding *binding =
1429          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1430
1431       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1432          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1433       } else {
1434          struct intel_buffer_object *intel_bo =
1435             intel_buffer_object(binding->BufferObject);
1436          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1437          if (!binding->AutomaticSize)
1438             size = MIN2(size, binding->Size);
1439          struct brw_bo *bo =
1440             intel_bufferobj_buffer(brw, intel_bo,
1441                                    binding->Offset,
1442                                    size, true);
1443          brw_create_buffer_surface(brw, bo, binding->Offset,
1444                                    size,
1445                                    &ssbo_surf_offsets[i]);
1446       }
1447    }
1448
1449    stage_state->push_constants_dirty = true;
1450
1451    if (prog->info.num_ubos || prog->info.num_ssbos)
1452       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1453 }
1454
1455 static void
1456 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1457 {
1458    struct gl_context *ctx = &brw->ctx;
1459    /* _NEW_PROGRAM */
1460    struct gl_program *prog = ctx->FragmentProgram._Current;
1461
1462    /* BRW_NEW_FS_PROG_DATA */
1463    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1464 }
1465
1466 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1467    .dirty = {
1468       .mesa = _NEW_PROGRAM,
1469       .brw = BRW_NEW_BATCH |
1470              BRW_NEW_BLORP |
1471              BRW_NEW_FS_PROG_DATA |
1472              BRW_NEW_UNIFORM_BUFFER,
1473    },
1474    .emit = brw_upload_wm_ubo_surfaces,
1475 };
1476
1477 static void
1478 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1479 {
1480    struct gl_context *ctx = &brw->ctx;
1481    /* _NEW_PROGRAM */
1482    struct gl_program *prog =
1483       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1484
1485    /* BRW_NEW_CS_PROG_DATA */
1486    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1487 }
1488
1489 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1490    .dirty = {
1491       .mesa = _NEW_PROGRAM,
1492       .brw = BRW_NEW_BATCH |
1493              BRW_NEW_BLORP |
1494              BRW_NEW_CS_PROG_DATA |
1495              BRW_NEW_UNIFORM_BUFFER,
1496    },
1497    .emit = brw_upload_cs_ubo_surfaces,
1498 };
1499
1500 void
1501 brw_upload_abo_surfaces(struct brw_context *brw,
1502                         const struct gl_program *prog,
1503                         struct brw_stage_state *stage_state,
1504                         struct brw_stage_prog_data *prog_data)
1505 {
1506    struct gl_context *ctx = &brw->ctx;
1507    uint32_t *surf_offsets =
1508       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1509
1510    if (prog->info.num_abos) {
1511       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1512          struct gl_atomic_buffer_binding *binding =
1513             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1514          struct intel_buffer_object *intel_bo =
1515             intel_buffer_object(binding->BufferObject);
1516          struct brw_bo *bo =
1517             intel_bufferobj_buffer(brw, intel_bo, binding->Offset,
1518                                    intel_bo->Base.Size - binding->Offset,
1519                                    true);
1520
1521          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1522                                        binding->Offset, ISL_FORMAT_RAW,
1523                                        bo->size - binding->Offset, 1, true);
1524       }
1525
1526       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1527    }
1528 }
1529
1530 static void
1531 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1532 {
1533    /* _NEW_PROGRAM */
1534    const struct gl_program *wm = brw->fragment_program;
1535
1536    if (wm) {
1537       /* BRW_NEW_FS_PROG_DATA */
1538       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1539    }
1540 }
1541
1542 const struct brw_tracked_state brw_wm_abo_surfaces = {
1543    .dirty = {
1544       .mesa = _NEW_PROGRAM,
1545       .brw = BRW_NEW_ATOMIC_BUFFER |
1546              BRW_NEW_BLORP |
1547              BRW_NEW_BATCH |
1548              BRW_NEW_FS_PROG_DATA,
1549    },
1550    .emit = brw_upload_wm_abo_surfaces,
1551 };
1552
1553 static void
1554 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1555 {
1556    /* _NEW_PROGRAM */
1557    const struct gl_program *cp = brw->compute_program;
1558
1559    if (cp) {
1560       /* BRW_NEW_CS_PROG_DATA */
1561       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1562    }
1563 }
1564
1565 const struct brw_tracked_state brw_cs_abo_surfaces = {
1566    .dirty = {
1567       .mesa = _NEW_PROGRAM,
1568       .brw = BRW_NEW_ATOMIC_BUFFER |
1569              BRW_NEW_BLORP |
1570              BRW_NEW_BATCH |
1571              BRW_NEW_CS_PROG_DATA,
1572    },
1573    .emit = brw_upload_cs_abo_surfaces,
1574 };
1575
1576 static void
1577 brw_upload_cs_image_surfaces(struct brw_context *brw)
1578 {
1579    /* _NEW_PROGRAM */
1580    const struct gl_program *cp = brw->compute_program;
1581
1582    if (cp) {
1583       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1584       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1585                                 brw->cs.base.prog_data);
1586    }
1587 }
1588
1589 const struct brw_tracked_state brw_cs_image_surfaces = {
1590    .dirty = {
1591       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1592       .brw = BRW_NEW_BATCH |
1593              BRW_NEW_BLORP |
1594              BRW_NEW_CS_PROG_DATA |
1595              BRW_NEW_IMAGE_UNITS
1596    },
1597    .emit = brw_upload_cs_image_surfaces,
1598 };
1599
1600 static uint32_t
1601 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1602 {
1603    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1604    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1605    if (access == GL_WRITE_ONLY) {
1606       return hw_format;
1607    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1608       /* Typed surface reads support a very limited subset of the shader
1609        * image formats.  Translate it into the closest format the
1610        * hardware supports.
1611        */
1612       return isl_lower_storage_image_format(devinfo, hw_format);
1613    } else {
1614       /* The hardware doesn't actually support a typed format that we can use
1615        * so we have to fall back to untyped read/write messages.
1616        */
1617       return ISL_FORMAT_RAW;
1618    }
1619 }
1620
1621 static void
1622 update_default_image_param(struct brw_context *brw,
1623                            struct gl_image_unit *u,
1624                            unsigned surface_idx,
1625                            struct brw_image_param *param)
1626 {
1627    memset(param, 0, sizeof(*param));
1628    param->surface_idx = surface_idx;
1629    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1630     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1631     * detailed explanation of these parameters.
1632     */
1633    param->swizzling[0] = 0xff;
1634    param->swizzling[1] = 0xff;
1635 }
1636
1637 static void
1638 update_buffer_image_param(struct brw_context *brw,
1639                           struct gl_image_unit *u,
1640                           unsigned surface_idx,
1641                           struct brw_image_param *param)
1642 {
1643    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1644    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1645    update_default_image_param(brw, u, surface_idx, param);
1646
1647    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1648    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1649 }
1650
1651 static void
1652 update_texture_image_param(struct brw_context *brw,
1653                            struct gl_image_unit *u,
1654                            unsigned surface_idx,
1655                            struct brw_image_param *param)
1656 {
1657    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1658
1659    update_default_image_param(brw, u, surface_idx, param);
1660
1661    param->size[0] = minify(mt->logical_width0, u->Level);
1662    param->size[1] = minify(mt->logical_height0, u->Level);
1663    param->size[2] = (!u->Layered ? 1 :
1664                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1665                      u->TexObj->Target == GL_TEXTURE_3D ?
1666                      minify(mt->logical_depth0, u->Level) :
1667                      mt->logical_depth0);
1668
1669    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1670                                   &param->offset[0],
1671                                   &param->offset[1]);
1672
1673    param->stride[0] = mt->cpp;
1674    param->stride[1] = mt->pitch / mt->cpp;
1675    param->stride[2] =
1676       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1677    param->stride[3] =
1678       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1679
1680    if (mt->tiling == I915_TILING_X) {
1681       /* An X tile is a rectangular block of 512x8 bytes. */
1682       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1683       param->tiling[1] = _mesa_logbase2(8);
1684
1685       if (brw->has_swizzling) {
1686          /* Right shifts required to swizzle bits 9 and 10 of the memory
1687           * address with bit 6.
1688           */
1689          param->swizzling[0] = 3;
1690          param->swizzling[1] = 4;
1691       }
1692    } else if (mt->tiling == I915_TILING_Y) {
1693       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1694        * different to the layout of an X-tiled surface, we simply pretend that
1695        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1696        * one arranged in X-major order just like is the case for X-tiling.
1697        */
1698       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1699       param->tiling[1] = _mesa_logbase2(32);
1700
1701       if (brw->has_swizzling) {
1702          /* Right shift required to swizzle bit 9 of the memory address with
1703           * bit 6.
1704           */
1705          param->swizzling[0] = 3;
1706       }
1707    }
1708
1709    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1710     * address calculation algorithm (emit_address_calculation() in
1711     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1712     * modulus equal to the LOD.
1713     */
1714    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1715                        0);
1716 }
1717
1718 static void
1719 update_image_surface(struct brw_context *brw,
1720                      struct gl_image_unit *u,
1721                      GLenum access,
1722                      unsigned surface_idx,
1723                      uint32_t *surf_offset,
1724                      struct brw_image_param *param)
1725 {
1726    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1727       struct gl_texture_object *obj = u->TexObj;
1728       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1729
1730       if (obj->Target == GL_TEXTURE_BUFFER) {
1731          struct intel_buffer_object *intel_obj =
1732             intel_buffer_object(obj->BufferObject);
1733          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1734                                       _mesa_get_format_bytes(u->_ActualFormat));
1735
1736          brw_emit_buffer_surface_state(
1737             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1738             format, intel_obj->Base.Size, texel_size,
1739             access != GL_READ_ONLY);
1740
1741          update_buffer_image_param(brw, u, surface_idx, param);
1742
1743       } else {
1744          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1745          struct intel_mipmap_tree *mt = intel_obj->mt;
1746
1747          if (format == ISL_FORMAT_RAW) {
1748             brw_emit_buffer_surface_state(
1749                brw, surf_offset, mt->bo, mt->offset,
1750                format, mt->bo->size - mt->offset, 1 /* pitch */,
1751                access != GL_READ_ONLY);
1752
1753          } else {
1754             const unsigned num_layers = (!u->Layered ? 1 :
1755                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1756                                          mt->logical_depth0);
1757
1758             struct isl_view view = {
1759                .format = format,
1760                .base_level = obj->MinLevel + u->Level,
1761                .levels = 1,
1762                .base_array_layer = obj->MinLayer + u->_Layer,
1763                .array_len = num_layers,
1764                .swizzle = ISL_SWIZZLE_IDENTITY,
1765                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1766             };
1767
1768             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1769             assert(!intel_miptree_has_color_unresolved(mt,
1770                                                        view.base_level, 1,
1771                                                        view.base_array_layer,
1772                                                        view.array_len));
1773             brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1774                                    mt->target, view, tex_mocs[brw->gen],
1775                                    surf_offset, surf_index,
1776                                    I915_GEM_DOMAIN_SAMPLER,
1777                                    access == GL_READ_ONLY ? 0 :
1778                                              I915_GEM_DOMAIN_SAMPLER);
1779          }
1780
1781          update_texture_image_param(brw, u, surface_idx, param);
1782       }
1783
1784    } else {
1785       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1786       update_default_image_param(brw, u, surface_idx, param);
1787    }
1788 }
1789
1790 void
1791 brw_upload_image_surfaces(struct brw_context *brw,
1792                           const struct gl_program *prog,
1793                           struct brw_stage_state *stage_state,
1794                           struct brw_stage_prog_data *prog_data)
1795 {
1796    assert(prog);
1797    struct gl_context *ctx = &brw->ctx;
1798
1799    if (prog->info.num_images) {
1800       for (unsigned i = 0; i < prog->info.num_images; i++) {
1801          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1802          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1803
1804          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1805                               surf_idx,
1806                               &stage_state->surf_offset[surf_idx],
1807                               &prog_data->image_param[i]);
1808       }
1809
1810       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1811       /* This may have changed the image metadata dependent on the context
1812        * image unit state and passed to the program as uniforms, make sure
1813        * that push and pull constants are reuploaded.
1814        */
1815       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1816    }
1817 }
1818
1819 static void
1820 brw_upload_wm_image_surfaces(struct brw_context *brw)
1821 {
1822    /* BRW_NEW_FRAGMENT_PROGRAM */
1823    const struct gl_program *wm = brw->fragment_program;
1824
1825    if (wm) {
1826       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1827       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1828                                 brw->wm.base.prog_data);
1829    }
1830 }
1831
1832 const struct brw_tracked_state brw_wm_image_surfaces = {
1833    .dirty = {
1834       .mesa = _NEW_TEXTURE,
1835       .brw = BRW_NEW_BATCH |
1836              BRW_NEW_BLORP |
1837              BRW_NEW_FRAGMENT_PROGRAM |
1838              BRW_NEW_FS_PROG_DATA |
1839              BRW_NEW_IMAGE_UNITS
1840    },
1841    .emit = brw_upload_wm_image_surfaces,
1842 };
1843
1844 void
1845 gen4_init_vtable_surface_functions(struct brw_context *brw)
1846 {
1847    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1848    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1849 }
1850
1851 void
1852 gen6_init_vtable_surface_functions(struct brw_context *brw)
1853 {
1854    gen4_init_vtable_surface_functions(brw);
1855    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1856 }
1857
1858 static void
1859 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1860 {
1861    struct gl_context *ctx = &brw->ctx;
1862    /* _NEW_PROGRAM */
1863    struct gl_program *prog =
1864       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1865    /* BRW_NEW_CS_PROG_DATA */
1866    const struct brw_cs_prog_data *cs_prog_data =
1867       brw_cs_prog_data(brw->cs.base.prog_data);
1868
1869    if (prog && cs_prog_data->uses_num_work_groups) {
1870       const unsigned surf_idx =
1871          cs_prog_data->binding_table.work_groups_start;
1872       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1873       struct brw_bo *bo;
1874       uint32_t bo_offset;
1875
1876       if (brw->compute.num_work_groups_bo == NULL) {
1877          bo = NULL;
1878          intel_upload_data(brw,
1879                            (void *)brw->compute.num_work_groups,
1880                            3 * sizeof(GLuint),
1881                            sizeof(GLuint),
1882                            &bo,
1883                            &bo_offset);
1884       } else {
1885          bo = brw->compute.num_work_groups_bo;
1886          bo_offset = brw->compute.num_work_groups_offset;
1887       }
1888
1889       brw_emit_buffer_surface_state(brw, surf_offset,
1890                                     bo, bo_offset,
1891                                     ISL_FORMAT_RAW,
1892                                     3 * sizeof(GLuint), 1, true);
1893       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1894    }
1895 }
1896
1897 const struct brw_tracked_state brw_cs_work_groups_surface = {
1898    .dirty = {
1899       .brw = BRW_NEW_BLORP |
1900              BRW_NEW_CS_PROG_DATA |
1901              BRW_NEW_CS_WORK_GROUPS
1902    },
1903    .emit = brw_upload_cs_work_groups_surface,
1904 };