src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 enum {
  59    INTEL_RENDERBUFFER_LAYERED = 1 << 0,
  60    INTEL_AUX_BUFFER_DISABLED = 1 << 1,
  61 };
  62
  63 uint32_t tex_mocs[] = {
  64    [7] = GEN7_MOCS_L3,
  65    [8] = BDW_MOCS_WB,
  66    [9] = SKL_MOCS_WB,
  67    [10] = CNL_MOCS_WB,
  68 };
  69
  70 uint32_t rb_mocs[] = {
  71    [7] = GEN7_MOCS_L3,
  72    [8] = BDW_MOCS_PTE,
  73    [9] = SKL_MOCS_PTE,
  74    [10] = CNL_MOCS_PTE,
  75 };
  76
  77 static void
  78 brw_emit_surface_state(struct brw_context *brw,
  79                        struct intel_mipmap_tree *mt, uint32_t flags,
  80                        GLenum target, struct isl_view view,
  81                        uint32_t mocs, uint32_t *surf_offset, int surf_index,
  82                        unsigned read_domains, unsigned write_domains)
  83 {
  84    uint32_t tile_x = mt->level[0].slice[0].x_offset;
  85    uint32_t tile_y = mt->level[0].slice[0].y_offset;
  86    uint32_t offset = mt->offset;
  87
  88    struct isl_surf surf;
  89    intel_miptree_get_isl_surf(brw, mt, &surf);
  90
  91    surf.dim = get_isl_surf_dim(target);
  92
  93    const enum isl_dim_layout dim_layout =
  94       get_isl_dim_layout(&brw->screen->devinfo, mt->tiling, target,
  95                          mt->array_layout);
  96
  97    if (surf.dim_layout != dim_layout) {
  98       /* The layout of the specified texture target is not compatible with the
  99        * actual layout of the miptree structure in memory -- You're entering
 100        * dangerous territory, this can only possibly work if you only intended
 101        * to access a single level and slice of the texture, and the hardware
 102        * supports the tile offset feature in order to allow non-tile-aligned
 103        * base offsets, since we'll have to point the hardware to the first
 104        * texel of the level instead of relying on the usual base level/layer
 105        * controls.
 106        */
 107       assert(brw->has_surface_tile_offset);
 108       assert(view.levels == 1 && view.array_len == 1);
 109       assert(tile_x == 0 && tile_y == 0);
 110
 111       offset += intel_miptree_get_tile_offsets(mt, view.base_level,
 112                                                view.base_array_layer,
 113                                                &tile_x, &tile_y);
 114
 115       /* Minify the logical dimensions of the texture. */
 116       const unsigned l = view.base_level - mt->first_level;
 117       surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
 118       surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
 119          minify(surf.logical_level0_px.height, l);
 120       surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
 121          minify(surf.logical_level0_px.depth, l);
 122
 123       /* Only the base level and layer can be addressed with the overridden
 124        * layout.
 125        */
 126       surf.logical_level0_px.array_len = 1;
 127       surf.levels = 1;
 128       surf.dim_layout = dim_layout;
 129
 130       /* The requested slice of the texture is now at the base level and
 131        * layer.
 132        */
 133       view.base_level = 0;
 134       view.base_array_layer = 0;
 135    }
 136
 137    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 138
 139    struct brw_bo *aux_bo;
 140    struct isl_surf *aux_surf = NULL;
 141    uint64_t aux_offset = 0;
 142    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
 143    if ((mt->mcs_buf || intel_miptree_sample_with_hiz(brw, mt)) &&
 144        !(flags & INTEL_AUX_BUFFER_DISABLED)) {
 145       aux_usage = intel_miptree_get_aux_isl_usage(brw, mt);
 146
 147       if (mt->mcs_buf) {
 148          aux_surf = &mt->mcs_buf->surf;
 149
 150          assert(mt->mcs_buf->offset == 0);
 151          aux_bo = mt->mcs_buf->bo;
 152          aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
 153       } else {
 154          aux_surf = &mt->hiz_buf->surf;
 155
 156          aux_bo = mt->hiz_buf->bo;
 157          aux_offset = mt->hiz_buf->bo->offset64;
 158       }
 159
 160       /* We only really need a clear color if we also have an auxiliary
 161        * surface.  Without one, it does nothing.
 162        */
 163       clear_color = mt->fast_clear_color;
 164    }
 165
 166    void *state = brw_state_batch(brw,
 167                                  brw->isl_dev.ss.size,
 168                                  brw->isl_dev.ss.align,
 169                                  surf_offset);
 170
 171    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
 172                        .address = mt->bo->offset64 + offset,
 173                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 174                        .aux_address = aux_offset,
 175                        .mocs = mocs, .clear_color = clear_color,
 176                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 177
 178    brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
 179                   mt->bo, offset, read_domains, write_domains);
 180
 181    if (aux_surf) {
 182       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 183        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 184        * contain other control information.  Since buffer addresses are always
 185        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 186        * an ordinary reloc to do the necessary address translation.
 187        */
 188       assert((aux_offset & 0xfff) == 0);
 189       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 190       brw_emit_reloc(&brw->batch,
 191                      *surf_offset + brw->isl_dev.ss.aux_addr_offset,
 192                      aux_bo, *aux_addr - aux_bo->offset64,
 193                      read_domains, write_domains);
 194    }
 195 }
 196
 197 uint32_t
 198 brw_update_renderbuffer_surface(struct brw_context *brw,
 199                                 struct gl_renderbuffer *rb,
 200                                 uint32_t flags, unsigned unit /* unused */,
 201                                 uint32_t surf_index)
 202 {
 203    struct gl_context *ctx = &brw->ctx;
 204    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 205    struct intel_mipmap_tree *mt = irb->mt;
 206
 207    if (brw->gen < 9) {
 208       assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
 209    }
 210
 211    assert(brw_render_target_supported(brw, rb));
 212
 213    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 214    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 215       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 216                     __func__, _mesa_get_format_name(rb_format));
 217    }
 218
 219    const unsigned layer_multiplier =
 220       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 221        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 222       MAX2(irb->mt->num_samples, 1) : 1;
 223
 224    struct isl_view view = {
 225       .format = brw->mesa_to_isl_render_format[rb_format],
 226       .base_level = irb->mt_level - irb->mt->first_level,
 227       .levels = 1,
 228       .base_array_layer = irb->mt_layer / layer_multiplier,
 229       .array_len = MAX2(irb->layer_count, 1),
 230       .swizzle = ISL_SWIZZLE_IDENTITY,
 231       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 232    };
 233
 234    uint32_t offset;
 235    brw_emit_surface_state(brw, mt, flags, mt->target, view,
 236                           rb_mocs[brw->gen],
 237                           &offset, surf_index,
 238                           I915_GEM_DOMAIN_RENDER,
 239                           I915_GEM_DOMAIN_RENDER);
 240    return offset;
 241 }
 242
 243 GLuint
 244 translate_tex_target(GLenum target)
 245 {
 246    switch (target) {
 247    case GL_TEXTURE_1D:
 248    case GL_TEXTURE_1D_ARRAY_EXT:
 249       return BRW_SURFACE_1D;
 250
 251    case GL_TEXTURE_RECTANGLE_NV:
 252       return BRW_SURFACE_2D;
 253
 254    case GL_TEXTURE_2D:
 255    case GL_TEXTURE_2D_ARRAY_EXT:
 256    case GL_TEXTURE_EXTERNAL_OES:
 257    case GL_TEXTURE_2D_MULTISAMPLE:
 258    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 259       return BRW_SURFACE_2D;
 260
 261    case GL_TEXTURE_3D:
 262       return BRW_SURFACE_3D;
 263
 264    case GL_TEXTURE_CUBE_MAP:
 265    case GL_TEXTURE_CUBE_MAP_ARRAY:
 266       return BRW_SURFACE_CUBE;
 267
 268    default:
 269       unreachable("not reached");
 270    }
 271 }
 272
 273 uint32_t
 274 brw_get_surface_tiling_bits(uint32_t tiling)
 275 {
 276    switch (tiling) {
 277    case I915_TILING_X:
 278       return BRW_SURFACE_TILED;
 279    case I915_TILING_Y:
 280       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 281    default:
 282       return 0;
 283    }
 284 }
 285
 286
 287 uint32_t
 288 brw_get_surface_num_multisamples(unsigned num_samples)
 289 {
 290    if (num_samples > 1)
 291       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 292    else
 293       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 294 }
 295
 296 /**
 297  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 298  * swizzling.
 299  */
 300 int
 301 brw_get_texture_swizzle(const struct gl_context *ctx,
 302                         const struct gl_texture_object *t)
 303 {
 304    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 305
 306    int swizzles[SWIZZLE_NIL + 1] = {
 307       SWIZZLE_X,
 308       SWIZZLE_Y,
 309       SWIZZLE_Z,
 310       SWIZZLE_W,
 311       SWIZZLE_ZERO,
 312       SWIZZLE_ONE,
 313       SWIZZLE_NIL
 314    };
 315
 316    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 317        img->_BaseFormat == GL_DEPTH_STENCIL) {
 318       GLenum depth_mode = t->DepthMode;
 319
 320       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 321        * with depth component data specified with a sized internal format.
 322        * Otherwise, it's left at the old default, GL_LUMINANCE.
 323        */
 324       if (_mesa_is_gles3(ctx) &&
 325           img->InternalFormat != GL_DEPTH_COMPONENT &&
 326           img->InternalFormat != GL_DEPTH_STENCIL) {
 327          depth_mode = GL_RED;
 328       }
 329
 330       switch (depth_mode) {
 331       case GL_ALPHA:
 332          swizzles[0] = SWIZZLE_ZERO;
 333          swizzles[1] = SWIZZLE_ZERO;
 334          swizzles[2] = SWIZZLE_ZERO;
 335          swizzles[3] = SWIZZLE_X;
 336          break;
 337       case GL_LUMINANCE:
 338          swizzles[0] = SWIZZLE_X;
 339          swizzles[1] = SWIZZLE_X;
 340          swizzles[2] = SWIZZLE_X;
 341          swizzles[3] = SWIZZLE_ONE;
 342          break;
 343       case GL_INTENSITY:
 344          swizzles[0] = SWIZZLE_X;
 345          swizzles[1] = SWIZZLE_X;
 346          swizzles[2] = SWIZZLE_X;
 347          swizzles[3] = SWIZZLE_X;
 348          break;
 349       case GL_RED:
 350          swizzles[0] = SWIZZLE_X;
 351          swizzles[1] = SWIZZLE_ZERO;
 352          swizzles[2] = SWIZZLE_ZERO;
 353          swizzles[3] = SWIZZLE_ONE;
 354          break;
 355       }
 356    }
 357
 358    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 359
 360    /* If the texture's format is alpha-only, force R, G, and B to
 361     * 0.0. Similarly, if the texture's format has no alpha channel,
 362     * force the alpha value read to 1.0. This allows for the
 363     * implementation to use an RGBA texture for any of these formats
 364     * without leaking any unexpected values.
 365     */
 366    switch (img->_BaseFormat) {
 367    case GL_ALPHA:
 368       swizzles[0] = SWIZZLE_ZERO;
 369       swizzles[1] = SWIZZLE_ZERO;
 370       swizzles[2] = SWIZZLE_ZERO;
 371       break;
 372    case GL_LUMINANCE:
 373       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 374          swizzles[0] = SWIZZLE_X;
 375          swizzles[1] = SWIZZLE_X;
 376          swizzles[2] = SWIZZLE_X;
 377          swizzles[3] = SWIZZLE_ONE;
 378       }
 379       break;
 380    case GL_LUMINANCE_ALPHA:
 381       if (datatype == GL_SIGNED_NORMALIZED) {
 382          swizzles[0] = SWIZZLE_X;
 383          swizzles[1] = SWIZZLE_X;
 384          swizzles[2] = SWIZZLE_X;
 385          swizzles[3] = SWIZZLE_W;
 386       }
 387       break;
 388    case GL_INTENSITY:
 389       if (datatype == GL_SIGNED_NORMALIZED) {
 390          swizzles[0] = SWIZZLE_X;
 391          swizzles[1] = SWIZZLE_X;
 392          swizzles[2] = SWIZZLE_X;
 393          swizzles[3] = SWIZZLE_X;
 394       }
 395       break;
 396    case GL_RED:
 397    case GL_RG:
 398    case GL_RGB:
 399       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 400           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 401           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 402          swizzles[3] = SWIZZLE_ONE;
 403       break;
 404    }
 405
 406    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 407                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 408                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 409                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 410 }
 411
 412 /**
 413  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 414  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 415  *
 416  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 417  *         0          1          2          3             4            5
 418  *         4          5          6          7             0            1
 419  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 420  *
 421  * which is simply adding 4 then modding by 8 (or anding with 7).
 422  *
 423  * We then may need to apply workarounds for textureGather hardware bugs.
 424  */
 425 static unsigned
 426 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 427 {
 428    unsigned scs = (swizzle + 4) & 7;
 429
 430    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 431 }
 432
 433 static unsigned
 434 brw_find_matching_rb(const struct gl_framebuffer *fb,
 435                      const struct intel_mipmap_tree *mt)
 436 {
 437    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 438       const struct intel_renderbuffer *irb =
 439          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 440
 441       if (irb && irb->mt == mt)
 442          return i;
 443    }
 444
 445    return fb->_NumColorDrawBuffers;
 446 }
 447
 448 static inline bool
 449 brw_texture_view_sane(const struct brw_context *brw,
 450                       const struct intel_mipmap_tree *mt,
 451                       const struct isl_view *view)
 452 {
 453    /* There are special cases only for lossless compression. */
 454    if (!intel_miptree_is_lossless_compressed(brw, mt))
 455       return true;
 456
 457    if (isl_format_supports_ccs_e(&brw->screen->devinfo, view->format))
 458       return true;
 459
 460    /* Logic elsewhere needs to take care to resolve the color buffer prior
 461     * to sampling it as non-compressed.
 462     */
 463    if (intel_miptree_has_color_unresolved(mt, view->base_level, view->levels,
 464                                           view->base_array_layer,
 465                                           view->array_len))
 466       return false;
 467
 468    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 469    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 470
 471    if (rb_index == fb->_NumColorDrawBuffers)
 472       return true;
 473
 474    /* Underlying surface is compressed but it is sampled using a format that
 475     * the sampling engine doesn't support as compressed. Compression must be
 476     * disabled for both sampling engine and data port in case the same surface
 477     * is used also as render target.
 478     */
 479    return brw->draw_aux_buffer_disabled[rb_index];
 480 }
 481
 482 static bool
 483 brw_disable_aux_surface(const struct brw_context *brw,
 484                         const struct intel_mipmap_tree *mt,
 485                         const struct isl_view *view)
 486 {
 487    /* Nothing to disable. */
 488    if (!mt->mcs_buf)
 489       return false;
 490
 491    const bool is_unresolved = intel_miptree_has_color_unresolved(
 492                                  mt, view->base_level, view->levels,
 493                                  view->base_array_layer, view->array_len);
 494
 495    /* There are special cases only for lossless compression. */
 496    if (!intel_miptree_is_lossless_compressed(brw, mt))
 497       return !is_unresolved;
 498
 499    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 500    const unsigned rb_index = brw_find_matching_rb(fb, mt);
 501
 502    /* If we are drawing into this with compression enabled, then we must also
 503     * enable compression when texturing from it regardless of
 504     * fast_clear_state.  If we don't then, after the first draw call with
 505     * this setup, there will be data in the CCS which won't get picked up by
 506     * subsequent texturing operations as required by ARB_texture_barrier.
 507     * Since we don't want to re-emit the binding table or do a resolve
 508     * operation every draw call, the easiest thing to do is just enable
 509     * compression on the texturing side.  This is completely safe to do
 510     * since, if compressed texturing weren't allowed, we would have disabled
 511     * compression of render targets in whatever_that_function_is_called().
 512     */
 513    if (rb_index < fb->_NumColorDrawBuffers) {
 514       if (brw->draw_aux_buffer_disabled[rb_index]) {
 515          assert(!is_unresolved);
 516       }
 517
 518       return brw->draw_aux_buffer_disabled[rb_index];
 519    }
 520
 521    return !is_unresolved;
 522 }
 523
 524 void
 525 brw_update_texture_surface(struct gl_context *ctx,
 526                            unsigned unit,
 527                            uint32_t *surf_offset,
 528                            bool for_gather,
 529                            uint32_t plane)
 530 {
 531    struct brw_context *brw = brw_context(ctx);
 532    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 533
 534    if (obj->Target == GL_TEXTURE_BUFFER) {
 535       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 536
 537    } else {
 538       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 539       struct intel_mipmap_tree *mt = intel_obj->mt;
 540
 541       if (plane > 0) {
 542          if (mt->plane[plane - 1] == NULL)
 543             return;
 544          mt = mt->plane[plane - 1];
 545       }
 546
 547       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 548       /* If this is a view with restricted NumLayers, then our effective depth
 549        * is not just the miptree depth.
 550        */
 551       const unsigned view_num_layers =
 552          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 553                                                             mt->logical_depth0;
 554
 555       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 556        * texturing functions that return a float, as our code generation always
 557        * selects the .x channel (which would always be 0).
 558        */
 559       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 560       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 561          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 562           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 563       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 564                                 brw_get_texture_swizzle(&brw->ctx, obj));
 565
 566       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 567       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 568                                                     sampler->sRGBDecode);
 569
 570       /* Implement gen6 and gen7 gather work-around */
 571       bool need_green_to_blue = false;
 572       if (for_gather) {
 573          if (brw->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 574                                format == ISL_FORMAT_R32G32_SINT ||
 575                                format == ISL_FORMAT_R32G32_UINT)) {
 576             format = ISL_FORMAT_R32G32_FLOAT_LD;
 577             need_green_to_blue = brw->is_haswell;
 578          } else if (brw->gen == 6) {
 579             /* Sandybridge's gather4 message is broken for integer formats.
 580              * To work around this, we pretend the surface is UNORM for
 581              * 8 or 16-bit formats, and emit shader instructions to recover
 582              * the real INT/UINT value.  For 32-bit formats, we pretend
 583              * the surface is FLOAT, and simply reinterpret the resulting
 584              * bits.
 585              */
 586             switch (format) {
 587             case ISL_FORMAT_R8_SINT:
 588             case ISL_FORMAT_R8_UINT:
 589                format = ISL_FORMAT_R8_UNORM;
 590                break;
 591
 592             case ISL_FORMAT_R16_SINT:
 593             case ISL_FORMAT_R16_UINT:
 594                format = ISL_FORMAT_R16_UNORM;
 595                break;
 596
 597             case ISL_FORMAT_R32_SINT:
 598             case ISL_FORMAT_R32_UINT:
 599                format = ISL_FORMAT_R32_FLOAT;
 600                break;
 601
 602             default:
 603                break;
 604             }
 605          }
 606       }
 607
 608       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 609          if (brw->gen <= 7) {
 610             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 611             mt = mt->r8stencil_mt;
 612          } else {
 613             mt = mt->stencil_mt;
 614          }
 615          format = ISL_FORMAT_R8_UINT;
 616       } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 617          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 618          mt = mt->r8stencil_mt;
 619          format = ISL_FORMAT_R8_UINT;
 620       }
 621
 622       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 623
 624       struct isl_view view = {
 625          .format = format,
 626          .base_level = obj->MinLevel + obj->BaseLevel,
 627          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 628          .base_array_layer = obj->MinLayer,
 629          .array_len = view_num_layers,
 630          .swizzle = {
 631             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 632             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 633             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 634             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 635          },
 636          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 637       };
 638
 639       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 640           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 641          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 642
 643       assert(brw_texture_view_sane(brw, mt, &view));
 644
 645       const int flags = brw_disable_aux_surface(brw, mt, &view) ?
 646                            INTEL_AUX_BUFFER_DISABLED : 0;
 647       brw_emit_surface_state(brw, mt, flags, mt->target, view,
 648                              tex_mocs[brw->gen],
 649                              surf_offset, surf_index,
 650                              I915_GEM_DOMAIN_SAMPLER, 0);
 651    }
 652 }
 653
 654 void
 655 brw_emit_buffer_surface_state(struct brw_context *brw,
 656                               uint32_t *out_offset,
 657                               struct brw_bo *bo,
 658                               unsigned buffer_offset,
 659                               unsigned surface_format,
 660                               unsigned buffer_size,
 661                               unsigned pitch,
 662                               bool rw)
 663 {
 664    uint32_t *dw = brw_state_batch(brw,
 665                                   brw->isl_dev.ss.size,
 666                                   brw->isl_dev.ss.align,
 667                                   out_offset);
 668
 669    isl_buffer_fill_state(&brw->isl_dev, dw,
 670                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 671                          .size = buffer_size,
 672                          .format = surface_format,
 673                          .stride = pitch,
 674                          .mocs = tex_mocs[brw->gen]);
 675
 676    if (bo) {
 677       brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
 678                      bo, buffer_offset,
 679                      I915_GEM_DOMAIN_SAMPLER,
 680                      (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 681    }
 682 }
 683
 684 void
 685 brw_update_buffer_texture_surface(struct gl_context *ctx,
 686                                   unsigned unit,
 687                                   uint32_t *surf_offset)
 688 {
 689    struct brw_context *brw = brw_context(ctx);
 690    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 691    struct intel_buffer_object *intel_obj =
 692       intel_buffer_object(tObj->BufferObject);
 693    uint32_t size = tObj->BufferSize;
 694    struct brw_bo *bo = NULL;
 695    mesa_format format = tObj->_BufferObjectFormat;
 696    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 697    int texel_size = _mesa_get_format_bytes(format);
 698
 699    if (intel_obj) {
 700       size = MIN2(size, intel_obj->Base.Size);
 701       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 702    }
 703
 704    /* The ARB_texture_buffer_specification says:
 705     *
 706     *    "The number of texels in the buffer texture's texel array is given by
 707     *
 708     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 709     *
 710     *     where <buffer_size> is the size of the buffer object, in basic
 711     *     machine units and <components> and <base_type> are the element count
 712     *     and base data type for elements, as specified in Table X.1.  The
 713     *     number of texels in the texel array is then clamped to the
 714     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 715     *
 716     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 717     * so that when ISL divides by stride to obtain the number of texels, that
 718     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 719     */
 720    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 721
 722    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 723       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 724                     _mesa_get_format_name(format));
 725    }
 726
 727    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 728                                  tObj->BufferOffset,
 729                                  isl_format,
 730                                  size,
 731                                  texel_size,
 732                                  false /* rw */);
 733 }
 734
 735 /**
 736  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 737  * read from this buffer with Data Port Read instructions/messages.
 738  */
 739 void
 740 brw_create_constant_surface(struct brw_context *brw,
 741                             struct brw_bo *bo,
 742                             uint32_t offset,
 743                             uint32_t size,
 744                             uint32_t *out_offset)
 745 {
 746    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 747                                  ISL_FORMAT_R32G32B32A32_FLOAT,
 748                                  size, 1, false);
 749 }
 750
 751 /**
 752  * Create the buffer surface. Shader buffer variables will be
 753  * read from / write to this buffer with Data Port Read/Write
 754  * instructions/messages.
 755  */
 756 void
 757 brw_create_buffer_surface(struct brw_context *brw,
 758                           struct brw_bo *bo,
 759                           uint32_t offset,
 760                           uint32_t size,
 761                           uint32_t *out_offset)
 762 {
 763    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 764     * messages. We need these specifically for the fragment shader since they
 765     * include a pixel mask header that we need to ensure correct behavior
 766     * with helper invocations, which cannot write to the buffer.
 767     */
 768    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 769                                  ISL_FORMAT_RAW,
 770                                  size, 1, true);
 771 }
 772
 773 /**
 774  * Set up a binding table entry for use by stream output logic (transform
 775  * feedback).
 776  *
 777  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 778  */
 779 void
 780 brw_update_sol_surface(struct brw_context *brw,
 781                        struct gl_buffer_object *buffer_obj,
 782                        uint32_t *out_offset, unsigned num_vector_components,
 783                        unsigned stride_dwords, unsigned offset_dwords)
 784 {
 785    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 786    uint32_t offset_bytes = 4 * offset_dwords;
 787    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 788                                              offset_bytes,
 789                                              buffer_obj->Size - offset_bytes);
 790    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 791    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 792    size_t size_dwords = buffer_obj->Size / 4;
 793    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 794
 795    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 796     * too big to map using a single binding table entry?
 797     */
 798    assert((size_dwords - offset_dwords) / stride_dwords
 799           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 800
 801    if (size_dwords > offset_dwords + num_vector_components) {
 802       /* There is room for at least 1 transform feedback output in the buffer.
 803        * Compute the number of additional transform feedback outputs the
 804        * buffer has room for.
 805        */
 806       buffer_size_minus_1 =
 807          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 808    } else {
 809       /* There isn't even room for a single transform feedback output in the
 810        * buffer.  We can't configure the binding table entry to prevent output
 811        * entirely; we'll have to rely on the geometry shader to detect
 812        * overflow.  But to minimize the damage in case of a bug, set up the
 813        * binding table entry to just allow a single output.
 814        */
 815       buffer_size_minus_1 = 0;
 816    }
 817    width = buffer_size_minus_1 & 0x7f;
 818    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 819    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 820
 821    switch (num_vector_components) {
 822    case 1:
 823       surface_format = ISL_FORMAT_R32_FLOAT;
 824       break;
 825    case 2:
 826       surface_format = ISL_FORMAT_R32G32_FLOAT;
 827       break;
 828    case 3:
 829       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 830       break;
 831    case 4:
 832       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 833       break;
 834    default:
 835       unreachable("Invalid vector size for transform feedback output");
 836    }
 837
 838    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 839       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 840       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 841       BRW_SURFACE_RC_READ_WRITE;
 842    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 843    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 844               height << BRW_SURFACE_HEIGHT_SHIFT);
 845    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 846               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 847    surf[4] = 0;
 848    surf[5] = 0;
 849
 850    /* Emit relocation to surface contents. */
 851    brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
 852                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 853 }
 854
 855 /* Creates a new WM constant buffer reflecting the current fragment program's
 856  * constants, if needed by the fragment program.
 857  *
 858  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 859  * state atom.
 860  */
 861 static void
 862 brw_upload_wm_pull_constants(struct brw_context *brw)
 863 {
 864    struct brw_stage_state *stage_state = &brw->wm.base;
 865    /* BRW_NEW_FRAGMENT_PROGRAM */
 866    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
 867    /* BRW_NEW_FS_PROG_DATA */
 868    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 869
 870    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 871    /* _NEW_PROGRAM_CONSTANTS */
 872    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 873                              stage_state, prog_data);
 874 }
 875
 876 const struct brw_tracked_state brw_wm_pull_constants = {
 877    .dirty = {
 878       .mesa = _NEW_PROGRAM_CONSTANTS,
 879       .brw = BRW_NEW_BATCH |
 880              BRW_NEW_BLORP |
 881              BRW_NEW_FRAGMENT_PROGRAM |
 882              BRW_NEW_FS_PROG_DATA,
 883    },
 884    .emit = brw_upload_wm_pull_constants,
 885 };
 886
 887 /**
 888  * Creates a null renderbuffer surface.
 889  *
 890  * This is used when the shader doesn't write to any color output.  An FB
 891  * write to target 0 will still be emitted, because that's how the thread is
 892  * terminated (and computed depth is returned), so we need to have the
 893  * hardware discard the target 0 color output..
 894  */
 895 static void
 896 brw_emit_null_surface_state(struct brw_context *brw,
 897                             unsigned width,
 898                             unsigned height,
 899                             unsigned samples,
 900                             uint32_t *out_offset)
 901 {
 902    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 903     * Notes):
 904     *
 905     *     A null surface will be used in instances where an actual surface is
 906     *     not bound. When a write message is generated to a null surface, no
 907     *     actual surface is written to. When a read message (including any
 908     *     sampling engine message) is generated to a null surface, the result
 909     *     is all zeros. Note that a null surface type is allowed to be used
 910     *     with all messages, even if it is not specificially indicated as
 911     *     supported. All of the remaining fields in surface state are ignored
 912     *     for null surfaces, with the following exceptions:
 913     *
 914     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 915     *       depth buffer’s corresponding state for all render target surfaces,
 916     *       including null.
 917     *
 918     *     - Surface Format must be R8G8B8A8_UNORM.
 919     */
 920    unsigned surface_type = BRW_SURFACE_NULL;
 921    struct brw_bo *bo = NULL;
 922    unsigned pitch_minus_1 = 0;
 923    uint32_t multisampling_state = 0;
 924    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 925
 926    if (samples > 1) {
 927       /* On Gen6, null render targets seem to cause GPU hangs when
 928        * multisampling.  So work around this problem by rendering into dummy
 929        * color buffer.
 930        *
 931        * To decrease the amount of memory needed by the workaround buffer, we
 932        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 933        * the amount of memory needed for the workaround buffer is
 934        * (width_in_tiles + height_in_tiles - 1) tiles.
 935        *
 936        * Note that since the workaround buffer will be interpreted by the
 937        * hardware as an interleaved multisampled buffer, we need to compute
 938        * width_in_tiles and height_in_tiles by dividing the width and height
 939        * by 16 rather than the normal Y-tile size of 32.
 940        */
 941       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 942       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 943       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 944       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 945                          size_needed);
 946       bo = brw->wm.multisampled_null_render_target_bo;
 947       surface_type = BRW_SURFACE_2D;
 948       pitch_minus_1 = 127;
 949       multisampling_state = brw_get_surface_num_multisamples(samples);
 950    }
 951
 952    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 953               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 954    if (brw->gen < 6) {
 955       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 956                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 957                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 958                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 959    }
 960    surf[1] = bo ? bo->offset64 : 0;
 961    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 962               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 963
 964    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 965     * Notes):
 966     *
 967     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 968     */
 969    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 970               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 971    surf[4] = multisampling_state;
 972    surf[5] = 0;
 973
 974    if (bo) {
 975       brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
 976                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 977    }
 978 }
 979
 980 /**
 981  * Sets up a surface state structure to point at the given region.
 982  * While it is only used for the front/back buffer currently, it should be
 983  * usable for further buffers when doing ARB_draw_buffer support.
 984  */
 985 static uint32_t
 986 gen4_update_renderbuffer_surface(struct brw_context *brw,
 987                                  struct gl_renderbuffer *rb,
 988                                  uint32_t flags, unsigned unit,
 989                                  uint32_t surf_index)
 990 {
 991    struct gl_context *ctx = &brw->ctx;
 992    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 993    struct intel_mipmap_tree *mt = irb->mt;
 994    uint32_t *surf;
 995    uint32_t tile_x, tile_y;
 996    enum isl_format format;
 997    uint32_t offset;
 998    /* _NEW_BUFFERS */
 999    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
1000    /* BRW_NEW_FS_PROG_DATA */
1001
1002    assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
1003    assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
1004
1005    if (rb->TexImage && !brw->has_surface_tile_offset) {
1006       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
1007
1008       if (tile_x != 0 || tile_y != 0) {
1009          /* Original gen4 hardware couldn't draw to a non-tile-aligned
1010           * destination in a miptree unless you actually setup your renderbuffer
1011           * as a miptree and used the fragile lod/array_index/etc. controls to
1012           * select the image.  So, instead, we just make a new single-level
1013           * miptree and render into that.
1014           */
1015          intel_renderbuffer_move_to_temp(brw, irb, false);
1016          assert(irb->align_wa_mt);
1017          mt = irb->align_wa_mt;
1018       }
1019    }
1020
1021    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
1022
1023    format = brw->mesa_to_isl_render_format[rb_format];
1024    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
1025       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
1026                     __func__, _mesa_get_format_name(rb_format));
1027    }
1028
1029    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
1030               format << BRW_SURFACE_FORMAT_SHIFT);
1031
1032    /* reloc */
1033    assert(mt->offset % mt->cpp == 0);
1034    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
1035               mt->bo->offset64 + mt->offset);
1036
1037    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1038               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
1039
1040    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
1041               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
1042
1043    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
1044
1045    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
1046    /* Note that the low bits of these fields are missing, so
1047     * there's the possibility of getting in trouble.
1048     */
1049    assert(tile_x % 4 == 0);
1050    assert(tile_y % 2 == 0);
1051    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
1052               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
1053               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
1054
1055    if (brw->gen < 6) {
1056       /* _NEW_COLOR */
1057       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
1058           (ctx->Color.BlendEnabled & (1 << unit)))
1059          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
1060
1061       if (!ctx->Color.ColorMask[unit][0])
1062          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
1063       if (!ctx->Color.ColorMask[unit][1])
1064          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
1065       if (!ctx->Color.ColorMask[unit][2])
1066          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
1067
1068       /* As mentioned above, disable writes to the alpha component when the
1069        * renderbuffer is XRGB.
1070        */
1071       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
1072           !ctx->Color.ColorMask[unit][3]) {
1073          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
1074       }
1075    }
1076
1077    brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
1078                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
1079
1080    return offset;
1081 }
1082
1083 /**
1084  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
1085  */
1086 void
1087 brw_update_renderbuffer_surfaces(struct brw_context *brw,
1088                                  const struct gl_framebuffer *fb,
1089                                  uint32_t render_target_start,
1090                                  uint32_t *surf_offset)
1091 {
1092    GLuint i;
1093    const unsigned int w = _mesa_geometric_width(fb);
1094    const unsigned int h = _mesa_geometric_height(fb);
1095    const unsigned int s = _mesa_geometric_samples(fb);
1096
1097    /* Update surfaces for drawing buffers */
1098    if (fb->_NumColorDrawBuffers >= 1) {
1099       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
1100          const uint32_t surf_index = render_target_start + i;
1101          const int flags = (_mesa_geometric_layers(fb) > 0 ?
1102                               INTEL_RENDERBUFFER_LAYERED : 0) |
1103                            (brw->draw_aux_buffer_disabled[i] ?
1104                               INTEL_AUX_BUFFER_DISABLED : 0);
1105
1106          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
1107             surf_offset[surf_index] =
1108                brw->vtbl.update_renderbuffer_surface(
1109                   brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
1110          } else {
1111             brw->vtbl.emit_null_surface_state(brw, w, h, s,
1112                &surf_offset[surf_index]);
1113          }
1114       }
1115    } else {
1116       const uint32_t surf_index = render_target_start;
1117       brw->vtbl.emit_null_surface_state(brw, w, h, s,
1118          &surf_offset[surf_index]);
1119    }
1120 }
1121
1122 static void
1123 update_renderbuffer_surfaces(struct brw_context *brw)
1124 {
1125    const struct gl_context *ctx = &brw->ctx;
1126
1127    /* BRW_NEW_FS_PROG_DATA */
1128    const struct brw_wm_prog_data *wm_prog_data =
1129       brw_wm_prog_data(brw->wm.base.prog_data);
1130
1131    /* _NEW_BUFFERS | _NEW_COLOR */
1132    const struct gl_framebuffer *fb = ctx->DrawBuffer;
1133    brw_update_renderbuffer_surfaces(
1134       brw, fb,
1135       wm_prog_data->binding_table.render_target_start,
1136       brw->wm.base.surf_offset);
1137    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1138 }
1139
1140 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1141    .dirty = {
1142       .mesa = _NEW_BUFFERS |
1143               _NEW_COLOR,
1144       .brw = BRW_NEW_BATCH |
1145              BRW_NEW_BLORP |
1146              BRW_NEW_FS_PROG_DATA,
1147    },
1148    .emit = update_renderbuffer_surfaces,
1149 };
1150
1151 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1152    .dirty = {
1153       .mesa = _NEW_BUFFERS,
1154       .brw = BRW_NEW_BATCH |
1155              BRW_NEW_BLORP,
1156    },
1157    .emit = update_renderbuffer_surfaces,
1158 };
1159
1160 static void
1161 update_renderbuffer_read_surfaces(struct brw_context *brw)
1162 {
1163    const struct gl_context *ctx = &brw->ctx;
1164
1165    /* BRW_NEW_FS_PROG_DATA */
1166    const struct brw_wm_prog_data *wm_prog_data =
1167       brw_wm_prog_data(brw->wm.base.prog_data);
1168
1169    /* BRW_NEW_FRAGMENT_PROGRAM */
1170    if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
1171        brw->fragment_program && brw->fragment_program->info.outputs_read) {
1172       /* _NEW_BUFFERS */
1173       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1174
1175       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1176          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1177          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1178          const unsigned surf_index =
1179             wm_prog_data->binding_table.render_target_read_start + i;
1180          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1181
1182          if (irb) {
1183             const enum isl_format format = brw->mesa_to_isl_render_format[
1184                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1185             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1186                                                 format));
1187
1188             /* Override the target of the texture if the render buffer is a
1189              * single slice of a 3D texture (since the minimum array element
1190              * field of the surface state structure is ignored by the sampler
1191              * unit for 3D textures on some hardware), or if the render buffer
1192              * is a 1D array (since shaders always provide the array index
1193              * coordinate at the Z component to avoid state-dependent
1194              * recompiles when changing the texture target of the
1195              * framebuffer).
1196              */
1197             const GLenum target =
1198                (irb->mt->target == GL_TEXTURE_3D &&
1199                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1200                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1201                irb->mt->target;
1202
1203             /* intel_renderbuffer::mt_layer is expressed in sample units for
1204              * the UMS and CMS multisample layouts, but
1205              * intel_renderbuffer::layer_count is expressed in units of whole
1206              * logical layers regardless of the multisample layout.
1207              */
1208             const unsigned mt_layer_unit =
1209                (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
1210                 irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
1211                MAX2(irb->mt->num_samples, 1) : 1;
1212
1213             const struct isl_view view = {
1214                .format = format,
1215                .base_level = irb->mt_level - irb->mt->first_level,
1216                .levels = 1,
1217                .base_array_layer = irb->mt_layer / mt_layer_unit,
1218                .array_len = irb->layer_count,
1219                .swizzle = ISL_SWIZZLE_IDENTITY,
1220                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1221             };
1222
1223             const int flags = brw->draw_aux_buffer_disabled[i] ?
1224                                  INTEL_AUX_BUFFER_DISABLED : 0;
1225             brw_emit_surface_state(brw, irb->mt, flags, target, view,
1226                                    tex_mocs[brw->gen],
1227                                    surf_offset, surf_index,
1228                                    I915_GEM_DOMAIN_SAMPLER, 0);
1229
1230          } else {
1231             brw->vtbl.emit_null_surface_state(
1232                brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
1233                _mesa_geometric_samples(fb), surf_offset);
1234          }
1235       }
1236
1237       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1238    }
1239 }
1240
1241 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1242    .dirty = {
1243       .mesa = _NEW_BUFFERS,
1244       .brw = BRW_NEW_BATCH |
1245              BRW_NEW_FRAGMENT_PROGRAM |
1246              BRW_NEW_FS_PROG_DATA,
1247    },
1248    .emit = update_renderbuffer_read_surfaces,
1249 };
1250
1251 static void
1252 update_stage_texture_surfaces(struct brw_context *brw,
1253                               const struct gl_program *prog,
1254                               struct brw_stage_state *stage_state,
1255                               bool for_gather, uint32_t plane)
1256 {
1257    if (!prog)
1258       return;
1259
1260    struct gl_context *ctx = &brw->ctx;
1261
1262    uint32_t *surf_offset = stage_state->surf_offset;
1263
1264    /* BRW_NEW_*_PROG_DATA */
1265    if (for_gather)
1266       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1267    else
1268       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1269
1270    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1271    for (unsigned s = 0; s < num_samplers; s++) {
1272       surf_offset[s] = 0;
1273
1274       if (prog->SamplersUsed & (1 << s)) {
1275          const unsigned unit = prog->SamplerUnits[s];
1276
1277          /* _NEW_TEXTURE */
1278          if (ctx->Texture.Unit[unit]._Current) {
1279             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1280          }
1281       }
1282    }
1283 }
1284
1285
1286 /**
1287  * Construct SURFACE_STATE objects for enabled textures.
1288  */
1289 static void
1290 brw_update_texture_surfaces(struct brw_context *brw)
1291 {
1292    /* BRW_NEW_VERTEX_PROGRAM */
1293    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1294
1295    /* BRW_NEW_TESS_PROGRAMS */
1296    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1297    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1298
1299    /* BRW_NEW_GEOMETRY_PROGRAM */
1300    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1301
1302    /* BRW_NEW_FRAGMENT_PROGRAM */
1303    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1304
1305    /* _NEW_TEXTURE */
1306    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1307    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1308    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1309    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1310    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1311
1312    /* emit alternate set of surface state for gather. this
1313     * allows the surface format to be overriden for only the
1314     * gather4 messages. */
1315    if (brw->gen < 8) {
1316       if (vs && vs->nir->info.uses_texture_gather)
1317          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1318       if (tcs && tcs->nir->info.uses_texture_gather)
1319          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1320       if (tes && tes->nir->info.uses_texture_gather)
1321          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1322       if (gs && gs->nir->info.uses_texture_gather)
1323          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1324       if (fs && fs->nir->info.uses_texture_gather)
1325          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1326    }
1327
1328    if (fs) {
1329       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1330       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1331    }
1332
1333    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1334 }
1335
1336 const struct brw_tracked_state brw_texture_surfaces = {
1337    .dirty = {
1338       .mesa = _NEW_TEXTURE,
1339       .brw = BRW_NEW_BATCH |
1340              BRW_NEW_BLORP |
1341              BRW_NEW_FRAGMENT_PROGRAM |
1342              BRW_NEW_FS_PROG_DATA |
1343              BRW_NEW_GEOMETRY_PROGRAM |
1344              BRW_NEW_GS_PROG_DATA |
1345              BRW_NEW_TESS_PROGRAMS |
1346              BRW_NEW_TCS_PROG_DATA |
1347              BRW_NEW_TES_PROG_DATA |
1348              BRW_NEW_TEXTURE_BUFFER |
1349              BRW_NEW_VERTEX_PROGRAM |
1350              BRW_NEW_VS_PROG_DATA,
1351    },
1352    .emit = brw_update_texture_surfaces,
1353 };
1354
1355 static void
1356 brw_update_cs_texture_surfaces(struct brw_context *brw)
1357 {
1358    /* BRW_NEW_COMPUTE_PROGRAM */
1359    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1360
1361    /* _NEW_TEXTURE */
1362    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1363
1364    /* emit alternate set of surface state for gather. this
1365     * allows the surface format to be overriden for only the
1366     * gather4 messages.
1367     */
1368    if (brw->gen < 8) {
1369       if (cs && cs->nir->info.uses_texture_gather)
1370          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1371    }
1372
1373    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1374 }
1375
1376 const struct brw_tracked_state brw_cs_texture_surfaces = {
1377    .dirty = {
1378       .mesa = _NEW_TEXTURE,
1379       .brw = BRW_NEW_BATCH |
1380              BRW_NEW_BLORP |
1381              BRW_NEW_COMPUTE_PROGRAM,
1382    },
1383    .emit = brw_update_cs_texture_surfaces,
1384 };
1385
1386
1387 void
1388 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1389                         struct brw_stage_state *stage_state,
1390                         struct brw_stage_prog_data *prog_data)
1391 {
1392    struct gl_context *ctx = &brw->ctx;
1393
1394    if (!prog)
1395       return;
1396
1397    uint32_t *ubo_surf_offsets =
1398       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1399
1400    for (int i = 0; i < prog->info.num_ubos; i++) {
1401       struct gl_uniform_buffer_binding *binding =
1402          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1403
1404       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1405          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1406       } else {
1407          struct intel_buffer_object *intel_bo =
1408             intel_buffer_object(binding->BufferObject);
1409          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1410          if (!binding->AutomaticSize)
1411             size = MIN2(size, binding->Size);
1412          struct brw_bo *bo =
1413             intel_bufferobj_buffer(brw, intel_bo,
1414                                    binding->Offset,
1415                                    size);
1416          brw_create_constant_surface(brw, bo, binding->Offset,
1417                                      size,
1418                                      &ubo_surf_offsets[i]);
1419       }
1420    }
1421
1422    uint32_t *ssbo_surf_offsets =
1423       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1424
1425    for (int i = 0; i < prog->info.num_ssbos; i++) {
1426       struct gl_shader_storage_buffer_binding *binding =
1427          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1428
1429       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1430          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1431       } else {
1432          struct intel_buffer_object *intel_bo =
1433             intel_buffer_object(binding->BufferObject);
1434          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1435          if (!binding->AutomaticSize)
1436             size = MIN2(size, binding->Size);
1437          struct brw_bo *bo =
1438             intel_bufferobj_buffer(brw, intel_bo,
1439                                    binding->Offset,
1440                                    size);
1441          brw_create_buffer_surface(brw, bo, binding->Offset,
1442                                    size,
1443                                    &ssbo_surf_offsets[i]);
1444       }
1445    }
1446
1447    if (prog->info.num_ubos || prog->info.num_ssbos)
1448       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1449 }
1450
1451 static void
1452 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1453 {
1454    struct gl_context *ctx = &brw->ctx;
1455    /* _NEW_PROGRAM */
1456    struct gl_program *prog = ctx->FragmentProgram._Current;
1457
1458    /* BRW_NEW_FS_PROG_DATA */
1459    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1460 }
1461
1462 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1463    .dirty = {
1464       .mesa = _NEW_PROGRAM,
1465       .brw = BRW_NEW_BATCH |
1466              BRW_NEW_BLORP |
1467              BRW_NEW_FS_PROG_DATA |
1468              BRW_NEW_UNIFORM_BUFFER,
1469    },
1470    .emit = brw_upload_wm_ubo_surfaces,
1471 };
1472
1473 static void
1474 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1475 {
1476    struct gl_context *ctx = &brw->ctx;
1477    /* _NEW_PROGRAM */
1478    struct gl_program *prog =
1479       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1480
1481    /* BRW_NEW_CS_PROG_DATA */
1482    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1483 }
1484
1485 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1486    .dirty = {
1487       .mesa = _NEW_PROGRAM,
1488       .brw = BRW_NEW_BATCH |
1489              BRW_NEW_BLORP |
1490              BRW_NEW_CS_PROG_DATA |
1491              BRW_NEW_UNIFORM_BUFFER,
1492    },
1493    .emit = brw_upload_cs_ubo_surfaces,
1494 };
1495
1496 void
1497 brw_upload_abo_surfaces(struct brw_context *brw,
1498                         const struct gl_program *prog,
1499                         struct brw_stage_state *stage_state,
1500                         struct brw_stage_prog_data *prog_data)
1501 {
1502    struct gl_context *ctx = &brw->ctx;
1503    uint32_t *surf_offsets =
1504       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1505
1506    if (prog->info.num_abos) {
1507       for (unsigned i = 0; i < prog->info.num_abos; i++) {
1508          struct gl_atomic_buffer_binding *binding =
1509             &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1510          struct intel_buffer_object *intel_bo =
1511             intel_buffer_object(binding->BufferObject);
1512          struct brw_bo *bo = intel_bufferobj_buffer(
1513             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1514
1515          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1516                                        binding->Offset, ISL_FORMAT_RAW,
1517                                        bo->size - binding->Offset, 1, true);
1518       }
1519
1520       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1521    }
1522 }
1523
1524 static void
1525 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1526 {
1527    /* _NEW_PROGRAM */
1528    const struct gl_program *wm = brw->fragment_program;
1529
1530    if (wm) {
1531       /* BRW_NEW_FS_PROG_DATA */
1532       brw_upload_abo_surfaces(brw, wm, &brw->wm.base, brw->wm.base.prog_data);
1533    }
1534 }
1535
1536 const struct brw_tracked_state brw_wm_abo_surfaces = {
1537    .dirty = {
1538       .mesa = _NEW_PROGRAM,
1539       .brw = BRW_NEW_ATOMIC_BUFFER |
1540              BRW_NEW_BLORP |
1541              BRW_NEW_BATCH |
1542              BRW_NEW_FS_PROG_DATA,
1543    },
1544    .emit = brw_upload_wm_abo_surfaces,
1545 };
1546
1547 static void
1548 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1549 {
1550    /* _NEW_PROGRAM */
1551    const struct gl_program *cp = brw->compute_program;
1552
1553    if (cp) {
1554       /* BRW_NEW_CS_PROG_DATA */
1555       brw_upload_abo_surfaces(brw, cp, &brw->cs.base, brw->cs.base.prog_data);
1556    }
1557 }
1558
1559 const struct brw_tracked_state brw_cs_abo_surfaces = {
1560    .dirty = {
1561       .mesa = _NEW_PROGRAM,
1562       .brw = BRW_NEW_ATOMIC_BUFFER |
1563              BRW_NEW_BLORP |
1564              BRW_NEW_BATCH |
1565              BRW_NEW_CS_PROG_DATA,
1566    },
1567    .emit = brw_upload_cs_abo_surfaces,
1568 };
1569
1570 static void
1571 brw_upload_cs_image_surfaces(struct brw_context *brw)
1572 {
1573    /* _NEW_PROGRAM */
1574    const struct gl_program *cp = brw->compute_program;
1575
1576    if (cp) {
1577       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1578       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1579                                 brw->cs.base.prog_data);
1580    }
1581 }
1582
1583 const struct brw_tracked_state brw_cs_image_surfaces = {
1584    .dirty = {
1585       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1586       .brw = BRW_NEW_BATCH |
1587              BRW_NEW_BLORP |
1588              BRW_NEW_CS_PROG_DATA |
1589              BRW_NEW_IMAGE_UNITS
1590    },
1591    .emit = brw_upload_cs_image_surfaces,
1592 };
1593
1594 static uint32_t
1595 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1596 {
1597    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1598    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1599    if (access == GL_WRITE_ONLY) {
1600       return hw_format;
1601    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1602       /* Typed surface reads support a very limited subset of the shader
1603        * image formats.  Translate it into the closest format the
1604        * hardware supports.
1605        */
1606       return isl_lower_storage_image_format(devinfo, hw_format);
1607    } else {
1608       /* The hardware doesn't actually support a typed format that we can use
1609        * so we have to fall back to untyped read/write messages.
1610        */
1611       return ISL_FORMAT_RAW;
1612    }
1613 }
1614
1615 static void
1616 update_default_image_param(struct brw_context *brw,
1617                            struct gl_image_unit *u,
1618                            unsigned surface_idx,
1619                            struct brw_image_param *param)
1620 {
1621    memset(param, 0, sizeof(*param));
1622    param->surface_idx = surface_idx;
1623    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1624     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1625     * detailed explanation of these parameters.
1626     */
1627    param->swizzling[0] = 0xff;
1628    param->swizzling[1] = 0xff;
1629 }
1630
1631 static void
1632 update_buffer_image_param(struct brw_context *brw,
1633                           struct gl_image_unit *u,
1634                           unsigned surface_idx,
1635                           struct brw_image_param *param)
1636 {
1637    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1638    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1639    update_default_image_param(brw, u, surface_idx, param);
1640
1641    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1642    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1643 }
1644
1645 static void
1646 update_texture_image_param(struct brw_context *brw,
1647                            struct gl_image_unit *u,
1648                            unsigned surface_idx,
1649                            struct brw_image_param *param)
1650 {
1651    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1652
1653    update_default_image_param(brw, u, surface_idx, param);
1654
1655    param->size[0] = minify(mt->logical_width0, u->Level);
1656    param->size[1] = minify(mt->logical_height0, u->Level);
1657    param->size[2] = (!u->Layered ? 1 :
1658                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1659                      u->TexObj->Target == GL_TEXTURE_3D ?
1660                      minify(mt->logical_depth0, u->Level) :
1661                      mt->logical_depth0);
1662
1663    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1664                                   &param->offset[0],
1665                                   &param->offset[1]);
1666
1667    param->stride[0] = mt->cpp;
1668    param->stride[1] = mt->pitch / mt->cpp;
1669    param->stride[2] =
1670       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1671    param->stride[3] =
1672       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1673
1674    if (mt->tiling == I915_TILING_X) {
1675       /* An X tile is a rectangular block of 512x8 bytes. */
1676       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1677       param->tiling[1] = _mesa_logbase2(8);
1678
1679       if (brw->has_swizzling) {
1680          /* Right shifts required to swizzle bits 9 and 10 of the memory
1681           * address with bit 6.
1682           */
1683          param->swizzling[0] = 3;
1684          param->swizzling[1] = 4;
1685       }
1686    } else if (mt->tiling == I915_TILING_Y) {
1687       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1688        * different to the layout of an X-tiled surface, we simply pretend that
1689        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1690        * one arranged in X-major order just like is the case for X-tiling.
1691        */
1692       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1693       param->tiling[1] = _mesa_logbase2(32);
1694
1695       if (brw->has_swizzling) {
1696          /* Right shift required to swizzle bit 9 of the memory address with
1697           * bit 6.
1698           */
1699          param->swizzling[0] = 3;
1700       }
1701    }
1702
1703    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1704     * address calculation algorithm (emit_address_calculation() in
1705     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1706     * modulus equal to the LOD.
1707     */
1708    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1709                        0);
1710 }
1711
1712 static void
1713 update_image_surface(struct brw_context *brw,
1714                      struct gl_image_unit *u,
1715                      GLenum access,
1716                      unsigned surface_idx,
1717                      uint32_t *surf_offset,
1718                      struct brw_image_param *param)
1719 {
1720    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1721       struct gl_texture_object *obj = u->TexObj;
1722       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1723
1724       if (obj->Target == GL_TEXTURE_BUFFER) {
1725          struct intel_buffer_object *intel_obj =
1726             intel_buffer_object(obj->BufferObject);
1727          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1728                                       _mesa_get_format_bytes(u->_ActualFormat));
1729
1730          brw_emit_buffer_surface_state(
1731             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1732             format, intel_obj->Base.Size, texel_size,
1733             access != GL_READ_ONLY);
1734
1735          update_buffer_image_param(brw, u, surface_idx, param);
1736
1737       } else {
1738          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1739          struct intel_mipmap_tree *mt = intel_obj->mt;
1740
1741          if (format == ISL_FORMAT_RAW) {
1742             brw_emit_buffer_surface_state(
1743                brw, surf_offset, mt->bo, mt->offset,
1744                format, mt->bo->size - mt->offset, 1 /* pitch */,
1745                access != GL_READ_ONLY);
1746
1747          } else {
1748             const unsigned num_layers = (!u->Layered ? 1 :
1749                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1750                                          mt->logical_depth0);
1751
1752             struct isl_view view = {
1753                .format = format,
1754                .base_level = obj->MinLevel + u->Level,
1755                .levels = 1,
1756                .base_array_layer = obj->MinLayer + u->_Layer,
1757                .array_len = num_layers,
1758                .swizzle = ISL_SWIZZLE_IDENTITY,
1759                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1760             };
1761
1762             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1763             assert(!intel_miptree_has_color_unresolved(mt,
1764                                                        view.base_level, 1,
1765                                                        view.base_array_layer,
1766                                                        view.array_len));
1767             brw_emit_surface_state(brw, mt, INTEL_AUX_BUFFER_DISABLED,
1768                                    mt->target, view, tex_mocs[brw->gen],
1769                                    surf_offset, surf_index,
1770                                    I915_GEM_DOMAIN_SAMPLER,
1771                                    access == GL_READ_ONLY ? 0 :
1772                                              I915_GEM_DOMAIN_SAMPLER);
1773          }
1774
1775          update_texture_image_param(brw, u, surface_idx, param);
1776       }
1777
1778    } else {
1779       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1780       update_default_image_param(brw, u, surface_idx, param);
1781    }
1782 }
1783
1784 void
1785 brw_upload_image_surfaces(struct brw_context *brw,
1786                           const struct gl_program *prog,
1787                           struct brw_stage_state *stage_state,
1788                           struct brw_stage_prog_data *prog_data)
1789 {
1790    assert(prog);
1791    struct gl_context *ctx = &brw->ctx;
1792
1793    if (prog->info.num_images) {
1794       for (unsigned i = 0; i < prog->info.num_images; i++) {
1795          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1796          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1797
1798          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1799                               surf_idx,
1800                               &stage_state->surf_offset[surf_idx],
1801                               &prog_data->image_param[i]);
1802       }
1803
1804       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1805       /* This may have changed the image metadata dependent on the context
1806        * image unit state and passed to the program as uniforms, make sure
1807        * that push and pull constants are reuploaded.
1808        */
1809       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1810    }
1811 }
1812
1813 static void
1814 brw_upload_wm_image_surfaces(struct brw_context *brw)
1815 {
1816    /* BRW_NEW_FRAGMENT_PROGRAM */
1817    const struct gl_program *wm = brw->fragment_program;
1818
1819    if (wm) {
1820       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1821       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1822                                 brw->wm.base.prog_data);
1823    }
1824 }
1825
1826 const struct brw_tracked_state brw_wm_image_surfaces = {
1827    .dirty = {
1828       .mesa = _NEW_TEXTURE,
1829       .brw = BRW_NEW_BATCH |
1830              BRW_NEW_BLORP |
1831              BRW_NEW_FRAGMENT_PROGRAM |
1832              BRW_NEW_FS_PROG_DATA |
1833              BRW_NEW_IMAGE_UNITS
1834    },
1835    .emit = brw_upload_wm_image_surfaces,
1836 };
1837
1838 void
1839 gen4_init_vtable_surface_functions(struct brw_context *brw)
1840 {
1841    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1842    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1843 }
1844
1845 void
1846 gen6_init_vtable_surface_functions(struct brw_context *brw)
1847 {
1848    gen4_init_vtable_surface_functions(brw);
1849    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1850 }
1851
1852 static void
1853 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1854 {
1855    struct gl_context *ctx = &brw->ctx;
1856    /* _NEW_PROGRAM */
1857    struct gl_program *prog =
1858       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1859    /* BRW_NEW_CS_PROG_DATA */
1860    const struct brw_cs_prog_data *cs_prog_data =
1861       brw_cs_prog_data(brw->cs.base.prog_data);
1862
1863    if (prog && cs_prog_data->uses_num_work_groups) {
1864       const unsigned surf_idx =
1865          cs_prog_data->binding_table.work_groups_start;
1866       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1867       struct brw_bo *bo;
1868       uint32_t bo_offset;
1869
1870       if (brw->compute.num_work_groups_bo == NULL) {
1871          bo = NULL;
1872          intel_upload_data(brw,
1873                            (void *)brw->compute.num_work_groups,
1874                            3 * sizeof(GLuint),
1875                            sizeof(GLuint),
1876                            &bo,
1877                            &bo_offset);
1878       } else {
1879          bo = brw->compute.num_work_groups_bo;
1880          bo_offset = brw->compute.num_work_groups_offset;
1881       }
1882
1883       brw_emit_buffer_surface_state(brw, surf_offset,
1884                                     bo, bo_offset,
1885                                     ISL_FORMAT_RAW,
1886                                     3 * sizeof(GLuint), 1, true);
1887       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1888    }
1889 }
1890
1891 const struct brw_tracked_state brw_cs_work_groups_surface = {
1892    .dirty = {
1893       .brw = BRW_NEW_BLORP |
1894              BRW_NEW_CS_PROG_DATA |
1895              BRW_NEW_CS_WORK_GROUPS
1896    },
1897    .emit = brw_upload_cs_work_groups_surface,
1898 };