src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "compiler/nir/nir.h"
  34 #include "main/context.h"
  35 #include "main/blend.h"
  36 #include "main/mtypes.h"
  37 #include "main/samplerobj.h"
  38 #include "main/shaderimage.h"
  39 #include "main/teximage.h"
  40 #include "program/prog_parameter.h"
  41 #include "program/prog_instruction.h"
  42 #include "main/framebuffer.h"
  43 #include "main/shaderapi.h"
  44
  45 #include "isl/isl.h"
  46
  47 #include "intel_mipmap_tree.h"
  48 #include "intel_batchbuffer.h"
  49 #include "intel_tex.h"
  50 #include "intel_fbo.h"
  51 #include "intel_buffer_objects.h"
  52
  53 #include "brw_context.h"
  54 #include "brw_state.h"
  55 #include "brw_defines.h"
  56 #include "brw_wm.h"
  57
  58 uint32_t wb_mocs[] = {
  59    [7] = GEN7_MOCS_L3,
  60    [8] = BDW_MOCS_WB,
  61    [9] = SKL_MOCS_WB,
  62    [10] = CNL_MOCS_WB,
  63 };
  64
  65 uint32_t pte_mocs[] = {
  66    [7] = GEN7_MOCS_L3,
  67    [8] = BDW_MOCS_PTE,
  68    [9] = SKL_MOCS_PTE,
  69    [10] = CNL_MOCS_PTE,
  70 };
  71
  72 uint32_t
  73 brw_get_bo_mocs(const struct gen_device_info *devinfo, struct brw_bo *bo)
  74 {
  75    return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen];
  76 }
  77
  78 static void
  79 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
  80              GLenum target, struct isl_view *view,
  81              uint32_t *tile_x, uint32_t *tile_y,
  82              uint32_t *offset, struct isl_surf *surf)
  83 {
  84    *surf = mt->surf;
  85
  86    const struct gen_device_info *devinfo = &brw->screen->devinfo;
  87    const enum isl_dim_layout dim_layout =
  88       get_isl_dim_layout(devinfo, mt->surf.tiling, target);
  89
  90    if (surf->dim_layout == dim_layout)
  91       return;
  92
  93    /* The layout of the specified texture target is not compatible with the
  94     * actual layout of the miptree structure in memory -- You're entering
  95     * dangerous territory, this can only possibly work if you only intended
  96     * to access a single level and slice of the texture, and the hardware
  97     * supports the tile offset feature in order to allow non-tile-aligned
  98     * base offsets, since we'll have to point the hardware to the first
  99     * texel of the level instead of relying on the usual base level/layer
 100     * controls.
 101     */
 102    assert(devinfo->has_surface_tile_offset);
 103    assert(view->levels == 1 && view->array_len == 1);
 104    assert(*tile_x == 0 && *tile_y == 0);
 105
 106    *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
 107                                              view->base_array_layer,
 108                                              tile_x, tile_y);
 109
 110    /* Minify the logical dimensions of the texture. */
 111    const unsigned l = view->base_level - mt->first_level;
 112    surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
 113    surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
 114       minify(surf->logical_level0_px.height, l);
 115    surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
 116       minify(surf->logical_level0_px.depth, l);
 117
 118    /* Only the base level and layer can be addressed with the overridden
 119     * layout.
 120     */
 121    surf->logical_level0_px.array_len = 1;
 122    surf->levels = 1;
 123    surf->dim_layout = dim_layout;
 124
 125    /* The requested slice of the texture is now at the base level and
 126     * layer.
 127     */
 128    view->base_level = 0;
 129    view->base_array_layer = 0;
 130 }
 131
 132 static void
 133 brw_emit_surface_state(struct brw_context *brw,
 134                        struct intel_mipmap_tree *mt,
 135                        GLenum target, struct isl_view view,
 136                        enum isl_aux_usage aux_usage,
 137                        uint32_t *surf_offset, int surf_index,
 138                        unsigned reloc_flags)
 139 {
 140    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 141    uint32_t tile_x = mt->level[0].level_x;
 142    uint32_t tile_y = mt->level[0].level_y;
 143    uint32_t offset = mt->offset;
 144
 145    struct isl_surf surf;
 146
 147    get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
 148
 149    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 150
 151    struct brw_bo *aux_bo;
 152    struct isl_surf *aux_surf = NULL;
 153    uint64_t aux_offset = 0;
 154    switch (aux_usage) {
 155    case ISL_AUX_USAGE_MCS:
 156    case ISL_AUX_USAGE_CCS_D:
 157    case ISL_AUX_USAGE_CCS_E:
 158       aux_surf = &mt->mcs_buf->surf;
 159       aux_bo = mt->mcs_buf->bo;
 160       aux_offset = mt->mcs_buf->offset;
 161       break;
 162
 163    case ISL_AUX_USAGE_HIZ:
 164       aux_surf = &mt->hiz_buf->surf;
 165       aux_bo = mt->hiz_buf->bo;
 166       aux_offset = 0;
 167       break;
 168
 169    case ISL_AUX_USAGE_NONE:
 170       break;
 171    }
 172
 173    if (aux_usage != ISL_AUX_USAGE_NONE) {
 174       /* We only really need a clear color if we also have an auxiliary
 175        * surface.  Without one, it does nothing.
 176        */
 177       clear_color = mt->fast_clear_color;
 178    }
 179
 180    void *state = brw_state_batch(brw,
 181                                  brw->isl_dev.ss.size,
 182                                  brw->isl_dev.ss.align,
 183                                  surf_offset);
 184
 185    isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
 186                        .address = brw_state_reloc(&brw->batch,
 187                                                   *surf_offset + brw->isl_dev.ss.addr_offset,
 188                                                   mt->bo, offset, reloc_flags),
 189                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 190                        .aux_address = aux_offset,
 191                        .mocs = brw_get_bo_mocs(devinfo, mt->bo),
 192                        .clear_color = clear_color,
 193                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 194    if (aux_surf) {
 195       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 196        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 197        * contain other control information.  Since buffer addresses are always
 198        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 199        * an ordinary reloc to do the necessary address translation.
 200        *
 201        * FIXME: move to the point of assignment.
 202        */
 203       assert((aux_offset & 0xfff) == 0);
 204       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
 205       *aux_addr = brw_state_reloc(&brw->batch,
 206                                   *surf_offset +
 207                                   brw->isl_dev.ss.aux_addr_offset,
 208                                   aux_bo, *aux_addr,
 209                                   reloc_flags);
 210    }
 211 }
 212
 213 static uint32_t
 214 gen6_update_renderbuffer_surface(struct brw_context *brw,
 215                                  struct gl_renderbuffer *rb,
 216                                  unsigned unit,
 217                                  uint32_t surf_index)
 218 {
 219    struct gl_context *ctx = &brw->ctx;
 220    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 221    struct intel_mipmap_tree *mt = irb->mt;
 222
 223    assert(brw_render_target_supported(brw, rb));
 224
 225    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 226    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 227       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 228                     __func__, _mesa_get_format_name(rb_format));
 229    }
 230    enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
 231
 232    enum isl_aux_usage aux_usage =
 233       intel_miptree_render_aux_usage(brw, mt, isl_format,
 234                                      ctx->Color.BlendEnabled & (1 << unit),
 235                                      brw->draw_aux_buffer_disabled[unit]);
 236
 237    struct isl_view view = {
 238       .format = isl_format,
 239       .base_level = irb->mt_level - irb->mt->first_level,
 240       .levels = 1,
 241       .base_array_layer = irb->mt_layer,
 242       .array_len = MAX2(irb->layer_count, 1),
 243       .swizzle = ISL_SWIZZLE_IDENTITY,
 244       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 245    };
 246
 247    uint32_t offset;
 248    brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 249                           &offset, surf_index,
 250                           RELOC_WRITE);
 251    return offset;
 252 }
 253
 254 GLuint
 255 translate_tex_target(GLenum target)
 256 {
 257    switch (target) {
 258    case GL_TEXTURE_1D:
 259    case GL_TEXTURE_1D_ARRAY_EXT:
 260       return BRW_SURFACE_1D;
 261
 262    case GL_TEXTURE_RECTANGLE_NV:
 263       return BRW_SURFACE_2D;
 264
 265    case GL_TEXTURE_2D:
 266    case GL_TEXTURE_2D_ARRAY_EXT:
 267    case GL_TEXTURE_EXTERNAL_OES:
 268    case GL_TEXTURE_2D_MULTISAMPLE:
 269    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 270       return BRW_SURFACE_2D;
 271
 272    case GL_TEXTURE_3D:
 273       return BRW_SURFACE_3D;
 274
 275    case GL_TEXTURE_CUBE_MAP:
 276    case GL_TEXTURE_CUBE_MAP_ARRAY:
 277       return BRW_SURFACE_CUBE;
 278
 279    default:
 280       unreachable("not reached");
 281    }
 282 }
 283
 284 uint32_t
 285 brw_get_surface_tiling_bits(enum isl_tiling tiling)
 286 {
 287    switch (tiling) {
 288    case ISL_TILING_X:
 289       return BRW_SURFACE_TILED;
 290    case ISL_TILING_Y0:
 291       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 292    default:
 293       return 0;
 294    }
 295 }
 296
 297
 298 uint32_t
 299 brw_get_surface_num_multisamples(unsigned num_samples)
 300 {
 301    if (num_samples > 1)
 302       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 303    else
 304       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 305 }
 306
 307 /**
 308  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 309  * swizzling.
 310  */
 311 int
 312 brw_get_texture_swizzle(const struct gl_context *ctx,
 313                         const struct gl_texture_object *t)
 314 {
 315    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 316
 317    int swizzles[SWIZZLE_NIL + 1] = {
 318       SWIZZLE_X,
 319       SWIZZLE_Y,
 320       SWIZZLE_Z,
 321       SWIZZLE_W,
 322       SWIZZLE_ZERO,
 323       SWIZZLE_ONE,
 324       SWIZZLE_NIL
 325    };
 326
 327    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 328        img->_BaseFormat == GL_DEPTH_STENCIL) {
 329       GLenum depth_mode = t->DepthMode;
 330
 331       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 332        * with depth component data specified with a sized internal format.
 333        * Otherwise, it's left at the old default, GL_LUMINANCE.
 334        */
 335       if (_mesa_is_gles3(ctx) &&
 336           img->InternalFormat != GL_DEPTH_COMPONENT &&
 337           img->InternalFormat != GL_DEPTH_STENCIL) {
 338          depth_mode = GL_RED;
 339       }
 340
 341       switch (depth_mode) {
 342       case GL_ALPHA:
 343          swizzles[0] = SWIZZLE_ZERO;
 344          swizzles[1] = SWIZZLE_ZERO;
 345          swizzles[2] = SWIZZLE_ZERO;
 346          swizzles[3] = SWIZZLE_X;
 347          break;
 348       case GL_LUMINANCE:
 349          swizzles[0] = SWIZZLE_X;
 350          swizzles[1] = SWIZZLE_X;
 351          swizzles[2] = SWIZZLE_X;
 352          swizzles[3] = SWIZZLE_ONE;
 353          break;
 354       case GL_INTENSITY:
 355          swizzles[0] = SWIZZLE_X;
 356          swizzles[1] = SWIZZLE_X;
 357          swizzles[2] = SWIZZLE_X;
 358          swizzles[3] = SWIZZLE_X;
 359          break;
 360       case GL_RED:
 361          swizzles[0] = SWIZZLE_X;
 362          swizzles[1] = SWIZZLE_ZERO;
 363          swizzles[2] = SWIZZLE_ZERO;
 364          swizzles[3] = SWIZZLE_ONE;
 365          break;
 366       }
 367    }
 368
 369    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 370
 371    /* If the texture's format is alpha-only, force R, G, and B to
 372     * 0.0. Similarly, if the texture's format has no alpha channel,
 373     * force the alpha value read to 1.0. This allows for the
 374     * implementation to use an RGBA texture for any of these formats
 375     * without leaking any unexpected values.
 376     */
 377    switch (img->_BaseFormat) {
 378    case GL_ALPHA:
 379       swizzles[0] = SWIZZLE_ZERO;
 380       swizzles[1] = SWIZZLE_ZERO;
 381       swizzles[2] = SWIZZLE_ZERO;
 382       break;
 383    case GL_LUMINANCE:
 384       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 385          swizzles[0] = SWIZZLE_X;
 386          swizzles[1] = SWIZZLE_X;
 387          swizzles[2] = SWIZZLE_X;
 388          swizzles[3] = SWIZZLE_ONE;
 389       }
 390       break;
 391    case GL_LUMINANCE_ALPHA:
 392       if (datatype == GL_SIGNED_NORMALIZED) {
 393          swizzles[0] = SWIZZLE_X;
 394          swizzles[1] = SWIZZLE_X;
 395          swizzles[2] = SWIZZLE_X;
 396          swizzles[3] = SWIZZLE_W;
 397       }
 398       break;
 399    case GL_INTENSITY:
 400       if (datatype == GL_SIGNED_NORMALIZED) {
 401          swizzles[0] = SWIZZLE_X;
 402          swizzles[1] = SWIZZLE_X;
 403          swizzles[2] = SWIZZLE_X;
 404          swizzles[3] = SWIZZLE_X;
 405       }
 406       break;
 407    case GL_RED:
 408    case GL_RG:
 409    case GL_RGB:
 410       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
 411           img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
 412           img->TexFormat == MESA_FORMAT_SRGB_DXT1)
 413          swizzles[3] = SWIZZLE_ONE;
 414       break;
 415    }
 416
 417    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 418                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 419                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 420                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 421 }
 422
 423 /**
 424  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 425  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 426  *
 427  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 428  *         0          1          2          3             4            5
 429  *         4          5          6          7             0            1
 430  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 431  *
 432  * which is simply adding 4 then modding by 8 (or anding with 7).
 433  *
 434  * We then may need to apply workarounds for textureGather hardware bugs.
 435  */
 436 static unsigned
 437 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 438 {
 439    unsigned scs = (swizzle + 4) & 7;
 440
 441    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 442 }
 443
 444 static bool
 445 brw_aux_surface_disabled(const struct brw_context *brw,
 446                          const struct intel_mipmap_tree *mt)
 447 {
 448    const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 449
 450    for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 451       const struct intel_renderbuffer *irb =
 452          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 453
 454       if (irb && irb->mt == mt)
 455          return brw->draw_aux_buffer_disabled[i];
 456    }
 457
 458    return false;
 459 }
 460
 461 static void
 462 brw_update_texture_surface(struct gl_context *ctx,
 463                            unsigned unit,
 464                            uint32_t *surf_offset,
 465                            bool for_gather,
 466                            bool for_txf,
 467                            uint32_t plane)
 468 {
 469    struct brw_context *brw = brw_context(ctx);
 470    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 471    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 472
 473    if (obj->Target == GL_TEXTURE_BUFFER) {
 474       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 475
 476    } else {
 477       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 478       struct intel_mipmap_tree *mt = intel_obj->mt;
 479
 480       if (plane > 0) {
 481          if (mt->plane[plane - 1] == NULL)
 482             return;
 483          mt = mt->plane[plane - 1];
 484       }
 485
 486       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 487       /* If this is a view with restricted NumLayers, then our effective depth
 488        * is not just the miptree depth.
 489        */
 490       unsigned view_num_layers;
 491       if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
 492          view_num_layers = obj->NumLayers;
 493       } else {
 494          view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
 495                               mt->surf.logical_level0_px.depth :
 496                               mt->surf.logical_level0_px.array_len;
 497       }
 498
 499       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 500        * texturing functions that return a float, as our code generation always
 501        * selects the .x channel (which would always be 0).
 502        */
 503       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 504       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 505          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 506           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 507       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 508                                 brw_get_texture_swizzle(&brw->ctx, obj));
 509
 510       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 511       enum isl_format format = translate_tex_format(brw, mesa_fmt,
 512                                                     for_txf ? GL_DECODE_EXT :
 513                                                     sampler->sRGBDecode);
 514
 515       /* Implement gen6 and gen7 gather work-around */
 516       bool need_green_to_blue = false;
 517       if (for_gather) {
 518          if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
 519                                    format == ISL_FORMAT_R32G32_SINT ||
 520                                    format == ISL_FORMAT_R32G32_UINT)) {
 521             format = ISL_FORMAT_R32G32_FLOAT_LD;
 522             need_green_to_blue = devinfo->is_haswell;
 523          } else if (devinfo->gen == 6) {
 524             /* Sandybridge's gather4 message is broken for integer formats.
 525              * To work around this, we pretend the surface is UNORM for
 526              * 8 or 16-bit formats, and emit shader instructions to recover
 527              * the real INT/UINT value.  For 32-bit formats, we pretend
 528              * the surface is FLOAT, and simply reinterpret the resulting
 529              * bits.
 530              */
 531             switch (format) {
 532             case ISL_FORMAT_R8_SINT:
 533             case ISL_FORMAT_R8_UINT:
 534                format = ISL_FORMAT_R8_UNORM;
 535                break;
 536
 537             case ISL_FORMAT_R16_SINT:
 538             case ISL_FORMAT_R16_UINT:
 539                format = ISL_FORMAT_R16_UNORM;
 540                break;
 541
 542             case ISL_FORMAT_R32_SINT:
 543             case ISL_FORMAT_R32_UINT:
 544                format = ISL_FORMAT_R32_FLOAT;
 545                break;
 546
 547             default:
 548                break;
 549             }
 550          }
 551       }
 552
 553       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 554          if (devinfo->gen <= 7) {
 555             assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
 556             mt = mt->r8stencil_mt;
 557          } else {
 558             mt = mt->stencil_mt;
 559          }
 560          format = ISL_FORMAT_R8_UINT;
 561       } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
 562          assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
 563          mt = mt->r8stencil_mt;
 564          format = ISL_FORMAT_R8_UINT;
 565       }
 566
 567       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 568
 569       struct isl_view view = {
 570          .format = format,
 571          .base_level = obj->MinLevel + obj->BaseLevel,
 572          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 573          .base_array_layer = obj->MinLayer,
 574          .array_len = view_num_layers,
 575          .swizzle = {
 576             .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 577             .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 578             .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 579             .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 580          },
 581          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 582       };
 583
 584       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 585           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 586          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 587
 588       enum isl_aux_usage aux_usage =
 589          intel_miptree_texture_aux_usage(brw, mt, format);
 590
 591       if (brw_aux_surface_disabled(brw, mt))
 592          aux_usage = ISL_AUX_USAGE_NONE;
 593
 594       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
 595                              surf_offset, surf_index,
 596                              0);
 597    }
 598 }
 599
 600 void
 601 brw_emit_buffer_surface_state(struct brw_context *brw,
 602                               uint32_t *out_offset,
 603                               struct brw_bo *bo,
 604                               unsigned buffer_offset,
 605                               unsigned surface_format,
 606                               unsigned buffer_size,
 607                               unsigned pitch,
 608                               unsigned reloc_flags)
 609 {
 610    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 611    uint32_t *dw = brw_state_batch(brw,
 612                                   brw->isl_dev.ss.size,
 613                                   brw->isl_dev.ss.align,
 614                                   out_offset);
 615
 616    isl_buffer_fill_state(&brw->isl_dev, dw,
 617                          .address = !bo ? buffer_offset :
 618                                     brw_state_reloc(&brw->batch,
 619                                                     *out_offset + brw->isl_dev.ss.addr_offset,
 620                                                     bo, buffer_offset,
 621                                                     reloc_flags),
 622                          .size = buffer_size,
 623                          .format = surface_format,
 624                          .stride = pitch,
 625                          .mocs = brw_get_bo_mocs(devinfo, bo));
 626 }
 627
 628 void
 629 brw_update_buffer_texture_surface(struct gl_context *ctx,
 630                                   unsigned unit,
 631                                   uint32_t *surf_offset)
 632 {
 633    struct brw_context *brw = brw_context(ctx);
 634    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 635    struct intel_buffer_object *intel_obj =
 636       intel_buffer_object(tObj->BufferObject);
 637    uint32_t size = tObj->BufferSize;
 638    struct brw_bo *bo = NULL;
 639    mesa_format format = tObj->_BufferObjectFormat;
 640    const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
 641    int texel_size = _mesa_get_format_bytes(format);
 642
 643    if (intel_obj) {
 644       size = MIN2(size, intel_obj->Base.Size);
 645       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
 646                                   false);
 647    }
 648
 649    /* The ARB_texture_buffer_specification says:
 650     *
 651     *    "The number of texels in the buffer texture's texel array is given by
 652     *
 653     *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
 654     *
 655     *     where <buffer_size> is the size of the buffer object, in basic
 656     *     machine units and <components> and <base_type> are the element count
 657     *     and base data type for elements, as specified in Table X.1.  The
 658     *     number of texels in the texel array is then clamped to the
 659     *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
 660     *
 661     * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
 662     * so that when ISL divides by stride to obtain the number of texels, that
 663     * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 664     */
 665    size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
 666
 667    if (isl_format == ISL_FORMAT_UNSUPPORTED) {
 668       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 669                     _mesa_get_format_name(format));
 670    }
 671
 672    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 673                                  tObj->BufferOffset,
 674                                  isl_format,
 675                                  size,
 676                                  texel_size,
 677                                  0);
 678 }
 679
 680 /**
 681  * Set up a binding table entry for use by stream output logic (transform
 682  * feedback).
 683  *
 684  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 685  */
 686 void
 687 brw_update_sol_surface(struct brw_context *brw,
 688                        struct gl_buffer_object *buffer_obj,
 689                        uint32_t *out_offset, unsigned num_vector_components,
 690                        unsigned stride_dwords, unsigned offset_dwords)
 691 {
 692    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 693    uint32_t offset_bytes = 4 * offset_dwords;
 694    struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 695                                              offset_bytes,
 696                                              buffer_obj->Size - offset_bytes,
 697                                              true);
 698    uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
 699    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 700    size_t size_dwords = buffer_obj->Size / 4;
 701    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 702
 703    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 704     * too big to map using a single binding table entry?
 705     */
 706    assert((size_dwords - offset_dwords) / stride_dwords
 707           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 708
 709    if (size_dwords > offset_dwords + num_vector_components) {
 710       /* There is room for at least 1 transform feedback output in the buffer.
 711        * Compute the number of additional transform feedback outputs the
 712        * buffer has room for.
 713        */
 714       buffer_size_minus_1 =
 715          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 716    } else {
 717       /* There isn't even room for a single transform feedback output in the
 718        * buffer.  We can't configure the binding table entry to prevent output
 719        * entirely; we'll have to rely on the geometry shader to detect
 720        * overflow.  But to minimize the damage in case of a bug, set up the
 721        * binding table entry to just allow a single output.
 722        */
 723       buffer_size_minus_1 = 0;
 724    }
 725    width = buffer_size_minus_1 & 0x7f;
 726    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 727    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 728
 729    switch (num_vector_components) {
 730    case 1:
 731       surface_format = ISL_FORMAT_R32_FLOAT;
 732       break;
 733    case 2:
 734       surface_format = ISL_FORMAT_R32G32_FLOAT;
 735       break;
 736    case 3:
 737       surface_format = ISL_FORMAT_R32G32B32_FLOAT;
 738       break;
 739    case 4:
 740       surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
 741       break;
 742    default:
 743       unreachable("Invalid vector size for transform feedback output");
 744    }
 745
 746    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 747       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 748       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 749       BRW_SURFACE_RC_READ_WRITE;
 750    surf[1] = brw_state_reloc(&brw->batch,
 751                              *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
 752    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 753               height << BRW_SURFACE_HEIGHT_SHIFT);
 754    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 755               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 756    surf[4] = 0;
 757    surf[5] = 0;
 758 }
 759
 760 /* Creates a new WM constant buffer reflecting the current fragment program's
 761  * constants, if needed by the fragment program.
 762  *
 763  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 764  * state atom.
 765  */
 766 static void
 767 brw_upload_wm_pull_constants(struct brw_context *brw)
 768 {
 769    struct brw_stage_state *stage_state = &brw->wm.base;
 770    /* BRW_NEW_FRAGMENT_PROGRAM */
 771    struct brw_program *fp =
 772       (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
 773
 774    /* BRW_NEW_FS_PROG_DATA */
 775    struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 776
 777    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 778    /* _NEW_PROGRAM_CONSTANTS */
 779    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
 780                              stage_state, prog_data);
 781 }
 782
 783 const struct brw_tracked_state brw_wm_pull_constants = {
 784    .dirty = {
 785       .mesa = _NEW_PROGRAM_CONSTANTS,
 786       .brw = BRW_NEW_BATCH |
 787              BRW_NEW_FRAGMENT_PROGRAM |
 788              BRW_NEW_FS_PROG_DATA,
 789    },
 790    .emit = brw_upload_wm_pull_constants,
 791 };
 792
 793 /**
 794  * Creates a null renderbuffer surface.
 795  *
 796  * This is used when the shader doesn't write to any color output.  An FB
 797  * write to target 0 will still be emitted, because that's how the thread is
 798  * terminated (and computed depth is returned), so we need to have the
 799  * hardware discard the target 0 color output..
 800  */
 801 static void
 802 emit_null_surface_state(struct brw_context *brw,
 803                         const struct gl_framebuffer *fb,
 804                         uint32_t *out_offset)
 805 {
 806    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 807    uint32_t *surf = brw_state_batch(brw,
 808                                     brw->isl_dev.ss.size,
 809                                     brw->isl_dev.ss.align,
 810                                     out_offset);
 811
 812    /* Use the fb dimensions or 1x1x1 */
 813    const unsigned width   = fb ? _mesa_geometric_width(fb)   : 1;
 814    const unsigned height  = fb ? _mesa_geometric_height(fb)  : 1;
 815    const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
 816
 817    if (devinfo->gen != 6 || samples <= 1) {
 818       isl_null_fill_state(&brw->isl_dev, surf,
 819                           isl_extent3d(width, height, 1));
 820       return;
 821    }
 822
 823    /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
 824     * So work around this problem by rendering into dummy color buffer.
 825     *
 826     * To decrease the amount of memory needed by the workaround buffer, we
 827     * set its pitch to 128 bytes (the width of a Y tile).  This means that
 828     * the amount of memory needed for the workaround buffer is
 829     * (width_in_tiles + height_in_tiles - 1) tiles.
 830     *
 831     * Note that since the workaround buffer will be interpreted by the
 832     * hardware as an interleaved multisampled buffer, we need to compute
 833     * width_in_tiles and height_in_tiles by dividing the width and height
 834     * by 16 rather than the normal Y-tile size of 32.
 835     */
 836    unsigned width_in_tiles = ALIGN(width, 16) / 16;
 837    unsigned height_in_tiles = ALIGN(height, 16) / 16;
 838    unsigned pitch_minus_1 = 127;
 839    unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 840    brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 841                       size_needed);
 842
 843    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 844               ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 845    surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
 846                              brw->wm.multisampled_null_render_target_bo,
 847                              0, RELOC_WRITE);
 848
 849    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 850               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 851
 852    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 853     * Notes):
 854     *
 855     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 856     */
 857    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 858               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 859    surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
 860    surf[5] = 0;
 861 }
 862
 863 /**
 864  * Sets up a surface state structure to point at the given region.
 865  * While it is only used for the front/back buffer currently, it should be
 866  * usable for further buffers when doing ARB_draw_buffer support.
 867  */
 868 static uint32_t
 869 gen4_update_renderbuffer_surface(struct brw_context *brw,
 870                                  struct gl_renderbuffer *rb,
 871                                  unsigned unit,
 872                                  uint32_t surf_index)
 873 {
 874    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 875    struct gl_context *ctx = &brw->ctx;
 876    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 877    struct intel_mipmap_tree *mt = irb->mt;
 878    uint32_t *surf;
 879    uint32_t tile_x, tile_y;
 880    enum isl_format format;
 881    uint32_t offset;
 882    /* _NEW_BUFFERS */
 883    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 884    /* BRW_NEW_FS_PROG_DATA */
 885
 886    if (rb->TexImage && !devinfo->has_surface_tile_offset) {
 887       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 888
 889       if (tile_x != 0 || tile_y != 0) {
 890          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 891           * destination in a miptree unless you actually setup your renderbuffer
 892           * as a miptree and used the fragile lod/array_index/etc. controls to
 893           * select the image.  So, instead, we just make a new single-level
 894           * miptree and render into that.
 895           */
 896          intel_renderbuffer_move_to_temp(brw, irb, false);
 897          assert(irb->align_wa_mt);
 898          mt = irb->align_wa_mt;
 899       }
 900    }
 901
 902    surf = brw_state_batch(brw, 6 * 4, 32, &offset);
 903
 904    format = brw->mesa_to_isl_render_format[rb_format];
 905    if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
 906       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 907                     __func__, _mesa_get_format_name(rb_format));
 908    }
 909
 910    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 911               format << BRW_SURFACE_FORMAT_SHIFT);
 912
 913    /* reloc */
 914    assert(mt->offset % mt->cpp == 0);
 915    surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
 916                              mt->offset +
 917                              intel_renderbuffer_get_tile_offsets(irb,
 918                                                                  &tile_x,
 919                                                                  &tile_y),
 920                              RELOC_WRITE);
 921
 922    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 923               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 924
 925    surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
 926               (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 927
 928    surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
 929
 930    assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 931    /* Note that the low bits of these fields are missing, so
 932     * there's the possibility of getting in trouble.
 933     */
 934    assert(tile_x % 4 == 0);
 935    assert(tile_y % 2 == 0);
 936    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 937               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 938               (mt->surf.image_alignment_el.height == 4 ?
 939                   BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 940
 941    if (devinfo->gen < 6) {
 942       /* _NEW_COLOR */
 943       if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
 944           (ctx->Color.BlendEnabled & (1 << unit)))
 945          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 946
 947       if (!ctx->Color.ColorMask[unit][0])
 948          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 949       if (!ctx->Color.ColorMask[unit][1])
 950          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 951       if (!ctx->Color.ColorMask[unit][2])
 952          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 953
 954       /* As mentioned above, disable writes to the alpha component when the
 955        * renderbuffer is XRGB.
 956        */
 957       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 958           !ctx->Color.ColorMask[unit][3]) {
 959          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 960       }
 961    }
 962
 963    return offset;
 964 }
 965
 966 static void
 967 update_renderbuffer_surfaces(struct brw_context *brw)
 968 {
 969    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 970    const struct gl_context *ctx = &brw->ctx;
 971
 972    /* _NEW_BUFFERS | _NEW_COLOR */
 973    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 974
 975    /* Render targets always start at binding table index 0. */
 976    const unsigned rt_start = 0;
 977
 978    uint32_t *surf_offsets = brw->wm.base.surf_offset;
 979
 980    /* Update surfaces for drawing buffers */
 981    if (fb->_NumColorDrawBuffers >= 1) {
 982       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
 983          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
 984
 985          if (intel_renderbuffer(rb)) {
 986             surf_offsets[rt_start + i] = devinfo->gen >= 6 ?
 987                gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
 988                gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
 989          } else {
 990             emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
 991          }
 992       }
 993    } else {
 994       emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
 995    }
 996
 997    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 998 }
 999
1000 const struct brw_tracked_state brw_renderbuffer_surfaces = {
1001    .dirty = {
1002       .mesa = _NEW_BUFFERS |
1003               _NEW_COLOR,
1004       .brw = BRW_NEW_BATCH,
1005    },
1006    .emit = update_renderbuffer_surfaces,
1007 };
1008
1009 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
1010    .dirty = {
1011       .mesa = _NEW_BUFFERS,
1012       .brw = BRW_NEW_BATCH |
1013              BRW_NEW_AUX_STATE,
1014    },
1015    .emit = update_renderbuffer_surfaces,
1016 };
1017
1018 static void
1019 update_renderbuffer_read_surfaces(struct brw_context *brw)
1020 {
1021    const struct gl_context *ctx = &brw->ctx;
1022
1023    /* BRW_NEW_FS_PROG_DATA */
1024    const struct brw_wm_prog_data *wm_prog_data =
1025       brw_wm_prog_data(brw->wm.base.prog_data);
1026
1027    if (wm_prog_data->has_render_target_reads &&
1028        !ctx->Extensions.MESA_shader_framebuffer_fetch) {
1029       /* _NEW_BUFFERS */
1030       const struct gl_framebuffer *fb = ctx->DrawBuffer;
1031
1032       for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1033          struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1034          const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1035          const unsigned surf_index =
1036             wm_prog_data->binding_table.render_target_read_start + i;
1037          uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1038
1039          if (irb) {
1040             const enum isl_format format = brw->mesa_to_isl_render_format[
1041                _mesa_get_render_format(ctx, intel_rb_format(irb))];
1042             assert(isl_format_supports_sampling(&brw->screen->devinfo,
1043                                                 format));
1044
1045             /* Override the target of the texture if the render buffer is a
1046              * single slice of a 3D texture (since the minimum array element
1047              * field of the surface state structure is ignored by the sampler
1048              * unit for 3D textures on some hardware), or if the render buffer
1049              * is a 1D array (since shaders always provide the array index
1050              * coordinate at the Z component to avoid state-dependent
1051              * recompiles when changing the texture target of the
1052              * framebuffer).
1053              */
1054             const GLenum target =
1055                (irb->mt->target == GL_TEXTURE_3D &&
1056                 irb->layer_count == 1) ? GL_TEXTURE_2D :
1057                irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1058                irb->mt->target;
1059
1060             const struct isl_view view = {
1061                .format = format,
1062                .base_level = irb->mt_level - irb->mt->first_level,
1063                .levels = 1,
1064                .base_array_layer = irb->mt_layer,
1065                .array_len = irb->layer_count,
1066                .swizzle = ISL_SWIZZLE_IDENTITY,
1067                .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1068             };
1069
1070             enum isl_aux_usage aux_usage =
1071                intel_miptree_texture_aux_usage(brw, irb->mt, format);
1072             if (brw->draw_aux_buffer_disabled[i])
1073                aux_usage = ISL_AUX_USAGE_NONE;
1074
1075             brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1076                                    surf_offset, surf_index,
1077                                    0);
1078
1079          } else {
1080             emit_null_surface_state(brw, fb, surf_offset);
1081          }
1082       }
1083
1084       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1085    }
1086 }
1087
1088 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1089    .dirty = {
1090       .mesa = _NEW_BUFFERS,
1091       .brw = BRW_NEW_BATCH |
1092              BRW_NEW_AUX_STATE |
1093              BRW_NEW_FS_PROG_DATA,
1094    },
1095    .emit = update_renderbuffer_read_surfaces,
1096 };
1097
1098 static bool
1099 is_depth_texture(struct intel_texture_object *iobj)
1100 {
1101    GLenum base_format = _mesa_get_format_base_format(iobj->_Format);
1102    return base_format == GL_DEPTH_COMPONENT ||
1103           (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling);
1104 }
1105
1106 static void
1107 update_stage_texture_surfaces(struct brw_context *brw,
1108                               const struct gl_program *prog,
1109                               struct brw_stage_state *stage_state,
1110                               bool for_gather, uint32_t plane)
1111 {
1112    if (!prog)
1113       return;
1114
1115    struct gl_context *ctx = &brw->ctx;
1116
1117    uint32_t *surf_offset = stage_state->surf_offset;
1118
1119    /* BRW_NEW_*_PROG_DATA */
1120    if (for_gather)
1121       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1122    else
1123       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1124
1125    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1126    for (unsigned s = 0; s < num_samplers; s++) {
1127       surf_offset[s] = 0;
1128
1129       if (prog->SamplersUsed & (1 << s)) {
1130          const unsigned unit = prog->SamplerUnits[s];
1131          const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s);
1132          struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
1133          struct intel_texture_object *iobj = intel_texture_object(obj);
1134
1135          /* _NEW_TEXTURE */
1136          if (!obj)
1137             continue;
1138
1139          if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) {
1140             /* A programming note for the sample_c message says:
1141              *
1142              *    "The Surface Format of the associated surface must be
1143              *     indicated as supporting shadow mapping as indicated in the
1144              *     surface format table."
1145              *
1146              * Accessing non-depth textures via a sampler*Shadow type is
1147              * undefined.  GLSL 4.50 page 162 says:
1148              *
1149              *    "If a shadow texture call is made to a sampler that does not
1150              *     represent a depth texture, then results are undefined."
1151              *
1152              * We give them a null surface (zeros) for undefined.  We've seen
1153              * GPU hangs with color buffers and sample_c, so we try and avoid
1154              * those with this hack.
1155              */
1156             emit_null_surface_state(brw, NULL, surf_offset + s);
1157          } else {
1158             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
1159                                        used_by_txf, plane);
1160          }
1161       }
1162    }
1163 }
1164
1165
1166 /**
1167  * Construct SURFACE_STATE objects for enabled textures.
1168  */
1169 static void
1170 brw_update_texture_surfaces(struct brw_context *brw)
1171 {
1172    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1173
1174    /* BRW_NEW_VERTEX_PROGRAM */
1175    struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
1176
1177    /* BRW_NEW_TESS_PROGRAMS */
1178    struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
1179    struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
1180
1181    /* BRW_NEW_GEOMETRY_PROGRAM */
1182    struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
1183
1184    /* BRW_NEW_FRAGMENT_PROGRAM */
1185    struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
1186
1187    /* _NEW_TEXTURE */
1188    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1189    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1190    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1191    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1192    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1193
1194    /* emit alternate set of surface state for gather. this
1195     * allows the surface format to be overriden for only the
1196     * gather4 messages. */
1197    if (devinfo->gen < 8) {
1198       if (vs && vs->info.uses_texture_gather)
1199          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1200       if (tcs && tcs->info.uses_texture_gather)
1201          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1202       if (tes && tes->info.uses_texture_gather)
1203          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1204       if (gs && gs->info.uses_texture_gather)
1205          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1206       if (fs && fs->info.uses_texture_gather)
1207          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1208    }
1209
1210    if (fs) {
1211       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1212       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1213    }
1214
1215    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1216 }
1217
1218 const struct brw_tracked_state brw_texture_surfaces = {
1219    .dirty = {
1220       .mesa = _NEW_TEXTURE,
1221       .brw = BRW_NEW_BATCH |
1222              BRW_NEW_AUX_STATE |
1223              BRW_NEW_FRAGMENT_PROGRAM |
1224              BRW_NEW_FS_PROG_DATA |
1225              BRW_NEW_GEOMETRY_PROGRAM |
1226              BRW_NEW_GS_PROG_DATA |
1227              BRW_NEW_TESS_PROGRAMS |
1228              BRW_NEW_TCS_PROG_DATA |
1229              BRW_NEW_TES_PROG_DATA |
1230              BRW_NEW_TEXTURE_BUFFER |
1231              BRW_NEW_VERTEX_PROGRAM |
1232              BRW_NEW_VS_PROG_DATA,
1233    },
1234    .emit = brw_update_texture_surfaces,
1235 };
1236
1237 static void
1238 brw_update_cs_texture_surfaces(struct brw_context *brw)
1239 {
1240    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1241
1242    /* BRW_NEW_COMPUTE_PROGRAM */
1243    struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
1244
1245    /* _NEW_TEXTURE */
1246    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1247
1248    /* emit alternate set of surface state for gather. this
1249     * allows the surface format to be overriden for only the
1250     * gather4 messages.
1251     */
1252    if (devinfo->gen < 8) {
1253       if (cs && cs->info.uses_texture_gather)
1254          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1255    }
1256
1257    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1258 }
1259
1260 const struct brw_tracked_state brw_cs_texture_surfaces = {
1261    .dirty = {
1262       .mesa = _NEW_TEXTURE,
1263       .brw = BRW_NEW_BATCH |
1264              BRW_NEW_COMPUTE_PROGRAM |
1265              BRW_NEW_AUX_STATE,
1266    },
1267    .emit = brw_update_cs_texture_surfaces,
1268 };
1269
1270 static void
1271 upload_buffer_surface(struct brw_context *brw,
1272                       struct gl_buffer_binding *binding,
1273                       uint32_t *out_offset,
1274                       enum isl_format format,
1275                       unsigned reloc_flags)
1276 {
1277    struct gl_context *ctx = &brw->ctx;
1278
1279    if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1280       emit_null_surface_state(brw, NULL, out_offset);
1281    } else {
1282       ptrdiff_t size = binding->BufferObject->Size - binding->Offset;
1283       if (!binding->AutomaticSize)
1284          size = MIN2(size, binding->Size);
1285
1286       struct intel_buffer_object *iobj =
1287          intel_buffer_object(binding->BufferObject);
1288       struct brw_bo *bo =
1289          intel_bufferobj_buffer(brw, iobj, binding->Offset, size,
1290                                 (reloc_flags & RELOC_WRITE) != 0);
1291
1292       brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset,
1293                                     format, size, 1, reloc_flags);
1294    }
1295 }
1296
1297 void
1298 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1299                         struct brw_stage_state *stage_state,
1300                         struct brw_stage_prog_data *prog_data)
1301 {
1302    struct gl_context *ctx = &brw->ctx;
1303
1304    if (!prog || (prog->info.num_ubos == 0 &&
1305                  prog->info.num_ssbos == 0 &&
1306                  prog->info.num_abos == 0))
1307       return;
1308
1309    uint32_t *ubo_surf_offsets =
1310       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1311
1312    for (int i = 0; i < prog->info.num_ubos; i++) {
1313       struct gl_buffer_binding *binding =
1314          &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1315       upload_buffer_surface(brw, binding, &ubo_surf_offsets[i],
1316                             ISL_FORMAT_R32G32B32A32_FLOAT, 0);
1317    }
1318
1319    uint32_t *abo_surf_offsets =
1320       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1321    uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos;
1322
1323    for (int i = 0; i < prog->info.num_abos; i++) {
1324       struct gl_buffer_binding *binding =
1325          &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1326       upload_buffer_surface(brw, binding, &abo_surf_offsets[i],
1327                             ISL_FORMAT_RAW, RELOC_WRITE);
1328    }
1329
1330    for (int i = 0; i < prog->info.num_ssbos; i++) {
1331       struct gl_buffer_binding *binding =
1332          &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1333
1334       upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i],
1335                             ISL_FORMAT_RAW, RELOC_WRITE);
1336    }
1337
1338    stage_state->push_constants_dirty = true;
1339    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1340 }
1341
1342 static void
1343 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1344 {
1345    struct gl_context *ctx = &brw->ctx;
1346    /* _NEW_PROGRAM */
1347    struct gl_program *prog = ctx->FragmentProgram._Current;
1348
1349    /* BRW_NEW_FS_PROG_DATA */
1350    brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1351 }
1352
1353 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1354    .dirty = {
1355       .mesa = _NEW_PROGRAM,
1356       .brw = BRW_NEW_BATCH |
1357              BRW_NEW_FS_PROG_DATA |
1358              BRW_NEW_UNIFORM_BUFFER,
1359    },
1360    .emit = brw_upload_wm_ubo_surfaces,
1361 };
1362
1363 static void
1364 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1365 {
1366    struct gl_context *ctx = &brw->ctx;
1367    /* _NEW_PROGRAM */
1368    struct gl_program *prog =
1369       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1370
1371    /* BRW_NEW_CS_PROG_DATA */
1372    brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1373 }
1374
1375 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1376    .dirty = {
1377       .mesa = _NEW_PROGRAM,
1378       .brw = BRW_NEW_BATCH |
1379              BRW_NEW_CS_PROG_DATA |
1380              BRW_NEW_UNIFORM_BUFFER,
1381    },
1382    .emit = brw_upload_cs_ubo_surfaces,
1383 };
1384
1385 static void
1386 brw_upload_cs_image_surfaces(struct brw_context *brw)
1387 {
1388    /* _NEW_PROGRAM */
1389    const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
1390
1391    if (cp) {
1392       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1393       brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1394                                 brw->cs.base.prog_data);
1395    }
1396 }
1397
1398 const struct brw_tracked_state brw_cs_image_surfaces = {
1399    .dirty = {
1400       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1401       .brw = BRW_NEW_BATCH |
1402              BRW_NEW_CS_PROG_DATA |
1403              BRW_NEW_AUX_STATE |
1404              BRW_NEW_IMAGE_UNITS
1405    },
1406    .emit = brw_upload_cs_image_surfaces,
1407 };
1408
1409 static uint32_t
1410 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1411 {
1412    const struct gen_device_info *devinfo = &brw->screen->devinfo;
1413    enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1414    if (access == GL_WRITE_ONLY) {
1415       return hw_format;
1416    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1417       /* Typed surface reads support a very limited subset of the shader
1418        * image formats.  Translate it into the closest format the
1419        * hardware supports.
1420        */
1421       return isl_lower_storage_image_format(devinfo, hw_format);
1422    } else {
1423       /* The hardware doesn't actually support a typed format that we can use
1424        * so we have to fall back to untyped read/write messages.
1425        */
1426       return ISL_FORMAT_RAW;
1427    }
1428 }
1429
1430 static void
1431 update_default_image_param(struct brw_context *brw,
1432                            struct gl_image_unit *u,
1433                            unsigned surface_idx,
1434                            struct brw_image_param *param)
1435 {
1436    memset(param, 0, sizeof(*param));
1437    param->surface_idx = surface_idx;
1438    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1439     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1440     * detailed explanation of these parameters.
1441     */
1442    param->swizzling[0] = 0xff;
1443    param->swizzling[1] = 0xff;
1444 }
1445
1446 static void
1447 update_buffer_image_param(struct brw_context *brw,
1448                           struct gl_image_unit *u,
1449                           unsigned surface_idx,
1450                           struct brw_image_param *param)
1451 {
1452    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1453    const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1454    update_default_image_param(brw, u, surface_idx, param);
1455
1456    param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1457    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1458 }
1459
1460 static unsigned
1461 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1462                      unsigned level)
1463 {
1464    if (target == GL_TEXTURE_CUBE_MAP)
1465       return 6;
1466
1467    return target == GL_TEXTURE_3D ?
1468       minify(mt->surf.logical_level0_px.depth, level) :
1469       mt->surf.logical_level0_px.array_len;
1470 }
1471
1472 static void
1473 update_image_surface(struct brw_context *brw,
1474                      struct gl_image_unit *u,
1475                      GLenum access,
1476                      unsigned surface_idx,
1477                      uint32_t *surf_offset,
1478                      struct brw_image_param *param)
1479 {
1480    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1481       struct gl_texture_object *obj = u->TexObj;
1482       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1483
1484       if (obj->Target == GL_TEXTURE_BUFFER) {
1485          struct intel_buffer_object *intel_obj =
1486             intel_buffer_object(obj->BufferObject);
1487          const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1488                                       _mesa_get_format_bytes(u->_ActualFormat));
1489
1490          brw_emit_buffer_surface_state(
1491             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1492             format, intel_obj->Base.Size, texel_size,
1493             access != GL_READ_ONLY ? RELOC_WRITE : 0);
1494
1495          update_buffer_image_param(brw, u, surface_idx, param);
1496
1497       } else {
1498          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1499          struct intel_mipmap_tree *mt = intel_obj->mt;
1500          const unsigned num_layers = u->Layered ?
1501             get_image_num_layers(mt, obj->Target, u->Level) : 1;
1502
1503          struct isl_view view = {
1504             .format = format,
1505             .base_level = obj->MinLevel + u->Level,
1506             .levels = 1,
1507             .base_array_layer = obj->MinLayer + u->_Layer,
1508             .array_len = num_layers,
1509             .swizzle = ISL_SWIZZLE_IDENTITY,
1510             .usage = ISL_SURF_USAGE_STORAGE_BIT,
1511          };
1512
1513          if (format == ISL_FORMAT_RAW) {
1514             brw_emit_buffer_surface_state(
1515                brw, surf_offset, mt->bo, mt->offset,
1516                format, mt->bo->size - mt->offset, 1 /* pitch */,
1517                access != GL_READ_ONLY ? RELOC_WRITE : 0);
1518
1519          } else {
1520             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1521             assert(!intel_miptree_has_color_unresolved(mt,
1522                                                        view.base_level, 1,
1523                                                        view.base_array_layer,
1524                                                        view.array_len));
1525             brw_emit_surface_state(brw, mt, mt->target, view,
1526                                    ISL_AUX_USAGE_NONE,
1527                                    surf_offset, surf_index,
1528                                    access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1529          }
1530
1531          isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1532          param->surface_idx = surface_idx;
1533       }
1534
1535    } else {
1536       emit_null_surface_state(brw, NULL, surf_offset);
1537       update_default_image_param(brw, u, surface_idx, param);
1538    }
1539 }
1540
1541 void
1542 brw_upload_image_surfaces(struct brw_context *brw,
1543                           const struct gl_program *prog,
1544                           struct brw_stage_state *stage_state,
1545                           struct brw_stage_prog_data *prog_data)
1546 {
1547    assert(prog);
1548    struct gl_context *ctx = &brw->ctx;
1549
1550    if (prog->info.num_images) {
1551       for (unsigned i = 0; i < prog->info.num_images; i++) {
1552          struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1553          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1554
1555          update_image_surface(brw, u, prog->sh.ImageAccess[i],
1556                               surf_idx,
1557                               &stage_state->surf_offset[surf_idx],
1558                               &stage_state->image_param[i]);
1559       }
1560
1561       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1562       /* This may have changed the image metadata dependent on the context
1563        * image unit state and passed to the program as uniforms, make sure
1564        * that push and pull constants are reuploaded.
1565        */
1566       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1567    }
1568 }
1569
1570 static void
1571 brw_upload_wm_image_surfaces(struct brw_context *brw)
1572 {
1573    /* BRW_NEW_FRAGMENT_PROGRAM */
1574    const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
1575
1576    if (wm) {
1577       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1578       brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1579                                 brw->wm.base.prog_data);
1580    }
1581 }
1582
1583 const struct brw_tracked_state brw_wm_image_surfaces = {
1584    .dirty = {
1585       .mesa = _NEW_TEXTURE,
1586       .brw = BRW_NEW_BATCH |
1587              BRW_NEW_AUX_STATE |
1588              BRW_NEW_FRAGMENT_PROGRAM |
1589              BRW_NEW_FS_PROG_DATA |
1590              BRW_NEW_IMAGE_UNITS
1591    },
1592    .emit = brw_upload_wm_image_surfaces,
1593 };
1594
1595 static void
1596 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1597 {
1598    struct gl_context *ctx = &brw->ctx;
1599    /* _NEW_PROGRAM */
1600    struct gl_program *prog =
1601       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1602    /* BRW_NEW_CS_PROG_DATA */
1603    const struct brw_cs_prog_data *cs_prog_data =
1604       brw_cs_prog_data(brw->cs.base.prog_data);
1605
1606    if (prog && cs_prog_data->uses_num_work_groups) {
1607       const unsigned surf_idx =
1608          cs_prog_data->binding_table.work_groups_start;
1609       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1610       struct brw_bo *bo;
1611       uint32_t bo_offset;
1612
1613       if (brw->compute.num_work_groups_bo == NULL) {
1614          bo = NULL;
1615          intel_upload_data(brw,
1616                            (void *)brw->compute.num_work_groups,
1617                            3 * sizeof(GLuint),
1618                            sizeof(GLuint),
1619                            &bo,
1620                            &bo_offset);
1621       } else {
1622          bo = brw->compute.num_work_groups_bo;
1623          bo_offset = brw->compute.num_work_groups_offset;
1624       }
1625
1626       brw_emit_buffer_surface_state(brw, surf_offset,
1627                                     bo, bo_offset,
1628                                     ISL_FORMAT_RAW,
1629                                     3 * sizeof(GLuint), 1,
1630                                     RELOC_WRITE);
1631       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1632    }
1633 }
1634
1635 const struct brw_tracked_state brw_cs_work_groups_surface = {
1636    .dirty = {
1637       .brw = BRW_NEW_CS_PROG_DATA |
1638              BRW_NEW_CS_WORK_GROUPS
1639    },
1640    .emit = brw_upload_cs_work_groups_surface,
1641 };