src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "main/framebuffer.h"
  40
  41 #include "intel_mipmap_tree.h"
  42 #include "intel_batchbuffer.h"
  43 #include "intel_tex.h"
  44 #include "intel_fbo.h"
  45 #include "intel_buffer_objects.h"
  46
  47 #include "brw_context.h"
  48 #include "brw_state.h"
  49 #include "brw_defines.h"
  50 #include "brw_wm.h"
  51
  52 GLuint
  53 translate_tex_target(GLenum target)
  54 {
  55    switch (target) {
  56    case GL_TEXTURE_1D:
  57    case GL_TEXTURE_1D_ARRAY_EXT:
  58       return BRW_SURFACE_1D;
  59
  60    case GL_TEXTURE_RECTANGLE_NV:
  61       return BRW_SURFACE_2D;
  62
  63    case GL_TEXTURE_2D:
  64    case GL_TEXTURE_2D_ARRAY_EXT:
  65    case GL_TEXTURE_EXTERNAL_OES:
  66    case GL_TEXTURE_2D_MULTISAMPLE:
  67    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  68       return BRW_SURFACE_2D;
  69
  70    case GL_TEXTURE_3D:
  71       return BRW_SURFACE_3D;
  72
  73    case GL_TEXTURE_CUBE_MAP:
  74    case GL_TEXTURE_CUBE_MAP_ARRAY:
  75       return BRW_SURFACE_CUBE;
  76
  77    default:
  78       unreachable("not reached");
  79    }
  80 }
  81
  82 uint32_t
  83 brw_get_surface_tiling_bits(uint32_t tiling)
  84 {
  85    switch (tiling) {
  86    case I915_TILING_X:
  87       return BRW_SURFACE_TILED;
  88    case I915_TILING_Y:
  89       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  90    default:
  91       return 0;
  92    }
  93 }
  94
  95
  96 uint32_t
  97 brw_get_surface_num_multisamples(unsigned num_samples)
  98 {
  99    if (num_samples > 1)
 100       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 101    else
 102       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 103 }
 104
 105 void
 106 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 107                       bool is_render_target,
 108                       unsigned *width, unsigned *height,
 109                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 110 {
 111    static const unsigned halign_stencil = 8;
 112
 113    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 114     * there are half as many rows.
 115     * In addition, mip-levels are accessed manually by the program and
 116     * therefore the surface is setup to cover all the mip-levels for one slice.
 117     * (Hardware is still used to access individual slices).
 118     */
 119    *tiling = I915_TILING_Y;
 120    *pitch = mt->pitch * 2;
 121    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 122    *height = (mt->total_height / mt->physical_depth0) / 2;
 123
 124    if (is_render_target) {
 125       *format = BRW_SURFACEFORMAT_R8_UINT;
 126    }
 127 }
 128
 129
 130 /**
 131  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 132  * swizzling.
 133  */
 134 int
 135 brw_get_texture_swizzle(const struct gl_context *ctx,
 136                         const struct gl_texture_object *t)
 137 {
 138    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 139
 140    int swizzles[SWIZZLE_NIL + 1] = {
 141       SWIZZLE_X,
 142       SWIZZLE_Y,
 143       SWIZZLE_Z,
 144       SWIZZLE_W,
 145       SWIZZLE_ZERO,
 146       SWIZZLE_ONE,
 147       SWIZZLE_NIL
 148    };
 149
 150    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 151        img->_BaseFormat == GL_DEPTH_STENCIL) {
 152       GLenum depth_mode = t->DepthMode;
 153
 154       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 155        * with depth component data specified with a sized internal format.
 156        * Otherwise, it's left at the old default, GL_LUMINANCE.
 157        */
 158       if (_mesa_is_gles3(ctx) &&
 159           img->InternalFormat != GL_DEPTH_COMPONENT &&
 160           img->InternalFormat != GL_DEPTH_STENCIL) {
 161          depth_mode = GL_RED;
 162       }
 163
 164       switch (depth_mode) {
 165       case GL_ALPHA:
 166          swizzles[0] = SWIZZLE_ZERO;
 167          swizzles[1] = SWIZZLE_ZERO;
 168          swizzles[2] = SWIZZLE_ZERO;
 169          swizzles[3] = SWIZZLE_X;
 170          break;
 171       case GL_LUMINANCE:
 172          swizzles[0] = SWIZZLE_X;
 173          swizzles[1] = SWIZZLE_X;
 174          swizzles[2] = SWIZZLE_X;
 175          swizzles[3] = SWIZZLE_ONE;
 176          break;
 177       case GL_INTENSITY:
 178          swizzles[0] = SWIZZLE_X;
 179          swizzles[1] = SWIZZLE_X;
 180          swizzles[2] = SWIZZLE_X;
 181          swizzles[3] = SWIZZLE_X;
 182          break;
 183       case GL_RED:
 184          swizzles[0] = SWIZZLE_X;
 185          swizzles[1] = SWIZZLE_ZERO;
 186          swizzles[2] = SWIZZLE_ZERO;
 187          swizzles[3] = SWIZZLE_ONE;
 188          break;
 189       }
 190    }
 191
 192    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 193
 194    /* If the texture's format is alpha-only, force R, G, and B to
 195     * 0.0. Similarly, if the texture's format has no alpha channel,
 196     * force the alpha value read to 1.0. This allows for the
 197     * implementation to use an RGBA texture for any of these formats
 198     * without leaking any unexpected values.
 199     */
 200    switch (img->_BaseFormat) {
 201    case GL_ALPHA:
 202       swizzles[0] = SWIZZLE_ZERO;
 203       swizzles[1] = SWIZZLE_ZERO;
 204       swizzles[2] = SWIZZLE_ZERO;
 205       break;
 206    case GL_LUMINANCE:
 207       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 208          swizzles[0] = SWIZZLE_X;
 209          swizzles[1] = SWIZZLE_X;
 210          swizzles[2] = SWIZZLE_X;
 211          swizzles[3] = SWIZZLE_ONE;
 212       }
 213       break;
 214    case GL_LUMINANCE_ALPHA:
 215       if (datatype == GL_SIGNED_NORMALIZED) {
 216          swizzles[0] = SWIZZLE_X;
 217          swizzles[1] = SWIZZLE_X;
 218          swizzles[2] = SWIZZLE_X;
 219          swizzles[3] = SWIZZLE_W;
 220       }
 221       break;
 222    case GL_INTENSITY:
 223       if (datatype == GL_SIGNED_NORMALIZED) {
 224          swizzles[0] = SWIZZLE_X;
 225          swizzles[1] = SWIZZLE_X;
 226          swizzles[2] = SWIZZLE_X;
 227          swizzles[3] = SWIZZLE_X;
 228       }
 229       break;
 230    case GL_RED:
 231    case GL_RG:
 232    case GL_RGB:
 233       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 234          swizzles[3] = SWIZZLE_ONE;
 235       break;
 236    }
 237
 238    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 239                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 242 }
 243
 244 static void
 245 gen4_emit_buffer_surface_state(struct brw_context *brw,
 246                                uint32_t *out_offset,
 247                                drm_intel_bo *bo,
 248                                unsigned buffer_offset,
 249                                unsigned surface_format,
 250                                unsigned buffer_size,
 251                                unsigned pitch,
 252                                bool rw)
 253 {
 254    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 255                                     6 * 4, 32, out_offset);
 256    memset(surf, 0, 6 * 4);
 257
 258    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 259              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 260              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 261    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 262    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 263              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 264    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 265              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 266
 267    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 268     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 269     * physical cache.  It is mapped in hardware to the sampler cache."
 270     */
 271    if (bo) {
 272       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 273                               bo, buffer_offset,
 274                               I915_GEM_DOMAIN_SAMPLER,
 275                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 276    }
 277 }
 278
 279 void
 280 brw_update_buffer_texture_surface(struct gl_context *ctx,
 281                                   unsigned unit,
 282                                   uint32_t *surf_offset)
 283 {
 284    struct brw_context *brw = brw_context(ctx);
 285    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 286    struct intel_buffer_object *intel_obj =
 287       intel_buffer_object(tObj->BufferObject);
 288    uint32_t size = tObj->BufferSize;
 289    drm_intel_bo *bo = NULL;
 290    mesa_format format = tObj->_BufferObjectFormat;
 291    uint32_t brw_format = brw_format_for_mesa_format(format);
 292    int texel_size = _mesa_get_format_bytes(format);
 293
 294    if (intel_obj) {
 295       size = MIN2(size, intel_obj->Base.Size);
 296       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 297    }
 298
 299    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 300       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 301                     _mesa_get_format_name(format));
 302    }
 303
 304    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 305                                        tObj->BufferOffset,
 306                                        brw_format,
 307                                        size / texel_size,
 308                                        texel_size,
 309                                        false /* rw */);
 310 }
 311
 312 static void
 313 brw_update_texture_surface(struct gl_context *ctx,
 314                            unsigned unit,
 315                            uint32_t *surf_offset,
 316                            bool for_gather)
 317 {
 318    struct brw_context *brw = brw_context(ctx);
 319    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 320    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 321    struct intel_mipmap_tree *mt = intelObj->mt;
 322    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 323    uint32_t *surf;
 324
 325    /* BRW_NEW_TEXTURE_BUFFER */
 326    if (tObj->Target == GL_TEXTURE_BUFFER) {
 327       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 328       return;
 329    }
 330
 331    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 332                           6 * 4, 32, surf_offset);
 333
 334    uint32_t tex_format = translate_tex_format(brw, mt->format,
 335                                               sampler->sRGBDecode);
 336
 337    if (for_gather) {
 338       /* Sandybridge's gather4 message is broken for integer formats.
 339        * To work around this, we pretend the surface is UNORM for
 340        * 8 or 16-bit formats, and emit shader instructions to recover
 341        * the real INT/UINT value.  For 32-bit formats, we pretend
 342        * the surface is FLOAT, and simply reinterpret the resulting
 343        * bits.
 344        */
 345       switch (tex_format) {
 346       case BRW_SURFACEFORMAT_R8_SINT:
 347       case BRW_SURFACEFORMAT_R8_UINT:
 348          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 349          break;
 350
 351       case BRW_SURFACEFORMAT_R16_SINT:
 352       case BRW_SURFACEFORMAT_R16_UINT:
 353          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 354          break;
 355
 356       case BRW_SURFACEFORMAT_R32_SINT:
 357       case BRW_SURFACEFORMAT_R32_UINT:
 358          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 359          break;
 360
 361       default:
 362          break;
 363       }
 364    }
 365
 366    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 367               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 368               BRW_SURFACE_CUBEFACE_ENABLES |
 369               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 370
 371    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 372
 373    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 374               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 375               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 376
 377    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 378               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 379               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 380
 381    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 382               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 383
 384    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 385
 386    /* Emit relocation to surface contents */
 387    drm_intel_bo_emit_reloc(brw->batch.bo,
 388                            *surf_offset + 4,
 389                            mt->bo,
 390                            surf[1] - mt->bo->offset64,
 391                            I915_GEM_DOMAIN_SAMPLER, 0);
 392 }
 393
 394 /**
 395  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 396  * read from this buffer with Data Port Read instructions/messages.
 397  */
 398 void
 399 brw_create_constant_surface(struct brw_context *brw,
 400                             drm_intel_bo *bo,
 401                             uint32_t offset,
 402                             uint32_t size,
 403                             uint32_t *out_offset,
 404                             bool dword_pitch)
 405 {
 406    uint32_t stride = dword_pitch ? 4 : 16;
 407    uint32_t elements = ALIGN(size, stride) / stride;
 408
 409    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 410                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 411                                        elements, stride, false);
 412 }
 413
 414 /**
 415  * Create the buffer surface. Shader buffer variables will be
 416  * read from / write to this buffer with Data Port Read/Write
 417  * instructions/messages.
 418  */
 419 void
 420 brw_create_buffer_surface(struct brw_context *brw,
 421                           drm_intel_bo *bo,
 422                           uint32_t offset,
 423                           uint32_t size,
 424                           uint32_t *out_offset,
 425                           bool dword_pitch)
 426 {
 427    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 428     * messages. We need these specifically for the fragment shader since they
 429     * include a pixel mask header that we need to ensure correct behavior
 430     * with helper invocations, which cannot write to the buffer.
 431     */
 432    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 433                                        BRW_SURFACEFORMAT_RAW,
 434                                        size, 1, true);
 435 }
 436
 437 /**
 438  * Set up a binding table entry for use by stream output logic (transform
 439  * feedback).
 440  *
 441  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 442  */
 443 void
 444 brw_update_sol_surface(struct brw_context *brw,
 445                        struct gl_buffer_object *buffer_obj,
 446                        uint32_t *out_offset, unsigned num_vector_components,
 447                        unsigned stride_dwords, unsigned offset_dwords)
 448 {
 449    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 450    uint32_t offset_bytes = 4 * offset_dwords;
 451    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 452                                              offset_bytes,
 453                                              buffer_obj->Size - offset_bytes);
 454    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 455                                     out_offset);
 456    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 457    size_t size_dwords = buffer_obj->Size / 4;
 458    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 459
 460    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 461     * too big to map using a single binding table entry?
 462     */
 463    assert((size_dwords - offset_dwords) / stride_dwords
 464           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 465
 466    if (size_dwords > offset_dwords + num_vector_components) {
 467       /* There is room for at least 1 transform feedback output in the buffer.
 468        * Compute the number of additional transform feedback outputs the
 469        * buffer has room for.
 470        */
 471       buffer_size_minus_1 =
 472          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 473    } else {
 474       /* There isn't even room for a single transform feedback output in the
 475        * buffer.  We can't configure the binding table entry to prevent output
 476        * entirely; we'll have to rely on the geometry shader to detect
 477        * overflow.  But to minimize the damage in case of a bug, set up the
 478        * binding table entry to just allow a single output.
 479        */
 480       buffer_size_minus_1 = 0;
 481    }
 482    width = buffer_size_minus_1 & 0x7f;
 483    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 484    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 485
 486    switch (num_vector_components) {
 487    case 1:
 488       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 489       break;
 490    case 2:
 491       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 492       break;
 493    case 3:
 494       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 495       break;
 496    case 4:
 497       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 498       break;
 499    default:
 500       unreachable("Invalid vector size for transform feedback output");
 501    }
 502
 503    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 504       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 505       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 506       BRW_SURFACE_RC_READ_WRITE;
 507    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 508    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 509               height << BRW_SURFACE_HEIGHT_SHIFT);
 510    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 511               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 512    surf[4] = 0;
 513    surf[5] = 0;
 514
 515    /* Emit relocation to surface contents. */
 516    drm_intel_bo_emit_reloc(brw->batch.bo,
 517                            *out_offset + 4,
 518                            bo, offset_bytes,
 519                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 520 }
 521
 522 /* Creates a new WM constant buffer reflecting the current fragment program's
 523  * constants, if needed by the fragment program.
 524  *
 525  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 526  * state atom.
 527  */
 528 static void
 529 brw_upload_wm_pull_constants(struct brw_context *brw)
 530 {
 531    struct brw_stage_state *stage_state = &brw->wm.base;
 532    /* BRW_NEW_FRAGMENT_PROGRAM */
 533    struct brw_fragment_program *fp =
 534       (struct brw_fragment_program *) brw->fragment_program;
 535    /* BRW_NEW_FS_PROG_DATA */
 536    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 537
 538    /* _NEW_PROGRAM_CONSTANTS */
 539    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 540                              stage_state, prog_data, true);
 541 }
 542
 543 const struct brw_tracked_state brw_wm_pull_constants = {
 544    .dirty = {
 545       .mesa = _NEW_PROGRAM_CONSTANTS,
 546       .brw = BRW_NEW_BATCH |
 547              BRW_NEW_FRAGMENT_PROGRAM |
 548              BRW_NEW_FS_PROG_DATA,
 549    },
 550    .emit = brw_upload_wm_pull_constants,
 551 };
 552
 553 /**
 554  * Creates a null renderbuffer surface.
 555  *
 556  * This is used when the shader doesn't write to any color output.  An FB
 557  * write to target 0 will still be emitted, because that's how the thread is
 558  * terminated (and computed depth is returned), so we need to have the
 559  * hardware discard the target 0 color output..
 560  */
 561 static void
 562 brw_emit_null_surface_state(struct brw_context *brw,
 563                             unsigned width,
 564                             unsigned height,
 565                             unsigned samples,
 566                             uint32_t *out_offset)
 567 {
 568    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 569     * Notes):
 570     *
 571     *     A null surface will be used in instances where an actual surface is
 572     *     not bound. When a write message is generated to a null surface, no
 573     *     actual surface is written to. When a read message (including any
 574     *     sampling engine message) is generated to a null surface, the result
 575     *     is all zeros. Note that a null surface type is allowed to be used
 576     *     with all messages, even if it is not specificially indicated as
 577     *     supported. All of the remaining fields in surface state are ignored
 578     *     for null surfaces, with the following exceptions:
 579     *
 580     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 581     *       depth buffer’s corresponding state for all render target surfaces,
 582     *       including null.
 583     *
 584     *     - Surface Format must be R8G8B8A8_UNORM.
 585     */
 586    unsigned surface_type = BRW_SURFACE_NULL;
 587    drm_intel_bo *bo = NULL;
 588    unsigned pitch_minus_1 = 0;
 589    uint32_t multisampling_state = 0;
 590    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 591                                     out_offset);
 592
 593    if (samples > 1) {
 594       /* On Gen6, null render targets seem to cause GPU hangs when
 595        * multisampling.  So work around this problem by rendering into dummy
 596        * color buffer.
 597        *
 598        * To decrease the amount of memory needed by the workaround buffer, we
 599        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 600        * the amount of memory needed for the workaround buffer is
 601        * (width_in_tiles + height_in_tiles - 1) tiles.
 602        *
 603        * Note that since the workaround buffer will be interpreted by the
 604        * hardware as an interleaved multisampled buffer, we need to compute
 605        * width_in_tiles and height_in_tiles by dividing the width and height
 606        * by 16 rather than the normal Y-tile size of 32.
 607        */
 608       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 609       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 610       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 611       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 612                          size_needed);
 613       bo = brw->wm.multisampled_null_render_target_bo;
 614       surface_type = BRW_SURFACE_2D;
 615       pitch_minus_1 = 127;
 616       multisampling_state = brw_get_surface_num_multisamples(samples);
 617    }
 618
 619    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 620               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 621    if (brw->gen < 6) {
 622       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 623                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 624                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 625                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 626    }
 627    surf[1] = bo ? bo->offset64 : 0;
 628    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 629               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 630
 631    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 632     * Notes):
 633     *
 634     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 635     */
 636    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 637               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 638    surf[4] = multisampling_state;
 639    surf[5] = 0;
 640
 641    if (bo) {
 642       drm_intel_bo_emit_reloc(brw->batch.bo,
 643                               *out_offset + 4,
 644                               bo, 0,
 645                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 646    }
 647 }
 648
 649 /**
 650  * Sets up a surface state structure to point at the given region.
 651  * While it is only used for the front/back buffer currently, it should be
 652  * usable for further buffers when doing ARB_draw_buffer support.
 653  */
 654 static uint32_t
 655 brw_update_renderbuffer_surface(struct brw_context *brw,
 656                                 struct gl_renderbuffer *rb,
 657                                 bool layered, unsigned unit,
 658                                 uint32_t surf_index)
 659 {
 660    struct gl_context *ctx = &brw->ctx;
 661    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 662    struct intel_mipmap_tree *mt = irb->mt;
 663    uint32_t *surf;
 664    uint32_t tile_x, tile_y;
 665    uint32_t format = 0;
 666    uint32_t offset;
 667    /* _NEW_BUFFERS */
 668    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 669    /* BRW_NEW_FS_PROG_DATA */
 670
 671    assert(!layered);
 672
 673    if (rb->TexImage && !brw->has_surface_tile_offset) {
 674       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 675
 676       if (tile_x != 0 || tile_y != 0) {
 677          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 678           * destination in a miptree unless you actually setup your renderbuffer
 679           * as a miptree and used the fragile lod/array_index/etc. controls to
 680           * select the image.  So, instead, we just make a new single-level
 681           * miptree and render into that.
 682           */
 683          intel_renderbuffer_move_to_temp(brw, irb, false);
 684          mt = irb->mt;
 685       }
 686    }
 687
 688    intel_miptree_used_for_rendering(irb->mt);
 689
 690    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 691
 692    format = brw->render_target_format[rb_format];
 693    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 694       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 695                     __func__, _mesa_get_format_name(rb_format));
 696    }
 697
 698    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 699               format << BRW_SURFACE_FORMAT_SHIFT);
 700
 701    /* reloc */
 702    assert(mt->offset % mt->cpp == 0);
 703    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 704               mt->bo->offset64 + mt->offset);
 705
 706    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 707               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 708
 709    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 710               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 711
 712    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 713
 714    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 715    /* Note that the low bits of these fields are missing, so
 716     * there's the possibility of getting in trouble.
 717     */
 718    assert(tile_x % 4 == 0);
 719    assert(tile_y % 2 == 0);
 720    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 721               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 722               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 723
 724    if (brw->gen < 6) {
 725       /* _NEW_COLOR */
 726       if (!ctx->Color.ColorLogicOpEnabled &&
 727           (ctx->Color.BlendEnabled & (1 << unit)))
 728          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 729
 730       if (!ctx->Color.ColorMask[unit][0])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 732       if (!ctx->Color.ColorMask[unit][1])
 733          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 734       if (!ctx->Color.ColorMask[unit][2])
 735          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 736
 737       /* As mentioned above, disable writes to the alpha component when the
 738        * renderbuffer is XRGB.
 739        */
 740       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 741           !ctx->Color.ColorMask[unit][3]) {
 742          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 743       }
 744    }
 745
 746    drm_intel_bo_emit_reloc(brw->batch.bo,
 747                            offset + 4,
 748                            mt->bo,
 749                            surf[1] - mt->bo->offset64,
 750                            I915_GEM_DOMAIN_RENDER,
 751                            I915_GEM_DOMAIN_RENDER);
 752
 753    return offset;
 754 }
 755
 756 /**
 757  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 758  */
 759 void
 760 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 761                                  const struct gl_framebuffer *fb,
 762                                  uint32_t render_target_start,
 763                                  uint32_t *surf_offset)
 764 {
 765    GLuint i;
 766    const unsigned int w = _mesa_geometric_width(fb);
 767    const unsigned int h = _mesa_geometric_height(fb);
 768    const unsigned int s = _mesa_geometric_samples(fb);
 769
 770    /* Update surfaces for drawing buffers */
 771    if (fb->_NumColorDrawBuffers >= 1) {
 772       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 773          const uint32_t surf_index = render_target_start + i;
 774
 775          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 776             surf_offset[surf_index] =
 777                brw->vtbl.update_renderbuffer_surface(
 778                   brw, fb->_ColorDrawBuffers[i],
 779                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 780          } else {
 781             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 782                &surf_offset[surf_index]);
 783          }
 784       }
 785    } else {
 786       const uint32_t surf_index = render_target_start;
 787       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 788          &surf_offset[surf_index]);
 789    }
 790 }
 791
 792 static void
 793 update_renderbuffer_surfaces(struct brw_context *brw)
 794 {
 795    const struct gl_context *ctx = &brw->ctx;
 796
 797    /* _NEW_BUFFERS | _NEW_COLOR */
 798    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 799    brw_update_renderbuffer_surfaces(
 800       brw, fb,
 801       brw->wm.prog_data->binding_table.render_target_start,
 802       brw->wm.base.surf_offset);
 803    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 804 }
 805
 806 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 807    .dirty = {
 808       .mesa = _NEW_BUFFERS |
 809               _NEW_COLOR,
 810       .brw = BRW_NEW_BATCH |
 811              BRW_NEW_FS_PROG_DATA,
 812    },
 813    .emit = update_renderbuffer_surfaces,
 814 };
 815
 816 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 817    .dirty = {
 818       .mesa = _NEW_BUFFERS,
 819       .brw = BRW_NEW_BATCH,
 820    },
 821    .emit = update_renderbuffer_surfaces,
 822 };
 823
 824
 825 static void
 826 update_stage_texture_surfaces(struct brw_context *brw,
 827                               const struct gl_program *prog,
 828                               struct brw_stage_state *stage_state,
 829                               bool for_gather)
 830 {
 831    if (!prog)
 832       return;
 833
 834    struct gl_context *ctx = &brw->ctx;
 835
 836    uint32_t *surf_offset = stage_state->surf_offset;
 837
 838    /* BRW_NEW_*_PROG_DATA */
 839    if (for_gather)
 840       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 841    else
 842       surf_offset += stage_state->prog_data->binding_table.texture_start;
 843
 844    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 845    for (unsigned s = 0; s < num_samplers; s++) {
 846       surf_offset[s] = 0;
 847
 848       if (prog->SamplersUsed & (1 << s)) {
 849          const unsigned unit = prog->SamplerUnits[s];
 850
 851          /* _NEW_TEXTURE */
 852          if (ctx->Texture.Unit[unit]._Current) {
 853             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 854          }
 855       }
 856    }
 857 }
 858
 859
 860 /**
 861  * Construct SURFACE_STATE objects for enabled textures.
 862  */
 863 static void
 864 brw_update_texture_surfaces(struct brw_context *brw)
 865 {
 866    /* BRW_NEW_VERTEX_PROGRAM */
 867    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 868
 869    /* BRW_NEW_GEOMETRY_PROGRAM */
 870    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 871
 872    /* BRW_NEW_FRAGMENT_PROGRAM */
 873    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 874
 875    /* BRW_NEW_COMPUTE_PROGRAM */
 876    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 877
 878    /* _NEW_TEXTURE */
 879    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 880    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 881    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 882    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 883
 884    /* emit alternate set of surface state for gather. this
 885     * allows the surface format to be overriden for only the
 886     * gather4 messages. */
 887    if (brw->gen < 8) {
 888       if (vs && vs->UsesGather)
 889          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 890       if (gs && gs->UsesGather)
 891          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 892       if (fs && fs->UsesGather)
 893          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 894       if (cs && cs->UsesGather)
 895          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 896    }
 897
 898    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 899 }
 900
 901 const struct brw_tracked_state brw_texture_surfaces = {
 902    .dirty = {
 903       .mesa = _NEW_TEXTURE,
 904       .brw = BRW_NEW_BATCH |
 905              BRW_NEW_COMPUTE_PROGRAM |
 906              BRW_NEW_FRAGMENT_PROGRAM |
 907              BRW_NEW_FS_PROG_DATA |
 908              BRW_NEW_GEOMETRY_PROGRAM |
 909              BRW_NEW_GS_PROG_DATA |
 910              BRW_NEW_TEXTURE_BUFFER |
 911              BRW_NEW_VERTEX_PROGRAM |
 912              BRW_NEW_VS_PROG_DATA,
 913    },
 914    .emit = brw_update_texture_surfaces,
 915 };
 916
 917 void
 918 brw_upload_ubo_surfaces(struct brw_context *brw,
 919                         struct gl_shader *shader,
 920                         struct brw_stage_state *stage_state,
 921                         struct brw_stage_prog_data *prog_data,
 922                         bool dword_pitch)
 923 {
 924    struct gl_context *ctx = &brw->ctx;
 925
 926    if (!shader)
 927       return;
 928
 929    uint32_t *ubo_surf_offsets =
 930       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 931
 932    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 933       struct gl_uniform_buffer_binding *binding =
 934          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 935
 936       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 937          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 938       } else {
 939          struct intel_buffer_object *intel_bo =
 940             intel_buffer_object(binding->BufferObject);
 941          drm_intel_bo *bo =
 942             intel_bufferobj_buffer(brw, intel_bo,
 943                                    binding->Offset,
 944                                    binding->BufferObject->Size - binding->Offset);
 945          brw_create_constant_surface(brw, bo, binding->Offset,
 946                                      binding->BufferObject->Size - binding->Offset,
 947                                      &ubo_surf_offsets[i],
 948                                      dword_pitch);
 949       }
 950    }
 951
 952    uint32_t *ssbo_surf_offsets =
 953       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 954
 955    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 956       struct gl_shader_storage_buffer_binding *binding =
 957          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 958
 959       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 960          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 961       } else {
 962          struct intel_buffer_object *intel_bo =
 963             intel_buffer_object(binding->BufferObject);
 964          drm_intel_bo *bo =
 965             intel_bufferobj_buffer(brw, intel_bo,
 966                                    binding->Offset,
 967                                    binding->BufferObject->Size - binding->Offset);
 968          brw_create_buffer_surface(brw, bo, binding->Offset,
 969                                    binding->BufferObject->Size - binding->Offset,
 970                                    &ssbo_surf_offsets[i],
 971                                    dword_pitch);
 972       }
 973    }
 974
 975    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
 976       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 977 }
 978
 979 static void
 980 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 981 {
 982    struct gl_context *ctx = &brw->ctx;
 983    /* _NEW_PROGRAM */
 984    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 985
 986    if (!prog)
 987       return;
 988
 989    /* BRW_NEW_FS_PROG_DATA */
 990    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 991                            &brw->wm.base, &brw->wm.prog_data->base, true);
 992 }
 993
 994 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 995    .dirty = {
 996       .mesa = _NEW_PROGRAM,
 997       .brw = BRW_NEW_BATCH |
 998              BRW_NEW_FS_PROG_DATA |
 999              BRW_NEW_UNIFORM_BUFFER,
1000    },
1001    .emit = brw_upload_wm_ubo_surfaces,
1002 };
1003
1004 static void
1005 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1006 {
1007    struct gl_context *ctx = &brw->ctx;
1008    /* _NEW_PROGRAM */
1009    struct gl_shader_program *prog =
1010       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1011
1012    if (!prog)
1013       return;
1014
1015    /* BRW_NEW_CS_PROG_DATA */
1016    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1017                            &brw->cs.base, &brw->cs.prog_data->base, true);
1018 }
1019
1020 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1021    .dirty = {
1022       .mesa = _NEW_PROGRAM,
1023       .brw = BRW_NEW_BATCH |
1024              BRW_NEW_CS_PROG_DATA |
1025              BRW_NEW_UNIFORM_BUFFER,
1026    },
1027    .emit = brw_upload_cs_ubo_surfaces,
1028 };
1029
1030 void
1031 brw_upload_abo_surfaces(struct brw_context *brw,
1032                         struct gl_shader_program *prog,
1033                         struct brw_stage_state *stage_state,
1034                         struct brw_stage_prog_data *prog_data)
1035 {
1036    struct gl_context *ctx = &brw->ctx;
1037    uint32_t *surf_offsets =
1038       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1039
1040    for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
1041       struct gl_atomic_buffer_binding *binding =
1042          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
1043       struct intel_buffer_object *intel_bo =
1044          intel_buffer_object(binding->BufferObject);
1045       drm_intel_bo *bo = intel_bufferobj_buffer(
1046          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1047
1048       brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1049                                           binding->Offset, BRW_SURFACEFORMAT_RAW,
1050                                           bo->size - binding->Offset, 1, true);
1051    }
1052
1053    if (prog->NumAtomicBuffers)
1054       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1055 }
1056
1057 static void
1058 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1059 {
1060    struct gl_context *ctx = &brw->ctx;
1061    /* _NEW_PROGRAM */
1062    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1063
1064    if (prog) {
1065       /* BRW_NEW_FS_PROG_DATA */
1066       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
1067                               &brw->wm.prog_data->base);
1068    }
1069 }
1070
1071 const struct brw_tracked_state brw_wm_abo_surfaces = {
1072    .dirty = {
1073       .mesa = _NEW_PROGRAM,
1074       .brw = BRW_NEW_ATOMIC_BUFFER |
1075              BRW_NEW_BATCH |
1076              BRW_NEW_FS_PROG_DATA,
1077    },
1078    .emit = brw_upload_wm_abo_surfaces,
1079 };
1080
1081 static void
1082 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1083 {
1084    struct gl_context *ctx = &brw->ctx;
1085    /* _NEW_PROGRAM */
1086    struct gl_shader_program *prog =
1087       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1088
1089    if (prog) {
1090       /* BRW_NEW_CS_PROG_DATA */
1091       brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1092                               &brw->cs.prog_data->base);
1093    }
1094 }
1095
1096 const struct brw_tracked_state brw_cs_abo_surfaces = {
1097    .dirty = {
1098       .mesa = _NEW_PROGRAM,
1099       .brw = BRW_NEW_ATOMIC_BUFFER |
1100              BRW_NEW_BATCH |
1101              BRW_NEW_CS_PROG_DATA,
1102    },
1103    .emit = brw_upload_cs_abo_surfaces,
1104 };
1105
1106 static void
1107 brw_upload_cs_image_surfaces(struct brw_context *brw)
1108 {
1109    struct gl_context *ctx = &brw->ctx;
1110    /* _NEW_PROGRAM */
1111    struct gl_shader_program *prog =
1112       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1113
1114    if (prog) {
1115       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1116       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1117                                 &brw->cs.base, &brw->cs.prog_data->base);
1118    }
1119 }
1120
1121 const struct brw_tracked_state brw_cs_image_surfaces = {
1122    .dirty = {
1123       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1124       .brw = BRW_NEW_BATCH |
1125              BRW_NEW_CS_PROG_DATA |
1126              BRW_NEW_IMAGE_UNITS
1127    },
1128    .emit = brw_upload_cs_image_surfaces,
1129 };
1130
1131 static uint32_t
1132 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1133 {
1134    if (access == GL_WRITE_ONLY) {
1135       return brw_format_for_mesa_format(format);
1136    } else {
1137       /* Typed surface reads support a very limited subset of the shader
1138        * image formats.  Translate it into the closest format the
1139        * hardware supports.
1140        */
1141       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1142           (_mesa_get_format_bytes(format) >= 8 &&
1143            (brw->gen == 7 && !brw->is_haswell)))
1144          return BRW_SURFACEFORMAT_RAW;
1145       else
1146          return brw_format_for_mesa_format(
1147             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1148    }
1149 }
1150
1151 static void
1152 update_default_image_param(struct brw_context *brw,
1153                            struct gl_image_unit *u,
1154                            unsigned surface_idx,
1155                            struct brw_image_param *param)
1156 {
1157    memset(param, 0, sizeof(*param));
1158    param->surface_idx = surface_idx;
1159    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1160     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1161     * detailed explanation of these parameters.
1162     */
1163    param->swizzling[0] = 0xff;
1164    param->swizzling[1] = 0xff;
1165 }
1166
1167 static void
1168 update_buffer_image_param(struct brw_context *brw,
1169                           struct gl_image_unit *u,
1170                           unsigned surface_idx,
1171                           struct brw_image_param *param)
1172 {
1173    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1174
1175    update_default_image_param(brw, u, surface_idx, param);
1176
1177    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1178    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1179 }
1180
1181 static void
1182 update_texture_image_param(struct brw_context *brw,
1183                            struct gl_image_unit *u,
1184                            unsigned surface_idx,
1185                            struct brw_image_param *param)
1186 {
1187    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1188
1189    update_default_image_param(brw, u, surface_idx, param);
1190
1191    param->size[0] = minify(mt->logical_width0, u->Level);
1192    param->size[1] = minify(mt->logical_height0, u->Level);
1193    param->size[2] = (!u->Layered ? 1 :
1194                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1195                      u->TexObj->Target == GL_TEXTURE_3D ?
1196                      minify(mt->logical_depth0, u->Level) :
1197                      mt->logical_depth0);
1198
1199    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1200                                   &param->offset[0],
1201                                   &param->offset[1]);
1202
1203    param->stride[0] = mt->cpp;
1204    param->stride[1] = mt->pitch / mt->cpp;
1205    param->stride[2] =
1206       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1207    param->stride[3] =
1208       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1209
1210    if (mt->tiling == I915_TILING_X) {
1211       /* An X tile is a rectangular block of 512x8 bytes. */
1212       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1213       param->tiling[1] = _mesa_logbase2(8);
1214
1215       if (brw->has_swizzling) {
1216          /* Right shifts required to swizzle bits 9 and 10 of the memory
1217           * address with bit 6.
1218           */
1219          param->swizzling[0] = 3;
1220          param->swizzling[1] = 4;
1221       }
1222    } else if (mt->tiling == I915_TILING_Y) {
1223       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1224        * different to the layout of an X-tiled surface, we simply pretend that
1225        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1226        * one arranged in X-major order just like is the case for X-tiling.
1227        */
1228       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1229       param->tiling[1] = _mesa_logbase2(32);
1230
1231       if (brw->has_swizzling) {
1232          /* Right shift required to swizzle bit 9 of the memory address with
1233           * bit 6.
1234           */
1235          param->swizzling[0] = 3;
1236       }
1237    }
1238
1239    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1240     * address calculation algorithm (emit_address_calculation() in
1241     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1242     * modulus equal to the LOD.
1243     */
1244    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1245                        0);
1246 }
1247
1248 static void
1249 update_image_surface(struct brw_context *brw,
1250                      struct gl_image_unit *u,
1251                      GLenum access,
1252                      unsigned surface_idx,
1253                      uint32_t *surf_offset,
1254                      struct brw_image_param *param)
1255 {
1256    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1257       struct gl_texture_object *obj = u->TexObj;
1258       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1259
1260       if (obj->Target == GL_TEXTURE_BUFFER) {
1261          struct intel_buffer_object *intel_obj =
1262             intel_buffer_object(obj->BufferObject);
1263          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1264                                       _mesa_get_format_bytes(u->_ActualFormat));
1265
1266          brw->vtbl.emit_buffer_surface_state(
1267             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1268             format, intel_obj->Base.Size / texel_size, texel_size,
1269             access != GL_READ_ONLY);
1270
1271          update_buffer_image_param(brw, u, surface_idx, param);
1272
1273       } else {
1274          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1275          struct intel_mipmap_tree *mt = intel_obj->mt;
1276
1277          if (format == BRW_SURFACEFORMAT_RAW) {
1278             brw->vtbl.emit_buffer_surface_state(
1279                brw, surf_offset, mt->bo, mt->offset,
1280                format, mt->bo->size - mt->offset, 1 /* pitch */,
1281                access != GL_READ_ONLY);
1282
1283          } else {
1284             const unsigned min_layer = obj->MinLayer + u->_Layer;
1285             const unsigned min_level = obj->MinLevel + u->Level;
1286             const unsigned num_layers = (!u->Layered ? 1 :
1287                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1288                                          mt->logical_depth0);
1289             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1290                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1291                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1292
1293             brw->vtbl.emit_texture_surface_state(
1294                brw, mt, target,
1295                min_layer, min_layer + num_layers,
1296                min_level, min_level + 1,
1297                format, SWIZZLE_XYZW,
1298                surf_offset, access != GL_READ_ONLY, false);
1299          }
1300
1301          update_texture_image_param(brw, u, surface_idx, param);
1302       }
1303
1304    } else {
1305       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1306       update_default_image_param(brw, u, surface_idx, param);
1307    }
1308 }
1309
1310 void
1311 brw_upload_image_surfaces(struct brw_context *brw,
1312                           struct gl_shader *shader,
1313                           struct brw_stage_state *stage_state,
1314                           struct brw_stage_prog_data *prog_data)
1315 {
1316    struct gl_context *ctx = &brw->ctx;
1317
1318    if (shader && shader->NumImages) {
1319       for (unsigned i = 0; i < shader->NumImages; i++) {
1320          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1321          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1322
1323          update_image_surface(brw, u, shader->ImageAccess[i],
1324                               surf_idx,
1325                               &stage_state->surf_offset[surf_idx],
1326                               &prog_data->image_param[i]);
1327       }
1328
1329       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1330    }
1331 }
1332
1333 static void
1334 brw_upload_wm_image_surfaces(struct brw_context *brw)
1335 {
1336    struct gl_context *ctx = &brw->ctx;
1337    /* BRW_NEW_FRAGMENT_PROGRAM */
1338    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1339
1340    if (prog) {
1341       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1342       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1343                                 &brw->wm.base, &brw->wm.prog_data->base);
1344    }
1345 }
1346
1347 const struct brw_tracked_state brw_wm_image_surfaces = {
1348    .dirty = {
1349       .mesa = _NEW_TEXTURE,
1350       .brw = BRW_NEW_BATCH |
1351              BRW_NEW_FRAGMENT_PROGRAM |
1352              BRW_NEW_FS_PROG_DATA |
1353              BRW_NEW_IMAGE_UNITS
1354    },
1355    .emit = brw_upload_wm_image_surfaces,
1356 };
1357
1358 void
1359 gen4_init_vtable_surface_functions(struct brw_context *brw)
1360 {
1361    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1362    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1363    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1364    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1365 }
1366
1367 static void
1368 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1369 {
1370    struct gl_context *ctx = &brw->ctx;
1371    /* _NEW_PROGRAM */
1372    struct gl_shader_program *prog =
1373       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1374
1375    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1376       const unsigned surf_idx =
1377          brw->cs.prog_data->binding_table.work_groups_start;
1378       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1379       drm_intel_bo *bo;
1380       uint32_t bo_offset;
1381
1382       if (brw->compute.num_work_groups_bo == NULL) {
1383          bo = NULL;
1384          intel_upload_data(brw,
1385                            (void *)brw->compute.num_work_groups,
1386                            3 * sizeof(GLuint),
1387                            sizeof(GLuint),
1388                            &bo,
1389                            &bo_offset);
1390       } else {
1391          bo = brw->compute.num_work_groups_bo;
1392          bo_offset = brw->compute.num_work_groups_offset;
1393       }
1394
1395       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1396                                           bo, bo_offset,
1397                                           BRW_SURFACEFORMAT_RAW,
1398                                           3 * sizeof(GLuint), 1, true);
1399       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1400    }
1401 }
1402
1403 const struct brw_tracked_state brw_cs_work_groups_surface = {
1404    .dirty = {
1405       .brw = BRW_NEW_CS_WORK_GROUPS
1406    },
1407    .emit = brw_upload_cs_work_groups_surface,
1408 };