src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "isl/isl.h"
  43
  44 #include "intel_mipmap_tree.h"
  45 #include "intel_batchbuffer.h"
  46 #include "intel_tex.h"
  47 #include "intel_fbo.h"
  48 #include "intel_buffer_objects.h"
  49
  50 #include "brw_context.h"
  51 #include "brw_state.h"
  52 #include "brw_defines.h"
  53 #include "brw_wm.h"
  54
  55 GLuint
  56 translate_tex_target(GLenum target)
  57 {
  58    switch (target) {
  59    case GL_TEXTURE_1D:
  60    case GL_TEXTURE_1D_ARRAY_EXT:
  61       return BRW_SURFACE_1D;
  62
  63    case GL_TEXTURE_RECTANGLE_NV:
  64       return BRW_SURFACE_2D;
  65
  66    case GL_TEXTURE_2D:
  67    case GL_TEXTURE_2D_ARRAY_EXT:
  68    case GL_TEXTURE_EXTERNAL_OES:
  69    case GL_TEXTURE_2D_MULTISAMPLE:
  70    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  71       return BRW_SURFACE_2D;
  72
  73    case GL_TEXTURE_3D:
  74       return BRW_SURFACE_3D;
  75
  76    case GL_TEXTURE_CUBE_MAP:
  77    case GL_TEXTURE_CUBE_MAP_ARRAY:
  78       return BRW_SURFACE_CUBE;
  79
  80    default:
  81       unreachable("not reached");
  82    }
  83 }
  84
  85 uint32_t
  86 brw_get_surface_tiling_bits(uint32_t tiling)
  87 {
  88    switch (tiling) {
  89    case I915_TILING_X:
  90       return BRW_SURFACE_TILED;
  91    case I915_TILING_Y:
  92       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  93    default:
  94       return 0;
  95    }
  96 }
  97
  98
  99 uint32_t
 100 brw_get_surface_num_multisamples(unsigned num_samples)
 101 {
 102    if (num_samples > 1)
 103       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 104    else
 105       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 106 }
 107
 108 void
 109 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 110                       bool is_render_target,
 111                       unsigned *width, unsigned *height,
 112                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 113 {
 114    static const unsigned halign_stencil = 8;
 115
 116    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 117     * there are half as many rows.
 118     * In addition, mip-levels are accessed manually by the program and
 119     * therefore the surface is setup to cover all the mip-levels for one slice.
 120     * (Hardware is still used to access individual slices).
 121     */
 122    *tiling = I915_TILING_Y;
 123    *pitch = mt->pitch * 2;
 124    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 125    *height = (mt->total_height / mt->physical_depth0) / 2;
 126
 127    if (is_render_target) {
 128       *format = BRW_SURFACEFORMAT_R8_UINT;
 129    }
 130 }
 131
 132
 133 /**
 134  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 135  * swizzling.
 136  */
 137 int
 138 brw_get_texture_swizzle(const struct gl_context *ctx,
 139                         const struct gl_texture_object *t)
 140 {
 141    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 142
 143    int swizzles[SWIZZLE_NIL + 1] = {
 144       SWIZZLE_X,
 145       SWIZZLE_Y,
 146       SWIZZLE_Z,
 147       SWIZZLE_W,
 148       SWIZZLE_ZERO,
 149       SWIZZLE_ONE,
 150       SWIZZLE_NIL
 151    };
 152
 153    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 154        img->_BaseFormat == GL_DEPTH_STENCIL) {
 155       GLenum depth_mode = t->DepthMode;
 156
 157       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 158        * with depth component data specified with a sized internal format.
 159        * Otherwise, it's left at the old default, GL_LUMINANCE.
 160        */
 161       if (_mesa_is_gles3(ctx) &&
 162           img->InternalFormat != GL_DEPTH_COMPONENT &&
 163           img->InternalFormat != GL_DEPTH_STENCIL) {
 164          depth_mode = GL_RED;
 165       }
 166
 167       switch (depth_mode) {
 168       case GL_ALPHA:
 169          swizzles[0] = SWIZZLE_ZERO;
 170          swizzles[1] = SWIZZLE_ZERO;
 171          swizzles[2] = SWIZZLE_ZERO;
 172          swizzles[3] = SWIZZLE_X;
 173          break;
 174       case GL_LUMINANCE:
 175          swizzles[0] = SWIZZLE_X;
 176          swizzles[1] = SWIZZLE_X;
 177          swizzles[2] = SWIZZLE_X;
 178          swizzles[3] = SWIZZLE_ONE;
 179          break;
 180       case GL_INTENSITY:
 181          swizzles[0] = SWIZZLE_X;
 182          swizzles[1] = SWIZZLE_X;
 183          swizzles[2] = SWIZZLE_X;
 184          swizzles[3] = SWIZZLE_X;
 185          break;
 186       case GL_RED:
 187          swizzles[0] = SWIZZLE_X;
 188          swizzles[1] = SWIZZLE_ZERO;
 189          swizzles[2] = SWIZZLE_ZERO;
 190          swizzles[3] = SWIZZLE_ONE;
 191          break;
 192       }
 193    }
 194
 195    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 196
 197    /* If the texture's format is alpha-only, force R, G, and B to
 198     * 0.0. Similarly, if the texture's format has no alpha channel,
 199     * force the alpha value read to 1.0. This allows for the
 200     * implementation to use an RGBA texture for any of these formats
 201     * without leaking any unexpected values.
 202     */
 203    switch (img->_BaseFormat) {
 204    case GL_ALPHA:
 205       swizzles[0] = SWIZZLE_ZERO;
 206       swizzles[1] = SWIZZLE_ZERO;
 207       swizzles[2] = SWIZZLE_ZERO;
 208       break;
 209    case GL_LUMINANCE:
 210       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 211          swizzles[0] = SWIZZLE_X;
 212          swizzles[1] = SWIZZLE_X;
 213          swizzles[2] = SWIZZLE_X;
 214          swizzles[3] = SWIZZLE_ONE;
 215       }
 216       break;
 217    case GL_LUMINANCE_ALPHA:
 218       if (datatype == GL_SIGNED_NORMALIZED) {
 219          swizzles[0] = SWIZZLE_X;
 220          swizzles[1] = SWIZZLE_X;
 221          swizzles[2] = SWIZZLE_X;
 222          swizzles[3] = SWIZZLE_W;
 223       }
 224       break;
 225    case GL_INTENSITY:
 226       if (datatype == GL_SIGNED_NORMALIZED) {
 227          swizzles[0] = SWIZZLE_X;
 228          swizzles[1] = SWIZZLE_X;
 229          swizzles[2] = SWIZZLE_X;
 230          swizzles[3] = SWIZZLE_X;
 231       }
 232       break;
 233    case GL_RED:
 234    case GL_RG:
 235    case GL_RGB:
 236       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 237          swizzles[3] = SWIZZLE_ONE;
 238       break;
 239    }
 240
 241    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 243                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 244                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 245 }
 246
 247 static void
 248 gen4_emit_buffer_surface_state(struct brw_context *brw,
 249                                uint32_t *out_offset,
 250                                drm_intel_bo *bo,
 251                                unsigned buffer_offset,
 252                                unsigned surface_format,
 253                                unsigned buffer_size,
 254                                unsigned pitch,
 255                                bool rw)
 256 {
 257    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 258                                     6 * 4, 32, out_offset);
 259    memset(surf, 0, 6 * 4);
 260
 261    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 262              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 263              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 264    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 265    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 266              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 267    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 268              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 269
 270    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 271     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 272     * physical cache.  It is mapped in hardware to the sampler cache."
 273     */
 274    if (bo) {
 275       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 276                               bo, buffer_offset,
 277                               I915_GEM_DOMAIN_SAMPLER,
 278                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 279    }
 280 }
 281
 282 void
 283 brw_update_buffer_texture_surface(struct gl_context *ctx,
 284                                   unsigned unit,
 285                                   uint32_t *surf_offset)
 286 {
 287    struct brw_context *brw = brw_context(ctx);
 288    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 289    struct intel_buffer_object *intel_obj =
 290       intel_buffer_object(tObj->BufferObject);
 291    uint32_t size = tObj->BufferSize;
 292    drm_intel_bo *bo = NULL;
 293    mesa_format format = tObj->_BufferObjectFormat;
 294    uint32_t brw_format = brw_format_for_mesa_format(format);
 295    int texel_size = _mesa_get_format_bytes(format);
 296
 297    if (intel_obj) {
 298       size = MIN2(size, intel_obj->Base.Size);
 299       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 300    }
 301
 302    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 303       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 304                     _mesa_get_format_name(format));
 305    }
 306
 307    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 308                                        tObj->BufferOffset,
 309                                        brw_format,
 310                                        size / texel_size,
 311                                        texel_size,
 312                                        false /* rw */);
 313 }
 314
 315 static void
 316 brw_update_texture_surface(struct gl_context *ctx,
 317                            unsigned unit,
 318                            uint32_t *surf_offset,
 319                            bool for_gather,
 320                            uint32_t plane)
 321 {
 322    struct brw_context *brw = brw_context(ctx);
 323    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 324    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 325    struct intel_mipmap_tree *mt = intelObj->mt;
 326    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 327    uint32_t *surf;
 328
 329    /* BRW_NEW_TEXTURE_BUFFER */
 330    if (tObj->Target == GL_TEXTURE_BUFFER) {
 331       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 332       return;
 333    }
 334
 335    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 336                           6 * 4, 32, surf_offset);
 337
 338    uint32_t tex_format = translate_tex_format(brw, mt->format,
 339                                               sampler->sRGBDecode);
 340
 341    if (for_gather) {
 342       /* Sandybridge's gather4 message is broken for integer formats.
 343        * To work around this, we pretend the surface is UNORM for
 344        * 8 or 16-bit formats, and emit shader instructions to recover
 345        * the real INT/UINT value.  For 32-bit formats, we pretend
 346        * the surface is FLOAT, and simply reinterpret the resulting
 347        * bits.
 348        */
 349       switch (tex_format) {
 350       case BRW_SURFACEFORMAT_R8_SINT:
 351       case BRW_SURFACEFORMAT_R8_UINT:
 352          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 353          break;
 354
 355       case BRW_SURFACEFORMAT_R16_SINT:
 356       case BRW_SURFACEFORMAT_R16_UINT:
 357          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 358          break;
 359
 360       case BRW_SURFACEFORMAT_R32_SINT:
 361       case BRW_SURFACEFORMAT_R32_UINT:
 362          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 363          break;
 364
 365       default:
 366          break;
 367       }
 368    }
 369
 370    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 371               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 372               BRW_SURFACE_CUBEFACE_ENABLES |
 373               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 374
 375    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 376
 377    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 378               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 379               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 380
 381    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 382               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 383               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 384
 385    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 386               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 387
 388    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 389
 390    /* Emit relocation to surface contents */
 391    drm_intel_bo_emit_reloc(brw->batch.bo,
 392                            *surf_offset + 4,
 393                            mt->bo,
 394                            surf[1] - mt->bo->offset64,
 395                            I915_GEM_DOMAIN_SAMPLER, 0);
 396 }
 397
 398 /**
 399  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 400  * read from this buffer with Data Port Read instructions/messages.
 401  */
 402 void
 403 brw_create_constant_surface(struct brw_context *brw,
 404                             drm_intel_bo *bo,
 405                             uint32_t offset,
 406                             uint32_t size,
 407                             uint32_t *out_offset)
 408 {
 409    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 410                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 411                                        size, 1, false);
 412 }
 413
 414 /**
 415  * Create the buffer surface. Shader buffer variables will be
 416  * read from / write to this buffer with Data Port Read/Write
 417  * instructions/messages.
 418  */
 419 void
 420 brw_create_buffer_surface(struct brw_context *brw,
 421                           drm_intel_bo *bo,
 422                           uint32_t offset,
 423                           uint32_t size,
 424                           uint32_t *out_offset)
 425 {
 426    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 427     * messages. We need these specifically for the fragment shader since they
 428     * include a pixel mask header that we need to ensure correct behavior
 429     * with helper invocations, which cannot write to the buffer.
 430     */
 431    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 432                                        BRW_SURFACEFORMAT_RAW,
 433                                        size, 1, true);
 434 }
 435
 436 /**
 437  * Set up a binding table entry for use by stream output logic (transform
 438  * feedback).
 439  *
 440  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 441  */
 442 void
 443 brw_update_sol_surface(struct brw_context *brw,
 444                        struct gl_buffer_object *buffer_obj,
 445                        uint32_t *out_offset, unsigned num_vector_components,
 446                        unsigned stride_dwords, unsigned offset_dwords)
 447 {
 448    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 449    uint32_t offset_bytes = 4 * offset_dwords;
 450    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 451                                              offset_bytes,
 452                                              buffer_obj->Size - offset_bytes);
 453    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 454                                     out_offset);
 455    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 456    size_t size_dwords = buffer_obj->Size / 4;
 457    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 458
 459    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 460     * too big to map using a single binding table entry?
 461     */
 462    assert((size_dwords - offset_dwords) / stride_dwords
 463           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 464
 465    if (size_dwords > offset_dwords + num_vector_components) {
 466       /* There is room for at least 1 transform feedback output in the buffer.
 467        * Compute the number of additional transform feedback outputs the
 468        * buffer has room for.
 469        */
 470       buffer_size_minus_1 =
 471          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 472    } else {
 473       /* There isn't even room for a single transform feedback output in the
 474        * buffer.  We can't configure the binding table entry to prevent output
 475        * entirely; we'll have to rely on the geometry shader to detect
 476        * overflow.  But to minimize the damage in case of a bug, set up the
 477        * binding table entry to just allow a single output.
 478        */
 479       buffer_size_minus_1 = 0;
 480    }
 481    width = buffer_size_minus_1 & 0x7f;
 482    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 483    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 484
 485    switch (num_vector_components) {
 486    case 1:
 487       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 488       break;
 489    case 2:
 490       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 491       break;
 492    case 3:
 493       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 494       break;
 495    case 4:
 496       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 497       break;
 498    default:
 499       unreachable("Invalid vector size for transform feedback output");
 500    }
 501
 502    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 503       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 504       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 505       BRW_SURFACE_RC_READ_WRITE;
 506    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 507    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 508               height << BRW_SURFACE_HEIGHT_SHIFT);
 509    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 510               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 511    surf[4] = 0;
 512    surf[5] = 0;
 513
 514    /* Emit relocation to surface contents. */
 515    drm_intel_bo_emit_reloc(brw->batch.bo,
 516                            *out_offset + 4,
 517                            bo, offset_bytes,
 518                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 519 }
 520
 521 /* Creates a new WM constant buffer reflecting the current fragment program's
 522  * constants, if needed by the fragment program.
 523  *
 524  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 525  * state atom.
 526  */
 527 static void
 528 brw_upload_wm_pull_constants(struct brw_context *brw)
 529 {
 530    struct brw_stage_state *stage_state = &brw->wm.base;
 531    /* BRW_NEW_FRAGMENT_PROGRAM */
 532    struct brw_fragment_program *fp =
 533       (struct brw_fragment_program *) brw->fragment_program;
 534    /* BRW_NEW_FS_PROG_DATA */
 535    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 536
 537    /* _NEW_PROGRAM_CONSTANTS */
 538    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 539                              stage_state, prog_data);
 540 }
 541
 542 const struct brw_tracked_state brw_wm_pull_constants = {
 543    .dirty = {
 544       .mesa = _NEW_PROGRAM_CONSTANTS,
 545       .brw = BRW_NEW_BATCH |
 546              BRW_NEW_BLORP |
 547              BRW_NEW_FRAGMENT_PROGRAM |
 548              BRW_NEW_FS_PROG_DATA,
 549    },
 550    .emit = brw_upload_wm_pull_constants,
 551 };
 552
 553 /**
 554  * Creates a null renderbuffer surface.
 555  *
 556  * This is used when the shader doesn't write to any color output.  An FB
 557  * write to target 0 will still be emitted, because that's how the thread is
 558  * terminated (and computed depth is returned), so we need to have the
 559  * hardware discard the target 0 color output..
 560  */
 561 static void
 562 brw_emit_null_surface_state(struct brw_context *brw,
 563                             unsigned width,
 564                             unsigned height,
 565                             unsigned samples,
 566                             uint32_t *out_offset)
 567 {
 568    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 569     * Notes):
 570     *
 571     *     A null surface will be used in instances where an actual surface is
 572     *     not bound. When a write message is generated to a null surface, no
 573     *     actual surface is written to. When a read message (including any
 574     *     sampling engine message) is generated to a null surface, the result
 575     *     is all zeros. Note that a null surface type is allowed to be used
 576     *     with all messages, even if it is not specificially indicated as
 577     *     supported. All of the remaining fields in surface state are ignored
 578     *     for null surfaces, with the following exceptions:
 579     *
 580     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 581     *       depth buffer’s corresponding state for all render target surfaces,
 582     *       including null.
 583     *
 584     *     - Surface Format must be R8G8B8A8_UNORM.
 585     */
 586    unsigned surface_type = BRW_SURFACE_NULL;
 587    drm_intel_bo *bo = NULL;
 588    unsigned pitch_minus_1 = 0;
 589    uint32_t multisampling_state = 0;
 590    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 591                                     out_offset);
 592
 593    if (samples > 1) {
 594       /* On Gen6, null render targets seem to cause GPU hangs when
 595        * multisampling.  So work around this problem by rendering into dummy
 596        * color buffer.
 597        *
 598        * To decrease the amount of memory needed by the workaround buffer, we
 599        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 600        * the amount of memory needed for the workaround buffer is
 601        * (width_in_tiles + height_in_tiles - 1) tiles.
 602        *
 603        * Note that since the workaround buffer will be interpreted by the
 604        * hardware as an interleaved multisampled buffer, we need to compute
 605        * width_in_tiles and height_in_tiles by dividing the width and height
 606        * by 16 rather than the normal Y-tile size of 32.
 607        */
 608       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 609       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 610       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 611       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 612                          size_needed);
 613       bo = brw->wm.multisampled_null_render_target_bo;
 614       surface_type = BRW_SURFACE_2D;
 615       pitch_minus_1 = 127;
 616       multisampling_state = brw_get_surface_num_multisamples(samples);
 617    }
 618
 619    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 620               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 621    if (brw->gen < 6) {
 622       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 623                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 624                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 625                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 626    }
 627    surf[1] = bo ? bo->offset64 : 0;
 628    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 629               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 630
 631    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 632     * Notes):
 633     *
 634     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 635     */
 636    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 637               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 638    surf[4] = multisampling_state;
 639    surf[5] = 0;
 640
 641    if (bo) {
 642       drm_intel_bo_emit_reloc(brw->batch.bo,
 643                               *out_offset + 4,
 644                               bo, 0,
 645                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 646    }
 647 }
 648
 649 /**
 650  * Sets up a surface state structure to point at the given region.
 651  * While it is only used for the front/back buffer currently, it should be
 652  * usable for further buffers when doing ARB_draw_buffer support.
 653  */
 654 static uint32_t
 655 brw_update_renderbuffer_surface(struct brw_context *brw,
 656                                 struct gl_renderbuffer *rb,
 657                                 bool layered, unsigned unit,
 658                                 uint32_t surf_index)
 659 {
 660    struct gl_context *ctx = &brw->ctx;
 661    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 662    struct intel_mipmap_tree *mt = irb->mt;
 663    uint32_t *surf;
 664    uint32_t tile_x, tile_y;
 665    uint32_t format = 0;
 666    uint32_t offset;
 667    /* _NEW_BUFFERS */
 668    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 669    /* BRW_NEW_FS_PROG_DATA */
 670
 671    assert(!layered);
 672
 673    if (rb->TexImage && !brw->has_surface_tile_offset) {
 674       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 675
 676       if (tile_x != 0 || tile_y != 0) {
 677          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 678           * destination in a miptree unless you actually setup your renderbuffer
 679           * as a miptree and used the fragile lod/array_index/etc. controls to
 680           * select the image.  So, instead, we just make a new single-level
 681           * miptree and render into that.
 682           */
 683          intel_renderbuffer_move_to_temp(brw, irb, false);
 684          mt = irb->mt;
 685       }
 686    }
 687
 688    intel_miptree_used_for_rendering(irb->mt);
 689
 690    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 691
 692    format = brw->render_target_format[rb_format];
 693    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 694       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 695                     __func__, _mesa_get_format_name(rb_format));
 696    }
 697
 698    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 699               format << BRW_SURFACE_FORMAT_SHIFT);
 700
 701    /* reloc */
 702    assert(mt->offset % mt->cpp == 0);
 703    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 704               mt->bo->offset64 + mt->offset);
 705
 706    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 707               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 708
 709    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 710               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 711
 712    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 713
 714    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 715    /* Note that the low bits of these fields are missing, so
 716     * there's the possibility of getting in trouble.
 717     */
 718    assert(tile_x % 4 == 0);
 719    assert(tile_y % 2 == 0);
 720    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 721               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 722               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 723
 724    if (brw->gen < 6) {
 725       /* _NEW_COLOR */
 726       if (!ctx->Color.ColorLogicOpEnabled &&
 727           (ctx->Color.BlendEnabled & (1 << unit)))
 728          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 729
 730       if (!ctx->Color.ColorMask[unit][0])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 732       if (!ctx->Color.ColorMask[unit][1])
 733          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 734       if (!ctx->Color.ColorMask[unit][2])
 735          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 736
 737       /* As mentioned above, disable writes to the alpha component when the
 738        * renderbuffer is XRGB.
 739        */
 740       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 741           !ctx->Color.ColorMask[unit][3]) {
 742          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 743       }
 744    }
 745
 746    drm_intel_bo_emit_reloc(brw->batch.bo,
 747                            offset + 4,
 748                            mt->bo,
 749                            surf[1] - mt->bo->offset64,
 750                            I915_GEM_DOMAIN_RENDER,
 751                            I915_GEM_DOMAIN_RENDER);
 752
 753    return offset;
 754 }
 755
 756 /**
 757  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 758  */
 759 void
 760 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 761                                  const struct gl_framebuffer *fb,
 762                                  uint32_t render_target_start,
 763                                  uint32_t *surf_offset)
 764 {
 765    GLuint i;
 766    const unsigned int w = _mesa_geometric_width(fb);
 767    const unsigned int h = _mesa_geometric_height(fb);
 768    const unsigned int s = _mesa_geometric_samples(fb);
 769
 770    /* Update surfaces for drawing buffers */
 771    if (fb->_NumColorDrawBuffers >= 1) {
 772       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 773          const uint32_t surf_index = render_target_start + i;
 774
 775          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 776             surf_offset[surf_index] =
 777                brw->vtbl.update_renderbuffer_surface(
 778                   brw, fb->_ColorDrawBuffers[i],
 779                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 780          } else {
 781             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 782                &surf_offset[surf_index]);
 783          }
 784       }
 785    } else {
 786       const uint32_t surf_index = render_target_start;
 787       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 788          &surf_offset[surf_index]);
 789    }
 790 }
 791
 792 static void
 793 update_renderbuffer_surfaces(struct brw_context *brw)
 794 {
 795    const struct gl_context *ctx = &brw->ctx;
 796
 797    /* _NEW_BUFFERS | _NEW_COLOR */
 798    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 799    brw_update_renderbuffer_surfaces(
 800       brw, fb,
 801       brw->wm.prog_data->binding_table.render_target_start,
 802       brw->wm.base.surf_offset);
 803    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 804 }
 805
 806 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 807    .dirty = {
 808       .mesa = _NEW_BUFFERS |
 809               _NEW_COLOR,
 810       .brw = BRW_NEW_BATCH |
 811              BRW_NEW_BLORP |
 812              BRW_NEW_FS_PROG_DATA,
 813    },
 814    .emit = update_renderbuffer_surfaces,
 815 };
 816
 817 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 818    .dirty = {
 819       .mesa = _NEW_BUFFERS,
 820       .brw = BRW_NEW_BATCH |
 821              BRW_NEW_BLORP,
 822    },
 823    .emit = update_renderbuffer_surfaces,
 824 };
 825
 826
 827 static void
 828 update_stage_texture_surfaces(struct brw_context *brw,
 829                               const struct gl_program *prog,
 830                               struct brw_stage_state *stage_state,
 831                               bool for_gather, uint32_t plane)
 832 {
 833    if (!prog)
 834       return;
 835
 836    struct gl_context *ctx = &brw->ctx;
 837
 838    uint32_t *surf_offset = stage_state->surf_offset;
 839
 840    /* BRW_NEW_*_PROG_DATA */
 841    if (for_gather)
 842       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 843    else
 844       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
 845
 846    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 847    for (unsigned s = 0; s < num_samplers; s++) {
 848       surf_offset[s] = 0;
 849
 850       if (prog->SamplersUsed & (1 << s)) {
 851          const unsigned unit = prog->SamplerUnits[s];
 852
 853          /* _NEW_TEXTURE */
 854          if (ctx->Texture.Unit[unit]._Current) {
 855             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
 856          }
 857       }
 858    }
 859 }
 860
 861
 862 /**
 863  * Construct SURFACE_STATE objects for enabled textures.
 864  */
 865 static void
 866 brw_update_texture_surfaces(struct brw_context *brw)
 867 {
 868    /* BRW_NEW_VERTEX_PROGRAM */
 869    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 870
 871    /* BRW_NEW_TESS_PROGRAMS */
 872    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 873    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 874
 875    /* BRW_NEW_GEOMETRY_PROGRAM */
 876    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 877
 878    /* BRW_NEW_FRAGMENT_PROGRAM */
 879    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 880
 881    /* _NEW_TEXTURE */
 882    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
 883    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
 884    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
 885    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
 886    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
 887
 888    /* emit alternate set of surface state for gather. this
 889     * allows the surface format to be overriden for only the
 890     * gather4 messages. */
 891    if (brw->gen < 8) {
 892       if (vs && vs->UsesGather)
 893          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
 894       if (tcs && tcs->UsesGather)
 895          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
 896       if (tes && tes->UsesGather)
 897          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
 898       if (gs && gs->UsesGather)
 899          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
 900       if (fs && fs->UsesGather)
 901          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
 902    }
 903
 904    if (fs) {
 905       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
 906       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
 907    }
 908
 909    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 910 }
 911
 912 const struct brw_tracked_state brw_texture_surfaces = {
 913    .dirty = {
 914       .mesa = _NEW_TEXTURE,
 915       .brw = BRW_NEW_BATCH |
 916              BRW_NEW_BLORP |
 917              BRW_NEW_FRAGMENT_PROGRAM |
 918              BRW_NEW_FS_PROG_DATA |
 919              BRW_NEW_GEOMETRY_PROGRAM |
 920              BRW_NEW_GS_PROG_DATA |
 921              BRW_NEW_TESS_PROGRAMS |
 922              BRW_NEW_TCS_PROG_DATA |
 923              BRW_NEW_TES_PROG_DATA |
 924              BRW_NEW_TEXTURE_BUFFER |
 925              BRW_NEW_VERTEX_PROGRAM |
 926              BRW_NEW_VS_PROG_DATA,
 927    },
 928    .emit = brw_update_texture_surfaces,
 929 };
 930
 931 static void
 932 brw_update_cs_texture_surfaces(struct brw_context *brw)
 933 {
 934    /* BRW_NEW_COMPUTE_PROGRAM */
 935    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 936
 937    /* _NEW_TEXTURE */
 938    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
 939
 940    /* emit alternate set of surface state for gather. this
 941     * allows the surface format to be overriden for only the
 942     * gather4 messages.
 943     */
 944    if (brw->gen < 8) {
 945       if (cs && cs->UsesGather)
 946          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
 947    }
 948
 949    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 950 }
 951
 952 const struct brw_tracked_state brw_cs_texture_surfaces = {
 953    .dirty = {
 954       .mesa = _NEW_TEXTURE,
 955       .brw = BRW_NEW_BATCH |
 956              BRW_NEW_BLORP |
 957              BRW_NEW_COMPUTE_PROGRAM,
 958    },
 959    .emit = brw_update_cs_texture_surfaces,
 960 };
 961
 962
 963 void
 964 brw_upload_ubo_surfaces(struct brw_context *brw,
 965                         struct gl_shader *shader,
 966                         struct brw_stage_state *stage_state,
 967                         struct brw_stage_prog_data *prog_data)
 968 {
 969    struct gl_context *ctx = &brw->ctx;
 970
 971    if (!shader)
 972       return;
 973
 974    uint32_t *ubo_surf_offsets =
 975       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 976
 977    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 978       struct gl_uniform_buffer_binding *binding =
 979          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 980
 981       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 982          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 983       } else {
 984          struct intel_buffer_object *intel_bo =
 985             intel_buffer_object(binding->BufferObject);
 986          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 987          if (!binding->AutomaticSize)
 988             size = MIN2(size, binding->Size);
 989          drm_intel_bo *bo =
 990             intel_bufferobj_buffer(brw, intel_bo,
 991                                    binding->Offset,
 992                                    size);
 993          brw_create_constant_surface(brw, bo, binding->Offset,
 994                                      size,
 995                                      &ubo_surf_offsets[i]);
 996       }
 997    }
 998
 999    uint32_t *ssbo_surf_offsets =
1000       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1001
1002    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1003       struct gl_shader_storage_buffer_binding *binding =
1004          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1005
1006       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1007          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1008       } else {
1009          struct intel_buffer_object *intel_bo =
1010             intel_buffer_object(binding->BufferObject);
1011          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1012          if (!binding->AutomaticSize)
1013             size = MIN2(size, binding->Size);
1014          drm_intel_bo *bo =
1015             intel_bufferobj_buffer(brw, intel_bo,
1016                                    binding->Offset,
1017                                    size);
1018          brw_create_buffer_surface(brw, bo, binding->Offset,
1019                                    size,
1020                                    &ssbo_surf_offsets[i]);
1021       }
1022    }
1023
1024    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1025       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1026 }
1027
1028 static void
1029 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1030 {
1031    struct gl_context *ctx = &brw->ctx;
1032    /* _NEW_PROGRAM */
1033    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1034
1035    if (!prog)
1036       return;
1037
1038    /* BRW_NEW_FS_PROG_DATA */
1039    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1040                            &brw->wm.base, &brw->wm.prog_data->base);
1041 }
1042
1043 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1044    .dirty = {
1045       .mesa = _NEW_PROGRAM,
1046       .brw = BRW_NEW_BATCH |
1047              BRW_NEW_BLORP |
1048              BRW_NEW_FS_PROG_DATA |
1049              BRW_NEW_UNIFORM_BUFFER,
1050    },
1051    .emit = brw_upload_wm_ubo_surfaces,
1052 };
1053
1054 static void
1055 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1056 {
1057    struct gl_context *ctx = &brw->ctx;
1058    /* _NEW_PROGRAM */
1059    struct gl_shader_program *prog =
1060       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1061
1062    if (!prog)
1063       return;
1064
1065    /* BRW_NEW_CS_PROG_DATA */
1066    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1067                            &brw->cs.base, &brw->cs.prog_data->base);
1068 }
1069
1070 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1071    .dirty = {
1072       .mesa = _NEW_PROGRAM,
1073       .brw = BRW_NEW_BATCH |
1074              BRW_NEW_BLORP |
1075              BRW_NEW_CS_PROG_DATA |
1076              BRW_NEW_UNIFORM_BUFFER,
1077    },
1078    .emit = brw_upload_cs_ubo_surfaces,
1079 };
1080
1081 void
1082 brw_upload_abo_surfaces(struct brw_context *brw,
1083                         struct gl_shader *shader,
1084                         struct brw_stage_state *stage_state,
1085                         struct brw_stage_prog_data *prog_data)
1086 {
1087    struct gl_context *ctx = &brw->ctx;
1088    uint32_t *surf_offsets =
1089       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1090
1091    if (shader && shader->NumAtomicBuffers) {
1092       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1093          struct gl_atomic_buffer_binding *binding =
1094             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1095          struct intel_buffer_object *intel_bo =
1096             intel_buffer_object(binding->BufferObject);
1097          drm_intel_bo *bo = intel_bufferobj_buffer(
1098             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1099
1100          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1101                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1102                                              bo->size - binding->Offset, 1, true);
1103       }
1104
1105       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1106    }
1107 }
1108
1109 static void
1110 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1111 {
1112    struct gl_context *ctx = &brw->ctx;
1113    /* _NEW_PROGRAM */
1114    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1115
1116    if (prog) {
1117       /* BRW_NEW_FS_PROG_DATA */
1118       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1119                               &brw->wm.base, &brw->wm.prog_data->base);
1120    }
1121 }
1122
1123 const struct brw_tracked_state brw_wm_abo_surfaces = {
1124    .dirty = {
1125       .mesa = _NEW_PROGRAM,
1126       .brw = BRW_NEW_ATOMIC_BUFFER |
1127              BRW_NEW_BLORP |
1128              BRW_NEW_BATCH |
1129              BRW_NEW_FS_PROG_DATA,
1130    },
1131    .emit = brw_upload_wm_abo_surfaces,
1132 };
1133
1134 static void
1135 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1136 {
1137    struct gl_context *ctx = &brw->ctx;
1138    /* _NEW_PROGRAM */
1139    struct gl_shader_program *prog =
1140       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1141
1142    if (prog) {
1143       /* BRW_NEW_CS_PROG_DATA */
1144       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1145                               &brw->cs.base, &brw->cs.prog_data->base);
1146    }
1147 }
1148
1149 const struct brw_tracked_state brw_cs_abo_surfaces = {
1150    .dirty = {
1151       .mesa = _NEW_PROGRAM,
1152       .brw = BRW_NEW_ATOMIC_BUFFER |
1153              BRW_NEW_BLORP |
1154              BRW_NEW_BATCH |
1155              BRW_NEW_CS_PROG_DATA,
1156    },
1157    .emit = brw_upload_cs_abo_surfaces,
1158 };
1159
1160 static void
1161 brw_upload_cs_image_surfaces(struct brw_context *brw)
1162 {
1163    struct gl_context *ctx = &brw->ctx;
1164    /* _NEW_PROGRAM */
1165    struct gl_shader_program *prog =
1166       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1167
1168    if (prog) {
1169       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1170       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1171                                 &brw->cs.base, &brw->cs.prog_data->base);
1172    }
1173 }
1174
1175 const struct brw_tracked_state brw_cs_image_surfaces = {
1176    .dirty = {
1177       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1178       .brw = BRW_NEW_BATCH |
1179              BRW_NEW_BLORP |
1180              BRW_NEW_CS_PROG_DATA |
1181              BRW_NEW_IMAGE_UNITS
1182    },
1183    .emit = brw_upload_cs_image_surfaces,
1184 };
1185
1186 static uint32_t
1187 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1188 {
1189    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1190    uint32_t hw_format = brw_format_for_mesa_format(format);
1191    if (access == GL_WRITE_ONLY) {
1192       return hw_format;
1193    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1194       /* Typed surface reads support a very limited subset of the shader
1195        * image formats.  Translate it into the closest format the
1196        * hardware supports.
1197        */
1198       return isl_lower_storage_image_format(devinfo, hw_format);
1199    } else {
1200       /* The hardware doesn't actually support a typed format that we can use
1201        * so we have to fall back to untyped read/write messages.
1202        */
1203       return BRW_SURFACEFORMAT_RAW;
1204    }
1205 }
1206
1207 static void
1208 update_default_image_param(struct brw_context *brw,
1209                            struct gl_image_unit *u,
1210                            unsigned surface_idx,
1211                            struct brw_image_param *param)
1212 {
1213    memset(param, 0, sizeof(*param));
1214    param->surface_idx = surface_idx;
1215    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1216     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1217     * detailed explanation of these parameters.
1218     */
1219    param->swizzling[0] = 0xff;
1220    param->swizzling[1] = 0xff;
1221 }
1222
1223 static void
1224 update_buffer_image_param(struct brw_context *brw,
1225                           struct gl_image_unit *u,
1226                           unsigned surface_idx,
1227                           struct brw_image_param *param)
1228 {
1229    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1230
1231    update_default_image_param(brw, u, surface_idx, param);
1232
1233    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1234    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1235 }
1236
1237 static void
1238 update_texture_image_param(struct brw_context *brw,
1239                            struct gl_image_unit *u,
1240                            unsigned surface_idx,
1241                            struct brw_image_param *param)
1242 {
1243    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1244
1245    update_default_image_param(brw, u, surface_idx, param);
1246
1247    param->size[0] = minify(mt->logical_width0, u->Level);
1248    param->size[1] = minify(mt->logical_height0, u->Level);
1249    param->size[2] = (!u->Layered ? 1 :
1250                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1251                      u->TexObj->Target == GL_TEXTURE_3D ?
1252                      minify(mt->logical_depth0, u->Level) :
1253                      mt->logical_depth0);
1254
1255    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1256                                   &param->offset[0],
1257                                   &param->offset[1]);
1258
1259    param->stride[0] = mt->cpp;
1260    param->stride[1] = mt->pitch / mt->cpp;
1261    param->stride[2] =
1262       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1263    param->stride[3] =
1264       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1265
1266    if (mt->tiling == I915_TILING_X) {
1267       /* An X tile is a rectangular block of 512x8 bytes. */
1268       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1269       param->tiling[1] = _mesa_logbase2(8);
1270
1271       if (brw->has_swizzling) {
1272          /* Right shifts required to swizzle bits 9 and 10 of the memory
1273           * address with bit 6.
1274           */
1275          param->swizzling[0] = 3;
1276          param->swizzling[1] = 4;
1277       }
1278    } else if (mt->tiling == I915_TILING_Y) {
1279       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1280        * different to the layout of an X-tiled surface, we simply pretend that
1281        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1282        * one arranged in X-major order just like is the case for X-tiling.
1283        */
1284       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1285       param->tiling[1] = _mesa_logbase2(32);
1286
1287       if (brw->has_swizzling) {
1288          /* Right shift required to swizzle bit 9 of the memory address with
1289           * bit 6.
1290           */
1291          param->swizzling[0] = 3;
1292       }
1293    }
1294
1295    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1296     * address calculation algorithm (emit_address_calculation() in
1297     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1298     * modulus equal to the LOD.
1299     */
1300    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1301                        0);
1302 }
1303
1304 static void
1305 update_image_surface(struct brw_context *brw,
1306                      struct gl_image_unit *u,
1307                      GLenum access,
1308                      unsigned surface_idx,
1309                      uint32_t *surf_offset,
1310                      struct brw_image_param *param)
1311 {
1312    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1313       struct gl_texture_object *obj = u->TexObj;
1314       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1315
1316       if (obj->Target == GL_TEXTURE_BUFFER) {
1317          struct intel_buffer_object *intel_obj =
1318             intel_buffer_object(obj->BufferObject);
1319          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1320                                       _mesa_get_format_bytes(u->_ActualFormat));
1321
1322          brw->vtbl.emit_buffer_surface_state(
1323             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1324             format, intel_obj->Base.Size / texel_size, texel_size,
1325             access != GL_READ_ONLY);
1326
1327          update_buffer_image_param(brw, u, surface_idx, param);
1328
1329       } else {
1330          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1331          struct intel_mipmap_tree *mt = intel_obj->mt;
1332
1333          if (format == BRW_SURFACEFORMAT_RAW) {
1334             brw->vtbl.emit_buffer_surface_state(
1335                brw, surf_offset, mt->bo, mt->offset,
1336                format, mt->bo->size - mt->offset, 1 /* pitch */,
1337                access != GL_READ_ONLY);
1338
1339          } else {
1340             const unsigned min_layer = obj->MinLayer + u->_Layer;
1341             const unsigned min_level = obj->MinLevel + u->Level;
1342             const unsigned num_layers = (!u->Layered ? 1 :
1343                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1344                                          mt->logical_depth0);
1345             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1346                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1347                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1348             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1349
1350             brw->vtbl.emit_texture_surface_state(
1351                brw, mt, target,
1352                min_layer, min_layer + num_layers,
1353                min_level, min_level + 1,
1354                format, SWIZZLE_XYZW,
1355                surf_offset, surf_index, access != GL_READ_ONLY, false);
1356          }
1357
1358          update_texture_image_param(brw, u, surface_idx, param);
1359       }
1360
1361    } else {
1362       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1363       update_default_image_param(brw, u, surface_idx, param);
1364    }
1365 }
1366
1367 void
1368 brw_upload_image_surfaces(struct brw_context *brw,
1369                           struct gl_shader *shader,
1370                           struct brw_stage_state *stage_state,
1371                           struct brw_stage_prog_data *prog_data)
1372 {
1373    struct gl_context *ctx = &brw->ctx;
1374
1375    if (shader && shader->NumImages) {
1376       for (unsigned i = 0; i < shader->NumImages; i++) {
1377          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1378          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1379
1380          update_image_surface(brw, u, shader->ImageAccess[i],
1381                               surf_idx,
1382                               &stage_state->surf_offset[surf_idx],
1383                               &prog_data->image_param[i]);
1384       }
1385
1386       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1387       /* This may have changed the image metadata dependent on the context
1388        * image unit state and passed to the program as uniforms, make sure
1389        * that push and pull constants are reuploaded.
1390        */
1391       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1392    }
1393 }
1394
1395 static void
1396 brw_upload_wm_image_surfaces(struct brw_context *brw)
1397 {
1398    struct gl_context *ctx = &brw->ctx;
1399    /* BRW_NEW_FRAGMENT_PROGRAM */
1400    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1401
1402    if (prog) {
1403       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1404       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1405                                 &brw->wm.base, &brw->wm.prog_data->base);
1406    }
1407 }
1408
1409 const struct brw_tracked_state brw_wm_image_surfaces = {
1410    .dirty = {
1411       .mesa = _NEW_TEXTURE,
1412       .brw = BRW_NEW_BATCH |
1413              BRW_NEW_BLORP |
1414              BRW_NEW_FRAGMENT_PROGRAM |
1415              BRW_NEW_FS_PROG_DATA |
1416              BRW_NEW_IMAGE_UNITS
1417    },
1418    .emit = brw_upload_wm_image_surfaces,
1419 };
1420
1421 void
1422 gen4_init_vtable_surface_functions(struct brw_context *brw)
1423 {
1424    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1425    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1426    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1427    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1428 }
1429
1430 static void
1431 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1432 {
1433    struct gl_context *ctx = &brw->ctx;
1434    /* _NEW_PROGRAM */
1435    struct gl_shader_program *prog =
1436       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1437
1438    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1439       const unsigned surf_idx =
1440          brw->cs.prog_data->binding_table.work_groups_start;
1441       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1442       drm_intel_bo *bo;
1443       uint32_t bo_offset;
1444
1445       if (brw->compute.num_work_groups_bo == NULL) {
1446          bo = NULL;
1447          intel_upload_data(brw,
1448                            (void *)brw->compute.num_work_groups,
1449                            3 * sizeof(GLuint),
1450                            sizeof(GLuint),
1451                            &bo,
1452                            &bo_offset);
1453       } else {
1454          bo = brw->compute.num_work_groups_bo;
1455          bo_offset = brw->compute.num_work_groups_offset;
1456       }
1457
1458       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1459                                           bo, bo_offset,
1460                                           BRW_SURFACEFORMAT_RAW,
1461                                           3 * sizeof(GLuint), 1, true);
1462       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1463    }
1464 }
1465
1466 const struct brw_tracked_state brw_cs_work_groups_surface = {
1467    .dirty = {
1468       .brw = BRW_NEW_BLORP |
1469              BRW_NEW_CS_WORK_GROUPS
1470    },
1471    .emit = brw_upload_cs_work_groups_surface,
1472 };