src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "isl/isl.h"
  43
  44 #include "intel_mipmap_tree.h"
  45 #include "intel_batchbuffer.h"
  46 #include "intel_tex.h"
  47 #include "intel_fbo.h"
  48 #include "intel_buffer_objects.h"
  49
  50 #include "brw_context.h"
  51 #include "brw_state.h"
  52 #include "brw_defines.h"
  53 #include "brw_wm.h"
  54
  55 GLuint
  56 translate_tex_target(GLenum target)
  57 {
  58    switch (target) {
  59    case GL_TEXTURE_1D:
  60    case GL_TEXTURE_1D_ARRAY_EXT:
  61       return BRW_SURFACE_1D;
  62
  63    case GL_TEXTURE_RECTANGLE_NV:
  64       return BRW_SURFACE_2D;
  65
  66    case GL_TEXTURE_2D:
  67    case GL_TEXTURE_2D_ARRAY_EXT:
  68    case GL_TEXTURE_EXTERNAL_OES:
  69    case GL_TEXTURE_2D_MULTISAMPLE:
  70    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  71       return BRW_SURFACE_2D;
  72
  73    case GL_TEXTURE_3D:
  74       return BRW_SURFACE_3D;
  75
  76    case GL_TEXTURE_CUBE_MAP:
  77    case GL_TEXTURE_CUBE_MAP_ARRAY:
  78       return BRW_SURFACE_CUBE;
  79
  80    default:
  81       unreachable("not reached");
  82    }
  83 }
  84
  85 uint32_t
  86 brw_get_surface_tiling_bits(uint32_t tiling)
  87 {
  88    switch (tiling) {
  89    case I915_TILING_X:
  90       return BRW_SURFACE_TILED;
  91    case I915_TILING_Y:
  92       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  93    default:
  94       return 0;
  95    }
  96 }
  97
  98
  99 uint32_t
 100 brw_get_surface_num_multisamples(unsigned num_samples)
 101 {
 102    if (num_samples > 1)
 103       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 104    else
 105       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 106 }
 107
 108 void
 109 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 110                       bool is_render_target,
 111                       unsigned *width, unsigned *height,
 112                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 113 {
 114    static const unsigned halign_stencil = 8;
 115
 116    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 117     * there are half as many rows.
 118     * In addition, mip-levels are accessed manually by the program and
 119     * therefore the surface is setup to cover all the mip-levels for one slice.
 120     * (Hardware is still used to access individual slices).
 121     */
 122    *tiling = I915_TILING_Y;
 123    *pitch = mt->pitch * 2;
 124    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 125    *height = (mt->total_height / mt->physical_depth0) / 2;
 126
 127    if (is_render_target) {
 128       *format = BRW_SURFACEFORMAT_R8_UINT;
 129    }
 130 }
 131
 132
 133 /**
 134  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 135  * swizzling.
 136  */
 137 int
 138 brw_get_texture_swizzle(const struct gl_context *ctx,
 139                         const struct gl_texture_object *t)
 140 {
 141    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 142
 143    int swizzles[SWIZZLE_NIL + 1] = {
 144       SWIZZLE_X,
 145       SWIZZLE_Y,
 146       SWIZZLE_Z,
 147       SWIZZLE_W,
 148       SWIZZLE_ZERO,
 149       SWIZZLE_ONE,
 150       SWIZZLE_NIL
 151    };
 152
 153    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 154        img->_BaseFormat == GL_DEPTH_STENCIL) {
 155       GLenum depth_mode = t->DepthMode;
 156
 157       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 158        * with depth component data specified with a sized internal format.
 159        * Otherwise, it's left at the old default, GL_LUMINANCE.
 160        */
 161       if (_mesa_is_gles3(ctx) &&
 162           img->InternalFormat != GL_DEPTH_COMPONENT &&
 163           img->InternalFormat != GL_DEPTH_STENCIL) {
 164          depth_mode = GL_RED;
 165       }
 166
 167       switch (depth_mode) {
 168       case GL_ALPHA:
 169          swizzles[0] = SWIZZLE_ZERO;
 170          swizzles[1] = SWIZZLE_ZERO;
 171          swizzles[2] = SWIZZLE_ZERO;
 172          swizzles[3] = SWIZZLE_X;
 173          break;
 174       case GL_LUMINANCE:
 175          swizzles[0] = SWIZZLE_X;
 176          swizzles[1] = SWIZZLE_X;
 177          swizzles[2] = SWIZZLE_X;
 178          swizzles[3] = SWIZZLE_ONE;
 179          break;
 180       case GL_INTENSITY:
 181          swizzles[0] = SWIZZLE_X;
 182          swizzles[1] = SWIZZLE_X;
 183          swizzles[2] = SWIZZLE_X;
 184          swizzles[3] = SWIZZLE_X;
 185          break;
 186       case GL_RED:
 187          swizzles[0] = SWIZZLE_X;
 188          swizzles[1] = SWIZZLE_ZERO;
 189          swizzles[2] = SWIZZLE_ZERO;
 190          swizzles[3] = SWIZZLE_ONE;
 191          break;
 192       }
 193    }
 194
 195    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 196
 197    /* If the texture's format is alpha-only, force R, G, and B to
 198     * 0.0. Similarly, if the texture's format has no alpha channel,
 199     * force the alpha value read to 1.0. This allows for the
 200     * implementation to use an RGBA texture for any of these formats
 201     * without leaking any unexpected values.
 202     */
 203    switch (img->_BaseFormat) {
 204    case GL_ALPHA:
 205       swizzles[0] = SWIZZLE_ZERO;
 206       swizzles[1] = SWIZZLE_ZERO;
 207       swizzles[2] = SWIZZLE_ZERO;
 208       break;
 209    case GL_LUMINANCE:
 210       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 211          swizzles[0] = SWIZZLE_X;
 212          swizzles[1] = SWIZZLE_X;
 213          swizzles[2] = SWIZZLE_X;
 214          swizzles[3] = SWIZZLE_ONE;
 215       }
 216       break;
 217    case GL_LUMINANCE_ALPHA:
 218       if (datatype == GL_SIGNED_NORMALIZED) {
 219          swizzles[0] = SWIZZLE_X;
 220          swizzles[1] = SWIZZLE_X;
 221          swizzles[2] = SWIZZLE_X;
 222          swizzles[3] = SWIZZLE_W;
 223       }
 224       break;
 225    case GL_INTENSITY:
 226       if (datatype == GL_SIGNED_NORMALIZED) {
 227          swizzles[0] = SWIZZLE_X;
 228          swizzles[1] = SWIZZLE_X;
 229          swizzles[2] = SWIZZLE_X;
 230          swizzles[3] = SWIZZLE_X;
 231       }
 232       break;
 233    case GL_RED:
 234    case GL_RG:
 235    case GL_RGB:
 236       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 237          swizzles[3] = SWIZZLE_ONE;
 238       break;
 239    }
 240
 241    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 243                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 244                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 245 }
 246
 247 static void
 248 gen4_emit_buffer_surface_state(struct brw_context *brw,
 249                                uint32_t *out_offset,
 250                                drm_intel_bo *bo,
 251                                unsigned buffer_offset,
 252                                unsigned surface_format,
 253                                unsigned buffer_size,
 254                                unsigned pitch,
 255                                bool rw)
 256 {
 257    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 258                                     6 * 4, 32, out_offset);
 259    memset(surf, 0, 6 * 4);
 260
 261    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 262              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 263              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 264    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 265    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 266              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 267    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 268              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 269
 270    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 271     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 272     * physical cache.  It is mapped in hardware to the sampler cache."
 273     */
 274    if (bo) {
 275       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 276                               bo, buffer_offset,
 277                               I915_GEM_DOMAIN_SAMPLER,
 278                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 279    }
 280 }
 281
 282 void
 283 brw_update_buffer_texture_surface(struct gl_context *ctx,
 284                                   unsigned unit,
 285                                   uint32_t *surf_offset)
 286 {
 287    struct brw_context *brw = brw_context(ctx);
 288    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 289    struct intel_buffer_object *intel_obj =
 290       intel_buffer_object(tObj->BufferObject);
 291    uint32_t size = tObj->BufferSize;
 292    drm_intel_bo *bo = NULL;
 293    mesa_format format = tObj->_BufferObjectFormat;
 294    uint32_t brw_format = brw_format_for_mesa_format(format);
 295    int texel_size = _mesa_get_format_bytes(format);
 296
 297    if (intel_obj) {
 298       size = MIN2(size, intel_obj->Base.Size);
 299       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 300    }
 301
 302    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 303       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 304                     _mesa_get_format_name(format));
 305    }
 306
 307    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 308                                        tObj->BufferOffset,
 309                                        brw_format,
 310                                        size / texel_size,
 311                                        texel_size,
 312                                        false /* rw */);
 313 }
 314
 315 static void
 316 brw_update_texture_surface(struct gl_context *ctx,
 317                            unsigned unit,
 318                            uint32_t *surf_offset,
 319                            bool for_gather,
 320                            uint32_t plane)
 321 {
 322    struct brw_context *brw = brw_context(ctx);
 323    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 324    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 325    struct intel_mipmap_tree *mt = intelObj->mt;
 326    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 327    uint32_t *surf;
 328
 329    /* BRW_NEW_TEXTURE_BUFFER */
 330    if (tObj->Target == GL_TEXTURE_BUFFER) {
 331       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 332       return;
 333    }
 334
 335    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 336                           6 * 4, 32, surf_offset);
 337
 338    uint32_t tex_format = translate_tex_format(brw, intelObj->_Format,
 339                                               sampler->sRGBDecode);
 340
 341    if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
 342       if (plane > 0)
 343          mt = mt->plane[plane - 1];
 344       if (mt == NULL)
 345          return;
 346
 347       tex_format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
 348    }
 349
 350    if (for_gather) {
 351       /* Sandybridge's gather4 message is broken for integer formats.
 352        * To work around this, we pretend the surface is UNORM for
 353        * 8 or 16-bit formats, and emit shader instructions to recover
 354        * the real INT/UINT value.  For 32-bit formats, we pretend
 355        * the surface is FLOAT, and simply reinterpret the resulting
 356        * bits.
 357        */
 358       switch (tex_format) {
 359       case BRW_SURFACEFORMAT_R8_SINT:
 360       case BRW_SURFACEFORMAT_R8_UINT:
 361          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 362          break;
 363
 364       case BRW_SURFACEFORMAT_R16_SINT:
 365       case BRW_SURFACEFORMAT_R16_UINT:
 366          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 367          break;
 368
 369       case BRW_SURFACEFORMAT_R32_SINT:
 370       case BRW_SURFACEFORMAT_R32_UINT:
 371          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 372          break;
 373
 374       default:
 375          break;
 376       }
 377    }
 378
 379    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 380               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 381               BRW_SURFACE_CUBEFACE_ENABLES |
 382               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 383
 384    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 385
 386    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 387               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 388               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 389
 390    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 391               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 392               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 393
 394    const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
 395    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 396               SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
 397               SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
 398
 399    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 400
 401    /* Emit relocation to surface contents */
 402    drm_intel_bo_emit_reloc(brw->batch.bo,
 403                            *surf_offset + 4,
 404                            mt->bo,
 405                            surf[1] - mt->bo->offset64,
 406                            I915_GEM_DOMAIN_SAMPLER, 0);
 407 }
 408
 409 /**
 410  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 411  * read from this buffer with Data Port Read instructions/messages.
 412  */
 413 void
 414 brw_create_constant_surface(struct brw_context *brw,
 415                             drm_intel_bo *bo,
 416                             uint32_t offset,
 417                             uint32_t size,
 418                             uint32_t *out_offset)
 419 {
 420    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 421                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 422                                        size, 1, false);
 423 }
 424
 425 /**
 426  * Create the buffer surface. Shader buffer variables will be
 427  * read from / write to this buffer with Data Port Read/Write
 428  * instructions/messages.
 429  */
 430 void
 431 brw_create_buffer_surface(struct brw_context *brw,
 432                           drm_intel_bo *bo,
 433                           uint32_t offset,
 434                           uint32_t size,
 435                           uint32_t *out_offset)
 436 {
 437    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 438     * messages. We need these specifically for the fragment shader since they
 439     * include a pixel mask header that we need to ensure correct behavior
 440     * with helper invocations, which cannot write to the buffer.
 441     */
 442    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 443                                        BRW_SURFACEFORMAT_RAW,
 444                                        size, 1, true);
 445 }
 446
 447 /**
 448  * Set up a binding table entry for use by stream output logic (transform
 449  * feedback).
 450  *
 451  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 452  */
 453 void
 454 brw_update_sol_surface(struct brw_context *brw,
 455                        struct gl_buffer_object *buffer_obj,
 456                        uint32_t *out_offset, unsigned num_vector_components,
 457                        unsigned stride_dwords, unsigned offset_dwords)
 458 {
 459    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 460    uint32_t offset_bytes = 4 * offset_dwords;
 461    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 462                                              offset_bytes,
 463                                              buffer_obj->Size - offset_bytes);
 464    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 465                                     out_offset);
 466    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 467    size_t size_dwords = buffer_obj->Size / 4;
 468    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 469
 470    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 471     * too big to map using a single binding table entry?
 472     */
 473    assert((size_dwords - offset_dwords) / stride_dwords
 474           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 475
 476    if (size_dwords > offset_dwords + num_vector_components) {
 477       /* There is room for at least 1 transform feedback output in the buffer.
 478        * Compute the number of additional transform feedback outputs the
 479        * buffer has room for.
 480        */
 481       buffer_size_minus_1 =
 482          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 483    } else {
 484       /* There isn't even room for a single transform feedback output in the
 485        * buffer.  We can't configure the binding table entry to prevent output
 486        * entirely; we'll have to rely on the geometry shader to detect
 487        * overflow.  But to minimize the damage in case of a bug, set up the
 488        * binding table entry to just allow a single output.
 489        */
 490       buffer_size_minus_1 = 0;
 491    }
 492    width = buffer_size_minus_1 & 0x7f;
 493    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 494    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 495
 496    switch (num_vector_components) {
 497    case 1:
 498       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 499       break;
 500    case 2:
 501       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 502       break;
 503    case 3:
 504       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 505       break;
 506    case 4:
 507       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 508       break;
 509    default:
 510       unreachable("Invalid vector size for transform feedback output");
 511    }
 512
 513    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 514       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 515       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 516       BRW_SURFACE_RC_READ_WRITE;
 517    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 518    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 519               height << BRW_SURFACE_HEIGHT_SHIFT);
 520    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 521               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 522    surf[4] = 0;
 523    surf[5] = 0;
 524
 525    /* Emit relocation to surface contents. */
 526    drm_intel_bo_emit_reloc(brw->batch.bo,
 527                            *out_offset + 4,
 528                            bo, offset_bytes,
 529                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 530 }
 531
 532 /* Creates a new WM constant buffer reflecting the current fragment program's
 533  * constants, if needed by the fragment program.
 534  *
 535  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 536  * state atom.
 537  */
 538 static void
 539 brw_upload_wm_pull_constants(struct brw_context *brw)
 540 {
 541    struct brw_stage_state *stage_state = &brw->wm.base;
 542    /* BRW_NEW_FRAGMENT_PROGRAM */
 543    struct brw_fragment_program *fp =
 544       (struct brw_fragment_program *) brw->fragment_program;
 545    /* BRW_NEW_FS_PROG_DATA */
 546    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 547
 548    /* _NEW_PROGRAM_CONSTANTS */
 549    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 550                              stage_state, prog_data);
 551 }
 552
 553 const struct brw_tracked_state brw_wm_pull_constants = {
 554    .dirty = {
 555       .mesa = _NEW_PROGRAM_CONSTANTS,
 556       .brw = BRW_NEW_BATCH |
 557              BRW_NEW_BLORP |
 558              BRW_NEW_FRAGMENT_PROGRAM |
 559              BRW_NEW_FS_PROG_DATA,
 560    },
 561    .emit = brw_upload_wm_pull_constants,
 562 };
 563
 564 /**
 565  * Creates a null renderbuffer surface.
 566  *
 567  * This is used when the shader doesn't write to any color output.  An FB
 568  * write to target 0 will still be emitted, because that's how the thread is
 569  * terminated (and computed depth is returned), so we need to have the
 570  * hardware discard the target 0 color output..
 571  */
 572 static void
 573 brw_emit_null_surface_state(struct brw_context *brw,
 574                             unsigned width,
 575                             unsigned height,
 576                             unsigned samples,
 577                             uint32_t *out_offset)
 578 {
 579    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 580     * Notes):
 581     *
 582     *     A null surface will be used in instances where an actual surface is
 583     *     not bound. When a write message is generated to a null surface, no
 584     *     actual surface is written to. When a read message (including any
 585     *     sampling engine message) is generated to a null surface, the result
 586     *     is all zeros. Note that a null surface type is allowed to be used
 587     *     with all messages, even if it is not specificially indicated as
 588     *     supported. All of the remaining fields in surface state are ignored
 589     *     for null surfaces, with the following exceptions:
 590     *
 591     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 592     *       depth buffer’s corresponding state for all render target surfaces,
 593     *       including null.
 594     *
 595     *     - Surface Format must be R8G8B8A8_UNORM.
 596     */
 597    unsigned surface_type = BRW_SURFACE_NULL;
 598    drm_intel_bo *bo = NULL;
 599    unsigned pitch_minus_1 = 0;
 600    uint32_t multisampling_state = 0;
 601    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 602                                     out_offset);
 603
 604    if (samples > 1) {
 605       /* On Gen6, null render targets seem to cause GPU hangs when
 606        * multisampling.  So work around this problem by rendering into dummy
 607        * color buffer.
 608        *
 609        * To decrease the amount of memory needed by the workaround buffer, we
 610        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 611        * the amount of memory needed for the workaround buffer is
 612        * (width_in_tiles + height_in_tiles - 1) tiles.
 613        *
 614        * Note that since the workaround buffer will be interpreted by the
 615        * hardware as an interleaved multisampled buffer, we need to compute
 616        * width_in_tiles and height_in_tiles by dividing the width and height
 617        * by 16 rather than the normal Y-tile size of 32.
 618        */
 619       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 620       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 621       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 622       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 623                          size_needed);
 624       bo = brw->wm.multisampled_null_render_target_bo;
 625       surface_type = BRW_SURFACE_2D;
 626       pitch_minus_1 = 127;
 627       multisampling_state = brw_get_surface_num_multisamples(samples);
 628    }
 629
 630    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 631               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 632    if (brw->gen < 6) {
 633       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 634                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 635                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 636                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 637    }
 638    surf[1] = bo ? bo->offset64 : 0;
 639    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 640               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 641
 642    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 643     * Notes):
 644     *
 645     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 646     */
 647    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 648               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 649    surf[4] = multisampling_state;
 650    surf[5] = 0;
 651
 652    if (bo) {
 653       drm_intel_bo_emit_reloc(brw->batch.bo,
 654                               *out_offset + 4,
 655                               bo, 0,
 656                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 657    }
 658 }
 659
 660 /**
 661  * Sets up a surface state structure to point at the given region.
 662  * While it is only used for the front/back buffer currently, it should be
 663  * usable for further buffers when doing ARB_draw_buffer support.
 664  */
 665 static uint32_t
 666 brw_update_renderbuffer_surface(struct brw_context *brw,
 667                                 struct gl_renderbuffer *rb,
 668                                 bool layered, unsigned unit,
 669                                 uint32_t surf_index)
 670 {
 671    struct gl_context *ctx = &brw->ctx;
 672    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 673    struct intel_mipmap_tree *mt = irb->mt;
 674    uint32_t *surf;
 675    uint32_t tile_x, tile_y;
 676    uint32_t format = 0;
 677    uint32_t offset;
 678    /* _NEW_BUFFERS */
 679    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 680    /* BRW_NEW_FS_PROG_DATA */
 681
 682    assert(!layered);
 683
 684    if (rb->TexImage && !brw->has_surface_tile_offset) {
 685       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 686
 687       if (tile_x != 0 || tile_y != 0) {
 688          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 689           * destination in a miptree unless you actually setup your renderbuffer
 690           * as a miptree and used the fragile lod/array_index/etc. controls to
 691           * select the image.  So, instead, we just make a new single-level
 692           * miptree and render into that.
 693           */
 694          intel_renderbuffer_move_to_temp(brw, irb, false);
 695          mt = irb->mt;
 696       }
 697    }
 698
 699    intel_miptree_used_for_rendering(irb->mt);
 700
 701    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 702
 703    format = brw->render_target_format[rb_format];
 704    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 705       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 706                     __func__, _mesa_get_format_name(rb_format));
 707    }
 708
 709    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 710               format << BRW_SURFACE_FORMAT_SHIFT);
 711
 712    /* reloc */
 713    assert(mt->offset % mt->cpp == 0);
 714    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 715               mt->bo->offset64 + mt->offset);
 716
 717    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 718               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 719
 720    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 721               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 722
 723    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 724
 725    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 726    /* Note that the low bits of these fields are missing, so
 727     * there's the possibility of getting in trouble.
 728     */
 729    assert(tile_x % 4 == 0);
 730    assert(tile_y % 2 == 0);
 731    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 732               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 733               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 734
 735    if (brw->gen < 6) {
 736       /* _NEW_COLOR */
 737       if (!ctx->Color.ColorLogicOpEnabled &&
 738           (ctx->Color.BlendEnabled & (1 << unit)))
 739          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 740
 741       if (!ctx->Color.ColorMask[unit][0])
 742          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 743       if (!ctx->Color.ColorMask[unit][1])
 744          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 745       if (!ctx->Color.ColorMask[unit][2])
 746          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 747
 748       /* As mentioned above, disable writes to the alpha component when the
 749        * renderbuffer is XRGB.
 750        */
 751       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 752           !ctx->Color.ColorMask[unit][3]) {
 753          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 754       }
 755    }
 756
 757    drm_intel_bo_emit_reloc(brw->batch.bo,
 758                            offset + 4,
 759                            mt->bo,
 760                            surf[1] - mt->bo->offset64,
 761                            I915_GEM_DOMAIN_RENDER,
 762                            I915_GEM_DOMAIN_RENDER);
 763
 764    return offset;
 765 }
 766
 767 /**
 768  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 769  */
 770 void
 771 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 772                                  const struct gl_framebuffer *fb,
 773                                  uint32_t render_target_start,
 774                                  uint32_t *surf_offset)
 775 {
 776    GLuint i;
 777    const unsigned int w = _mesa_geometric_width(fb);
 778    const unsigned int h = _mesa_geometric_height(fb);
 779    const unsigned int s = _mesa_geometric_samples(fb);
 780
 781    /* Update surfaces for drawing buffers */
 782    if (fb->_NumColorDrawBuffers >= 1) {
 783       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 784          const uint32_t surf_index = render_target_start + i;
 785
 786          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 787             surf_offset[surf_index] =
 788                brw->vtbl.update_renderbuffer_surface(
 789                   brw, fb->_ColorDrawBuffers[i],
 790                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 791          } else {
 792             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 793                &surf_offset[surf_index]);
 794          }
 795       }
 796    } else {
 797       const uint32_t surf_index = render_target_start;
 798       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 799          &surf_offset[surf_index]);
 800    }
 801 }
 802
 803 static void
 804 update_renderbuffer_surfaces(struct brw_context *brw)
 805 {
 806    const struct gl_context *ctx = &brw->ctx;
 807
 808    /* _NEW_BUFFERS | _NEW_COLOR */
 809    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 810    brw_update_renderbuffer_surfaces(
 811       brw, fb,
 812       brw->wm.prog_data->binding_table.render_target_start,
 813       brw->wm.base.surf_offset);
 814    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 815 }
 816
 817 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 818    .dirty = {
 819       .mesa = _NEW_BUFFERS |
 820               _NEW_COLOR,
 821       .brw = BRW_NEW_BATCH |
 822              BRW_NEW_BLORP |
 823              BRW_NEW_FS_PROG_DATA,
 824    },
 825    .emit = update_renderbuffer_surfaces,
 826 };
 827
 828 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 829    .dirty = {
 830       .mesa = _NEW_BUFFERS,
 831       .brw = BRW_NEW_BATCH |
 832              BRW_NEW_BLORP,
 833    },
 834    .emit = update_renderbuffer_surfaces,
 835 };
 836
 837
 838 static void
 839 update_stage_texture_surfaces(struct brw_context *brw,
 840                               const struct gl_program *prog,
 841                               struct brw_stage_state *stage_state,
 842                               bool for_gather, uint32_t plane)
 843 {
 844    if (!prog)
 845       return;
 846
 847    struct gl_context *ctx = &brw->ctx;
 848
 849    uint32_t *surf_offset = stage_state->surf_offset;
 850
 851    /* BRW_NEW_*_PROG_DATA */
 852    if (for_gather)
 853       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 854    else
 855       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
 856
 857    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 858    for (unsigned s = 0; s < num_samplers; s++) {
 859       surf_offset[s] = 0;
 860
 861       if (prog->SamplersUsed & (1 << s)) {
 862          const unsigned unit = prog->SamplerUnits[s];
 863
 864          /* _NEW_TEXTURE */
 865          if (ctx->Texture.Unit[unit]._Current) {
 866             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
 867          }
 868       }
 869    }
 870 }
 871
 872
 873 /**
 874  * Construct SURFACE_STATE objects for enabled textures.
 875  */
 876 static void
 877 brw_update_texture_surfaces(struct brw_context *brw)
 878 {
 879    /* BRW_NEW_VERTEX_PROGRAM */
 880    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 881
 882    /* BRW_NEW_TESS_PROGRAMS */
 883    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 884    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 885
 886    /* BRW_NEW_GEOMETRY_PROGRAM */
 887    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 888
 889    /* BRW_NEW_FRAGMENT_PROGRAM */
 890    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 891
 892    /* _NEW_TEXTURE */
 893    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
 894    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
 895    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
 896    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
 897    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
 898
 899    /* emit alternate set of surface state for gather. this
 900     * allows the surface format to be overriden for only the
 901     * gather4 messages. */
 902    if (brw->gen < 8) {
 903       if (vs && vs->UsesGather)
 904          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
 905       if (tcs && tcs->UsesGather)
 906          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
 907       if (tes && tes->UsesGather)
 908          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
 909       if (gs && gs->UsesGather)
 910          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
 911       if (fs && fs->UsesGather)
 912          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
 913    }
 914
 915    if (fs) {
 916       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
 917       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
 918    }
 919
 920    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 921 }
 922
 923 const struct brw_tracked_state brw_texture_surfaces = {
 924    .dirty = {
 925       .mesa = _NEW_TEXTURE,
 926       .brw = BRW_NEW_BATCH |
 927              BRW_NEW_BLORP |
 928              BRW_NEW_FRAGMENT_PROGRAM |
 929              BRW_NEW_FS_PROG_DATA |
 930              BRW_NEW_GEOMETRY_PROGRAM |
 931              BRW_NEW_GS_PROG_DATA |
 932              BRW_NEW_TESS_PROGRAMS |
 933              BRW_NEW_TCS_PROG_DATA |
 934              BRW_NEW_TES_PROG_DATA |
 935              BRW_NEW_TEXTURE_BUFFER |
 936              BRW_NEW_VERTEX_PROGRAM |
 937              BRW_NEW_VS_PROG_DATA,
 938    },
 939    .emit = brw_update_texture_surfaces,
 940 };
 941
 942 static void
 943 brw_update_cs_texture_surfaces(struct brw_context *brw)
 944 {
 945    /* BRW_NEW_COMPUTE_PROGRAM */
 946    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 947
 948    /* _NEW_TEXTURE */
 949    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
 950
 951    /* emit alternate set of surface state for gather. this
 952     * allows the surface format to be overriden for only the
 953     * gather4 messages.
 954     */
 955    if (brw->gen < 8) {
 956       if (cs && cs->UsesGather)
 957          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
 958    }
 959
 960    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 961 }
 962
 963 const struct brw_tracked_state brw_cs_texture_surfaces = {
 964    .dirty = {
 965       .mesa = _NEW_TEXTURE,
 966       .brw = BRW_NEW_BATCH |
 967              BRW_NEW_BLORP |
 968              BRW_NEW_COMPUTE_PROGRAM,
 969    },
 970    .emit = brw_update_cs_texture_surfaces,
 971 };
 972
 973
 974 void
 975 brw_upload_ubo_surfaces(struct brw_context *brw,
 976                         struct gl_shader *shader,
 977                         struct brw_stage_state *stage_state,
 978                         struct brw_stage_prog_data *prog_data)
 979 {
 980    struct gl_context *ctx = &brw->ctx;
 981
 982    if (!shader)
 983       return;
 984
 985    uint32_t *ubo_surf_offsets =
 986       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 987
 988    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 989       struct gl_uniform_buffer_binding *binding =
 990          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 991
 992       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 993          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 994       } else {
 995          struct intel_buffer_object *intel_bo =
 996             intel_buffer_object(binding->BufferObject);
 997          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 998          if (!binding->AutomaticSize)
 999             size = MIN2(size, binding->Size);
1000          drm_intel_bo *bo =
1001             intel_bufferobj_buffer(brw, intel_bo,
1002                                    binding->Offset,
1003                                    size);
1004          brw_create_constant_surface(brw, bo, binding->Offset,
1005                                      size,
1006                                      &ubo_surf_offsets[i]);
1007       }
1008    }
1009
1010    uint32_t *ssbo_surf_offsets =
1011       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1012
1013    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1014       struct gl_shader_storage_buffer_binding *binding =
1015          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1016
1017       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1018          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1019       } else {
1020          struct intel_buffer_object *intel_bo =
1021             intel_buffer_object(binding->BufferObject);
1022          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1023          if (!binding->AutomaticSize)
1024             size = MIN2(size, binding->Size);
1025          drm_intel_bo *bo =
1026             intel_bufferobj_buffer(brw, intel_bo,
1027                                    binding->Offset,
1028                                    size);
1029          brw_create_buffer_surface(brw, bo, binding->Offset,
1030                                    size,
1031                                    &ssbo_surf_offsets[i]);
1032       }
1033    }
1034
1035    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1036       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1037 }
1038
1039 static void
1040 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1041 {
1042    struct gl_context *ctx = &brw->ctx;
1043    /* _NEW_PROGRAM */
1044    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1045
1046    if (!prog)
1047       return;
1048
1049    /* BRW_NEW_FS_PROG_DATA */
1050    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1051                            &brw->wm.base, &brw->wm.prog_data->base);
1052 }
1053
1054 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1055    .dirty = {
1056       .mesa = _NEW_PROGRAM,
1057       .brw = BRW_NEW_BATCH |
1058              BRW_NEW_BLORP |
1059              BRW_NEW_FS_PROG_DATA |
1060              BRW_NEW_UNIFORM_BUFFER,
1061    },
1062    .emit = brw_upload_wm_ubo_surfaces,
1063 };
1064
1065 static void
1066 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1067 {
1068    struct gl_context *ctx = &brw->ctx;
1069    /* _NEW_PROGRAM */
1070    struct gl_shader_program *prog =
1071       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1072
1073    if (!prog)
1074       return;
1075
1076    /* BRW_NEW_CS_PROG_DATA */
1077    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1078                            &brw->cs.base, &brw->cs.prog_data->base);
1079 }
1080
1081 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1082    .dirty = {
1083       .mesa = _NEW_PROGRAM,
1084       .brw = BRW_NEW_BATCH |
1085              BRW_NEW_BLORP |
1086              BRW_NEW_CS_PROG_DATA |
1087              BRW_NEW_UNIFORM_BUFFER,
1088    },
1089    .emit = brw_upload_cs_ubo_surfaces,
1090 };
1091
1092 void
1093 brw_upload_abo_surfaces(struct brw_context *brw,
1094                         struct gl_shader *shader,
1095                         struct brw_stage_state *stage_state,
1096                         struct brw_stage_prog_data *prog_data)
1097 {
1098    struct gl_context *ctx = &brw->ctx;
1099    uint32_t *surf_offsets =
1100       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1101
1102    if (shader && shader->NumAtomicBuffers) {
1103       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1104          struct gl_atomic_buffer_binding *binding =
1105             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1106          struct intel_buffer_object *intel_bo =
1107             intel_buffer_object(binding->BufferObject);
1108          drm_intel_bo *bo = intel_bufferobj_buffer(
1109             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1110
1111          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1112                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1113                                              bo->size - binding->Offset, 1, true);
1114       }
1115
1116       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1117    }
1118 }
1119
1120 static void
1121 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1122 {
1123    struct gl_context *ctx = &brw->ctx;
1124    /* _NEW_PROGRAM */
1125    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1126
1127    if (prog) {
1128       /* BRW_NEW_FS_PROG_DATA */
1129       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1130                               &brw->wm.base, &brw->wm.prog_data->base);
1131    }
1132 }
1133
1134 const struct brw_tracked_state brw_wm_abo_surfaces = {
1135    .dirty = {
1136       .mesa = _NEW_PROGRAM,
1137       .brw = BRW_NEW_ATOMIC_BUFFER |
1138              BRW_NEW_BLORP |
1139              BRW_NEW_BATCH |
1140              BRW_NEW_FS_PROG_DATA,
1141    },
1142    .emit = brw_upload_wm_abo_surfaces,
1143 };
1144
1145 static void
1146 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1147 {
1148    struct gl_context *ctx = &brw->ctx;
1149    /* _NEW_PROGRAM */
1150    struct gl_shader_program *prog =
1151       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1152
1153    if (prog) {
1154       /* BRW_NEW_CS_PROG_DATA */
1155       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1156                               &brw->cs.base, &brw->cs.prog_data->base);
1157    }
1158 }
1159
1160 const struct brw_tracked_state brw_cs_abo_surfaces = {
1161    .dirty = {
1162       .mesa = _NEW_PROGRAM,
1163       .brw = BRW_NEW_ATOMIC_BUFFER |
1164              BRW_NEW_BLORP |
1165              BRW_NEW_BATCH |
1166              BRW_NEW_CS_PROG_DATA,
1167    },
1168    .emit = brw_upload_cs_abo_surfaces,
1169 };
1170
1171 static void
1172 brw_upload_cs_image_surfaces(struct brw_context *brw)
1173 {
1174    struct gl_context *ctx = &brw->ctx;
1175    /* _NEW_PROGRAM */
1176    struct gl_shader_program *prog =
1177       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1178
1179    if (prog) {
1180       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1181       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1182                                 &brw->cs.base, &brw->cs.prog_data->base);
1183    }
1184 }
1185
1186 const struct brw_tracked_state brw_cs_image_surfaces = {
1187    .dirty = {
1188       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1189       .brw = BRW_NEW_BATCH |
1190              BRW_NEW_BLORP |
1191              BRW_NEW_CS_PROG_DATA |
1192              BRW_NEW_IMAGE_UNITS
1193    },
1194    .emit = brw_upload_cs_image_surfaces,
1195 };
1196
1197 static uint32_t
1198 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1199 {
1200    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1201    uint32_t hw_format = brw_format_for_mesa_format(format);
1202    if (access == GL_WRITE_ONLY) {
1203       return hw_format;
1204    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1205       /* Typed surface reads support a very limited subset of the shader
1206        * image formats.  Translate it into the closest format the
1207        * hardware supports.
1208        */
1209       return isl_lower_storage_image_format(devinfo, hw_format);
1210    } else {
1211       /* The hardware doesn't actually support a typed format that we can use
1212        * so we have to fall back to untyped read/write messages.
1213        */
1214       return BRW_SURFACEFORMAT_RAW;
1215    }
1216 }
1217
1218 static void
1219 update_default_image_param(struct brw_context *brw,
1220                            struct gl_image_unit *u,
1221                            unsigned surface_idx,
1222                            struct brw_image_param *param)
1223 {
1224    memset(param, 0, sizeof(*param));
1225    param->surface_idx = surface_idx;
1226    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1227     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1228     * detailed explanation of these parameters.
1229     */
1230    param->swizzling[0] = 0xff;
1231    param->swizzling[1] = 0xff;
1232 }
1233
1234 static void
1235 update_buffer_image_param(struct brw_context *brw,
1236                           struct gl_image_unit *u,
1237                           unsigned surface_idx,
1238                           struct brw_image_param *param)
1239 {
1240    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1241
1242    update_default_image_param(brw, u, surface_idx, param);
1243
1244    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1245    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1246 }
1247
1248 static void
1249 update_texture_image_param(struct brw_context *brw,
1250                            struct gl_image_unit *u,
1251                            unsigned surface_idx,
1252                            struct brw_image_param *param)
1253 {
1254    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1255
1256    update_default_image_param(brw, u, surface_idx, param);
1257
1258    param->size[0] = minify(mt->logical_width0, u->Level);
1259    param->size[1] = minify(mt->logical_height0, u->Level);
1260    param->size[2] = (!u->Layered ? 1 :
1261                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1262                      u->TexObj->Target == GL_TEXTURE_3D ?
1263                      minify(mt->logical_depth0, u->Level) :
1264                      mt->logical_depth0);
1265
1266    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1267                                   &param->offset[0],
1268                                   &param->offset[1]);
1269
1270    param->stride[0] = mt->cpp;
1271    param->stride[1] = mt->pitch / mt->cpp;
1272    param->stride[2] =
1273       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1274    param->stride[3] =
1275       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1276
1277    if (mt->tiling == I915_TILING_X) {
1278       /* An X tile is a rectangular block of 512x8 bytes. */
1279       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1280       param->tiling[1] = _mesa_logbase2(8);
1281
1282       if (brw->has_swizzling) {
1283          /* Right shifts required to swizzle bits 9 and 10 of the memory
1284           * address with bit 6.
1285           */
1286          param->swizzling[0] = 3;
1287          param->swizzling[1] = 4;
1288       }
1289    } else if (mt->tiling == I915_TILING_Y) {
1290       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1291        * different to the layout of an X-tiled surface, we simply pretend that
1292        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1293        * one arranged in X-major order just like is the case for X-tiling.
1294        */
1295       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1296       param->tiling[1] = _mesa_logbase2(32);
1297
1298       if (brw->has_swizzling) {
1299          /* Right shift required to swizzle bit 9 of the memory address with
1300           * bit 6.
1301           */
1302          param->swizzling[0] = 3;
1303       }
1304    }
1305
1306    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1307     * address calculation algorithm (emit_address_calculation() in
1308     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1309     * modulus equal to the LOD.
1310     */
1311    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1312                        0);
1313 }
1314
1315 static void
1316 update_image_surface(struct brw_context *brw,
1317                      struct gl_image_unit *u,
1318                      GLenum access,
1319                      unsigned surface_idx,
1320                      uint32_t *surf_offset,
1321                      struct brw_image_param *param)
1322 {
1323    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1324       struct gl_texture_object *obj = u->TexObj;
1325       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1326
1327       if (obj->Target == GL_TEXTURE_BUFFER) {
1328          struct intel_buffer_object *intel_obj =
1329             intel_buffer_object(obj->BufferObject);
1330          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1331                                       _mesa_get_format_bytes(u->_ActualFormat));
1332
1333          brw->vtbl.emit_buffer_surface_state(
1334             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1335             format, intel_obj->Base.Size / texel_size, texel_size,
1336             access != GL_READ_ONLY);
1337
1338          update_buffer_image_param(brw, u, surface_idx, param);
1339
1340       } else {
1341          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1342          struct intel_mipmap_tree *mt = intel_obj->mt;
1343
1344          if (format == BRW_SURFACEFORMAT_RAW) {
1345             brw->vtbl.emit_buffer_surface_state(
1346                brw, surf_offset, mt->bo, mt->offset,
1347                format, mt->bo->size - mt->offset, 1 /* pitch */,
1348                access != GL_READ_ONLY);
1349
1350          } else {
1351             const unsigned min_layer = obj->MinLayer + u->_Layer;
1352             const unsigned min_level = obj->MinLevel + u->Level;
1353             const unsigned num_layers = (!u->Layered ? 1 :
1354                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1355                                          mt->logical_depth0);
1356             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1357                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1358                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1359             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1360
1361             brw->vtbl.emit_texture_surface_state(
1362                brw, mt, target,
1363                min_layer, min_layer + num_layers,
1364                min_level, min_level + 1,
1365                format, SWIZZLE_XYZW,
1366                surf_offset, surf_index, access != GL_READ_ONLY, false);
1367          }
1368
1369          update_texture_image_param(brw, u, surface_idx, param);
1370       }
1371
1372    } else {
1373       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1374       update_default_image_param(brw, u, surface_idx, param);
1375    }
1376 }
1377
1378 void
1379 brw_upload_image_surfaces(struct brw_context *brw,
1380                           struct gl_shader *shader,
1381                           struct brw_stage_state *stage_state,
1382                           struct brw_stage_prog_data *prog_data)
1383 {
1384    struct gl_context *ctx = &brw->ctx;
1385
1386    if (shader && shader->NumImages) {
1387       for (unsigned i = 0; i < shader->NumImages; i++) {
1388          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1389          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1390
1391          update_image_surface(brw, u, shader->ImageAccess[i],
1392                               surf_idx,
1393                               &stage_state->surf_offset[surf_idx],
1394                               &prog_data->image_param[i]);
1395       }
1396
1397       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1398       /* This may have changed the image metadata dependent on the context
1399        * image unit state and passed to the program as uniforms, make sure
1400        * that push and pull constants are reuploaded.
1401        */
1402       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1403    }
1404 }
1405
1406 static void
1407 brw_upload_wm_image_surfaces(struct brw_context *brw)
1408 {
1409    struct gl_context *ctx = &brw->ctx;
1410    /* BRW_NEW_FRAGMENT_PROGRAM */
1411    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1412
1413    if (prog) {
1414       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1415       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1416                                 &brw->wm.base, &brw->wm.prog_data->base);
1417    }
1418 }
1419
1420 const struct brw_tracked_state brw_wm_image_surfaces = {
1421    .dirty = {
1422       .mesa = _NEW_TEXTURE,
1423       .brw = BRW_NEW_BATCH |
1424              BRW_NEW_BLORP |
1425              BRW_NEW_FRAGMENT_PROGRAM |
1426              BRW_NEW_FS_PROG_DATA |
1427              BRW_NEW_IMAGE_UNITS
1428    },
1429    .emit = brw_upload_wm_image_surfaces,
1430 };
1431
1432 void
1433 gen4_init_vtable_surface_functions(struct brw_context *brw)
1434 {
1435    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1436    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1437    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1438    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1439 }
1440
1441 static void
1442 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1443 {
1444    struct gl_context *ctx = &brw->ctx;
1445    /* _NEW_PROGRAM */
1446    struct gl_shader_program *prog =
1447       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1448
1449    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1450       const unsigned surf_idx =
1451          brw->cs.prog_data->binding_table.work_groups_start;
1452       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1453       drm_intel_bo *bo;
1454       uint32_t bo_offset;
1455
1456       if (brw->compute.num_work_groups_bo == NULL) {
1457          bo = NULL;
1458          intel_upload_data(brw,
1459                            (void *)brw->compute.num_work_groups,
1460                            3 * sizeof(GLuint),
1461                            sizeof(GLuint),
1462                            &bo,
1463                            &bo_offset);
1464       } else {
1465          bo = brw->compute.num_work_groups_bo;
1466          bo_offset = brw->compute.num_work_groups_offset;
1467       }
1468
1469       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1470                                           bo, bo_offset,
1471                                           BRW_SURFACEFORMAT_RAW,
1472                                           3 * sizeof(GLuint), 1, true);
1473       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1474    }
1475 }
1476
1477 const struct brw_tracked_state brw_cs_work_groups_surface = {
1478    .dirty = {
1479       .brw = BRW_NEW_BLORP |
1480              BRW_NEW_CS_WORK_GROUPS
1481    },
1482    .emit = brw_upload_cs_work_groups_surface,
1483 };