src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "main/framebuffer.h"
  40
  41 #include "intel_mipmap_tree.h"
  42 #include "intel_batchbuffer.h"
  43 #include "intel_tex.h"
  44 #include "intel_fbo.h"
  45 #include "intel_buffer_objects.h"
  46
  47 #include "brw_context.h"
  48 #include "brw_state.h"
  49 #include "brw_defines.h"
  50 #include "brw_wm.h"
  51
  52 GLuint
  53 translate_tex_target(GLenum target)
  54 {
  55    switch (target) {
  56    case GL_TEXTURE_1D:
  57    case GL_TEXTURE_1D_ARRAY_EXT:
  58       return BRW_SURFACE_1D;
  59
  60    case GL_TEXTURE_RECTANGLE_NV:
  61       return BRW_SURFACE_2D;
  62
  63    case GL_TEXTURE_2D:
  64    case GL_TEXTURE_2D_ARRAY_EXT:
  65    case GL_TEXTURE_EXTERNAL_OES:
  66    case GL_TEXTURE_2D_MULTISAMPLE:
  67    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  68       return BRW_SURFACE_2D;
  69
  70    case GL_TEXTURE_3D:
  71       return BRW_SURFACE_3D;
  72
  73    case GL_TEXTURE_CUBE_MAP:
  74    case GL_TEXTURE_CUBE_MAP_ARRAY:
  75       return BRW_SURFACE_CUBE;
  76
  77    default:
  78       unreachable("not reached");
  79    }
  80 }
  81
  82 uint32_t
  83 brw_get_surface_tiling_bits(uint32_t tiling)
  84 {
  85    switch (tiling) {
  86    case I915_TILING_X:
  87       return BRW_SURFACE_TILED;
  88    case I915_TILING_Y:
  89       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  90    default:
  91       return 0;
  92    }
  93 }
  94
  95
  96 uint32_t
  97 brw_get_surface_num_multisamples(unsigned num_samples)
  98 {
  99    if (num_samples > 1)
 100       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 101    else
 102       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 103 }
 104
 105 void
 106 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 107                       bool is_render_target,
 108                       unsigned *width, unsigned *height,
 109                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 110 {
 111    static const unsigned halign_stencil = 8;
 112
 113    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 114     * there are half as many rows.
 115     * In addition, mip-levels are accessed manually by the program and
 116     * therefore the surface is setup to cover all the mip-levels for one slice.
 117     * (Hardware is still used to access individual slices).
 118     */
 119    *tiling = I915_TILING_Y;
 120    *pitch = mt->pitch * 2;
 121    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 122    *height = (mt->total_height / mt->physical_depth0) / 2;
 123
 124    if (is_render_target) {
 125       *format = BRW_SURFACEFORMAT_R8_UINT;
 126    }
 127 }
 128
 129
 130 /**
 131  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 132  * swizzling.
 133  */
 134 int
 135 brw_get_texture_swizzle(const struct gl_context *ctx,
 136                         const struct gl_texture_object *t)
 137 {
 138    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 139
 140    int swizzles[SWIZZLE_NIL + 1] = {
 141       SWIZZLE_X,
 142       SWIZZLE_Y,
 143       SWIZZLE_Z,
 144       SWIZZLE_W,
 145       SWIZZLE_ZERO,
 146       SWIZZLE_ONE,
 147       SWIZZLE_NIL
 148    };
 149
 150    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 151        img->_BaseFormat == GL_DEPTH_STENCIL) {
 152       GLenum depth_mode = t->DepthMode;
 153
 154       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 155        * with depth component data specified with a sized internal format.
 156        * Otherwise, it's left at the old default, GL_LUMINANCE.
 157        */
 158       if (_mesa_is_gles3(ctx) &&
 159           img->InternalFormat != GL_DEPTH_COMPONENT &&
 160           img->InternalFormat != GL_DEPTH_STENCIL) {
 161          depth_mode = GL_RED;
 162       }
 163
 164       switch (depth_mode) {
 165       case GL_ALPHA:
 166          swizzles[0] = SWIZZLE_ZERO;
 167          swizzles[1] = SWIZZLE_ZERO;
 168          swizzles[2] = SWIZZLE_ZERO;
 169          swizzles[3] = SWIZZLE_X;
 170          break;
 171       case GL_LUMINANCE:
 172          swizzles[0] = SWIZZLE_X;
 173          swizzles[1] = SWIZZLE_X;
 174          swizzles[2] = SWIZZLE_X;
 175          swizzles[3] = SWIZZLE_ONE;
 176          break;
 177       case GL_INTENSITY:
 178          swizzles[0] = SWIZZLE_X;
 179          swizzles[1] = SWIZZLE_X;
 180          swizzles[2] = SWIZZLE_X;
 181          swizzles[3] = SWIZZLE_X;
 182          break;
 183       case GL_RED:
 184          swizzles[0] = SWIZZLE_X;
 185          swizzles[1] = SWIZZLE_ZERO;
 186          swizzles[2] = SWIZZLE_ZERO;
 187          swizzles[3] = SWIZZLE_ONE;
 188          break;
 189       }
 190    }
 191
 192    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 193
 194    /* If the texture's format is alpha-only, force R, G, and B to
 195     * 0.0. Similarly, if the texture's format has no alpha channel,
 196     * force the alpha value read to 1.0. This allows for the
 197     * implementation to use an RGBA texture for any of these formats
 198     * without leaking any unexpected values.
 199     */
 200    switch (img->_BaseFormat) {
 201    case GL_ALPHA:
 202       swizzles[0] = SWIZZLE_ZERO;
 203       swizzles[1] = SWIZZLE_ZERO;
 204       swizzles[2] = SWIZZLE_ZERO;
 205       break;
 206    case GL_LUMINANCE:
 207       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 208          swizzles[0] = SWIZZLE_X;
 209          swizzles[1] = SWIZZLE_X;
 210          swizzles[2] = SWIZZLE_X;
 211          swizzles[3] = SWIZZLE_ONE;
 212       }
 213       break;
 214    case GL_LUMINANCE_ALPHA:
 215       if (datatype == GL_SIGNED_NORMALIZED) {
 216          swizzles[0] = SWIZZLE_X;
 217          swizzles[1] = SWIZZLE_X;
 218          swizzles[2] = SWIZZLE_X;
 219          swizzles[3] = SWIZZLE_W;
 220       }
 221       break;
 222    case GL_INTENSITY:
 223       if (datatype == GL_SIGNED_NORMALIZED) {
 224          swizzles[0] = SWIZZLE_X;
 225          swizzles[1] = SWIZZLE_X;
 226          swizzles[2] = SWIZZLE_X;
 227          swizzles[3] = SWIZZLE_X;
 228       }
 229       break;
 230    case GL_RED:
 231    case GL_RG:
 232    case GL_RGB:
 233       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 234          swizzles[3] = SWIZZLE_ONE;
 235       break;
 236    }
 237
 238    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 239                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 242 }
 243
 244 static void
 245 gen4_emit_buffer_surface_state(struct brw_context *brw,
 246                                uint32_t *out_offset,
 247                                drm_intel_bo *bo,
 248                                unsigned buffer_offset,
 249                                unsigned surface_format,
 250                                unsigned buffer_size,
 251                                unsigned pitch,
 252                                bool rw)
 253 {
 254    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 255                                     6 * 4, 32, out_offset);
 256    memset(surf, 0, 6 * 4);
 257
 258    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 259              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 260              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 261    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 262    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 263              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 264    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 265              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 266
 267    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 268     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 269     * physical cache.  It is mapped in hardware to the sampler cache."
 270     */
 271    if (bo) {
 272       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 273                               bo, buffer_offset,
 274                               I915_GEM_DOMAIN_SAMPLER,
 275                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 276    }
 277 }
 278
 279 void
 280 brw_update_buffer_texture_surface(struct gl_context *ctx,
 281                                   unsigned unit,
 282                                   uint32_t *surf_offset)
 283 {
 284    struct brw_context *brw = brw_context(ctx);
 285    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 286    struct intel_buffer_object *intel_obj =
 287       intel_buffer_object(tObj->BufferObject);
 288    uint32_t size = tObj->BufferSize;
 289    drm_intel_bo *bo = NULL;
 290    mesa_format format = tObj->_BufferObjectFormat;
 291    uint32_t brw_format = brw_format_for_mesa_format(format);
 292    int texel_size = _mesa_get_format_bytes(format);
 293
 294    if (intel_obj) {
 295       size = MIN2(size, intel_obj->Base.Size);
 296       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 297    }
 298
 299    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 300       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 301                     _mesa_get_format_name(format));
 302    }
 303
 304    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 305                                        tObj->BufferOffset,
 306                                        brw_format,
 307                                        size / texel_size,
 308                                        texel_size,
 309                                        false /* rw */);
 310 }
 311
 312 static void
 313 brw_update_texture_surface(struct gl_context *ctx,
 314                            unsigned unit,
 315                            uint32_t *surf_offset,
 316                            bool for_gather)
 317 {
 318    struct brw_context *brw = brw_context(ctx);
 319    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 320    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 321    struct intel_mipmap_tree *mt = intelObj->mt;
 322    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 323    uint32_t *surf;
 324
 325    /* BRW_NEW_TEXTURE_BUFFER */
 326    if (tObj->Target == GL_TEXTURE_BUFFER) {
 327       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 328       return;
 329    }
 330
 331    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 332                           6 * 4, 32, surf_offset);
 333
 334    uint32_t tex_format = translate_tex_format(brw, mt->format,
 335                                               sampler->sRGBDecode);
 336
 337    if (for_gather) {
 338       /* Sandybridge's gather4 message is broken for integer formats.
 339        * To work around this, we pretend the surface is UNORM for
 340        * 8 or 16-bit formats, and emit shader instructions to recover
 341        * the real INT/UINT value.  For 32-bit formats, we pretend
 342        * the surface is FLOAT, and simply reinterpret the resulting
 343        * bits.
 344        */
 345       switch (tex_format) {
 346       case BRW_SURFACEFORMAT_R8_SINT:
 347       case BRW_SURFACEFORMAT_R8_UINT:
 348          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 349          break;
 350
 351       case BRW_SURFACEFORMAT_R16_SINT:
 352       case BRW_SURFACEFORMAT_R16_UINT:
 353          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 354          break;
 355
 356       case BRW_SURFACEFORMAT_R32_SINT:
 357       case BRW_SURFACEFORMAT_R32_UINT:
 358          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 359          break;
 360
 361       default:
 362          break;
 363       }
 364    }
 365
 366    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 367               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 368               BRW_SURFACE_CUBEFACE_ENABLES |
 369               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 370
 371    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 372
 373    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 374               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 375               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 376
 377    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 378               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 379               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 380
 381    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 382               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 383
 384    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 385
 386    /* Emit relocation to surface contents */
 387    drm_intel_bo_emit_reloc(brw->batch.bo,
 388                            *surf_offset + 4,
 389                            mt->bo,
 390                            surf[1] - mt->bo->offset64,
 391                            I915_GEM_DOMAIN_SAMPLER, 0);
 392 }
 393
 394 /**
 395  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 396  * read from this buffer with Data Port Read instructions/messages.
 397  */
 398 void
 399 brw_create_constant_surface(struct brw_context *brw,
 400                             drm_intel_bo *bo,
 401                             uint32_t offset,
 402                             uint32_t size,
 403                             uint32_t *out_offset,
 404                             bool dword_pitch)
 405 {
 406    uint32_t stride = dword_pitch ? 4 : 16;
 407    uint32_t elements = ALIGN(size, stride) / stride;
 408
 409    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 410                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 411                                        elements, stride, false);
 412 }
 413
 414 /**
 415  * Create the buffer surface. Shader buffer variables will be
 416  * read from / write to this buffer with Data Port Read/Write
 417  * instructions/messages.
 418  */
 419 void
 420 brw_create_buffer_surface(struct brw_context *brw,
 421                           drm_intel_bo *bo,
 422                           uint32_t offset,
 423                           uint32_t size,
 424                           uint32_t *out_offset,
 425                           bool dword_pitch)
 426 {
 427    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 428     * messages. We need these specifically for the fragment shader since they
 429     * include a pixel mask header that we need to ensure correct behavior
 430     * with helper invocations, which cannot write to the buffer.
 431     */
 432    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 433                                        BRW_SURFACEFORMAT_RAW,
 434                                        size, 1, true);
 435 }
 436
 437 /**
 438  * Set up a binding table entry for use by stream output logic (transform
 439  * feedback).
 440  *
 441  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 442  */
 443 void
 444 brw_update_sol_surface(struct brw_context *brw,
 445                        struct gl_buffer_object *buffer_obj,
 446                        uint32_t *out_offset, unsigned num_vector_components,
 447                        unsigned stride_dwords, unsigned offset_dwords)
 448 {
 449    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 450    uint32_t offset_bytes = 4 * offset_dwords;
 451    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 452                                              offset_bytes,
 453                                              buffer_obj->Size - offset_bytes);
 454    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 455                                     out_offset);
 456    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 457    size_t size_dwords = buffer_obj->Size / 4;
 458    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 459
 460    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 461     * too big to map using a single binding table entry?
 462     */
 463    assert((size_dwords - offset_dwords) / stride_dwords
 464           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 465
 466    if (size_dwords > offset_dwords + num_vector_components) {
 467       /* There is room for at least 1 transform feedback output in the buffer.
 468        * Compute the number of additional transform feedback outputs the
 469        * buffer has room for.
 470        */
 471       buffer_size_minus_1 =
 472          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 473    } else {
 474       /* There isn't even room for a single transform feedback output in the
 475        * buffer.  We can't configure the binding table entry to prevent output
 476        * entirely; we'll have to rely on the geometry shader to detect
 477        * overflow.  But to minimize the damage in case of a bug, set up the
 478        * binding table entry to just allow a single output.
 479        */
 480       buffer_size_minus_1 = 0;
 481    }
 482    width = buffer_size_minus_1 & 0x7f;
 483    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 484    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 485
 486    switch (num_vector_components) {
 487    case 1:
 488       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 489       break;
 490    case 2:
 491       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 492       break;
 493    case 3:
 494       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 495       break;
 496    case 4:
 497       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 498       break;
 499    default:
 500       unreachable("Invalid vector size for transform feedback output");
 501    }
 502
 503    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 504       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 505       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 506       BRW_SURFACE_RC_READ_WRITE;
 507    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 508    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 509               height << BRW_SURFACE_HEIGHT_SHIFT);
 510    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 511               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 512    surf[4] = 0;
 513    surf[5] = 0;
 514
 515    /* Emit relocation to surface contents. */
 516    drm_intel_bo_emit_reloc(brw->batch.bo,
 517                            *out_offset + 4,
 518                            bo, offset_bytes,
 519                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 520 }
 521
 522 /* Creates a new WM constant buffer reflecting the current fragment program's
 523  * constants, if needed by the fragment program.
 524  *
 525  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 526  * state atom.
 527  */
 528 static void
 529 brw_upload_wm_pull_constants(struct brw_context *brw)
 530 {
 531    struct brw_stage_state *stage_state = &brw->wm.base;
 532    /* BRW_NEW_FRAGMENT_PROGRAM */
 533    struct brw_fragment_program *fp =
 534       (struct brw_fragment_program *) brw->fragment_program;
 535    /* BRW_NEW_FS_PROG_DATA */
 536    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 537
 538    /* _NEW_PROGRAM_CONSTANTS */
 539    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 540                              stage_state, prog_data, true);
 541 }
 542
 543 const struct brw_tracked_state brw_wm_pull_constants = {
 544    .dirty = {
 545       .mesa = _NEW_PROGRAM_CONSTANTS,
 546       .brw = BRW_NEW_BATCH |
 547              BRW_NEW_FRAGMENT_PROGRAM |
 548              BRW_NEW_FS_PROG_DATA,
 549    },
 550    .emit = brw_upload_wm_pull_constants,
 551 };
 552
 553 /**
 554  * Creates a null renderbuffer surface.
 555  *
 556  * This is used when the shader doesn't write to any color output.  An FB
 557  * write to target 0 will still be emitted, because that's how the thread is
 558  * terminated (and computed depth is returned), so we need to have the
 559  * hardware discard the target 0 color output..
 560  */
 561 static void
 562 brw_emit_null_surface_state(struct brw_context *brw,
 563                             unsigned width,
 564                             unsigned height,
 565                             unsigned samples,
 566                             uint32_t *out_offset)
 567 {
 568    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 569     * Notes):
 570     *
 571     *     A null surface will be used in instances where an actual surface is
 572     *     not bound. When a write message is generated to a null surface, no
 573     *     actual surface is written to. When a read message (including any
 574     *     sampling engine message) is generated to a null surface, the result
 575     *     is all zeros. Note that a null surface type is allowed to be used
 576     *     with all messages, even if it is not specificially indicated as
 577     *     supported. All of the remaining fields in surface state are ignored
 578     *     for null surfaces, with the following exceptions:
 579     *
 580     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 581     *       depth buffer’s corresponding state for all render target surfaces,
 582     *       including null.
 583     *
 584     *     - Surface Format must be R8G8B8A8_UNORM.
 585     */
 586    unsigned surface_type = BRW_SURFACE_NULL;
 587    drm_intel_bo *bo = NULL;
 588    unsigned pitch_minus_1 = 0;
 589    uint32_t multisampling_state = 0;
 590    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 591                                     out_offset);
 592
 593    if (samples > 1) {
 594       /* On Gen6, null render targets seem to cause GPU hangs when
 595        * multisampling.  So work around this problem by rendering into dummy
 596        * color buffer.
 597        *
 598        * To decrease the amount of memory needed by the workaround buffer, we
 599        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 600        * the amount of memory needed for the workaround buffer is
 601        * (width_in_tiles + height_in_tiles - 1) tiles.
 602        *
 603        * Note that since the workaround buffer will be interpreted by the
 604        * hardware as an interleaved multisampled buffer, we need to compute
 605        * width_in_tiles and height_in_tiles by dividing the width and height
 606        * by 16 rather than the normal Y-tile size of 32.
 607        */
 608       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 609       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 610       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 611       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 612                          size_needed);
 613       bo = brw->wm.multisampled_null_render_target_bo;
 614       surface_type = BRW_SURFACE_2D;
 615       pitch_minus_1 = 127;
 616       multisampling_state = brw_get_surface_num_multisamples(samples);
 617    }
 618
 619    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 620               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 621    if (brw->gen < 6) {
 622       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 623                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 624                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 625                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 626    }
 627    surf[1] = bo ? bo->offset64 : 0;
 628    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 629               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 630
 631    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 632     * Notes):
 633     *
 634     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 635     */
 636    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 637               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 638    surf[4] = multisampling_state;
 639    surf[5] = 0;
 640
 641    if (bo) {
 642       drm_intel_bo_emit_reloc(brw->batch.bo,
 643                               *out_offset + 4,
 644                               bo, 0,
 645                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 646    }
 647 }
 648
 649 /**
 650  * Sets up a surface state structure to point at the given region.
 651  * While it is only used for the front/back buffer currently, it should be
 652  * usable for further buffers when doing ARB_draw_buffer support.
 653  */
 654 static uint32_t
 655 brw_update_renderbuffer_surface(struct brw_context *brw,
 656                                 struct gl_renderbuffer *rb,
 657                                 bool layered, unsigned unit,
 658                                 uint32_t surf_index)
 659 {
 660    struct gl_context *ctx = &brw->ctx;
 661    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 662    struct intel_mipmap_tree *mt = irb->mt;
 663    uint32_t *surf;
 664    uint32_t tile_x, tile_y;
 665    uint32_t format = 0;
 666    uint32_t offset;
 667    /* _NEW_BUFFERS */
 668    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 669    /* BRW_NEW_FS_PROG_DATA */
 670
 671    assert(!layered);
 672
 673    if (rb->TexImage && !brw->has_surface_tile_offset) {
 674       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 675
 676       if (tile_x != 0 || tile_y != 0) {
 677          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 678           * destination in a miptree unless you actually setup your renderbuffer
 679           * as a miptree and used the fragile lod/array_index/etc. controls to
 680           * select the image.  So, instead, we just make a new single-level
 681           * miptree and render into that.
 682           */
 683          intel_renderbuffer_move_to_temp(brw, irb, false);
 684          mt = irb->mt;
 685       }
 686    }
 687
 688    intel_miptree_used_for_rendering(irb->mt);
 689
 690    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 691
 692    format = brw->render_target_format[rb_format];
 693    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 694       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 695                     __func__, _mesa_get_format_name(rb_format));
 696    }
 697
 698    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 699               format << BRW_SURFACE_FORMAT_SHIFT);
 700
 701    /* reloc */
 702    assert(mt->offset % mt->cpp == 0);
 703    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 704               mt->bo->offset64 + mt->offset);
 705
 706    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 707               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 708
 709    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 710               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 711
 712    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 713
 714    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 715    /* Note that the low bits of these fields are missing, so
 716     * there's the possibility of getting in trouble.
 717     */
 718    assert(tile_x % 4 == 0);
 719    assert(tile_y % 2 == 0);
 720    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 721               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 722               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 723
 724    if (brw->gen < 6) {
 725       /* _NEW_COLOR */
 726       if (!ctx->Color.ColorLogicOpEnabled &&
 727           (ctx->Color.BlendEnabled & (1 << unit)))
 728          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 729
 730       if (!ctx->Color.ColorMask[unit][0])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 732       if (!ctx->Color.ColorMask[unit][1])
 733          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 734       if (!ctx->Color.ColorMask[unit][2])
 735          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 736
 737       /* As mentioned above, disable writes to the alpha component when the
 738        * renderbuffer is XRGB.
 739        */
 740       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 741           !ctx->Color.ColorMask[unit][3]) {
 742          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 743       }
 744    }
 745
 746    drm_intel_bo_emit_reloc(brw->batch.bo,
 747                            offset + 4,
 748                            mt->bo,
 749                            surf[1] - mt->bo->offset64,
 750                            I915_GEM_DOMAIN_RENDER,
 751                            I915_GEM_DOMAIN_RENDER);
 752
 753    return offset;
 754 }
 755
 756 /**
 757  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 758  */
 759 void
 760 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 761                                  const struct gl_framebuffer *fb,
 762                                  uint32_t render_target_start,
 763                                  uint32_t *surf_offset)
 764 {
 765    GLuint i;
 766    const unsigned int w = _mesa_geometric_width(fb);
 767    const unsigned int h = _mesa_geometric_height(fb);
 768    const unsigned int s = _mesa_geometric_samples(fb);
 769
 770    /* Update surfaces for drawing buffers */
 771    if (fb->_NumColorDrawBuffers >= 1) {
 772       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 773          const uint32_t surf_index = render_target_start + i;
 774
 775          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 776             surf_offset[surf_index] =
 777                brw->vtbl.update_renderbuffer_surface(
 778                   brw, fb->_ColorDrawBuffers[i],
 779                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 780          } else {
 781             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 782                &surf_offset[surf_index]);
 783          }
 784       }
 785    } else {
 786       const uint32_t surf_index = render_target_start;
 787       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 788          &surf_offset[surf_index]);
 789    }
 790 }
 791
 792 static void
 793 update_renderbuffer_surfaces(struct brw_context *brw)
 794 {
 795    const struct gl_context *ctx = &brw->ctx;
 796
 797    /* _NEW_BUFFERS | _NEW_COLOR */
 798    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 799    brw_update_renderbuffer_surfaces(
 800       brw, fb,
 801       brw->wm.prog_data->binding_table.render_target_start,
 802       brw->wm.base.surf_offset);
 803    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 804 }
 805
 806 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 807    .dirty = {
 808       .mesa = _NEW_BUFFERS |
 809               _NEW_COLOR,
 810       .brw = BRW_NEW_BATCH |
 811              BRW_NEW_FS_PROG_DATA,
 812    },
 813    .emit = update_renderbuffer_surfaces,
 814 };
 815
 816 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 817    .dirty = {
 818       .mesa = _NEW_BUFFERS,
 819       .brw = BRW_NEW_BATCH,
 820    },
 821    .emit = update_renderbuffer_surfaces,
 822 };
 823
 824
 825 static void
 826 update_stage_texture_surfaces(struct brw_context *brw,
 827                               const struct gl_program *prog,
 828                               struct brw_stage_state *stage_state,
 829                               bool for_gather)
 830 {
 831    if (!prog)
 832       return;
 833
 834    struct gl_context *ctx = &brw->ctx;
 835
 836    uint32_t *surf_offset = stage_state->surf_offset;
 837
 838    /* BRW_NEW_*_PROG_DATA */
 839    if (for_gather)
 840       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 841    else
 842       surf_offset += stage_state->prog_data->binding_table.texture_start;
 843
 844    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 845    for (unsigned s = 0; s < num_samplers; s++) {
 846       surf_offset[s] = 0;
 847
 848       if (prog->SamplersUsed & (1 << s)) {
 849          const unsigned unit = prog->SamplerUnits[s];
 850
 851          /* _NEW_TEXTURE */
 852          if (ctx->Texture.Unit[unit]._Current) {
 853             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 854          }
 855       }
 856    }
 857 }
 858
 859
 860 /**
 861  * Construct SURFACE_STATE objects for enabled textures.
 862  */
 863 static void
 864 brw_update_texture_surfaces(struct brw_context *brw)
 865 {
 866    /* BRW_NEW_VERTEX_PROGRAM */
 867    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 868
 869    /* BRW_NEW_GEOMETRY_PROGRAM */
 870    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 871
 872    /* BRW_NEW_FRAGMENT_PROGRAM */
 873    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 874
 875    /* BRW_NEW_COMPUTE_PROGRAM */
 876    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 877
 878    /* _NEW_TEXTURE */
 879    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 880    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 881    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 882    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 883
 884    /* emit alternate set of surface state for gather. this
 885     * allows the surface format to be overriden for only the
 886     * gather4 messages. */
 887    if (brw->gen < 8) {
 888       if (vs && vs->UsesGather)
 889          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 890       if (gs && gs->UsesGather)
 891          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 892       if (fs && fs->UsesGather)
 893          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 894       if (cs && cs->UsesGather)
 895          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 896    }
 897
 898    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 899 }
 900
 901 const struct brw_tracked_state brw_texture_surfaces = {
 902    .dirty = {
 903       .mesa = _NEW_TEXTURE,
 904       .brw = BRW_NEW_BATCH |
 905              BRW_NEW_COMPUTE_PROGRAM |
 906              BRW_NEW_FRAGMENT_PROGRAM |
 907              BRW_NEW_FS_PROG_DATA |
 908              BRW_NEW_GEOMETRY_PROGRAM |
 909              BRW_NEW_GS_PROG_DATA |
 910              BRW_NEW_TEXTURE_BUFFER |
 911              BRW_NEW_VERTEX_PROGRAM |
 912              BRW_NEW_VS_PROG_DATA,
 913    },
 914    .emit = brw_update_texture_surfaces,
 915 };
 916
 917 void
 918 brw_upload_ubo_surfaces(struct brw_context *brw,
 919                         struct gl_shader *shader,
 920                         struct brw_stage_state *stage_state,
 921                         struct brw_stage_prog_data *prog_data,
 922                         bool dword_pitch)
 923 {
 924    struct gl_context *ctx = &brw->ctx;
 925
 926    if (!shader)
 927       return;
 928
 929    uint32_t *surf_offsets =
 930       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 931
 932    for (int i = 0; i < shader->NumBufferInterfaceBlocks; i++) {
 933       struct intel_buffer_object *intel_bo;
 934
 935       /* Because behavior for referencing outside of the binding's size in the
 936        * glBindBufferRange case is undefined, we can just bind the whole buffer
 937        * glBindBufferBase wants and be a correct implementation.
 938        */
 939       if (!shader->BufferInterfaceBlocks[i].IsShaderStorage) {
 940          struct gl_uniform_buffer_binding *binding;
 941          binding =
 942             &ctx->UniformBufferBindings[shader->BufferInterfaceBlocks[i].Binding];
 943          if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 944             brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
 945          } else {
 946             intel_bo = intel_buffer_object(binding->BufferObject);
 947             drm_intel_bo *bo =
 948                intel_bufferobj_buffer(brw, intel_bo,
 949                                       binding->Offset,
 950                                       binding->BufferObject->Size - binding->Offset);
 951             brw_create_constant_surface(brw, bo, binding->Offset,
 952                                         binding->BufferObject->Size - binding->Offset,
 953                                         &surf_offsets[i],
 954                                         dword_pitch);
 955          }
 956       } else {
 957          struct gl_shader_storage_buffer_binding *binding;
 958          binding =
 959             &ctx->ShaderStorageBufferBindings[shader->BufferInterfaceBlocks[i].Binding];
 960          if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 961             brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]);
 962          } else {
 963             intel_bo = intel_buffer_object(binding->BufferObject);
 964             drm_intel_bo *bo =
 965                intel_bufferobj_buffer(brw, intel_bo,
 966                                       binding->Offset,
 967                                       binding->BufferObject->Size - binding->Offset);
 968             brw_create_buffer_surface(brw, bo, binding->Offset,
 969                                       binding->BufferObject->Size - binding->Offset,
 970                                       &surf_offsets[i],
 971                                       dword_pitch);
 972          }
 973       }
 974    }
 975
 976    if (shader->NumBufferInterfaceBlocks)
 977       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 978 }
 979
 980 static void
 981 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 982 {
 983    struct gl_context *ctx = &brw->ctx;
 984    /* _NEW_PROGRAM */
 985    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 986
 987    if (!prog)
 988       return;
 989
 990    /* BRW_NEW_FS_PROG_DATA */
 991    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 992                            &brw->wm.base, &brw->wm.prog_data->base, true);
 993 }
 994
 995 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 996    .dirty = {
 997       .mesa = _NEW_PROGRAM,
 998       .brw = BRW_NEW_BATCH |
 999              BRW_NEW_FS_PROG_DATA |
1000              BRW_NEW_UNIFORM_BUFFER,
1001    },
1002    .emit = brw_upload_wm_ubo_surfaces,
1003 };
1004
1005 static void
1006 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1007 {
1008    struct gl_context *ctx = &brw->ctx;
1009    /* _NEW_PROGRAM */
1010    struct gl_shader_program *prog =
1011       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1012
1013    if (!prog)
1014       return;
1015
1016    /* BRW_NEW_CS_PROG_DATA */
1017    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1018                            &brw->cs.base, &brw->cs.prog_data->base, true);
1019 }
1020
1021 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1022    .dirty = {
1023       .mesa = _NEW_PROGRAM,
1024       .brw = BRW_NEW_BATCH |
1025              BRW_NEW_CS_PROG_DATA |
1026              BRW_NEW_UNIFORM_BUFFER,
1027    },
1028    .emit = brw_upload_cs_ubo_surfaces,
1029 };
1030
1031 void
1032 brw_upload_abo_surfaces(struct brw_context *brw,
1033                         struct gl_shader_program *prog,
1034                         struct brw_stage_state *stage_state,
1035                         struct brw_stage_prog_data *prog_data)
1036 {
1037    struct gl_context *ctx = &brw->ctx;
1038    uint32_t *surf_offsets =
1039       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1040
1041    for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
1042       struct gl_atomic_buffer_binding *binding =
1043          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
1044       struct intel_buffer_object *intel_bo =
1045          intel_buffer_object(binding->BufferObject);
1046       drm_intel_bo *bo = intel_bufferobj_buffer(
1047          brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1048
1049       brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1050                                           binding->Offset, BRW_SURFACEFORMAT_RAW,
1051                                           bo->size - binding->Offset, 1, true);
1052    }
1053
1054    if (prog->NumAtomicBuffers)
1055       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1056 }
1057
1058 static void
1059 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1060 {
1061    struct gl_context *ctx = &brw->ctx;
1062    /* _NEW_PROGRAM */
1063    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1064
1065    if (prog) {
1066       /* BRW_NEW_FS_PROG_DATA */
1067       brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
1068                               &brw->wm.prog_data->base);
1069    }
1070 }
1071
1072 const struct brw_tracked_state brw_wm_abo_surfaces = {
1073    .dirty = {
1074       .mesa = _NEW_PROGRAM,
1075       .brw = BRW_NEW_ATOMIC_BUFFER |
1076              BRW_NEW_BATCH |
1077              BRW_NEW_FS_PROG_DATA,
1078    },
1079    .emit = brw_upload_wm_abo_surfaces,
1080 };
1081
1082 static void
1083 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1084 {
1085    struct gl_context *ctx = &brw->ctx;
1086    /* _NEW_PROGRAM */
1087    struct gl_shader_program *prog =
1088       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1089
1090    if (prog) {
1091       /* BRW_NEW_CS_PROG_DATA */
1092       brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1093                               &brw->cs.prog_data->base);
1094    }
1095 }
1096
1097 const struct brw_tracked_state brw_cs_abo_surfaces = {
1098    .dirty = {
1099       .mesa = _NEW_PROGRAM,
1100       .brw = BRW_NEW_ATOMIC_BUFFER |
1101              BRW_NEW_BATCH |
1102              BRW_NEW_CS_PROG_DATA,
1103    },
1104    .emit = brw_upload_cs_abo_surfaces,
1105 };
1106
1107 static void
1108 brw_upload_cs_image_surfaces(struct brw_context *brw)
1109 {
1110    struct gl_context *ctx = &brw->ctx;
1111    /* _NEW_PROGRAM */
1112    struct gl_shader_program *prog =
1113       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1114
1115    if (prog) {
1116       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1117       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1118                                 &brw->cs.base, &brw->cs.prog_data->base);
1119    }
1120 }
1121
1122 const struct brw_tracked_state brw_cs_image_surfaces = {
1123    .dirty = {
1124       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1125       .brw = BRW_NEW_BATCH |
1126              BRW_NEW_CS_PROG_DATA |
1127              BRW_NEW_IMAGE_UNITS
1128    },
1129    .emit = brw_upload_cs_image_surfaces,
1130 };
1131
1132 static uint32_t
1133 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1134 {
1135    if (access == GL_WRITE_ONLY) {
1136       return brw_format_for_mesa_format(format);
1137    } else {
1138       /* Typed surface reads support a very limited subset of the shader
1139        * image formats.  Translate it into the closest format the
1140        * hardware supports.
1141        */
1142       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1143           (_mesa_get_format_bytes(format) >= 8 &&
1144            (brw->gen == 7 && !brw->is_haswell)))
1145          return BRW_SURFACEFORMAT_RAW;
1146       else
1147          return brw_format_for_mesa_format(
1148             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1149    }
1150 }
1151
1152 static void
1153 update_default_image_param(struct brw_context *brw,
1154                            struct gl_image_unit *u,
1155                            unsigned surface_idx,
1156                            struct brw_image_param *param)
1157 {
1158    memset(param, 0, sizeof(*param));
1159    param->surface_idx = surface_idx;
1160    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1161     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1162     * detailed explanation of these parameters.
1163     */
1164    param->swizzling[0] = 0xff;
1165    param->swizzling[1] = 0xff;
1166 }
1167
1168 static void
1169 update_buffer_image_param(struct brw_context *brw,
1170                           struct gl_image_unit *u,
1171                           unsigned surface_idx,
1172                           struct brw_image_param *param)
1173 {
1174    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1175
1176    update_default_image_param(brw, u, surface_idx, param);
1177
1178    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1179    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1180 }
1181
1182 static void
1183 update_texture_image_param(struct brw_context *brw,
1184                            struct gl_image_unit *u,
1185                            unsigned surface_idx,
1186                            struct brw_image_param *param)
1187 {
1188    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1189
1190    update_default_image_param(brw, u, surface_idx, param);
1191
1192    param->size[0] = minify(mt->logical_width0, u->Level);
1193    param->size[1] = minify(mt->logical_height0, u->Level);
1194    param->size[2] = (!u->Layered ? 1 :
1195                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1196                      u->TexObj->Target == GL_TEXTURE_3D ?
1197                      minify(mt->logical_depth0, u->Level) :
1198                      mt->logical_depth0);
1199
1200    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1201                                   &param->offset[0],
1202                                   &param->offset[1]);
1203
1204    param->stride[0] = mt->cpp;
1205    param->stride[1] = mt->pitch / mt->cpp;
1206    param->stride[2] =
1207       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1208    param->stride[3] =
1209       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1210
1211    if (mt->tiling == I915_TILING_X) {
1212       /* An X tile is a rectangular block of 512x8 bytes. */
1213       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1214       param->tiling[1] = _mesa_logbase2(8);
1215
1216       if (brw->has_swizzling) {
1217          /* Right shifts required to swizzle bits 9 and 10 of the memory
1218           * address with bit 6.
1219           */
1220          param->swizzling[0] = 3;
1221          param->swizzling[1] = 4;
1222       }
1223    } else if (mt->tiling == I915_TILING_Y) {
1224       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1225        * different to the layout of an X-tiled surface, we simply pretend that
1226        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1227        * one arranged in X-major order just like is the case for X-tiling.
1228        */
1229       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1230       param->tiling[1] = _mesa_logbase2(32);
1231
1232       if (brw->has_swizzling) {
1233          /* Right shift required to swizzle bit 9 of the memory address with
1234           * bit 6.
1235           */
1236          param->swizzling[0] = 3;
1237       }
1238    }
1239
1240    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1241     * address calculation algorithm (emit_address_calculation() in
1242     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1243     * modulus equal to the LOD.
1244     */
1245    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1246                        0);
1247 }
1248
1249 static void
1250 update_image_surface(struct brw_context *brw,
1251                      struct gl_image_unit *u,
1252                      GLenum access,
1253                      unsigned surface_idx,
1254                      uint32_t *surf_offset,
1255                      struct brw_image_param *param)
1256 {
1257    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1258       struct gl_texture_object *obj = u->TexObj;
1259       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1260
1261       if (obj->Target == GL_TEXTURE_BUFFER) {
1262          struct intel_buffer_object *intel_obj =
1263             intel_buffer_object(obj->BufferObject);
1264          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1265                                       _mesa_get_format_bytes(u->_ActualFormat));
1266
1267          brw->vtbl.emit_buffer_surface_state(
1268             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1269             format, intel_obj->Base.Size / texel_size, texel_size,
1270             access != GL_READ_ONLY);
1271
1272          update_buffer_image_param(brw, u, surface_idx, param);
1273
1274       } else {
1275          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1276          struct intel_mipmap_tree *mt = intel_obj->mt;
1277
1278          if (format == BRW_SURFACEFORMAT_RAW) {
1279             brw->vtbl.emit_buffer_surface_state(
1280                brw, surf_offset, mt->bo, mt->offset,
1281                format, mt->bo->size - mt->offset, 1 /* pitch */,
1282                access != GL_READ_ONLY);
1283
1284          } else {
1285             const unsigned min_layer = obj->MinLayer + u->_Layer;
1286             const unsigned min_level = obj->MinLevel + u->Level;
1287             const unsigned num_layers = (!u->Layered ? 1 :
1288                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1289                                          mt->logical_depth0);
1290             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1291                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1292                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1293
1294             brw->vtbl.emit_texture_surface_state(
1295                brw, mt, target,
1296                min_layer, min_layer + num_layers,
1297                min_level, min_level + 1,
1298                format, SWIZZLE_XYZW,
1299                surf_offset, access != GL_READ_ONLY, false);
1300          }
1301
1302          update_texture_image_param(brw, u, surface_idx, param);
1303       }
1304
1305    } else {
1306       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1307       update_default_image_param(brw, u, surface_idx, param);
1308    }
1309 }
1310
1311 void
1312 brw_upload_image_surfaces(struct brw_context *brw,
1313                           struct gl_shader *shader,
1314                           struct brw_stage_state *stage_state,
1315                           struct brw_stage_prog_data *prog_data)
1316 {
1317    struct gl_context *ctx = &brw->ctx;
1318
1319    if (shader && shader->NumImages) {
1320       for (unsigned i = 0; i < shader->NumImages; i++) {
1321          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1322          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1323
1324          update_image_surface(brw, u, shader->ImageAccess[i],
1325                               surf_idx,
1326                               &stage_state->surf_offset[surf_idx],
1327                               &prog_data->image_param[i]);
1328       }
1329
1330       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1331    }
1332 }
1333
1334 static void
1335 brw_upload_wm_image_surfaces(struct brw_context *brw)
1336 {
1337    struct gl_context *ctx = &brw->ctx;
1338    /* BRW_NEW_FRAGMENT_PROGRAM */
1339    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1340
1341    if (prog) {
1342       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1343       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1344                                 &brw->wm.base, &brw->wm.prog_data->base);
1345    }
1346 }
1347
1348 const struct brw_tracked_state brw_wm_image_surfaces = {
1349    .dirty = {
1350       .mesa = _NEW_TEXTURE,
1351       .brw = BRW_NEW_BATCH |
1352              BRW_NEW_FRAGMENT_PROGRAM |
1353              BRW_NEW_FS_PROG_DATA |
1354              BRW_NEW_IMAGE_UNITS
1355    },
1356    .emit = brw_upload_wm_image_surfaces,
1357 };
1358
1359 void
1360 gen4_init_vtable_surface_functions(struct brw_context *brw)
1361 {
1362    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1363    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1364    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1365    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1366 }
1367
1368 static void
1369 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1370 {
1371    struct gl_context *ctx = &brw->ctx;
1372    /* _NEW_PROGRAM */
1373    struct gl_shader_program *prog =
1374       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1375
1376    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1377       const unsigned surf_idx =
1378          brw->cs.prog_data->binding_table.work_groups_start;
1379       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1380       drm_intel_bo *bo;
1381       uint32_t bo_offset;
1382
1383       if (brw->compute.num_work_groups_bo == NULL) {
1384          bo = NULL;
1385          intel_upload_data(brw,
1386                            (void *)brw->compute.num_work_groups,
1387                            3 * sizeof(GLuint),
1388                            sizeof(GLuint),
1389                            &bo,
1390                            &bo_offset);
1391       } else {
1392          bo = brw->compute.num_work_groups_bo;
1393          bo_offset = brw->compute.num_work_groups_offset;
1394       }
1395
1396       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1397                                           bo, bo_offset,
1398                                           BRW_SURFACEFORMAT_RAW,
1399                                           3 * sizeof(GLuint), 1, true);
1400       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1401    }
1402 }
1403
1404 const struct brw_tracked_state brw_cs_work_groups_surface = {
1405    .dirty = {
1406       .brw = BRW_NEW_CS_WORK_GROUPS
1407    },
1408    .emit = brw_upload_cs_work_groups_surface,
1409 };