src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "isl/isl.h"
  43
  44 #include "intel_mipmap_tree.h"
  45 #include "intel_batchbuffer.h"
  46 #include "intel_tex.h"
  47 #include "intel_fbo.h"
  48 #include "intel_buffer_objects.h"
  49
  50 #include "brw_context.h"
  51 #include "brw_state.h"
  52 #include "brw_defines.h"
  53 #include "brw_wm.h"
  54
  55 GLuint
  56 translate_tex_target(GLenum target)
  57 {
  58    switch (target) {
  59    case GL_TEXTURE_1D:
  60    case GL_TEXTURE_1D_ARRAY_EXT:
  61       return BRW_SURFACE_1D;
  62
  63    case GL_TEXTURE_RECTANGLE_NV:
  64       return BRW_SURFACE_2D;
  65
  66    case GL_TEXTURE_2D:
  67    case GL_TEXTURE_2D_ARRAY_EXT:
  68    case GL_TEXTURE_EXTERNAL_OES:
  69    case GL_TEXTURE_2D_MULTISAMPLE:
  70    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  71       return BRW_SURFACE_2D;
  72
  73    case GL_TEXTURE_3D:
  74       return BRW_SURFACE_3D;
  75
  76    case GL_TEXTURE_CUBE_MAP:
  77    case GL_TEXTURE_CUBE_MAP_ARRAY:
  78       return BRW_SURFACE_CUBE;
  79
  80    default:
  81       unreachable("not reached");
  82    }
  83 }
  84
  85 uint32_t
  86 brw_get_surface_tiling_bits(uint32_t tiling)
  87 {
  88    switch (tiling) {
  89    case I915_TILING_X:
  90       return BRW_SURFACE_TILED;
  91    case I915_TILING_Y:
  92       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  93    default:
  94       return 0;
  95    }
  96 }
  97
  98
  99 uint32_t
 100 brw_get_surface_num_multisamples(unsigned num_samples)
 101 {
 102    if (num_samples > 1)
 103       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 104    else
 105       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 106 }
 107
 108 /**
 109  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 110  * swizzling.
 111  */
 112 int
 113 brw_get_texture_swizzle(const struct gl_context *ctx,
 114                         const struct gl_texture_object *t)
 115 {
 116    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 117
 118    int swizzles[SWIZZLE_NIL + 1] = {
 119       SWIZZLE_X,
 120       SWIZZLE_Y,
 121       SWIZZLE_Z,
 122       SWIZZLE_W,
 123       SWIZZLE_ZERO,
 124       SWIZZLE_ONE,
 125       SWIZZLE_NIL
 126    };
 127
 128    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 129        img->_BaseFormat == GL_DEPTH_STENCIL) {
 130       GLenum depth_mode = t->DepthMode;
 131
 132       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 133        * with depth component data specified with a sized internal format.
 134        * Otherwise, it's left at the old default, GL_LUMINANCE.
 135        */
 136       if (_mesa_is_gles3(ctx) &&
 137           img->InternalFormat != GL_DEPTH_COMPONENT &&
 138           img->InternalFormat != GL_DEPTH_STENCIL) {
 139          depth_mode = GL_RED;
 140       }
 141
 142       switch (depth_mode) {
 143       case GL_ALPHA:
 144          swizzles[0] = SWIZZLE_ZERO;
 145          swizzles[1] = SWIZZLE_ZERO;
 146          swizzles[2] = SWIZZLE_ZERO;
 147          swizzles[3] = SWIZZLE_X;
 148          break;
 149       case GL_LUMINANCE:
 150          swizzles[0] = SWIZZLE_X;
 151          swizzles[1] = SWIZZLE_X;
 152          swizzles[2] = SWIZZLE_X;
 153          swizzles[3] = SWIZZLE_ONE;
 154          break;
 155       case GL_INTENSITY:
 156          swizzles[0] = SWIZZLE_X;
 157          swizzles[1] = SWIZZLE_X;
 158          swizzles[2] = SWIZZLE_X;
 159          swizzles[3] = SWIZZLE_X;
 160          break;
 161       case GL_RED:
 162          swizzles[0] = SWIZZLE_X;
 163          swizzles[1] = SWIZZLE_ZERO;
 164          swizzles[2] = SWIZZLE_ZERO;
 165          swizzles[3] = SWIZZLE_ONE;
 166          break;
 167       }
 168    }
 169
 170    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 171
 172    /* If the texture's format is alpha-only, force R, G, and B to
 173     * 0.0. Similarly, if the texture's format has no alpha channel,
 174     * force the alpha value read to 1.0. This allows for the
 175     * implementation to use an RGBA texture for any of these formats
 176     * without leaking any unexpected values.
 177     */
 178    switch (img->_BaseFormat) {
 179    case GL_ALPHA:
 180       swizzles[0] = SWIZZLE_ZERO;
 181       swizzles[1] = SWIZZLE_ZERO;
 182       swizzles[2] = SWIZZLE_ZERO;
 183       break;
 184    case GL_LUMINANCE:
 185       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 186          swizzles[0] = SWIZZLE_X;
 187          swizzles[1] = SWIZZLE_X;
 188          swizzles[2] = SWIZZLE_X;
 189          swizzles[3] = SWIZZLE_ONE;
 190       }
 191       break;
 192    case GL_LUMINANCE_ALPHA:
 193       if (datatype == GL_SIGNED_NORMALIZED) {
 194          swizzles[0] = SWIZZLE_X;
 195          swizzles[1] = SWIZZLE_X;
 196          swizzles[2] = SWIZZLE_X;
 197          swizzles[3] = SWIZZLE_W;
 198       }
 199       break;
 200    case GL_INTENSITY:
 201       if (datatype == GL_SIGNED_NORMALIZED) {
 202          swizzles[0] = SWIZZLE_X;
 203          swizzles[1] = SWIZZLE_X;
 204          swizzles[2] = SWIZZLE_X;
 205          swizzles[3] = SWIZZLE_X;
 206       }
 207       break;
 208    case GL_RED:
 209    case GL_RG:
 210    case GL_RGB:
 211       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 212          swizzles[3] = SWIZZLE_ONE;
 213       break;
 214    }
 215
 216    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 217                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 218                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 219                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 220 }
 221
 222 static void
 223 gen4_emit_buffer_surface_state(struct brw_context *brw,
 224                                uint32_t *out_offset,
 225                                drm_intel_bo *bo,
 226                                unsigned buffer_offset,
 227                                unsigned surface_format,
 228                                unsigned buffer_size,
 229                                unsigned pitch,
 230                                bool rw)
 231 {
 232    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 233                                     6 * 4, 32, out_offset);
 234    memset(surf, 0, 6 * 4);
 235
 236    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 237              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 238              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 239    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 240    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 241              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 242    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 243              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 244
 245    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 246     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 247     * physical cache.  It is mapped in hardware to the sampler cache."
 248     */
 249    if (bo) {
 250       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 251                               bo, buffer_offset,
 252                               I915_GEM_DOMAIN_SAMPLER,
 253                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 254    }
 255 }
 256
 257 void
 258 brw_update_buffer_texture_surface(struct gl_context *ctx,
 259                                   unsigned unit,
 260                                   uint32_t *surf_offset)
 261 {
 262    struct brw_context *brw = brw_context(ctx);
 263    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 264    struct intel_buffer_object *intel_obj =
 265       intel_buffer_object(tObj->BufferObject);
 266    uint32_t size = tObj->BufferSize;
 267    drm_intel_bo *bo = NULL;
 268    mesa_format format = tObj->_BufferObjectFormat;
 269    uint32_t brw_format = brw_format_for_mesa_format(format);
 270    int texel_size = _mesa_get_format_bytes(format);
 271
 272    if (intel_obj) {
 273       size = MIN2(size, intel_obj->Base.Size);
 274       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 275    }
 276
 277    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 278       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 279                     _mesa_get_format_name(format));
 280    }
 281
 282    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 283                                        tObj->BufferOffset,
 284                                        brw_format,
 285                                        size / texel_size,
 286                                        texel_size,
 287                                        false /* rw */);
 288 }
 289
 290 static void
 291 brw_update_texture_surface(struct gl_context *ctx,
 292                            unsigned unit,
 293                            uint32_t *surf_offset,
 294                            bool for_gather,
 295                            uint32_t plane)
 296 {
 297    struct brw_context *brw = brw_context(ctx);
 298    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 299    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 300    struct intel_mipmap_tree *mt = intelObj->mt;
 301    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 302    uint32_t *surf;
 303
 304    /* BRW_NEW_TEXTURE_BUFFER */
 305    if (tObj->Target == GL_TEXTURE_BUFFER) {
 306       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 307       return;
 308    }
 309
 310    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 311                           6 * 4, 32, surf_offset);
 312
 313    uint32_t tex_format = translate_tex_format(brw, intelObj->_Format,
 314                                               sampler->sRGBDecode);
 315
 316    if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
 317       if (plane > 0)
 318          mt = mt->plane[plane - 1];
 319       if (mt == NULL)
 320          return;
 321
 322       tex_format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
 323    }
 324
 325    if (for_gather) {
 326       /* Sandybridge's gather4 message is broken for integer formats.
 327        * To work around this, we pretend the surface is UNORM for
 328        * 8 or 16-bit formats, and emit shader instructions to recover
 329        * the real INT/UINT value.  For 32-bit formats, we pretend
 330        * the surface is FLOAT, and simply reinterpret the resulting
 331        * bits.
 332        */
 333       switch (tex_format) {
 334       case BRW_SURFACEFORMAT_R8_SINT:
 335       case BRW_SURFACEFORMAT_R8_UINT:
 336          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 337          break;
 338
 339       case BRW_SURFACEFORMAT_R16_SINT:
 340       case BRW_SURFACEFORMAT_R16_UINT:
 341          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 342          break;
 343
 344       case BRW_SURFACEFORMAT_R32_SINT:
 345       case BRW_SURFACEFORMAT_R32_UINT:
 346          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 347          break;
 348
 349       default:
 350          break;
 351       }
 352    }
 353
 354    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 355               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 356               BRW_SURFACE_CUBEFACE_ENABLES |
 357               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 358
 359    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 360
 361    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 362               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 363               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 364
 365    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 366               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 367               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 368
 369    const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
 370    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 371               SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
 372               SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
 373
 374    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 375
 376    /* Emit relocation to surface contents */
 377    drm_intel_bo_emit_reloc(brw->batch.bo,
 378                            *surf_offset + 4,
 379                            mt->bo,
 380                            surf[1] - mt->bo->offset64,
 381                            I915_GEM_DOMAIN_SAMPLER, 0);
 382 }
 383
 384 /**
 385  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 386  * read from this buffer with Data Port Read instructions/messages.
 387  */
 388 void
 389 brw_create_constant_surface(struct brw_context *brw,
 390                             drm_intel_bo *bo,
 391                             uint32_t offset,
 392                             uint32_t size,
 393                             uint32_t *out_offset)
 394 {
 395    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 396                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 397                                        size, 1, false);
 398 }
 399
 400 /**
 401  * Create the buffer surface. Shader buffer variables will be
 402  * read from / write to this buffer with Data Port Read/Write
 403  * instructions/messages.
 404  */
 405 void
 406 brw_create_buffer_surface(struct brw_context *brw,
 407                           drm_intel_bo *bo,
 408                           uint32_t offset,
 409                           uint32_t size,
 410                           uint32_t *out_offset)
 411 {
 412    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 413     * messages. We need these specifically for the fragment shader since they
 414     * include a pixel mask header that we need to ensure correct behavior
 415     * with helper invocations, which cannot write to the buffer.
 416     */
 417    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 418                                        BRW_SURFACEFORMAT_RAW,
 419                                        size, 1, true);
 420 }
 421
 422 /**
 423  * Set up a binding table entry for use by stream output logic (transform
 424  * feedback).
 425  *
 426  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 427  */
 428 void
 429 brw_update_sol_surface(struct brw_context *brw,
 430                        struct gl_buffer_object *buffer_obj,
 431                        uint32_t *out_offset, unsigned num_vector_components,
 432                        unsigned stride_dwords, unsigned offset_dwords)
 433 {
 434    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 435    uint32_t offset_bytes = 4 * offset_dwords;
 436    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 437                                              offset_bytes,
 438                                              buffer_obj->Size - offset_bytes);
 439    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 440                                     out_offset);
 441    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 442    size_t size_dwords = buffer_obj->Size / 4;
 443    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 444
 445    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 446     * too big to map using a single binding table entry?
 447     */
 448    assert((size_dwords - offset_dwords) / stride_dwords
 449           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 450
 451    if (size_dwords > offset_dwords + num_vector_components) {
 452       /* There is room for at least 1 transform feedback output in the buffer.
 453        * Compute the number of additional transform feedback outputs the
 454        * buffer has room for.
 455        */
 456       buffer_size_minus_1 =
 457          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 458    } else {
 459       /* There isn't even room for a single transform feedback output in the
 460        * buffer.  We can't configure the binding table entry to prevent output
 461        * entirely; we'll have to rely on the geometry shader to detect
 462        * overflow.  But to minimize the damage in case of a bug, set up the
 463        * binding table entry to just allow a single output.
 464        */
 465       buffer_size_minus_1 = 0;
 466    }
 467    width = buffer_size_minus_1 & 0x7f;
 468    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 469    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 470
 471    switch (num_vector_components) {
 472    case 1:
 473       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 474       break;
 475    case 2:
 476       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 477       break;
 478    case 3:
 479       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 480       break;
 481    case 4:
 482       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 483       break;
 484    default:
 485       unreachable("Invalid vector size for transform feedback output");
 486    }
 487
 488    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 489       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 490       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 491       BRW_SURFACE_RC_READ_WRITE;
 492    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 493    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 494               height << BRW_SURFACE_HEIGHT_SHIFT);
 495    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 496               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 497    surf[4] = 0;
 498    surf[5] = 0;
 499
 500    /* Emit relocation to surface contents. */
 501    drm_intel_bo_emit_reloc(brw->batch.bo,
 502                            *out_offset + 4,
 503                            bo, offset_bytes,
 504                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 505 }
 506
 507 /* Creates a new WM constant buffer reflecting the current fragment program's
 508  * constants, if needed by the fragment program.
 509  *
 510  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 511  * state atom.
 512  */
 513 static void
 514 brw_upload_wm_pull_constants(struct brw_context *brw)
 515 {
 516    struct brw_stage_state *stage_state = &brw->wm.base;
 517    /* BRW_NEW_FRAGMENT_PROGRAM */
 518    struct brw_fragment_program *fp =
 519       (struct brw_fragment_program *) brw->fragment_program;
 520    /* BRW_NEW_FS_PROG_DATA */
 521    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 522
 523    /* _NEW_PROGRAM_CONSTANTS */
 524    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 525                              stage_state, prog_data);
 526 }
 527
 528 const struct brw_tracked_state brw_wm_pull_constants = {
 529    .dirty = {
 530       .mesa = _NEW_PROGRAM_CONSTANTS,
 531       .brw = BRW_NEW_BATCH |
 532              BRW_NEW_BLORP |
 533              BRW_NEW_FRAGMENT_PROGRAM |
 534              BRW_NEW_FS_PROG_DATA,
 535    },
 536    .emit = brw_upload_wm_pull_constants,
 537 };
 538
 539 /**
 540  * Creates a null renderbuffer surface.
 541  *
 542  * This is used when the shader doesn't write to any color output.  An FB
 543  * write to target 0 will still be emitted, because that's how the thread is
 544  * terminated (and computed depth is returned), so we need to have the
 545  * hardware discard the target 0 color output..
 546  */
 547 static void
 548 brw_emit_null_surface_state(struct brw_context *brw,
 549                             unsigned width,
 550                             unsigned height,
 551                             unsigned samples,
 552                             uint32_t *out_offset)
 553 {
 554    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 555     * Notes):
 556     *
 557     *     A null surface will be used in instances where an actual surface is
 558     *     not bound. When a write message is generated to a null surface, no
 559     *     actual surface is written to. When a read message (including any
 560     *     sampling engine message) is generated to a null surface, the result
 561     *     is all zeros. Note that a null surface type is allowed to be used
 562     *     with all messages, even if it is not specificially indicated as
 563     *     supported. All of the remaining fields in surface state are ignored
 564     *     for null surfaces, with the following exceptions:
 565     *
 566     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 567     *       depth buffer’s corresponding state for all render target surfaces,
 568     *       including null.
 569     *
 570     *     - Surface Format must be R8G8B8A8_UNORM.
 571     */
 572    unsigned surface_type = BRW_SURFACE_NULL;
 573    drm_intel_bo *bo = NULL;
 574    unsigned pitch_minus_1 = 0;
 575    uint32_t multisampling_state = 0;
 576    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 577                                     out_offset);
 578
 579    if (samples > 1) {
 580       /* On Gen6, null render targets seem to cause GPU hangs when
 581        * multisampling.  So work around this problem by rendering into dummy
 582        * color buffer.
 583        *
 584        * To decrease the amount of memory needed by the workaround buffer, we
 585        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 586        * the amount of memory needed for the workaround buffer is
 587        * (width_in_tiles + height_in_tiles - 1) tiles.
 588        *
 589        * Note that since the workaround buffer will be interpreted by the
 590        * hardware as an interleaved multisampled buffer, we need to compute
 591        * width_in_tiles and height_in_tiles by dividing the width and height
 592        * by 16 rather than the normal Y-tile size of 32.
 593        */
 594       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 595       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 596       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 597       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 598                          size_needed);
 599       bo = brw->wm.multisampled_null_render_target_bo;
 600       surface_type = BRW_SURFACE_2D;
 601       pitch_minus_1 = 127;
 602       multisampling_state = brw_get_surface_num_multisamples(samples);
 603    }
 604
 605    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 606               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 607    if (brw->gen < 6) {
 608       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 609                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 610                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 611                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 612    }
 613    surf[1] = bo ? bo->offset64 : 0;
 614    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 615               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 616
 617    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 618     * Notes):
 619     *
 620     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 621     */
 622    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 623               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 624    surf[4] = multisampling_state;
 625    surf[5] = 0;
 626
 627    if (bo) {
 628       drm_intel_bo_emit_reloc(brw->batch.bo,
 629                               *out_offset + 4,
 630                               bo, 0,
 631                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 632    }
 633 }
 634
 635 /**
 636  * Sets up a surface state structure to point at the given region.
 637  * While it is only used for the front/back buffer currently, it should be
 638  * usable for further buffers when doing ARB_draw_buffer support.
 639  */
 640 static uint32_t
 641 brw_update_renderbuffer_surface(struct brw_context *brw,
 642                                 struct gl_renderbuffer *rb,
 643                                 bool layered, unsigned unit,
 644                                 uint32_t surf_index)
 645 {
 646    struct gl_context *ctx = &brw->ctx;
 647    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 648    struct intel_mipmap_tree *mt = irb->mt;
 649    uint32_t *surf;
 650    uint32_t tile_x, tile_y;
 651    uint32_t format = 0;
 652    uint32_t offset;
 653    /* _NEW_BUFFERS */
 654    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 655    /* BRW_NEW_FS_PROG_DATA */
 656
 657    assert(!layered);
 658
 659    if (rb->TexImage && !brw->has_surface_tile_offset) {
 660       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 661
 662       if (tile_x != 0 || tile_y != 0) {
 663          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 664           * destination in a miptree unless you actually setup your renderbuffer
 665           * as a miptree and used the fragile lod/array_index/etc. controls to
 666           * select the image.  So, instead, we just make a new single-level
 667           * miptree and render into that.
 668           */
 669          intel_renderbuffer_move_to_temp(brw, irb, false);
 670          mt = irb->mt;
 671       }
 672    }
 673
 674    intel_miptree_used_for_rendering(irb->mt);
 675
 676    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 677
 678    format = brw->render_target_format[rb_format];
 679    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 680       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 681                     __func__, _mesa_get_format_name(rb_format));
 682    }
 683
 684    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 685               format << BRW_SURFACE_FORMAT_SHIFT);
 686
 687    /* reloc */
 688    assert(mt->offset % mt->cpp == 0);
 689    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 690               mt->bo->offset64 + mt->offset);
 691
 692    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 693               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 694
 695    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 696               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 697
 698    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 699
 700    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 701    /* Note that the low bits of these fields are missing, so
 702     * there's the possibility of getting in trouble.
 703     */
 704    assert(tile_x % 4 == 0);
 705    assert(tile_y % 2 == 0);
 706    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 707               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 708               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 709
 710    if (brw->gen < 6) {
 711       /* _NEW_COLOR */
 712       if (!ctx->Color.ColorLogicOpEnabled &&
 713           (ctx->Color.BlendEnabled & (1 << unit)))
 714          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 715
 716       if (!ctx->Color.ColorMask[unit][0])
 717          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 718       if (!ctx->Color.ColorMask[unit][1])
 719          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 720       if (!ctx->Color.ColorMask[unit][2])
 721          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 722
 723       /* As mentioned above, disable writes to the alpha component when the
 724        * renderbuffer is XRGB.
 725        */
 726       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 727           !ctx->Color.ColorMask[unit][3]) {
 728          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 729       }
 730    }
 731
 732    drm_intel_bo_emit_reloc(brw->batch.bo,
 733                            offset + 4,
 734                            mt->bo,
 735                            surf[1] - mt->bo->offset64,
 736                            I915_GEM_DOMAIN_RENDER,
 737                            I915_GEM_DOMAIN_RENDER);
 738
 739    return offset;
 740 }
 741
 742 /**
 743  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 744  */
 745 void
 746 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 747                                  const struct gl_framebuffer *fb,
 748                                  uint32_t render_target_start,
 749                                  uint32_t *surf_offset)
 750 {
 751    GLuint i;
 752    const unsigned int w = _mesa_geometric_width(fb);
 753    const unsigned int h = _mesa_geometric_height(fb);
 754    const unsigned int s = _mesa_geometric_samples(fb);
 755
 756    /* Update surfaces for drawing buffers */
 757    if (fb->_NumColorDrawBuffers >= 1) {
 758       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 759          const uint32_t surf_index = render_target_start + i;
 760
 761          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 762             surf_offset[surf_index] =
 763                brw->vtbl.update_renderbuffer_surface(
 764                   brw, fb->_ColorDrawBuffers[i],
 765                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 766          } else {
 767             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 768                &surf_offset[surf_index]);
 769          }
 770       }
 771    } else {
 772       const uint32_t surf_index = render_target_start;
 773       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 774          &surf_offset[surf_index]);
 775    }
 776 }
 777
 778 static void
 779 update_renderbuffer_surfaces(struct brw_context *brw)
 780 {
 781    const struct gl_context *ctx = &brw->ctx;
 782
 783    /* _NEW_BUFFERS | _NEW_COLOR */
 784    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 785    brw_update_renderbuffer_surfaces(
 786       brw, fb,
 787       brw->wm.prog_data->binding_table.render_target_start,
 788       brw->wm.base.surf_offset);
 789    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 790 }
 791
 792 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 793    .dirty = {
 794       .mesa = _NEW_BUFFERS |
 795               _NEW_COLOR,
 796       .brw = BRW_NEW_BATCH |
 797              BRW_NEW_BLORP |
 798              BRW_NEW_FS_PROG_DATA,
 799    },
 800    .emit = update_renderbuffer_surfaces,
 801 };
 802
 803 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 804    .dirty = {
 805       .mesa = _NEW_BUFFERS,
 806       .brw = BRW_NEW_BATCH |
 807              BRW_NEW_BLORP,
 808    },
 809    .emit = update_renderbuffer_surfaces,
 810 };
 811
 812
 813 static void
 814 update_stage_texture_surfaces(struct brw_context *brw,
 815                               const struct gl_program *prog,
 816                               struct brw_stage_state *stage_state,
 817                               bool for_gather, uint32_t plane)
 818 {
 819    if (!prog)
 820       return;
 821
 822    struct gl_context *ctx = &brw->ctx;
 823
 824    uint32_t *surf_offset = stage_state->surf_offset;
 825
 826    /* BRW_NEW_*_PROG_DATA */
 827    if (for_gather)
 828       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 829    else
 830       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
 831
 832    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 833    for (unsigned s = 0; s < num_samplers; s++) {
 834       surf_offset[s] = 0;
 835
 836       if (prog->SamplersUsed & (1 << s)) {
 837          const unsigned unit = prog->SamplerUnits[s];
 838
 839          /* _NEW_TEXTURE */
 840          if (ctx->Texture.Unit[unit]._Current) {
 841             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
 842          }
 843       }
 844    }
 845 }
 846
 847
 848 /**
 849  * Construct SURFACE_STATE objects for enabled textures.
 850  */
 851 static void
 852 brw_update_texture_surfaces(struct brw_context *brw)
 853 {
 854    /* BRW_NEW_VERTEX_PROGRAM */
 855    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 856
 857    /* BRW_NEW_TESS_PROGRAMS */
 858    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 859    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 860
 861    /* BRW_NEW_GEOMETRY_PROGRAM */
 862    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 863
 864    /* BRW_NEW_FRAGMENT_PROGRAM */
 865    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 866
 867    /* _NEW_TEXTURE */
 868    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
 869    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
 870    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
 871    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
 872    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
 873
 874    /* emit alternate set of surface state for gather. this
 875     * allows the surface format to be overriden for only the
 876     * gather4 messages. */
 877    if (brw->gen < 8) {
 878       if (vs && vs->UsesGather)
 879          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
 880       if (tcs && tcs->UsesGather)
 881          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
 882       if (tes && tes->UsesGather)
 883          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
 884       if (gs && gs->UsesGather)
 885          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
 886       if (fs && fs->UsesGather)
 887          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
 888    }
 889
 890    if (fs) {
 891       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
 892       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
 893    }
 894
 895    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 896 }
 897
 898 const struct brw_tracked_state brw_texture_surfaces = {
 899    .dirty = {
 900       .mesa = _NEW_TEXTURE,
 901       .brw = BRW_NEW_BATCH |
 902              BRW_NEW_BLORP |
 903              BRW_NEW_FRAGMENT_PROGRAM |
 904              BRW_NEW_FS_PROG_DATA |
 905              BRW_NEW_GEOMETRY_PROGRAM |
 906              BRW_NEW_GS_PROG_DATA |
 907              BRW_NEW_TESS_PROGRAMS |
 908              BRW_NEW_TCS_PROG_DATA |
 909              BRW_NEW_TES_PROG_DATA |
 910              BRW_NEW_TEXTURE_BUFFER |
 911              BRW_NEW_VERTEX_PROGRAM |
 912              BRW_NEW_VS_PROG_DATA,
 913    },
 914    .emit = brw_update_texture_surfaces,
 915 };
 916
 917 static void
 918 brw_update_cs_texture_surfaces(struct brw_context *brw)
 919 {
 920    /* BRW_NEW_COMPUTE_PROGRAM */
 921    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 922
 923    /* _NEW_TEXTURE */
 924    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
 925
 926    /* emit alternate set of surface state for gather. this
 927     * allows the surface format to be overriden for only the
 928     * gather4 messages.
 929     */
 930    if (brw->gen < 8) {
 931       if (cs && cs->UsesGather)
 932          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
 933    }
 934
 935    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 936 }
 937
 938 const struct brw_tracked_state brw_cs_texture_surfaces = {
 939    .dirty = {
 940       .mesa = _NEW_TEXTURE,
 941       .brw = BRW_NEW_BATCH |
 942              BRW_NEW_BLORP |
 943              BRW_NEW_COMPUTE_PROGRAM,
 944    },
 945    .emit = brw_update_cs_texture_surfaces,
 946 };
 947
 948
 949 void
 950 brw_upload_ubo_surfaces(struct brw_context *brw,
 951                         struct gl_shader *shader,
 952                         struct brw_stage_state *stage_state,
 953                         struct brw_stage_prog_data *prog_data)
 954 {
 955    struct gl_context *ctx = &brw->ctx;
 956
 957    if (!shader)
 958       return;
 959
 960    uint32_t *ubo_surf_offsets =
 961       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 962
 963    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 964       struct gl_uniform_buffer_binding *binding =
 965          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 966
 967       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 968          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 969       } else {
 970          struct intel_buffer_object *intel_bo =
 971             intel_buffer_object(binding->BufferObject);
 972          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 973          if (!binding->AutomaticSize)
 974             size = MIN2(size, binding->Size);
 975          drm_intel_bo *bo =
 976             intel_bufferobj_buffer(brw, intel_bo,
 977                                    binding->Offset,
 978                                    size);
 979          brw_create_constant_surface(brw, bo, binding->Offset,
 980                                      size,
 981                                      &ubo_surf_offsets[i]);
 982       }
 983    }
 984
 985    uint32_t *ssbo_surf_offsets =
 986       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 987
 988    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 989       struct gl_shader_storage_buffer_binding *binding =
 990          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 991
 992       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 993          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 994       } else {
 995          struct intel_buffer_object *intel_bo =
 996             intel_buffer_object(binding->BufferObject);
 997          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 998          if (!binding->AutomaticSize)
 999             size = MIN2(size, binding->Size);
1000          drm_intel_bo *bo =
1001             intel_bufferobj_buffer(brw, intel_bo,
1002                                    binding->Offset,
1003                                    size);
1004          brw_create_buffer_surface(brw, bo, binding->Offset,
1005                                    size,
1006                                    &ssbo_surf_offsets[i]);
1007       }
1008    }
1009
1010    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1011       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1012 }
1013
1014 static void
1015 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1016 {
1017    struct gl_context *ctx = &brw->ctx;
1018    /* _NEW_PROGRAM */
1019    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1020
1021    if (!prog)
1022       return;
1023
1024    /* BRW_NEW_FS_PROG_DATA */
1025    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1026                            &brw->wm.base, &brw->wm.prog_data->base);
1027 }
1028
1029 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1030    .dirty = {
1031       .mesa = _NEW_PROGRAM,
1032       .brw = BRW_NEW_BATCH |
1033              BRW_NEW_BLORP |
1034              BRW_NEW_FS_PROG_DATA |
1035              BRW_NEW_UNIFORM_BUFFER,
1036    },
1037    .emit = brw_upload_wm_ubo_surfaces,
1038 };
1039
1040 static void
1041 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1042 {
1043    struct gl_context *ctx = &brw->ctx;
1044    /* _NEW_PROGRAM */
1045    struct gl_shader_program *prog =
1046       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1047
1048    if (!prog)
1049       return;
1050
1051    /* BRW_NEW_CS_PROG_DATA */
1052    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1053                            &brw->cs.base, &brw->cs.prog_data->base);
1054 }
1055
1056 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1057    .dirty = {
1058       .mesa = _NEW_PROGRAM,
1059       .brw = BRW_NEW_BATCH |
1060              BRW_NEW_BLORP |
1061              BRW_NEW_CS_PROG_DATA |
1062              BRW_NEW_UNIFORM_BUFFER,
1063    },
1064    .emit = brw_upload_cs_ubo_surfaces,
1065 };
1066
1067 void
1068 brw_upload_abo_surfaces(struct brw_context *brw,
1069                         struct gl_shader *shader,
1070                         struct brw_stage_state *stage_state,
1071                         struct brw_stage_prog_data *prog_data)
1072 {
1073    struct gl_context *ctx = &brw->ctx;
1074    uint32_t *surf_offsets =
1075       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1076
1077    if (shader && shader->NumAtomicBuffers) {
1078       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1079          struct gl_atomic_buffer_binding *binding =
1080             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1081          struct intel_buffer_object *intel_bo =
1082             intel_buffer_object(binding->BufferObject);
1083          drm_intel_bo *bo = intel_bufferobj_buffer(
1084             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1085
1086          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1087                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1088                                              bo->size - binding->Offset, 1, true);
1089       }
1090
1091       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1092    }
1093 }
1094
1095 static void
1096 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1097 {
1098    struct gl_context *ctx = &brw->ctx;
1099    /* _NEW_PROGRAM */
1100    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1101
1102    if (prog) {
1103       /* BRW_NEW_FS_PROG_DATA */
1104       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1105                               &brw->wm.base, &brw->wm.prog_data->base);
1106    }
1107 }
1108
1109 const struct brw_tracked_state brw_wm_abo_surfaces = {
1110    .dirty = {
1111       .mesa = _NEW_PROGRAM,
1112       .brw = BRW_NEW_ATOMIC_BUFFER |
1113              BRW_NEW_BLORP |
1114              BRW_NEW_BATCH |
1115              BRW_NEW_FS_PROG_DATA,
1116    },
1117    .emit = brw_upload_wm_abo_surfaces,
1118 };
1119
1120 static void
1121 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1122 {
1123    struct gl_context *ctx = &brw->ctx;
1124    /* _NEW_PROGRAM */
1125    struct gl_shader_program *prog =
1126       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1127
1128    if (prog) {
1129       /* BRW_NEW_CS_PROG_DATA */
1130       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1131                               &brw->cs.base, &brw->cs.prog_data->base);
1132    }
1133 }
1134
1135 const struct brw_tracked_state brw_cs_abo_surfaces = {
1136    .dirty = {
1137       .mesa = _NEW_PROGRAM,
1138       .brw = BRW_NEW_ATOMIC_BUFFER |
1139              BRW_NEW_BLORP |
1140              BRW_NEW_BATCH |
1141              BRW_NEW_CS_PROG_DATA,
1142    },
1143    .emit = brw_upload_cs_abo_surfaces,
1144 };
1145
1146 static void
1147 brw_upload_cs_image_surfaces(struct brw_context *brw)
1148 {
1149    struct gl_context *ctx = &brw->ctx;
1150    /* _NEW_PROGRAM */
1151    struct gl_shader_program *prog =
1152       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1153
1154    if (prog) {
1155       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1156       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1157                                 &brw->cs.base, &brw->cs.prog_data->base);
1158    }
1159 }
1160
1161 const struct brw_tracked_state brw_cs_image_surfaces = {
1162    .dirty = {
1163       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1164       .brw = BRW_NEW_BATCH |
1165              BRW_NEW_BLORP |
1166              BRW_NEW_CS_PROG_DATA |
1167              BRW_NEW_IMAGE_UNITS
1168    },
1169    .emit = brw_upload_cs_image_surfaces,
1170 };
1171
1172 static uint32_t
1173 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1174 {
1175    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1176    uint32_t hw_format = brw_format_for_mesa_format(format);
1177    if (access == GL_WRITE_ONLY) {
1178       return hw_format;
1179    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1180       /* Typed surface reads support a very limited subset of the shader
1181        * image formats.  Translate it into the closest format the
1182        * hardware supports.
1183        */
1184       return isl_lower_storage_image_format(devinfo, hw_format);
1185    } else {
1186       /* The hardware doesn't actually support a typed format that we can use
1187        * so we have to fall back to untyped read/write messages.
1188        */
1189       return BRW_SURFACEFORMAT_RAW;
1190    }
1191 }
1192
1193 static void
1194 update_default_image_param(struct brw_context *brw,
1195                            struct gl_image_unit *u,
1196                            unsigned surface_idx,
1197                            struct brw_image_param *param)
1198 {
1199    memset(param, 0, sizeof(*param));
1200    param->surface_idx = surface_idx;
1201    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1202     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1203     * detailed explanation of these parameters.
1204     */
1205    param->swizzling[0] = 0xff;
1206    param->swizzling[1] = 0xff;
1207 }
1208
1209 static void
1210 update_buffer_image_param(struct brw_context *brw,
1211                           struct gl_image_unit *u,
1212                           unsigned surface_idx,
1213                           struct brw_image_param *param)
1214 {
1215    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1216
1217    update_default_image_param(brw, u, surface_idx, param);
1218
1219    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1220    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1221 }
1222
1223 static void
1224 update_texture_image_param(struct brw_context *brw,
1225                            struct gl_image_unit *u,
1226                            unsigned surface_idx,
1227                            struct brw_image_param *param)
1228 {
1229    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1230
1231    update_default_image_param(brw, u, surface_idx, param);
1232
1233    param->size[0] = minify(mt->logical_width0, u->Level);
1234    param->size[1] = minify(mt->logical_height0, u->Level);
1235    param->size[2] = (!u->Layered ? 1 :
1236                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1237                      u->TexObj->Target == GL_TEXTURE_3D ?
1238                      minify(mt->logical_depth0, u->Level) :
1239                      mt->logical_depth0);
1240
1241    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1242                                   &param->offset[0],
1243                                   &param->offset[1]);
1244
1245    param->stride[0] = mt->cpp;
1246    param->stride[1] = mt->pitch / mt->cpp;
1247    param->stride[2] =
1248       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1249    param->stride[3] =
1250       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1251
1252    if (mt->tiling == I915_TILING_X) {
1253       /* An X tile is a rectangular block of 512x8 bytes. */
1254       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1255       param->tiling[1] = _mesa_logbase2(8);
1256
1257       if (brw->has_swizzling) {
1258          /* Right shifts required to swizzle bits 9 and 10 of the memory
1259           * address with bit 6.
1260           */
1261          param->swizzling[0] = 3;
1262          param->swizzling[1] = 4;
1263       }
1264    } else if (mt->tiling == I915_TILING_Y) {
1265       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1266        * different to the layout of an X-tiled surface, we simply pretend that
1267        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1268        * one arranged in X-major order just like is the case for X-tiling.
1269        */
1270       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1271       param->tiling[1] = _mesa_logbase2(32);
1272
1273       if (brw->has_swizzling) {
1274          /* Right shift required to swizzle bit 9 of the memory address with
1275           * bit 6.
1276           */
1277          param->swizzling[0] = 3;
1278       }
1279    }
1280
1281    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1282     * address calculation algorithm (emit_address_calculation() in
1283     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1284     * modulus equal to the LOD.
1285     */
1286    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1287                        0);
1288 }
1289
1290 static void
1291 update_image_surface(struct brw_context *brw,
1292                      struct gl_image_unit *u,
1293                      GLenum access,
1294                      unsigned surface_idx,
1295                      uint32_t *surf_offset,
1296                      struct brw_image_param *param)
1297 {
1298    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1299       struct gl_texture_object *obj = u->TexObj;
1300       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1301
1302       if (obj->Target == GL_TEXTURE_BUFFER) {
1303          struct intel_buffer_object *intel_obj =
1304             intel_buffer_object(obj->BufferObject);
1305          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1306                                       _mesa_get_format_bytes(u->_ActualFormat));
1307
1308          brw->vtbl.emit_buffer_surface_state(
1309             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1310             format, intel_obj->Base.Size / texel_size, texel_size,
1311             access != GL_READ_ONLY);
1312
1313          update_buffer_image_param(brw, u, surface_idx, param);
1314
1315       } else {
1316          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1317          struct intel_mipmap_tree *mt = intel_obj->mt;
1318
1319          if (format == BRW_SURFACEFORMAT_RAW) {
1320             brw->vtbl.emit_buffer_surface_state(
1321                brw, surf_offset, mt->bo, mt->offset,
1322                format, mt->bo->size - mt->offset, 1 /* pitch */,
1323                access != GL_READ_ONLY);
1324
1325          } else {
1326             const unsigned min_layer = obj->MinLayer + u->_Layer;
1327             const unsigned min_level = obj->MinLevel + u->Level;
1328             const unsigned num_layers = (!u->Layered ? 1 :
1329                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1330                                          mt->logical_depth0);
1331             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1332                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1333                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1334             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1335
1336             brw->vtbl.emit_texture_surface_state(
1337                brw, mt, target,
1338                min_layer, min_layer + num_layers,
1339                min_level, min_level + 1,
1340                format, SWIZZLE_XYZW,
1341                surf_offset, surf_index, access != GL_READ_ONLY, false);
1342          }
1343
1344          update_texture_image_param(brw, u, surface_idx, param);
1345       }
1346
1347    } else {
1348       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1349       update_default_image_param(brw, u, surface_idx, param);
1350    }
1351 }
1352
1353 void
1354 brw_upload_image_surfaces(struct brw_context *brw,
1355                           struct gl_shader *shader,
1356                           struct brw_stage_state *stage_state,
1357                           struct brw_stage_prog_data *prog_data)
1358 {
1359    struct gl_context *ctx = &brw->ctx;
1360
1361    if (shader && shader->NumImages) {
1362       for (unsigned i = 0; i < shader->NumImages; i++) {
1363          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1364          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1365
1366          update_image_surface(brw, u, shader->ImageAccess[i],
1367                               surf_idx,
1368                               &stage_state->surf_offset[surf_idx],
1369                               &prog_data->image_param[i]);
1370       }
1371
1372       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1373       /* This may have changed the image metadata dependent on the context
1374        * image unit state and passed to the program as uniforms, make sure
1375        * that push and pull constants are reuploaded.
1376        */
1377       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1378    }
1379 }
1380
1381 static void
1382 brw_upload_wm_image_surfaces(struct brw_context *brw)
1383 {
1384    struct gl_context *ctx = &brw->ctx;
1385    /* BRW_NEW_FRAGMENT_PROGRAM */
1386    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1387
1388    if (prog) {
1389       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1390       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1391                                 &brw->wm.base, &brw->wm.prog_data->base);
1392    }
1393 }
1394
1395 const struct brw_tracked_state brw_wm_image_surfaces = {
1396    .dirty = {
1397       .mesa = _NEW_TEXTURE,
1398       .brw = BRW_NEW_BATCH |
1399              BRW_NEW_BLORP |
1400              BRW_NEW_FRAGMENT_PROGRAM |
1401              BRW_NEW_FS_PROG_DATA |
1402              BRW_NEW_IMAGE_UNITS
1403    },
1404    .emit = brw_upload_wm_image_surfaces,
1405 };
1406
1407 void
1408 gen4_init_vtable_surface_functions(struct brw_context *brw)
1409 {
1410    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1411    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1412    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1413    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1414 }
1415
1416 static void
1417 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1418 {
1419    struct gl_context *ctx = &brw->ctx;
1420    /* _NEW_PROGRAM */
1421    struct gl_shader_program *prog =
1422       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1423
1424    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1425       const unsigned surf_idx =
1426          brw->cs.prog_data->binding_table.work_groups_start;
1427       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1428       drm_intel_bo *bo;
1429       uint32_t bo_offset;
1430
1431       if (brw->compute.num_work_groups_bo == NULL) {
1432          bo = NULL;
1433          intel_upload_data(brw,
1434                            (void *)brw->compute.num_work_groups,
1435                            3 * sizeof(GLuint),
1436                            sizeof(GLuint),
1437                            &bo,
1438                            &bo_offset);
1439       } else {
1440          bo = brw->compute.num_work_groups_bo;
1441          bo_offset = brw->compute.num_work_groups_offset;
1442       }
1443
1444       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1445                                           bo, bo_offset,
1446                                           BRW_SURFACEFORMAT_RAW,
1447                                           3 * sizeof(GLuint), 1, true);
1448       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1449    }
1450 }
1451
1452 const struct brw_tracked_state brw_cs_work_groups_surface = {
1453    .dirty = {
1454       .brw = BRW_NEW_BLORP |
1455              BRW_NEW_CS_WORK_GROUPS
1456    },
1457    .emit = brw_upload_cs_work_groups_surface,
1458 };