src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "intel_mipmap_tree.h"
  43 #include "intel_batchbuffer.h"
  44 #include "intel_tex.h"
  45 #include "intel_fbo.h"
  46 #include "intel_buffer_objects.h"
  47
  48 #include "brw_context.h"
  49 #include "brw_state.h"
  50 #include "brw_defines.h"
  51 #include "brw_wm.h"
  52
  53 GLuint
  54 translate_tex_target(GLenum target)
  55 {
  56    switch (target) {
  57    case GL_TEXTURE_1D:
  58    case GL_TEXTURE_1D_ARRAY_EXT:
  59       return BRW_SURFACE_1D;
  60
  61    case GL_TEXTURE_RECTANGLE_NV:
  62       return BRW_SURFACE_2D;
  63
  64    case GL_TEXTURE_2D:
  65    case GL_TEXTURE_2D_ARRAY_EXT:
  66    case GL_TEXTURE_EXTERNAL_OES:
  67    case GL_TEXTURE_2D_MULTISAMPLE:
  68    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  69       return BRW_SURFACE_2D;
  70
  71    case GL_TEXTURE_3D:
  72       return BRW_SURFACE_3D;
  73
  74    case GL_TEXTURE_CUBE_MAP:
  75    case GL_TEXTURE_CUBE_MAP_ARRAY:
  76       return BRW_SURFACE_CUBE;
  77
  78    default:
  79       unreachable("not reached");
  80    }
  81 }
  82
  83 uint32_t
  84 brw_get_surface_tiling_bits(uint32_t tiling)
  85 {
  86    switch (tiling) {
  87    case I915_TILING_X:
  88       return BRW_SURFACE_TILED;
  89    case I915_TILING_Y:
  90       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  91    default:
  92       return 0;
  93    }
  94 }
  95
  96
  97 uint32_t
  98 brw_get_surface_num_multisamples(unsigned num_samples)
  99 {
 100    if (num_samples > 1)
 101       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 102    else
 103       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 104 }
 105
 106 void
 107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 108                       bool is_render_target,
 109                       unsigned *width, unsigned *height,
 110                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 111 {
 112    static const unsigned halign_stencil = 8;
 113
 114    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 115     * there are half as many rows.
 116     * In addition, mip-levels are accessed manually by the program and
 117     * therefore the surface is setup to cover all the mip-levels for one slice.
 118     * (Hardware is still used to access individual slices).
 119     */
 120    *tiling = I915_TILING_Y;
 121    *pitch = mt->pitch * 2;
 122    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 123    *height = (mt->total_height / mt->physical_depth0) / 2;
 124
 125    if (is_render_target) {
 126       *format = BRW_SURFACEFORMAT_R8_UINT;
 127    }
 128 }
 129
 130
 131 /**
 132  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 133  * swizzling.
 134  */
 135 int
 136 brw_get_texture_swizzle(const struct gl_context *ctx,
 137                         const struct gl_texture_object *t)
 138 {
 139    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 140
 141    int swizzles[SWIZZLE_NIL + 1] = {
 142       SWIZZLE_X,
 143       SWIZZLE_Y,
 144       SWIZZLE_Z,
 145       SWIZZLE_W,
 146       SWIZZLE_ZERO,
 147       SWIZZLE_ONE,
 148       SWIZZLE_NIL
 149    };
 150
 151    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 152        img->_BaseFormat == GL_DEPTH_STENCIL) {
 153       GLenum depth_mode = t->DepthMode;
 154
 155       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 156        * with depth component data specified with a sized internal format.
 157        * Otherwise, it's left at the old default, GL_LUMINANCE.
 158        */
 159       if (_mesa_is_gles3(ctx) &&
 160           img->InternalFormat != GL_DEPTH_COMPONENT &&
 161           img->InternalFormat != GL_DEPTH_STENCIL) {
 162          depth_mode = GL_RED;
 163       }
 164
 165       switch (depth_mode) {
 166       case GL_ALPHA:
 167          swizzles[0] = SWIZZLE_ZERO;
 168          swizzles[1] = SWIZZLE_ZERO;
 169          swizzles[2] = SWIZZLE_ZERO;
 170          swizzles[3] = SWIZZLE_X;
 171          break;
 172       case GL_LUMINANCE:
 173          swizzles[0] = SWIZZLE_X;
 174          swizzles[1] = SWIZZLE_X;
 175          swizzles[2] = SWIZZLE_X;
 176          swizzles[3] = SWIZZLE_ONE;
 177          break;
 178       case GL_INTENSITY:
 179          swizzles[0] = SWIZZLE_X;
 180          swizzles[1] = SWIZZLE_X;
 181          swizzles[2] = SWIZZLE_X;
 182          swizzles[3] = SWIZZLE_X;
 183          break;
 184       case GL_RED:
 185          swizzles[0] = SWIZZLE_X;
 186          swizzles[1] = SWIZZLE_ZERO;
 187          swizzles[2] = SWIZZLE_ZERO;
 188          swizzles[3] = SWIZZLE_ONE;
 189          break;
 190       }
 191    }
 192
 193    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 194
 195    /* If the texture's format is alpha-only, force R, G, and B to
 196     * 0.0. Similarly, if the texture's format has no alpha channel,
 197     * force the alpha value read to 1.0. This allows for the
 198     * implementation to use an RGBA texture for any of these formats
 199     * without leaking any unexpected values.
 200     */
 201    switch (img->_BaseFormat) {
 202    case GL_ALPHA:
 203       swizzles[0] = SWIZZLE_ZERO;
 204       swizzles[1] = SWIZZLE_ZERO;
 205       swizzles[2] = SWIZZLE_ZERO;
 206       break;
 207    case GL_LUMINANCE:
 208       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 209          swizzles[0] = SWIZZLE_X;
 210          swizzles[1] = SWIZZLE_X;
 211          swizzles[2] = SWIZZLE_X;
 212          swizzles[3] = SWIZZLE_ONE;
 213       }
 214       break;
 215    case GL_LUMINANCE_ALPHA:
 216       if (datatype == GL_SIGNED_NORMALIZED) {
 217          swizzles[0] = SWIZZLE_X;
 218          swizzles[1] = SWIZZLE_X;
 219          swizzles[2] = SWIZZLE_X;
 220          swizzles[3] = SWIZZLE_W;
 221       }
 222       break;
 223    case GL_INTENSITY:
 224       if (datatype == GL_SIGNED_NORMALIZED) {
 225          swizzles[0] = SWIZZLE_X;
 226          swizzles[1] = SWIZZLE_X;
 227          swizzles[2] = SWIZZLE_X;
 228          swizzles[3] = SWIZZLE_X;
 229       }
 230       break;
 231    case GL_RED:
 232    case GL_RG:
 233    case GL_RGB:
 234       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 235          swizzles[3] = SWIZZLE_ONE;
 236       break;
 237    }
 238
 239    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 243 }
 244
 245 static void
 246 gen4_emit_buffer_surface_state(struct brw_context *brw,
 247                                uint32_t *out_offset,
 248                                drm_intel_bo *bo,
 249                                unsigned buffer_offset,
 250                                unsigned surface_format,
 251                                unsigned buffer_size,
 252                                unsigned pitch,
 253                                bool rw)
 254 {
 255    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 256                                     6 * 4, 32, out_offset);
 257    memset(surf, 0, 6 * 4);
 258
 259    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 260              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 261              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 262    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 263    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 264              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 265    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 266              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 267
 268    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 269     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 270     * physical cache.  It is mapped in hardware to the sampler cache."
 271     */
 272    if (bo) {
 273       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 274                               bo, buffer_offset,
 275                               I915_GEM_DOMAIN_SAMPLER,
 276                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 277    }
 278 }
 279
 280 void
 281 brw_update_buffer_texture_surface(struct gl_context *ctx,
 282                                   unsigned unit,
 283                                   uint32_t *surf_offset)
 284 {
 285    struct brw_context *brw = brw_context(ctx);
 286    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 287    struct intel_buffer_object *intel_obj =
 288       intel_buffer_object(tObj->BufferObject);
 289    uint32_t size = tObj->BufferSize;
 290    drm_intel_bo *bo = NULL;
 291    mesa_format format = tObj->_BufferObjectFormat;
 292    uint32_t brw_format = brw_format_for_mesa_format(format);
 293    int texel_size = _mesa_get_format_bytes(format);
 294
 295    if (intel_obj) {
 296       size = MIN2(size, intel_obj->Base.Size);
 297       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 298    }
 299
 300    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 301       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 302                     _mesa_get_format_name(format));
 303    }
 304
 305    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 306                                        tObj->BufferOffset,
 307                                        brw_format,
 308                                        size / texel_size,
 309                                        texel_size,
 310                                        false /* rw */);
 311 }
 312
 313 static void
 314 brw_update_texture_surface(struct gl_context *ctx,
 315                            unsigned unit,
 316                            uint32_t *surf_offset,
 317                            bool for_gather)
 318 {
 319    struct brw_context *brw = brw_context(ctx);
 320    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 321    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 322    struct intel_mipmap_tree *mt = intelObj->mt;
 323    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 324    uint32_t *surf;
 325
 326    /* BRW_NEW_TEXTURE_BUFFER */
 327    if (tObj->Target == GL_TEXTURE_BUFFER) {
 328       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 329       return;
 330    }
 331
 332    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 333                           6 * 4, 32, surf_offset);
 334
 335    uint32_t tex_format = translate_tex_format(brw, mt->format,
 336                                               sampler->sRGBDecode);
 337
 338    if (for_gather) {
 339       /* Sandybridge's gather4 message is broken for integer formats.
 340        * To work around this, we pretend the surface is UNORM for
 341        * 8 or 16-bit formats, and emit shader instructions to recover
 342        * the real INT/UINT value.  For 32-bit formats, we pretend
 343        * the surface is FLOAT, and simply reinterpret the resulting
 344        * bits.
 345        */
 346       switch (tex_format) {
 347       case BRW_SURFACEFORMAT_R8_SINT:
 348       case BRW_SURFACEFORMAT_R8_UINT:
 349          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 350          break;
 351
 352       case BRW_SURFACEFORMAT_R16_SINT:
 353       case BRW_SURFACEFORMAT_R16_UINT:
 354          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 355          break;
 356
 357       case BRW_SURFACEFORMAT_R32_SINT:
 358       case BRW_SURFACEFORMAT_R32_UINT:
 359          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 360          break;
 361
 362       default:
 363          break;
 364       }
 365    }
 366
 367    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 368               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 369               BRW_SURFACE_CUBEFACE_ENABLES |
 370               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 371
 372    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 373
 374    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 375               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 376               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 377
 378    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 379               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 380               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 381
 382    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 383               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 384
 385    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 386
 387    /* Emit relocation to surface contents */
 388    drm_intel_bo_emit_reloc(brw->batch.bo,
 389                            *surf_offset + 4,
 390                            mt->bo,
 391                            surf[1] - mt->bo->offset64,
 392                            I915_GEM_DOMAIN_SAMPLER, 0);
 393 }
 394
 395 /**
 396  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 397  * read from this buffer with Data Port Read instructions/messages.
 398  */
 399 void
 400 brw_create_constant_surface(struct brw_context *brw,
 401                             drm_intel_bo *bo,
 402                             uint32_t offset,
 403                             uint32_t size,
 404                             uint32_t *out_offset)
 405 {
 406    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 407                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 408                                        size, 1, false);
 409 }
 410
 411 /**
 412  * Create the buffer surface. Shader buffer variables will be
 413  * read from / write to this buffer with Data Port Read/Write
 414  * instructions/messages.
 415  */
 416 void
 417 brw_create_buffer_surface(struct brw_context *brw,
 418                           drm_intel_bo *bo,
 419                           uint32_t offset,
 420                           uint32_t size,
 421                           uint32_t *out_offset)
 422 {
 423    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 424     * messages. We need these specifically for the fragment shader since they
 425     * include a pixel mask header that we need to ensure correct behavior
 426     * with helper invocations, which cannot write to the buffer.
 427     */
 428    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 429                                        BRW_SURFACEFORMAT_RAW,
 430                                        size, 1, true);
 431 }
 432
 433 /**
 434  * Set up a binding table entry for use by stream output logic (transform
 435  * feedback).
 436  *
 437  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 438  */
 439 void
 440 brw_update_sol_surface(struct brw_context *brw,
 441                        struct gl_buffer_object *buffer_obj,
 442                        uint32_t *out_offset, unsigned num_vector_components,
 443                        unsigned stride_dwords, unsigned offset_dwords)
 444 {
 445    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 446    uint32_t offset_bytes = 4 * offset_dwords;
 447    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 448                                              offset_bytes,
 449                                              buffer_obj->Size - offset_bytes);
 450    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 451                                     out_offset);
 452    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 453    size_t size_dwords = buffer_obj->Size / 4;
 454    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 455
 456    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 457     * too big to map using a single binding table entry?
 458     */
 459    assert((size_dwords - offset_dwords) / stride_dwords
 460           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 461
 462    if (size_dwords > offset_dwords + num_vector_components) {
 463       /* There is room for at least 1 transform feedback output in the buffer.
 464        * Compute the number of additional transform feedback outputs the
 465        * buffer has room for.
 466        */
 467       buffer_size_minus_1 =
 468          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 469    } else {
 470       /* There isn't even room for a single transform feedback output in the
 471        * buffer.  We can't configure the binding table entry to prevent output
 472        * entirely; we'll have to rely on the geometry shader to detect
 473        * overflow.  But to minimize the damage in case of a bug, set up the
 474        * binding table entry to just allow a single output.
 475        */
 476       buffer_size_minus_1 = 0;
 477    }
 478    width = buffer_size_minus_1 & 0x7f;
 479    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 480    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 481
 482    switch (num_vector_components) {
 483    case 1:
 484       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 485       break;
 486    case 2:
 487       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 488       break;
 489    case 3:
 490       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 491       break;
 492    case 4:
 493       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 494       break;
 495    default:
 496       unreachable("Invalid vector size for transform feedback output");
 497    }
 498
 499    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 500       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 501       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 502       BRW_SURFACE_RC_READ_WRITE;
 503    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 504    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 505               height << BRW_SURFACE_HEIGHT_SHIFT);
 506    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 507               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 508    surf[4] = 0;
 509    surf[5] = 0;
 510
 511    /* Emit relocation to surface contents. */
 512    drm_intel_bo_emit_reloc(brw->batch.bo,
 513                            *out_offset + 4,
 514                            bo, offset_bytes,
 515                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 516 }
 517
 518 /* Creates a new WM constant buffer reflecting the current fragment program's
 519  * constants, if needed by the fragment program.
 520  *
 521  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 522  * state atom.
 523  */
 524 static void
 525 brw_upload_wm_pull_constants(struct brw_context *brw)
 526 {
 527    struct brw_stage_state *stage_state = &brw->wm.base;
 528    /* BRW_NEW_FRAGMENT_PROGRAM */
 529    struct brw_fragment_program *fp =
 530       (struct brw_fragment_program *) brw->fragment_program;
 531    /* BRW_NEW_FS_PROG_DATA */
 532    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 533
 534    /* _NEW_PROGRAM_CONSTANTS */
 535    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 536                              stage_state, prog_data);
 537 }
 538
 539 const struct brw_tracked_state brw_wm_pull_constants = {
 540    .dirty = {
 541       .mesa = _NEW_PROGRAM_CONSTANTS,
 542       .brw = BRW_NEW_BATCH |
 543              BRW_NEW_FRAGMENT_PROGRAM |
 544              BRW_NEW_FS_PROG_DATA,
 545    },
 546    .emit = brw_upload_wm_pull_constants,
 547 };
 548
 549 /**
 550  * Creates a null renderbuffer surface.
 551  *
 552  * This is used when the shader doesn't write to any color output.  An FB
 553  * write to target 0 will still be emitted, because that's how the thread is
 554  * terminated (and computed depth is returned), so we need to have the
 555  * hardware discard the target 0 color output..
 556  */
 557 static void
 558 brw_emit_null_surface_state(struct brw_context *brw,
 559                             unsigned width,
 560                             unsigned height,
 561                             unsigned samples,
 562                             uint32_t *out_offset)
 563 {
 564    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 565     * Notes):
 566     *
 567     *     A null surface will be used in instances where an actual surface is
 568     *     not bound. When a write message is generated to a null surface, no
 569     *     actual surface is written to. When a read message (including any
 570     *     sampling engine message) is generated to a null surface, the result
 571     *     is all zeros. Note that a null surface type is allowed to be used
 572     *     with all messages, even if it is not specificially indicated as
 573     *     supported. All of the remaining fields in surface state are ignored
 574     *     for null surfaces, with the following exceptions:
 575     *
 576     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 577     *       depth buffer’s corresponding state for all render target surfaces,
 578     *       including null.
 579     *
 580     *     - Surface Format must be R8G8B8A8_UNORM.
 581     */
 582    unsigned surface_type = BRW_SURFACE_NULL;
 583    drm_intel_bo *bo = NULL;
 584    unsigned pitch_minus_1 = 0;
 585    uint32_t multisampling_state = 0;
 586    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 587                                     out_offset);
 588
 589    if (samples > 1) {
 590       /* On Gen6, null render targets seem to cause GPU hangs when
 591        * multisampling.  So work around this problem by rendering into dummy
 592        * color buffer.
 593        *
 594        * To decrease the amount of memory needed by the workaround buffer, we
 595        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 596        * the amount of memory needed for the workaround buffer is
 597        * (width_in_tiles + height_in_tiles - 1) tiles.
 598        *
 599        * Note that since the workaround buffer will be interpreted by the
 600        * hardware as an interleaved multisampled buffer, we need to compute
 601        * width_in_tiles and height_in_tiles by dividing the width and height
 602        * by 16 rather than the normal Y-tile size of 32.
 603        */
 604       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 605       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 606       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 607       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 608                          size_needed);
 609       bo = brw->wm.multisampled_null_render_target_bo;
 610       surface_type = BRW_SURFACE_2D;
 611       pitch_minus_1 = 127;
 612       multisampling_state = brw_get_surface_num_multisamples(samples);
 613    }
 614
 615    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 616               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 617    if (brw->gen < 6) {
 618       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 619                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 620                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 621                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 622    }
 623    surf[1] = bo ? bo->offset64 : 0;
 624    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 625               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 626
 627    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 628     * Notes):
 629     *
 630     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 631     */
 632    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 633               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 634    surf[4] = multisampling_state;
 635    surf[5] = 0;
 636
 637    if (bo) {
 638       drm_intel_bo_emit_reloc(brw->batch.bo,
 639                               *out_offset + 4,
 640                               bo, 0,
 641                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 642    }
 643 }
 644
 645 /**
 646  * Sets up a surface state structure to point at the given region.
 647  * While it is only used for the front/back buffer currently, it should be
 648  * usable for further buffers when doing ARB_draw_buffer support.
 649  */
 650 static uint32_t
 651 brw_update_renderbuffer_surface(struct brw_context *brw,
 652                                 struct gl_renderbuffer *rb,
 653                                 bool layered, unsigned unit,
 654                                 uint32_t surf_index)
 655 {
 656    struct gl_context *ctx = &brw->ctx;
 657    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 658    struct intel_mipmap_tree *mt = irb->mt;
 659    uint32_t *surf;
 660    uint32_t tile_x, tile_y;
 661    uint32_t format = 0;
 662    uint32_t offset;
 663    /* _NEW_BUFFERS */
 664    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 665    /* BRW_NEW_FS_PROG_DATA */
 666
 667    assert(!layered);
 668
 669    if (rb->TexImage && !brw->has_surface_tile_offset) {
 670       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 671
 672       if (tile_x != 0 || tile_y != 0) {
 673          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 674           * destination in a miptree unless you actually setup your renderbuffer
 675           * as a miptree and used the fragile lod/array_index/etc. controls to
 676           * select the image.  So, instead, we just make a new single-level
 677           * miptree and render into that.
 678           */
 679          intel_renderbuffer_move_to_temp(brw, irb, false);
 680          mt = irb->mt;
 681       }
 682    }
 683
 684    intel_miptree_used_for_rendering(irb->mt);
 685
 686    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 687
 688    format = brw->render_target_format[rb_format];
 689    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 690       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 691                     __func__, _mesa_get_format_name(rb_format));
 692    }
 693
 694    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 695               format << BRW_SURFACE_FORMAT_SHIFT);
 696
 697    /* reloc */
 698    assert(mt->offset % mt->cpp == 0);
 699    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 700               mt->bo->offset64 + mt->offset);
 701
 702    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 703               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 704
 705    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 706               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 707
 708    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 709
 710    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 711    /* Note that the low bits of these fields are missing, so
 712     * there's the possibility of getting in trouble.
 713     */
 714    assert(tile_x % 4 == 0);
 715    assert(tile_y % 2 == 0);
 716    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 717               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 718               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 719
 720    if (brw->gen < 6) {
 721       /* _NEW_COLOR */
 722       if (!ctx->Color.ColorLogicOpEnabled &&
 723           (ctx->Color.BlendEnabled & (1 << unit)))
 724          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 725
 726       if (!ctx->Color.ColorMask[unit][0])
 727          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 728       if (!ctx->Color.ColorMask[unit][1])
 729          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 730       if (!ctx->Color.ColorMask[unit][2])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 732
 733       /* As mentioned above, disable writes to the alpha component when the
 734        * renderbuffer is XRGB.
 735        */
 736       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 737           !ctx->Color.ColorMask[unit][3]) {
 738          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 739       }
 740    }
 741
 742    drm_intel_bo_emit_reloc(brw->batch.bo,
 743                            offset + 4,
 744                            mt->bo,
 745                            surf[1] - mt->bo->offset64,
 746                            I915_GEM_DOMAIN_RENDER,
 747                            I915_GEM_DOMAIN_RENDER);
 748
 749    return offset;
 750 }
 751
 752 /**
 753  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 754  */
 755 void
 756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 757                                  const struct gl_framebuffer *fb,
 758                                  uint32_t render_target_start,
 759                                  uint32_t *surf_offset)
 760 {
 761    GLuint i;
 762    const unsigned int w = _mesa_geometric_width(fb);
 763    const unsigned int h = _mesa_geometric_height(fb);
 764    const unsigned int s = _mesa_geometric_samples(fb);
 765
 766    /* Update surfaces for drawing buffers */
 767    if (fb->_NumColorDrawBuffers >= 1) {
 768       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 769          const uint32_t surf_index = render_target_start + i;
 770
 771          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 772             surf_offset[surf_index] =
 773                brw->vtbl.update_renderbuffer_surface(
 774                   brw, fb->_ColorDrawBuffers[i],
 775                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 776          } else {
 777             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 778                &surf_offset[surf_index]);
 779          }
 780       }
 781    } else {
 782       const uint32_t surf_index = render_target_start;
 783       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 784          &surf_offset[surf_index]);
 785    }
 786 }
 787
 788 static void
 789 update_renderbuffer_surfaces(struct brw_context *brw)
 790 {
 791    const struct gl_context *ctx = &brw->ctx;
 792
 793    /* _NEW_BUFFERS | _NEW_COLOR */
 794    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 795    brw_update_renderbuffer_surfaces(
 796       brw, fb,
 797       brw->wm.prog_data->binding_table.render_target_start,
 798       brw->wm.base.surf_offset);
 799    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 800 }
 801
 802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 803    .dirty = {
 804       .mesa = _NEW_BUFFERS |
 805               _NEW_COLOR,
 806       .brw = BRW_NEW_BATCH |
 807              BRW_NEW_FS_PROG_DATA,
 808    },
 809    .emit = update_renderbuffer_surfaces,
 810 };
 811
 812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 813    .dirty = {
 814       .mesa = _NEW_BUFFERS,
 815       .brw = BRW_NEW_BATCH,
 816    },
 817    .emit = update_renderbuffer_surfaces,
 818 };
 819
 820
 821 static void
 822 update_stage_texture_surfaces(struct brw_context *brw,
 823                               const struct gl_program *prog,
 824                               struct brw_stage_state *stage_state,
 825                               bool for_gather)
 826 {
 827    if (!prog)
 828       return;
 829
 830    struct gl_context *ctx = &brw->ctx;
 831
 832    uint32_t *surf_offset = stage_state->surf_offset;
 833
 834    /* BRW_NEW_*_PROG_DATA */
 835    if (for_gather)
 836       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 837    else
 838       surf_offset += stage_state->prog_data->binding_table.texture_start;
 839
 840    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 841    for (unsigned s = 0; s < num_samplers; s++) {
 842       surf_offset[s] = 0;
 843
 844       if (prog->SamplersUsed & (1 << s)) {
 845          const unsigned unit = prog->SamplerUnits[s];
 846
 847          /* _NEW_TEXTURE */
 848          if (ctx->Texture.Unit[unit]._Current) {
 849             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 850          }
 851       }
 852    }
 853 }
 854
 855
 856 /**
 857  * Construct SURFACE_STATE objects for enabled textures.
 858  */
 859 static void
 860 brw_update_texture_surfaces(struct brw_context *brw)
 861 {
 862    /* BRW_NEW_VERTEX_PROGRAM */
 863    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 864
 865    /* BRW_NEW_TESS_CTRL_PROGRAM */
 866    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 867
 868    /* BRW_NEW_TESS_EVAL_PROGRAM */
 869    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 870
 871    /* BRW_NEW_GEOMETRY_PROGRAM */
 872    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 873
 874    /* BRW_NEW_FRAGMENT_PROGRAM */
 875    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 876
 877    /* BRW_NEW_COMPUTE_PROGRAM */
 878    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 879
 880    /* _NEW_TEXTURE */
 881    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 882    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
 883    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
 884    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 885    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 886    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 887
 888    /* emit alternate set of surface state for gather. this
 889     * allows the surface format to be overriden for only the
 890     * gather4 messages. */
 891    if (brw->gen < 8) {
 892       if (vs && vs->UsesGather)
 893          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 894       if (tcs && tcs->UsesGather)
 895          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
 896       if (tes && tes->UsesGather)
 897          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
 898       if (gs && gs->UsesGather)
 899          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 900       if (fs && fs->UsesGather)
 901          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 902       if (cs && cs->UsesGather)
 903          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 904    }
 905
 906    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 907 }
 908
 909 const struct brw_tracked_state brw_texture_surfaces = {
 910    .dirty = {
 911       .mesa = _NEW_TEXTURE,
 912       .brw = BRW_NEW_BATCH |
 913              BRW_NEW_COMPUTE_PROGRAM |
 914              BRW_NEW_FRAGMENT_PROGRAM |
 915              BRW_NEW_FS_PROG_DATA |
 916              BRW_NEW_GEOMETRY_PROGRAM |
 917              BRW_NEW_GS_PROG_DATA |
 918              BRW_NEW_TESS_CTRL_PROGRAM |
 919              BRW_NEW_TESS_EVAL_PROGRAM |
 920              BRW_NEW_TCS_PROG_DATA |
 921              BRW_NEW_TES_PROG_DATA |
 922              BRW_NEW_TEXTURE_BUFFER |
 923              BRW_NEW_VERTEX_PROGRAM |
 924              BRW_NEW_VS_PROG_DATA,
 925    },
 926    .emit = brw_update_texture_surfaces,
 927 };
 928
 929 void
 930 brw_upload_ubo_surfaces(struct brw_context *brw,
 931                         struct gl_shader *shader,
 932                         struct brw_stage_state *stage_state,
 933                         struct brw_stage_prog_data *prog_data)
 934 {
 935    struct gl_context *ctx = &brw->ctx;
 936
 937    if (!shader)
 938       return;
 939
 940    uint32_t *ubo_surf_offsets =
 941       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 942
 943    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 944       struct gl_uniform_buffer_binding *binding =
 945          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 946
 947       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 948          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 949       } else {
 950          struct intel_buffer_object *intel_bo =
 951             intel_buffer_object(binding->BufferObject);
 952          drm_intel_bo *bo =
 953             intel_bufferobj_buffer(brw, intel_bo,
 954                                    binding->Offset,
 955                                    binding->BufferObject->Size - binding->Offset);
 956          brw_create_constant_surface(brw, bo, binding->Offset,
 957                                      binding->BufferObject->Size - binding->Offset,
 958                                      &ubo_surf_offsets[i]);
 959       }
 960    }
 961
 962    uint32_t *ssbo_surf_offsets =
 963       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 964
 965    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 966       struct gl_shader_storage_buffer_binding *binding =
 967          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 968
 969       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 970          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 971       } else {
 972          struct intel_buffer_object *intel_bo =
 973             intel_buffer_object(binding->BufferObject);
 974          drm_intel_bo *bo =
 975             intel_bufferobj_buffer(brw, intel_bo,
 976                                    binding->Offset,
 977                                    binding->BufferObject->Size - binding->Offset);
 978          brw_create_buffer_surface(brw, bo, binding->Offset,
 979                                    binding->BufferObject->Size - binding->Offset,
 980                                    &ssbo_surf_offsets[i]);
 981       }
 982    }
 983
 984    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
 985       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 986 }
 987
 988 static void
 989 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 990 {
 991    struct gl_context *ctx = &brw->ctx;
 992    /* _NEW_PROGRAM */
 993    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 994
 995    if (!prog)
 996       return;
 997
 998    /* BRW_NEW_FS_PROG_DATA */
 999    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1000                            &brw->wm.base, &brw->wm.prog_data->base);
1001 }
1002
1003 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1004    .dirty = {
1005       .mesa = _NEW_PROGRAM,
1006       .brw = BRW_NEW_BATCH |
1007              BRW_NEW_FS_PROG_DATA |
1008              BRW_NEW_UNIFORM_BUFFER,
1009    },
1010    .emit = brw_upload_wm_ubo_surfaces,
1011 };
1012
1013 static void
1014 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1015 {
1016    struct gl_context *ctx = &brw->ctx;
1017    /* _NEW_PROGRAM */
1018    struct gl_shader_program *prog =
1019       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1020
1021    if (!prog)
1022       return;
1023
1024    /* BRW_NEW_CS_PROG_DATA */
1025    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1026                            &brw->cs.base, &brw->cs.prog_data->base);
1027 }
1028
1029 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1030    .dirty = {
1031       .mesa = _NEW_PROGRAM,
1032       .brw = BRW_NEW_BATCH |
1033              BRW_NEW_CS_PROG_DATA |
1034              BRW_NEW_UNIFORM_BUFFER,
1035    },
1036    .emit = brw_upload_cs_ubo_surfaces,
1037 };
1038
1039 void
1040 brw_upload_abo_surfaces(struct brw_context *brw,
1041                         struct gl_shader *shader,
1042                         struct brw_stage_state *stage_state,
1043                         struct brw_stage_prog_data *prog_data)
1044 {
1045    struct gl_context *ctx = &brw->ctx;
1046    uint32_t *surf_offsets =
1047       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1048
1049    if (shader && shader->NumAtomicBuffers) {
1050       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1051          struct gl_atomic_buffer_binding *binding =
1052             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1053          struct intel_buffer_object *intel_bo =
1054             intel_buffer_object(binding->BufferObject);
1055          drm_intel_bo *bo = intel_bufferobj_buffer(
1056             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1057
1058          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1059                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1060                                              bo->size - binding->Offset, 1, true);
1061       }
1062
1063       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1064    }
1065 }
1066
1067 static void
1068 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1069 {
1070    struct gl_context *ctx = &brw->ctx;
1071    /* _NEW_PROGRAM */
1072    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1073
1074    if (prog) {
1075       /* BRW_NEW_FS_PROG_DATA */
1076       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1077                               &brw->wm.base, &brw->wm.prog_data->base);
1078    }
1079 }
1080
1081 const struct brw_tracked_state brw_wm_abo_surfaces = {
1082    .dirty = {
1083       .mesa = _NEW_PROGRAM,
1084       .brw = BRW_NEW_ATOMIC_BUFFER |
1085              BRW_NEW_BATCH |
1086              BRW_NEW_FS_PROG_DATA,
1087    },
1088    .emit = brw_upload_wm_abo_surfaces,
1089 };
1090
1091 static void
1092 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1093 {
1094    struct gl_context *ctx = &brw->ctx;
1095    /* _NEW_PROGRAM */
1096    struct gl_shader_program *prog =
1097       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1098
1099    if (prog) {
1100       /* BRW_NEW_CS_PROG_DATA */
1101       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1102                               &brw->cs.base, &brw->cs.prog_data->base);
1103    }
1104 }
1105
1106 const struct brw_tracked_state brw_cs_abo_surfaces = {
1107    .dirty = {
1108       .mesa = _NEW_PROGRAM,
1109       .brw = BRW_NEW_ATOMIC_BUFFER |
1110              BRW_NEW_BATCH |
1111              BRW_NEW_CS_PROG_DATA,
1112    },
1113    .emit = brw_upload_cs_abo_surfaces,
1114 };
1115
1116 static void
1117 brw_upload_cs_image_surfaces(struct brw_context *brw)
1118 {
1119    struct gl_context *ctx = &brw->ctx;
1120    /* _NEW_PROGRAM */
1121    struct gl_shader_program *prog =
1122       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1123
1124    if (prog) {
1125       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1126       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1127                                 &brw->cs.base, &brw->cs.prog_data->base);
1128    }
1129 }
1130
1131 const struct brw_tracked_state brw_cs_image_surfaces = {
1132    .dirty = {
1133       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1134       .brw = BRW_NEW_BATCH |
1135              BRW_NEW_CS_PROG_DATA |
1136              BRW_NEW_IMAGE_UNITS
1137    },
1138    .emit = brw_upload_cs_image_surfaces,
1139 };
1140
1141 static uint32_t
1142 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1143 {
1144    if (access == GL_WRITE_ONLY) {
1145       return brw_format_for_mesa_format(format);
1146    } else {
1147       /* Typed surface reads support a very limited subset of the shader
1148        * image formats.  Translate it into the closest format the
1149        * hardware supports.
1150        */
1151       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1152           (_mesa_get_format_bytes(format) >= 8 &&
1153            (brw->gen == 7 && !brw->is_haswell)))
1154          return BRW_SURFACEFORMAT_RAW;
1155       else
1156          return brw_format_for_mesa_format(
1157             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1158    }
1159 }
1160
1161 static void
1162 update_default_image_param(struct brw_context *brw,
1163                            struct gl_image_unit *u,
1164                            unsigned surface_idx,
1165                            struct brw_image_param *param)
1166 {
1167    memset(param, 0, sizeof(*param));
1168    param->surface_idx = surface_idx;
1169    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1170     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1171     * detailed explanation of these parameters.
1172     */
1173    param->swizzling[0] = 0xff;
1174    param->swizzling[1] = 0xff;
1175 }
1176
1177 static void
1178 update_buffer_image_param(struct brw_context *brw,
1179                           struct gl_image_unit *u,
1180                           unsigned surface_idx,
1181                           struct brw_image_param *param)
1182 {
1183    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1184
1185    update_default_image_param(brw, u, surface_idx, param);
1186
1187    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1188    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1189 }
1190
1191 static void
1192 update_texture_image_param(struct brw_context *brw,
1193                            struct gl_image_unit *u,
1194                            unsigned surface_idx,
1195                            struct brw_image_param *param)
1196 {
1197    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1198
1199    update_default_image_param(brw, u, surface_idx, param);
1200
1201    param->size[0] = minify(mt->logical_width0, u->Level);
1202    param->size[1] = minify(mt->logical_height0, u->Level);
1203    param->size[2] = (!u->Layered ? 1 :
1204                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1205                      u->TexObj->Target == GL_TEXTURE_3D ?
1206                      minify(mt->logical_depth0, u->Level) :
1207                      mt->logical_depth0);
1208
1209    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1210                                   &param->offset[0],
1211                                   &param->offset[1]);
1212
1213    param->stride[0] = mt->cpp;
1214    param->stride[1] = mt->pitch / mt->cpp;
1215    param->stride[2] =
1216       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1217    param->stride[3] =
1218       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1219
1220    if (mt->tiling == I915_TILING_X) {
1221       /* An X tile is a rectangular block of 512x8 bytes. */
1222       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1223       param->tiling[1] = _mesa_logbase2(8);
1224
1225       if (brw->has_swizzling) {
1226          /* Right shifts required to swizzle bits 9 and 10 of the memory
1227           * address with bit 6.
1228           */
1229          param->swizzling[0] = 3;
1230          param->swizzling[1] = 4;
1231       }
1232    } else if (mt->tiling == I915_TILING_Y) {
1233       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1234        * different to the layout of an X-tiled surface, we simply pretend that
1235        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1236        * one arranged in X-major order just like is the case for X-tiling.
1237        */
1238       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1239       param->tiling[1] = _mesa_logbase2(32);
1240
1241       if (brw->has_swizzling) {
1242          /* Right shift required to swizzle bit 9 of the memory address with
1243           * bit 6.
1244           */
1245          param->swizzling[0] = 3;
1246       }
1247    }
1248
1249    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1250     * address calculation algorithm (emit_address_calculation() in
1251     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1252     * modulus equal to the LOD.
1253     */
1254    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1255                        0);
1256 }
1257
1258 static void
1259 update_image_surface(struct brw_context *brw,
1260                      struct gl_image_unit *u,
1261                      GLenum access,
1262                      unsigned surface_idx,
1263                      uint32_t *surf_offset,
1264                      struct brw_image_param *param)
1265 {
1266    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1267       struct gl_texture_object *obj = u->TexObj;
1268       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1269
1270       if (obj->Target == GL_TEXTURE_BUFFER) {
1271          struct intel_buffer_object *intel_obj =
1272             intel_buffer_object(obj->BufferObject);
1273          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1274                                       _mesa_get_format_bytes(u->_ActualFormat));
1275
1276          brw->vtbl.emit_buffer_surface_state(
1277             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1278             format, intel_obj->Base.Size / texel_size, texel_size,
1279             access != GL_READ_ONLY);
1280
1281          update_buffer_image_param(brw, u, surface_idx, param);
1282
1283       } else {
1284          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1285          struct intel_mipmap_tree *mt = intel_obj->mt;
1286
1287          if (format == BRW_SURFACEFORMAT_RAW) {
1288             brw->vtbl.emit_buffer_surface_state(
1289                brw, surf_offset, mt->bo, mt->offset,
1290                format, mt->bo->size - mt->offset, 1 /* pitch */,
1291                access != GL_READ_ONLY);
1292
1293          } else {
1294             const unsigned min_layer = obj->MinLayer + u->_Layer;
1295             const unsigned min_level = obj->MinLevel + u->Level;
1296             const unsigned num_layers = (!u->Layered ? 1 :
1297                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1298                                          mt->logical_depth0);
1299             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1300                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1301                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1302
1303             brw->vtbl.emit_texture_surface_state(
1304                brw, mt, target,
1305                min_layer, min_layer + num_layers,
1306                min_level, min_level + 1,
1307                format, SWIZZLE_XYZW,
1308                surf_offset, access != GL_READ_ONLY, false);
1309          }
1310
1311          update_texture_image_param(brw, u, surface_idx, param);
1312       }
1313
1314    } else {
1315       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1316       update_default_image_param(brw, u, surface_idx, param);
1317    }
1318 }
1319
1320 void
1321 brw_upload_image_surfaces(struct brw_context *brw,
1322                           struct gl_shader *shader,
1323                           struct brw_stage_state *stage_state,
1324                           struct brw_stage_prog_data *prog_data)
1325 {
1326    struct gl_context *ctx = &brw->ctx;
1327
1328    if (shader && shader->NumImages) {
1329       for (unsigned i = 0; i < shader->NumImages; i++) {
1330          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1331          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1332
1333          update_image_surface(brw, u, shader->ImageAccess[i],
1334                               surf_idx,
1335                               &stage_state->surf_offset[surf_idx],
1336                               &prog_data->image_param[i]);
1337       }
1338
1339       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1340    }
1341 }
1342
1343 static void
1344 brw_upload_wm_image_surfaces(struct brw_context *brw)
1345 {
1346    struct gl_context *ctx = &brw->ctx;
1347    /* BRW_NEW_FRAGMENT_PROGRAM */
1348    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1349
1350    if (prog) {
1351       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1352       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1353                                 &brw->wm.base, &brw->wm.prog_data->base);
1354    }
1355 }
1356
1357 const struct brw_tracked_state brw_wm_image_surfaces = {
1358    .dirty = {
1359       .mesa = _NEW_TEXTURE,
1360       .brw = BRW_NEW_BATCH |
1361              BRW_NEW_FRAGMENT_PROGRAM |
1362              BRW_NEW_FS_PROG_DATA |
1363              BRW_NEW_IMAGE_UNITS
1364    },
1365    .emit = brw_upload_wm_image_surfaces,
1366 };
1367
1368 void
1369 gen4_init_vtable_surface_functions(struct brw_context *brw)
1370 {
1371    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1372    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1373    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1374    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1375 }
1376
1377 static void
1378 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1379 {
1380    struct gl_context *ctx = &brw->ctx;
1381    /* _NEW_PROGRAM */
1382    struct gl_shader_program *prog =
1383       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1384
1385    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1386       const unsigned surf_idx =
1387          brw->cs.prog_data->binding_table.work_groups_start;
1388       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1389       drm_intel_bo *bo;
1390       uint32_t bo_offset;
1391
1392       if (brw->compute.num_work_groups_bo == NULL) {
1393          bo = NULL;
1394          intel_upload_data(brw,
1395                            (void *)brw->compute.num_work_groups,
1396                            3 * sizeof(GLuint),
1397                            sizeof(GLuint),
1398                            &bo,
1399                            &bo_offset);
1400       } else {
1401          bo = brw->compute.num_work_groups_bo;
1402          bo_offset = brw->compute.num_work_groups_offset;
1403       }
1404
1405       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1406                                           bo, bo_offset,
1407                                           BRW_SURFACEFORMAT_RAW,
1408                                           3 * sizeof(GLuint), 1, true);
1409       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1410    }
1411 }
1412
1413 const struct brw_tracked_state brw_cs_work_groups_surface = {
1414    .dirty = {
1415       .brw = BRW_NEW_CS_WORK_GROUPS
1416    },
1417    .emit = brw_upload_cs_work_groups_surface,
1418 };