src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "intel_mipmap_tree.h"
  43 #include "intel_batchbuffer.h"
  44 #include "intel_tex.h"
  45 #include "intel_fbo.h"
  46 #include "intel_buffer_objects.h"
  47
  48 #include "brw_context.h"
  49 #include "brw_state.h"
  50 #include "brw_defines.h"
  51 #include "brw_wm.h"
  52
  53 GLuint
  54 translate_tex_target(GLenum target)
  55 {
  56    switch (target) {
  57    case GL_TEXTURE_1D:
  58    case GL_TEXTURE_1D_ARRAY_EXT:
  59       return BRW_SURFACE_1D;
  60
  61    case GL_TEXTURE_RECTANGLE_NV:
  62       return BRW_SURFACE_2D;
  63
  64    case GL_TEXTURE_2D:
  65    case GL_TEXTURE_2D_ARRAY_EXT:
  66    case GL_TEXTURE_EXTERNAL_OES:
  67    case GL_TEXTURE_2D_MULTISAMPLE:
  68    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  69       return BRW_SURFACE_2D;
  70
  71    case GL_TEXTURE_3D:
  72       return BRW_SURFACE_3D;
  73
  74    case GL_TEXTURE_CUBE_MAP:
  75    case GL_TEXTURE_CUBE_MAP_ARRAY:
  76       return BRW_SURFACE_CUBE;
  77
  78    default:
  79       unreachable("not reached");
  80    }
  81 }
  82
  83 uint32_t
  84 brw_get_surface_tiling_bits(uint32_t tiling)
  85 {
  86    switch (tiling) {
  87    case I915_TILING_X:
  88       return BRW_SURFACE_TILED;
  89    case I915_TILING_Y:
  90       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  91    default:
  92       return 0;
  93    }
  94 }
  95
  96
  97 uint32_t
  98 brw_get_surface_num_multisamples(unsigned num_samples)
  99 {
 100    if (num_samples > 1)
 101       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 102    else
 103       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 104 }
 105
 106 void
 107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 108                       bool is_render_target,
 109                       unsigned *width, unsigned *height,
 110                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 111 {
 112    static const unsigned halign_stencil = 8;
 113
 114    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 115     * there are half as many rows.
 116     * In addition, mip-levels are accessed manually by the program and
 117     * therefore the surface is setup to cover all the mip-levels for one slice.
 118     * (Hardware is still used to access individual slices).
 119     */
 120    *tiling = I915_TILING_Y;
 121    *pitch = mt->pitch * 2;
 122    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 123    *height = (mt->total_height / mt->physical_depth0) / 2;
 124
 125    if (is_render_target) {
 126       *format = BRW_SURFACEFORMAT_R8_UINT;
 127    }
 128 }
 129
 130
 131 /**
 132  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 133  * swizzling.
 134  */
 135 int
 136 brw_get_texture_swizzle(const struct gl_context *ctx,
 137                         const struct gl_texture_object *t)
 138 {
 139    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 140
 141    int swizzles[SWIZZLE_NIL + 1] = {
 142       SWIZZLE_X,
 143       SWIZZLE_Y,
 144       SWIZZLE_Z,
 145       SWIZZLE_W,
 146       SWIZZLE_ZERO,
 147       SWIZZLE_ONE,
 148       SWIZZLE_NIL
 149    };
 150
 151    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 152        img->_BaseFormat == GL_DEPTH_STENCIL) {
 153       GLenum depth_mode = t->DepthMode;
 154
 155       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 156        * with depth component data specified with a sized internal format.
 157        * Otherwise, it's left at the old default, GL_LUMINANCE.
 158        */
 159       if (_mesa_is_gles3(ctx) &&
 160           img->InternalFormat != GL_DEPTH_COMPONENT &&
 161           img->InternalFormat != GL_DEPTH_STENCIL) {
 162          depth_mode = GL_RED;
 163       }
 164
 165       switch (depth_mode) {
 166       case GL_ALPHA:
 167          swizzles[0] = SWIZZLE_ZERO;
 168          swizzles[1] = SWIZZLE_ZERO;
 169          swizzles[2] = SWIZZLE_ZERO;
 170          swizzles[3] = SWIZZLE_X;
 171          break;
 172       case GL_LUMINANCE:
 173          swizzles[0] = SWIZZLE_X;
 174          swizzles[1] = SWIZZLE_X;
 175          swizzles[2] = SWIZZLE_X;
 176          swizzles[3] = SWIZZLE_ONE;
 177          break;
 178       case GL_INTENSITY:
 179          swizzles[0] = SWIZZLE_X;
 180          swizzles[1] = SWIZZLE_X;
 181          swizzles[2] = SWIZZLE_X;
 182          swizzles[3] = SWIZZLE_X;
 183          break;
 184       case GL_RED:
 185          swizzles[0] = SWIZZLE_X;
 186          swizzles[1] = SWIZZLE_ZERO;
 187          swizzles[2] = SWIZZLE_ZERO;
 188          swizzles[3] = SWIZZLE_ONE;
 189          break;
 190       }
 191    }
 192
 193    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 194
 195    /* If the texture's format is alpha-only, force R, G, and B to
 196     * 0.0. Similarly, if the texture's format has no alpha channel,
 197     * force the alpha value read to 1.0. This allows for the
 198     * implementation to use an RGBA texture for any of these formats
 199     * without leaking any unexpected values.
 200     */
 201    switch (img->_BaseFormat) {
 202    case GL_ALPHA:
 203       swizzles[0] = SWIZZLE_ZERO;
 204       swizzles[1] = SWIZZLE_ZERO;
 205       swizzles[2] = SWIZZLE_ZERO;
 206       break;
 207    case GL_LUMINANCE:
 208       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 209          swizzles[0] = SWIZZLE_X;
 210          swizzles[1] = SWIZZLE_X;
 211          swizzles[2] = SWIZZLE_X;
 212          swizzles[3] = SWIZZLE_ONE;
 213       }
 214       break;
 215    case GL_LUMINANCE_ALPHA:
 216       if (datatype == GL_SIGNED_NORMALIZED) {
 217          swizzles[0] = SWIZZLE_X;
 218          swizzles[1] = SWIZZLE_X;
 219          swizzles[2] = SWIZZLE_X;
 220          swizzles[3] = SWIZZLE_W;
 221       }
 222       break;
 223    case GL_INTENSITY:
 224       if (datatype == GL_SIGNED_NORMALIZED) {
 225          swizzles[0] = SWIZZLE_X;
 226          swizzles[1] = SWIZZLE_X;
 227          swizzles[2] = SWIZZLE_X;
 228          swizzles[3] = SWIZZLE_X;
 229       }
 230       break;
 231    case GL_RED:
 232    case GL_RG:
 233    case GL_RGB:
 234       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 235          swizzles[3] = SWIZZLE_ONE;
 236       break;
 237    }
 238
 239    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 243 }
 244
 245 static void
 246 gen4_emit_buffer_surface_state(struct brw_context *brw,
 247                                uint32_t *out_offset,
 248                                drm_intel_bo *bo,
 249                                unsigned buffer_offset,
 250                                unsigned surface_format,
 251                                unsigned buffer_size,
 252                                unsigned pitch,
 253                                bool rw)
 254 {
 255    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 256                                     6 * 4, 32, out_offset);
 257    memset(surf, 0, 6 * 4);
 258
 259    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 260              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 261              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 262    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 263    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 264              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 265    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 266              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 267
 268    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 269     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 270     * physical cache.  It is mapped in hardware to the sampler cache."
 271     */
 272    if (bo) {
 273       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 274                               bo, buffer_offset,
 275                               I915_GEM_DOMAIN_SAMPLER,
 276                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 277    }
 278 }
 279
 280 void
 281 brw_update_buffer_texture_surface(struct gl_context *ctx,
 282                                   unsigned unit,
 283                                   uint32_t *surf_offset)
 284 {
 285    struct brw_context *brw = brw_context(ctx);
 286    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 287    struct intel_buffer_object *intel_obj =
 288       intel_buffer_object(tObj->BufferObject);
 289    uint32_t size = tObj->BufferSize;
 290    drm_intel_bo *bo = NULL;
 291    mesa_format format = tObj->_BufferObjectFormat;
 292    uint32_t brw_format = brw_format_for_mesa_format(format);
 293    int texel_size = _mesa_get_format_bytes(format);
 294
 295    if (intel_obj) {
 296       size = MIN2(size, intel_obj->Base.Size);
 297       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 298    }
 299
 300    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 301       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 302                     _mesa_get_format_name(format));
 303    }
 304
 305    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 306                                        tObj->BufferOffset,
 307                                        brw_format,
 308                                        size / texel_size,
 309                                        texel_size,
 310                                        false /* rw */);
 311 }
 312
 313 static void
 314 brw_update_texture_surface(struct gl_context *ctx,
 315                            unsigned unit,
 316                            uint32_t *surf_offset,
 317                            bool for_gather)
 318 {
 319    struct brw_context *brw = brw_context(ctx);
 320    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 321    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 322    struct intel_mipmap_tree *mt = intelObj->mt;
 323    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 324    uint32_t *surf;
 325
 326    /* BRW_NEW_TEXTURE_BUFFER */
 327    if (tObj->Target == GL_TEXTURE_BUFFER) {
 328       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 329       return;
 330    }
 331
 332    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 333                           6 * 4, 32, surf_offset);
 334
 335    uint32_t tex_format = translate_tex_format(brw, mt->format,
 336                                               sampler->sRGBDecode);
 337
 338    if (for_gather) {
 339       /* Sandybridge's gather4 message is broken for integer formats.
 340        * To work around this, we pretend the surface is UNORM for
 341        * 8 or 16-bit formats, and emit shader instructions to recover
 342        * the real INT/UINT value.  For 32-bit formats, we pretend
 343        * the surface is FLOAT, and simply reinterpret the resulting
 344        * bits.
 345        */
 346       switch (tex_format) {
 347       case BRW_SURFACEFORMAT_R8_SINT:
 348       case BRW_SURFACEFORMAT_R8_UINT:
 349          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 350          break;
 351
 352       case BRW_SURFACEFORMAT_R16_SINT:
 353       case BRW_SURFACEFORMAT_R16_UINT:
 354          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 355          break;
 356
 357       case BRW_SURFACEFORMAT_R32_SINT:
 358       case BRW_SURFACEFORMAT_R32_UINT:
 359          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 360          break;
 361
 362       default:
 363          break;
 364       }
 365    }
 366
 367    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 368               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 369               BRW_SURFACE_CUBEFACE_ENABLES |
 370               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 371
 372    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 373
 374    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 375               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 376               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 377
 378    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 379               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 380               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 381
 382    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 383               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 384
 385    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 386
 387    /* Emit relocation to surface contents */
 388    drm_intel_bo_emit_reloc(brw->batch.bo,
 389                            *surf_offset + 4,
 390                            mt->bo,
 391                            surf[1] - mt->bo->offset64,
 392                            I915_GEM_DOMAIN_SAMPLER, 0);
 393 }
 394
 395 /**
 396  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 397  * read from this buffer with Data Port Read instructions/messages.
 398  */
 399 void
 400 brw_create_constant_surface(struct brw_context *brw,
 401                             drm_intel_bo *bo,
 402                             uint32_t offset,
 403                             uint32_t size,
 404                             uint32_t *out_offset)
 405 {
 406    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 407                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 408                                        size, 1, false);
 409 }
 410
 411 /**
 412  * Create the buffer surface. Shader buffer variables will be
 413  * read from / write to this buffer with Data Port Read/Write
 414  * instructions/messages.
 415  */
 416 void
 417 brw_create_buffer_surface(struct brw_context *brw,
 418                           drm_intel_bo *bo,
 419                           uint32_t offset,
 420                           uint32_t size,
 421                           uint32_t *out_offset)
 422 {
 423    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 424     * messages. We need these specifically for the fragment shader since they
 425     * include a pixel mask header that we need to ensure correct behavior
 426     * with helper invocations, which cannot write to the buffer.
 427     */
 428    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 429                                        BRW_SURFACEFORMAT_RAW,
 430                                        size, 1, true);
 431 }
 432
 433 /**
 434  * Set up a binding table entry for use by stream output logic (transform
 435  * feedback).
 436  *
 437  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 438  */
 439 void
 440 brw_update_sol_surface(struct brw_context *brw,
 441                        struct gl_buffer_object *buffer_obj,
 442                        uint32_t *out_offset, unsigned num_vector_components,
 443                        unsigned stride_dwords, unsigned offset_dwords)
 444 {
 445    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 446    uint32_t offset_bytes = 4 * offset_dwords;
 447    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 448                                              offset_bytes,
 449                                              buffer_obj->Size - offset_bytes);
 450    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 451                                     out_offset);
 452    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 453    size_t size_dwords = buffer_obj->Size / 4;
 454    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 455
 456    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 457     * too big to map using a single binding table entry?
 458     */
 459    assert((size_dwords - offset_dwords) / stride_dwords
 460           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 461
 462    if (size_dwords > offset_dwords + num_vector_components) {
 463       /* There is room for at least 1 transform feedback output in the buffer.
 464        * Compute the number of additional transform feedback outputs the
 465        * buffer has room for.
 466        */
 467       buffer_size_minus_1 =
 468          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 469    } else {
 470       /* There isn't even room for a single transform feedback output in the
 471        * buffer.  We can't configure the binding table entry to prevent output
 472        * entirely; we'll have to rely on the geometry shader to detect
 473        * overflow.  But to minimize the damage in case of a bug, set up the
 474        * binding table entry to just allow a single output.
 475        */
 476       buffer_size_minus_1 = 0;
 477    }
 478    width = buffer_size_minus_1 & 0x7f;
 479    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 480    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 481
 482    switch (num_vector_components) {
 483    case 1:
 484       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 485       break;
 486    case 2:
 487       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 488       break;
 489    case 3:
 490       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 491       break;
 492    case 4:
 493       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 494       break;
 495    default:
 496       unreachable("Invalid vector size for transform feedback output");
 497    }
 498
 499    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 500       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 501       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 502       BRW_SURFACE_RC_READ_WRITE;
 503    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 504    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 505               height << BRW_SURFACE_HEIGHT_SHIFT);
 506    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 507               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 508    surf[4] = 0;
 509    surf[5] = 0;
 510
 511    /* Emit relocation to surface contents. */
 512    drm_intel_bo_emit_reloc(brw->batch.bo,
 513                            *out_offset + 4,
 514                            bo, offset_bytes,
 515                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 516 }
 517
 518 /* Creates a new WM constant buffer reflecting the current fragment program's
 519  * constants, if needed by the fragment program.
 520  *
 521  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 522  * state atom.
 523  */
 524 static void
 525 brw_upload_wm_pull_constants(struct brw_context *brw)
 526 {
 527    struct brw_stage_state *stage_state = &brw->wm.base;
 528    /* BRW_NEW_FRAGMENT_PROGRAM */
 529    struct brw_fragment_program *fp =
 530       (struct brw_fragment_program *) brw->fragment_program;
 531    /* BRW_NEW_FS_PROG_DATA */
 532    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 533
 534    /* _NEW_PROGRAM_CONSTANTS */
 535    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 536                              stage_state, prog_data);
 537 }
 538
 539 const struct brw_tracked_state brw_wm_pull_constants = {
 540    .dirty = {
 541       .mesa = _NEW_PROGRAM_CONSTANTS,
 542       .brw = BRW_NEW_BATCH |
 543              BRW_NEW_FRAGMENT_PROGRAM |
 544              BRW_NEW_FS_PROG_DATA,
 545    },
 546    .emit = brw_upload_wm_pull_constants,
 547 };
 548
 549 /**
 550  * Creates a null renderbuffer surface.
 551  *
 552  * This is used when the shader doesn't write to any color output.  An FB
 553  * write to target 0 will still be emitted, because that's how the thread is
 554  * terminated (and computed depth is returned), so we need to have the
 555  * hardware discard the target 0 color output..
 556  */
 557 static void
 558 brw_emit_null_surface_state(struct brw_context *brw,
 559                             unsigned width,
 560                             unsigned height,
 561                             unsigned samples,
 562                             uint32_t *out_offset)
 563 {
 564    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 565     * Notes):
 566     *
 567     *     A null surface will be used in instances where an actual surface is
 568     *     not bound. When a write message is generated to a null surface, no
 569     *     actual surface is written to. When a read message (including any
 570     *     sampling engine message) is generated to a null surface, the result
 571     *     is all zeros. Note that a null surface type is allowed to be used
 572     *     with all messages, even if it is not specificially indicated as
 573     *     supported. All of the remaining fields in surface state are ignored
 574     *     for null surfaces, with the following exceptions:
 575     *
 576     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 577     *       depth buffer’s corresponding state for all render target surfaces,
 578     *       including null.
 579     *
 580     *     - Surface Format must be R8G8B8A8_UNORM.
 581     */
 582    unsigned surface_type = BRW_SURFACE_NULL;
 583    drm_intel_bo *bo = NULL;
 584    unsigned pitch_minus_1 = 0;
 585    uint32_t multisampling_state = 0;
 586    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 587                                     out_offset);
 588
 589    if (samples > 1) {
 590       /* On Gen6, null render targets seem to cause GPU hangs when
 591        * multisampling.  So work around this problem by rendering into dummy
 592        * color buffer.
 593        *
 594        * To decrease the amount of memory needed by the workaround buffer, we
 595        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 596        * the amount of memory needed for the workaround buffer is
 597        * (width_in_tiles + height_in_tiles - 1) tiles.
 598        *
 599        * Note that since the workaround buffer will be interpreted by the
 600        * hardware as an interleaved multisampled buffer, we need to compute
 601        * width_in_tiles and height_in_tiles by dividing the width and height
 602        * by 16 rather than the normal Y-tile size of 32.
 603        */
 604       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 605       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 606       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 607       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 608                          size_needed);
 609       bo = brw->wm.multisampled_null_render_target_bo;
 610       surface_type = BRW_SURFACE_2D;
 611       pitch_minus_1 = 127;
 612       multisampling_state = brw_get_surface_num_multisamples(samples);
 613    }
 614
 615    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 616               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 617    if (brw->gen < 6) {
 618       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 619                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 620                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 621                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 622    }
 623    surf[1] = bo ? bo->offset64 : 0;
 624    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 625               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 626
 627    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 628     * Notes):
 629     *
 630     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 631     */
 632    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 633               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 634    surf[4] = multisampling_state;
 635    surf[5] = 0;
 636
 637    if (bo) {
 638       drm_intel_bo_emit_reloc(brw->batch.bo,
 639                               *out_offset + 4,
 640                               bo, 0,
 641                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 642    }
 643 }
 644
 645 /**
 646  * Sets up a surface state structure to point at the given region.
 647  * While it is only used for the front/back buffer currently, it should be
 648  * usable for further buffers when doing ARB_draw_buffer support.
 649  */
 650 static uint32_t
 651 brw_update_renderbuffer_surface(struct brw_context *brw,
 652                                 struct gl_renderbuffer *rb,
 653                                 bool layered, unsigned unit,
 654                                 uint32_t surf_index)
 655 {
 656    struct gl_context *ctx = &brw->ctx;
 657    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 658    struct intel_mipmap_tree *mt = irb->mt;
 659    uint32_t *surf;
 660    uint32_t tile_x, tile_y;
 661    uint32_t format = 0;
 662    uint32_t offset;
 663    /* _NEW_BUFFERS */
 664    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 665    /* BRW_NEW_FS_PROG_DATA */
 666
 667    assert(!layered);
 668
 669    if (rb->TexImage && !brw->has_surface_tile_offset) {
 670       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 671
 672       if (tile_x != 0 || tile_y != 0) {
 673          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 674           * destination in a miptree unless you actually setup your renderbuffer
 675           * as a miptree and used the fragile lod/array_index/etc. controls to
 676           * select the image.  So, instead, we just make a new single-level
 677           * miptree and render into that.
 678           */
 679          intel_renderbuffer_move_to_temp(brw, irb, false);
 680          mt = irb->mt;
 681       }
 682    }
 683
 684    intel_miptree_used_for_rendering(irb->mt);
 685
 686    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 687
 688    format = brw->render_target_format[rb_format];
 689    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 690       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 691                     __func__, _mesa_get_format_name(rb_format));
 692    }
 693
 694    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 695               format << BRW_SURFACE_FORMAT_SHIFT);
 696
 697    /* reloc */
 698    assert(mt->offset % mt->cpp == 0);
 699    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 700               mt->bo->offset64 + mt->offset);
 701
 702    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 703               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 704
 705    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 706               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 707
 708    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 709
 710    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 711    /* Note that the low bits of these fields are missing, so
 712     * there's the possibility of getting in trouble.
 713     */
 714    assert(tile_x % 4 == 0);
 715    assert(tile_y % 2 == 0);
 716    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 717               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 718               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 719
 720    if (brw->gen < 6) {
 721       /* _NEW_COLOR */
 722       if (!ctx->Color.ColorLogicOpEnabled &&
 723           (ctx->Color.BlendEnabled & (1 << unit)))
 724          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 725
 726       if (!ctx->Color.ColorMask[unit][0])
 727          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 728       if (!ctx->Color.ColorMask[unit][1])
 729          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 730       if (!ctx->Color.ColorMask[unit][2])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 732
 733       /* As mentioned above, disable writes to the alpha component when the
 734        * renderbuffer is XRGB.
 735        */
 736       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 737           !ctx->Color.ColorMask[unit][3]) {
 738          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 739       }
 740    }
 741
 742    drm_intel_bo_emit_reloc(brw->batch.bo,
 743                            offset + 4,
 744                            mt->bo,
 745                            surf[1] - mt->bo->offset64,
 746                            I915_GEM_DOMAIN_RENDER,
 747                            I915_GEM_DOMAIN_RENDER);
 748
 749    return offset;
 750 }
 751
 752 /**
 753  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 754  */
 755 void
 756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 757                                  const struct gl_framebuffer *fb,
 758                                  uint32_t render_target_start,
 759                                  uint32_t *surf_offset)
 760 {
 761    GLuint i;
 762    const unsigned int w = _mesa_geometric_width(fb);
 763    const unsigned int h = _mesa_geometric_height(fb);
 764    const unsigned int s = _mesa_geometric_samples(fb);
 765
 766    /* Update surfaces for drawing buffers */
 767    if (fb->_NumColorDrawBuffers >= 1) {
 768       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 769          const uint32_t surf_index = render_target_start + i;
 770
 771          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 772             surf_offset[surf_index] =
 773                brw->vtbl.update_renderbuffer_surface(
 774                   brw, fb->_ColorDrawBuffers[i],
 775                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 776          } else {
 777             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 778                &surf_offset[surf_index]);
 779          }
 780       }
 781    } else {
 782       const uint32_t surf_index = render_target_start;
 783       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 784          &surf_offset[surf_index]);
 785    }
 786 }
 787
 788 static void
 789 update_renderbuffer_surfaces(struct brw_context *brw)
 790 {
 791    const struct gl_context *ctx = &brw->ctx;
 792
 793    /* _NEW_BUFFERS | _NEW_COLOR */
 794    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 795    brw_update_renderbuffer_surfaces(
 796       brw, fb,
 797       brw->wm.prog_data->binding_table.render_target_start,
 798       brw->wm.base.surf_offset);
 799    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 800 }
 801
 802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 803    .dirty = {
 804       .mesa = _NEW_BUFFERS |
 805               _NEW_COLOR,
 806       .brw = BRW_NEW_BATCH |
 807              BRW_NEW_FS_PROG_DATA,
 808    },
 809    .emit = update_renderbuffer_surfaces,
 810 };
 811
 812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 813    .dirty = {
 814       .mesa = _NEW_BUFFERS,
 815       .brw = BRW_NEW_BATCH,
 816    },
 817    .emit = update_renderbuffer_surfaces,
 818 };
 819
 820
 821 static void
 822 update_stage_texture_surfaces(struct brw_context *brw,
 823                               const struct gl_program *prog,
 824                               struct brw_stage_state *stage_state,
 825                               bool for_gather)
 826 {
 827    if (!prog)
 828       return;
 829
 830    struct gl_context *ctx = &brw->ctx;
 831
 832    uint32_t *surf_offset = stage_state->surf_offset;
 833
 834    /* BRW_NEW_*_PROG_DATA */
 835    if (for_gather)
 836       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 837    else
 838       surf_offset += stage_state->prog_data->binding_table.texture_start;
 839
 840    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 841    for (unsigned s = 0; s < num_samplers; s++) {
 842       surf_offset[s] = 0;
 843
 844       if (prog->SamplersUsed & (1 << s)) {
 845          const unsigned unit = prog->SamplerUnits[s];
 846
 847          /* _NEW_TEXTURE */
 848          if (ctx->Texture.Unit[unit]._Current) {
 849             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 850          }
 851       }
 852    }
 853 }
 854
 855
 856 /**
 857  * Construct SURFACE_STATE objects for enabled textures.
 858  */
 859 static void
 860 brw_update_texture_surfaces(struct brw_context *brw)
 861 {
 862    /* BRW_NEW_VERTEX_PROGRAM */
 863    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 864
 865    /* BRW_NEW_TESS_PROGRAMS */
 866    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 867    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 868
 869    /* BRW_NEW_GEOMETRY_PROGRAM */
 870    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 871
 872    /* BRW_NEW_FRAGMENT_PROGRAM */
 873    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 874
 875    /* BRW_NEW_COMPUTE_PROGRAM */
 876    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 877
 878    /* _NEW_TEXTURE */
 879    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 880    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
 881    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
 882    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 883    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 884    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 885
 886    /* emit alternate set of surface state for gather. this
 887     * allows the surface format to be overriden for only the
 888     * gather4 messages. */
 889    if (brw->gen < 8) {
 890       if (vs && vs->UsesGather)
 891          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 892       if (tcs && tcs->UsesGather)
 893          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
 894       if (tes && tes->UsesGather)
 895          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
 896       if (gs && gs->UsesGather)
 897          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 898       if (fs && fs->UsesGather)
 899          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 900       if (cs && cs->UsesGather)
 901          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 902    }
 903
 904    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 905 }
 906
 907 const struct brw_tracked_state brw_texture_surfaces = {
 908    .dirty = {
 909       .mesa = _NEW_TEXTURE,
 910       .brw = BRW_NEW_BATCH |
 911              BRW_NEW_COMPUTE_PROGRAM |
 912              BRW_NEW_FRAGMENT_PROGRAM |
 913              BRW_NEW_FS_PROG_DATA |
 914              BRW_NEW_GEOMETRY_PROGRAM |
 915              BRW_NEW_GS_PROG_DATA |
 916              BRW_NEW_TESS_PROGRAMS |
 917              BRW_NEW_TCS_PROG_DATA |
 918              BRW_NEW_TES_PROG_DATA |
 919              BRW_NEW_TEXTURE_BUFFER |
 920              BRW_NEW_VERTEX_PROGRAM |
 921              BRW_NEW_VS_PROG_DATA,
 922    },
 923    .emit = brw_update_texture_surfaces,
 924 };
 925
 926 void
 927 brw_upload_ubo_surfaces(struct brw_context *brw,
 928                         struct gl_shader *shader,
 929                         struct brw_stage_state *stage_state,
 930                         struct brw_stage_prog_data *prog_data)
 931 {
 932    struct gl_context *ctx = &brw->ctx;
 933
 934    if (!shader)
 935       return;
 936
 937    uint32_t *ubo_surf_offsets =
 938       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 939
 940    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 941       struct gl_uniform_buffer_binding *binding =
 942          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 943
 944       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 945          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 946       } else {
 947          struct intel_buffer_object *intel_bo =
 948             intel_buffer_object(binding->BufferObject);
 949          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 950          if (!binding->AutomaticSize)
 951             size = MIN2(size, binding->Size);
 952          drm_intel_bo *bo =
 953             intel_bufferobj_buffer(brw, intel_bo,
 954                                    binding->Offset,
 955                                    size);
 956          brw_create_constant_surface(brw, bo, binding->Offset,
 957                                      size,
 958                                      &ubo_surf_offsets[i]);
 959       }
 960    }
 961
 962    uint32_t *ssbo_surf_offsets =
 963       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 964
 965    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 966       struct gl_shader_storage_buffer_binding *binding =
 967          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 968
 969       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 970          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 971       } else {
 972          struct intel_buffer_object *intel_bo =
 973             intel_buffer_object(binding->BufferObject);
 974          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 975          if (!binding->AutomaticSize)
 976             size = MIN2(size, binding->Size);
 977          drm_intel_bo *bo =
 978             intel_bufferobj_buffer(brw, intel_bo,
 979                                    binding->Offset,
 980                                    size);
 981          brw_create_buffer_surface(brw, bo, binding->Offset,
 982                                    size,
 983                                    &ssbo_surf_offsets[i]);
 984       }
 985    }
 986
 987    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
 988       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 989 }
 990
 991 static void
 992 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 993 {
 994    struct gl_context *ctx = &brw->ctx;
 995    /* _NEW_PROGRAM */
 996    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 997
 998    if (!prog)
 999       return;
1000
1001    /* BRW_NEW_FS_PROG_DATA */
1002    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1003                            &brw->wm.base, &brw->wm.prog_data->base);
1004 }
1005
1006 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1007    .dirty = {
1008       .mesa = _NEW_PROGRAM,
1009       .brw = BRW_NEW_BATCH |
1010              BRW_NEW_FS_PROG_DATA |
1011              BRW_NEW_UNIFORM_BUFFER,
1012    },
1013    .emit = brw_upload_wm_ubo_surfaces,
1014 };
1015
1016 static void
1017 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1018 {
1019    struct gl_context *ctx = &brw->ctx;
1020    /* _NEW_PROGRAM */
1021    struct gl_shader_program *prog =
1022       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1023
1024    if (!prog)
1025       return;
1026
1027    /* BRW_NEW_CS_PROG_DATA */
1028    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1029                            &brw->cs.base, &brw->cs.prog_data->base);
1030 }
1031
1032 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1033    .dirty = {
1034       .mesa = _NEW_PROGRAM,
1035       .brw = BRW_NEW_BATCH |
1036              BRW_NEW_CS_PROG_DATA |
1037              BRW_NEW_UNIFORM_BUFFER,
1038    },
1039    .emit = brw_upload_cs_ubo_surfaces,
1040 };
1041
1042 void
1043 brw_upload_abo_surfaces(struct brw_context *brw,
1044                         struct gl_shader *shader,
1045                         struct brw_stage_state *stage_state,
1046                         struct brw_stage_prog_data *prog_data)
1047 {
1048    struct gl_context *ctx = &brw->ctx;
1049    uint32_t *surf_offsets =
1050       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1051
1052    if (shader && shader->NumAtomicBuffers) {
1053       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1054          struct gl_atomic_buffer_binding *binding =
1055             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1056          struct intel_buffer_object *intel_bo =
1057             intel_buffer_object(binding->BufferObject);
1058          drm_intel_bo *bo = intel_bufferobj_buffer(
1059             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1060
1061          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1062                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1063                                              bo->size - binding->Offset, 1, true);
1064       }
1065
1066       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1067    }
1068 }
1069
1070 static void
1071 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1072 {
1073    struct gl_context *ctx = &brw->ctx;
1074    /* _NEW_PROGRAM */
1075    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1076
1077    if (prog) {
1078       /* BRW_NEW_FS_PROG_DATA */
1079       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1080                               &brw->wm.base, &brw->wm.prog_data->base);
1081    }
1082 }
1083
1084 const struct brw_tracked_state brw_wm_abo_surfaces = {
1085    .dirty = {
1086       .mesa = _NEW_PROGRAM,
1087       .brw = BRW_NEW_ATOMIC_BUFFER |
1088              BRW_NEW_BATCH |
1089              BRW_NEW_FS_PROG_DATA,
1090    },
1091    .emit = brw_upload_wm_abo_surfaces,
1092 };
1093
1094 static void
1095 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1096 {
1097    struct gl_context *ctx = &brw->ctx;
1098    /* _NEW_PROGRAM */
1099    struct gl_shader_program *prog =
1100       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1101
1102    if (prog) {
1103       /* BRW_NEW_CS_PROG_DATA */
1104       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1105                               &brw->cs.base, &brw->cs.prog_data->base);
1106    }
1107 }
1108
1109 const struct brw_tracked_state brw_cs_abo_surfaces = {
1110    .dirty = {
1111       .mesa = _NEW_PROGRAM,
1112       .brw = BRW_NEW_ATOMIC_BUFFER |
1113              BRW_NEW_BATCH |
1114              BRW_NEW_CS_PROG_DATA,
1115    },
1116    .emit = brw_upload_cs_abo_surfaces,
1117 };
1118
1119 static void
1120 brw_upload_cs_image_surfaces(struct brw_context *brw)
1121 {
1122    struct gl_context *ctx = &brw->ctx;
1123    /* _NEW_PROGRAM */
1124    struct gl_shader_program *prog =
1125       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1126
1127    if (prog) {
1128       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1129       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1130                                 &brw->cs.base, &brw->cs.prog_data->base);
1131    }
1132 }
1133
1134 const struct brw_tracked_state brw_cs_image_surfaces = {
1135    .dirty = {
1136       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1137       .brw = BRW_NEW_BATCH |
1138              BRW_NEW_CS_PROG_DATA |
1139              BRW_NEW_IMAGE_UNITS
1140    },
1141    .emit = brw_upload_cs_image_surfaces,
1142 };
1143
1144 static uint32_t
1145 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1146 {
1147    if (access == GL_WRITE_ONLY) {
1148       return brw_format_for_mesa_format(format);
1149    } else {
1150       /* Typed surface reads support a very limited subset of the shader
1151        * image formats.  Translate it into the closest format the
1152        * hardware supports.
1153        */
1154       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1155           (_mesa_get_format_bytes(format) >= 8 &&
1156            (brw->gen == 7 && !brw->is_haswell)))
1157          return BRW_SURFACEFORMAT_RAW;
1158       else
1159          return brw_format_for_mesa_format(
1160             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1161    }
1162 }
1163
1164 static void
1165 update_default_image_param(struct brw_context *brw,
1166                            struct gl_image_unit *u,
1167                            unsigned surface_idx,
1168                            struct brw_image_param *param)
1169 {
1170    memset(param, 0, sizeof(*param));
1171    param->surface_idx = surface_idx;
1172    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1173     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1174     * detailed explanation of these parameters.
1175     */
1176    param->swizzling[0] = 0xff;
1177    param->swizzling[1] = 0xff;
1178 }
1179
1180 static void
1181 update_buffer_image_param(struct brw_context *brw,
1182                           struct gl_image_unit *u,
1183                           unsigned surface_idx,
1184                           struct brw_image_param *param)
1185 {
1186    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1187
1188    update_default_image_param(brw, u, surface_idx, param);
1189
1190    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1191    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1192 }
1193
1194 static void
1195 update_texture_image_param(struct brw_context *brw,
1196                            struct gl_image_unit *u,
1197                            unsigned surface_idx,
1198                            struct brw_image_param *param)
1199 {
1200    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1201
1202    update_default_image_param(brw, u, surface_idx, param);
1203
1204    param->size[0] = minify(mt->logical_width0, u->Level);
1205    param->size[1] = minify(mt->logical_height0, u->Level);
1206    param->size[2] = (!u->Layered ? 1 :
1207                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1208                      u->TexObj->Target == GL_TEXTURE_3D ?
1209                      minify(mt->logical_depth0, u->Level) :
1210                      mt->logical_depth0);
1211
1212    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1213                                   &param->offset[0],
1214                                   &param->offset[1]);
1215
1216    param->stride[0] = mt->cpp;
1217    param->stride[1] = mt->pitch / mt->cpp;
1218    param->stride[2] =
1219       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1220    param->stride[3] =
1221       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1222
1223    if (mt->tiling == I915_TILING_X) {
1224       /* An X tile is a rectangular block of 512x8 bytes. */
1225       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1226       param->tiling[1] = _mesa_logbase2(8);
1227
1228       if (brw->has_swizzling) {
1229          /* Right shifts required to swizzle bits 9 and 10 of the memory
1230           * address with bit 6.
1231           */
1232          param->swizzling[0] = 3;
1233          param->swizzling[1] = 4;
1234       }
1235    } else if (mt->tiling == I915_TILING_Y) {
1236       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1237        * different to the layout of an X-tiled surface, we simply pretend that
1238        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1239        * one arranged in X-major order just like is the case for X-tiling.
1240        */
1241       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1242       param->tiling[1] = _mesa_logbase2(32);
1243
1244       if (brw->has_swizzling) {
1245          /* Right shift required to swizzle bit 9 of the memory address with
1246           * bit 6.
1247           */
1248          param->swizzling[0] = 3;
1249       }
1250    }
1251
1252    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1253     * address calculation algorithm (emit_address_calculation() in
1254     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1255     * modulus equal to the LOD.
1256     */
1257    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1258                        0);
1259 }
1260
1261 static void
1262 update_image_surface(struct brw_context *brw,
1263                      struct gl_image_unit *u,
1264                      GLenum access,
1265                      unsigned surface_idx,
1266                      uint32_t *surf_offset,
1267                      struct brw_image_param *param)
1268 {
1269    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1270       struct gl_texture_object *obj = u->TexObj;
1271       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1272
1273       if (obj->Target == GL_TEXTURE_BUFFER) {
1274          struct intel_buffer_object *intel_obj =
1275             intel_buffer_object(obj->BufferObject);
1276          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1277                                       _mesa_get_format_bytes(u->_ActualFormat));
1278
1279          brw->vtbl.emit_buffer_surface_state(
1280             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1281             format, intel_obj->Base.Size / texel_size, texel_size,
1282             access != GL_READ_ONLY);
1283
1284          update_buffer_image_param(brw, u, surface_idx, param);
1285
1286       } else {
1287          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1288          struct intel_mipmap_tree *mt = intel_obj->mt;
1289
1290          if (format == BRW_SURFACEFORMAT_RAW) {
1291             brw->vtbl.emit_buffer_surface_state(
1292                brw, surf_offset, mt->bo, mt->offset,
1293                format, mt->bo->size - mt->offset, 1 /* pitch */,
1294                access != GL_READ_ONLY);
1295
1296          } else {
1297             const unsigned min_layer = obj->MinLayer + u->_Layer;
1298             const unsigned min_level = obj->MinLevel + u->Level;
1299             const unsigned num_layers = (!u->Layered ? 1 :
1300                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1301                                          mt->logical_depth0);
1302             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1303                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1304                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1305
1306             brw->vtbl.emit_texture_surface_state(
1307                brw, mt, target,
1308                min_layer, min_layer + num_layers,
1309                min_level, min_level + 1,
1310                format, SWIZZLE_XYZW,
1311                surf_offset, access != GL_READ_ONLY, false);
1312          }
1313
1314          update_texture_image_param(brw, u, surface_idx, param);
1315       }
1316
1317    } else {
1318       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1319       update_default_image_param(brw, u, surface_idx, param);
1320    }
1321 }
1322
1323 void
1324 brw_upload_image_surfaces(struct brw_context *brw,
1325                           struct gl_shader *shader,
1326                           struct brw_stage_state *stage_state,
1327                           struct brw_stage_prog_data *prog_data)
1328 {
1329    struct gl_context *ctx = &brw->ctx;
1330
1331    if (shader && shader->NumImages) {
1332       for (unsigned i = 0; i < shader->NumImages; i++) {
1333          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1334          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1335
1336          update_image_surface(brw, u, shader->ImageAccess[i],
1337                               surf_idx,
1338                               &stage_state->surf_offset[surf_idx],
1339                               &prog_data->image_param[i]);
1340       }
1341
1342       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1343    }
1344 }
1345
1346 static void
1347 brw_upload_wm_image_surfaces(struct brw_context *brw)
1348 {
1349    struct gl_context *ctx = &brw->ctx;
1350    /* BRW_NEW_FRAGMENT_PROGRAM */
1351    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1352
1353    if (prog) {
1354       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1355       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1356                                 &brw->wm.base, &brw->wm.prog_data->base);
1357    }
1358 }
1359
1360 const struct brw_tracked_state brw_wm_image_surfaces = {
1361    .dirty = {
1362       .mesa = _NEW_TEXTURE,
1363       .brw = BRW_NEW_BATCH |
1364              BRW_NEW_FRAGMENT_PROGRAM |
1365              BRW_NEW_FS_PROG_DATA |
1366              BRW_NEW_IMAGE_UNITS
1367    },
1368    .emit = brw_upload_wm_image_surfaces,
1369 };
1370
1371 void
1372 gen4_init_vtable_surface_functions(struct brw_context *brw)
1373 {
1374    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1375    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1376    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1377    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1378 }
1379
1380 static void
1381 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1382 {
1383    struct gl_context *ctx = &brw->ctx;
1384    /* _NEW_PROGRAM */
1385    struct gl_shader_program *prog =
1386       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1387
1388    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1389       const unsigned surf_idx =
1390          brw->cs.prog_data->binding_table.work_groups_start;
1391       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1392       drm_intel_bo *bo;
1393       uint32_t bo_offset;
1394
1395       if (brw->compute.num_work_groups_bo == NULL) {
1396          bo = NULL;
1397          intel_upload_data(brw,
1398                            (void *)brw->compute.num_work_groups,
1399                            3 * sizeof(GLuint),
1400                            sizeof(GLuint),
1401                            &bo,
1402                            &bo_offset);
1403       } else {
1404          bo = brw->compute.num_work_groups_bo;
1405          bo_offset = brw->compute.num_work_groups_offset;
1406       }
1407
1408       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1409                                           bo, bo_offset,
1410                                           BRW_SURFACEFORMAT_RAW,
1411                                           3 * sizeof(GLuint), 1, true);
1412       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1413    }
1414 }
1415
1416 const struct brw_tracked_state brw_cs_work_groups_surface = {
1417    .dirty = {
1418       .brw = BRW_NEW_CS_WORK_GROUPS
1419    },
1420    .emit = brw_upload_cs_work_groups_surface,
1421 };