src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "intel_mipmap_tree.h"
  43 #include "intel_batchbuffer.h"
  44 #include "intel_tex.h"
  45 #include "intel_fbo.h"
  46 #include "intel_buffer_objects.h"
  47
  48 #include "brw_context.h"
  49 #include "brw_state.h"
  50 #include "brw_defines.h"
  51 #include "brw_wm.h"
  52
  53 GLuint
  54 translate_tex_target(GLenum target)
  55 {
  56    switch (target) {
  57    case GL_TEXTURE_1D:
  58    case GL_TEXTURE_1D_ARRAY_EXT:
  59       return BRW_SURFACE_1D;
  60
  61    case GL_TEXTURE_RECTANGLE_NV:
  62       return BRW_SURFACE_2D;
  63
  64    case GL_TEXTURE_2D:
  65    case GL_TEXTURE_2D_ARRAY_EXT:
  66    case GL_TEXTURE_EXTERNAL_OES:
  67    case GL_TEXTURE_2D_MULTISAMPLE:
  68    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  69       return BRW_SURFACE_2D;
  70
  71    case GL_TEXTURE_3D:
  72       return BRW_SURFACE_3D;
  73
  74    case GL_TEXTURE_CUBE_MAP:
  75    case GL_TEXTURE_CUBE_MAP_ARRAY:
  76       return BRW_SURFACE_CUBE;
  77
  78    default:
  79       unreachable("not reached");
  80    }
  81 }
  82
  83 uint32_t
  84 brw_get_surface_tiling_bits(uint32_t tiling)
  85 {
  86    switch (tiling) {
  87    case I915_TILING_X:
  88       return BRW_SURFACE_TILED;
  89    case I915_TILING_Y:
  90       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  91    default:
  92       return 0;
  93    }
  94 }
  95
  96
  97 uint32_t
  98 brw_get_surface_num_multisamples(unsigned num_samples)
  99 {
 100    if (num_samples > 1)
 101       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 102    else
 103       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 104 }
 105
 106 void
 107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 108                       bool is_render_target,
 109                       unsigned *width, unsigned *height,
 110                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 111 {
 112    static const unsigned halign_stencil = 8;
 113
 114    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 115     * there are half as many rows.
 116     * In addition, mip-levels are accessed manually by the program and
 117     * therefore the surface is setup to cover all the mip-levels for one slice.
 118     * (Hardware is still used to access individual slices).
 119     */
 120    *tiling = I915_TILING_Y;
 121    *pitch = mt->pitch * 2;
 122    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 123    *height = (mt->total_height / mt->physical_depth0) / 2;
 124
 125    if (is_render_target) {
 126       *format = BRW_SURFACEFORMAT_R8_UINT;
 127    }
 128 }
 129
 130
 131 /**
 132  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 133  * swizzling.
 134  */
 135 int
 136 brw_get_texture_swizzle(const struct gl_context *ctx,
 137                         const struct gl_texture_object *t)
 138 {
 139    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 140
 141    int swizzles[SWIZZLE_NIL + 1] = {
 142       SWIZZLE_X,
 143       SWIZZLE_Y,
 144       SWIZZLE_Z,
 145       SWIZZLE_W,
 146       SWIZZLE_ZERO,
 147       SWIZZLE_ONE,
 148       SWIZZLE_NIL
 149    };
 150
 151    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 152        img->_BaseFormat == GL_DEPTH_STENCIL) {
 153       GLenum depth_mode = t->DepthMode;
 154
 155       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 156        * with depth component data specified with a sized internal format.
 157        * Otherwise, it's left at the old default, GL_LUMINANCE.
 158        */
 159       if (_mesa_is_gles3(ctx) &&
 160           img->InternalFormat != GL_DEPTH_COMPONENT &&
 161           img->InternalFormat != GL_DEPTH_STENCIL) {
 162          depth_mode = GL_RED;
 163       }
 164
 165       switch (depth_mode) {
 166       case GL_ALPHA:
 167          swizzles[0] = SWIZZLE_ZERO;
 168          swizzles[1] = SWIZZLE_ZERO;
 169          swizzles[2] = SWIZZLE_ZERO;
 170          swizzles[3] = SWIZZLE_X;
 171          break;
 172       case GL_LUMINANCE:
 173          swizzles[0] = SWIZZLE_X;
 174          swizzles[1] = SWIZZLE_X;
 175          swizzles[2] = SWIZZLE_X;
 176          swizzles[3] = SWIZZLE_ONE;
 177          break;
 178       case GL_INTENSITY:
 179          swizzles[0] = SWIZZLE_X;
 180          swizzles[1] = SWIZZLE_X;
 181          swizzles[2] = SWIZZLE_X;
 182          swizzles[3] = SWIZZLE_X;
 183          break;
 184       case GL_RED:
 185          swizzles[0] = SWIZZLE_X;
 186          swizzles[1] = SWIZZLE_ZERO;
 187          swizzles[2] = SWIZZLE_ZERO;
 188          swizzles[3] = SWIZZLE_ONE;
 189          break;
 190       }
 191    }
 192
 193    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 194
 195    /* If the texture's format is alpha-only, force R, G, and B to
 196     * 0.0. Similarly, if the texture's format has no alpha channel,
 197     * force the alpha value read to 1.0. This allows for the
 198     * implementation to use an RGBA texture for any of these formats
 199     * without leaking any unexpected values.
 200     */
 201    switch (img->_BaseFormat) {
 202    case GL_ALPHA:
 203       swizzles[0] = SWIZZLE_ZERO;
 204       swizzles[1] = SWIZZLE_ZERO;
 205       swizzles[2] = SWIZZLE_ZERO;
 206       break;
 207    case GL_LUMINANCE:
 208       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 209          swizzles[0] = SWIZZLE_X;
 210          swizzles[1] = SWIZZLE_X;
 211          swizzles[2] = SWIZZLE_X;
 212          swizzles[3] = SWIZZLE_ONE;
 213       }
 214       break;
 215    case GL_LUMINANCE_ALPHA:
 216       if (datatype == GL_SIGNED_NORMALIZED) {
 217          swizzles[0] = SWIZZLE_X;
 218          swizzles[1] = SWIZZLE_X;
 219          swizzles[2] = SWIZZLE_X;
 220          swizzles[3] = SWIZZLE_W;
 221       }
 222       break;
 223    case GL_INTENSITY:
 224       if (datatype == GL_SIGNED_NORMALIZED) {
 225          swizzles[0] = SWIZZLE_X;
 226          swizzles[1] = SWIZZLE_X;
 227          swizzles[2] = SWIZZLE_X;
 228          swizzles[3] = SWIZZLE_X;
 229       }
 230       break;
 231    case GL_RED:
 232    case GL_RG:
 233    case GL_RGB:
 234       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 235          swizzles[3] = SWIZZLE_ONE;
 236       break;
 237    }
 238
 239    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 243 }
 244
 245 static void
 246 gen4_emit_buffer_surface_state(struct brw_context *brw,
 247                                uint32_t *out_offset,
 248                                drm_intel_bo *bo,
 249                                unsigned buffer_offset,
 250                                unsigned surface_format,
 251                                unsigned buffer_size,
 252                                unsigned pitch,
 253                                bool rw)
 254 {
 255    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 256                                     6 * 4, 32, out_offset);
 257    memset(surf, 0, 6 * 4);
 258
 259    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 260              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 261              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 262    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 263    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 264              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 265    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 266              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 267
 268    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 269     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 270     * physical cache.  It is mapped in hardware to the sampler cache."
 271     */
 272    if (bo) {
 273       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 274                               bo, buffer_offset,
 275                               I915_GEM_DOMAIN_SAMPLER,
 276                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 277    }
 278 }
 279
 280 void
 281 brw_update_buffer_texture_surface(struct gl_context *ctx,
 282                                   unsigned unit,
 283                                   uint32_t *surf_offset)
 284 {
 285    struct brw_context *brw = brw_context(ctx);
 286    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 287    struct intel_buffer_object *intel_obj =
 288       intel_buffer_object(tObj->BufferObject);
 289    uint32_t size = tObj->BufferSize;
 290    drm_intel_bo *bo = NULL;
 291    mesa_format format = tObj->_BufferObjectFormat;
 292    uint32_t brw_format = brw_format_for_mesa_format(format);
 293    int texel_size = _mesa_get_format_bytes(format);
 294
 295    if (intel_obj) {
 296       size = MIN2(size, intel_obj->Base.Size);
 297       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 298    }
 299
 300    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 301       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 302                     _mesa_get_format_name(format));
 303    }
 304
 305    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 306                                        tObj->BufferOffset,
 307                                        brw_format,
 308                                        size / texel_size,
 309                                        texel_size,
 310                                        false /* rw */);
 311 }
 312
 313 static void
 314 brw_update_texture_surface(struct gl_context *ctx,
 315                            unsigned unit,
 316                            uint32_t *surf_offset,
 317                            bool for_gather)
 318 {
 319    struct brw_context *brw = brw_context(ctx);
 320    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 321    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 322    struct intel_mipmap_tree *mt = intelObj->mt;
 323    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 324    uint32_t *surf;
 325
 326    /* BRW_NEW_TEXTURE_BUFFER */
 327    if (tObj->Target == GL_TEXTURE_BUFFER) {
 328       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 329       return;
 330    }
 331
 332    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 333                           6 * 4, 32, surf_offset);
 334
 335    uint32_t tex_format = translate_tex_format(brw, mt->format,
 336                                               sampler->sRGBDecode);
 337
 338    if (for_gather) {
 339       /* Sandybridge's gather4 message is broken for integer formats.
 340        * To work around this, we pretend the surface is UNORM for
 341        * 8 or 16-bit formats, and emit shader instructions to recover
 342        * the real INT/UINT value.  For 32-bit formats, we pretend
 343        * the surface is FLOAT, and simply reinterpret the resulting
 344        * bits.
 345        */
 346       switch (tex_format) {
 347       case BRW_SURFACEFORMAT_R8_SINT:
 348       case BRW_SURFACEFORMAT_R8_UINT:
 349          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 350          break;
 351
 352       case BRW_SURFACEFORMAT_R16_SINT:
 353       case BRW_SURFACEFORMAT_R16_UINT:
 354          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 355          break;
 356
 357       case BRW_SURFACEFORMAT_R32_SINT:
 358       case BRW_SURFACEFORMAT_R32_UINT:
 359          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 360          break;
 361
 362       default:
 363          break;
 364       }
 365    }
 366
 367    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 368               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 369               BRW_SURFACE_CUBEFACE_ENABLES |
 370               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 371
 372    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 373
 374    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 375               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 376               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 377
 378    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 379               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 380               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 381
 382    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 383               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 384
 385    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 386
 387    /* Emit relocation to surface contents */
 388    drm_intel_bo_emit_reloc(brw->batch.bo,
 389                            *surf_offset + 4,
 390                            mt->bo,
 391                            surf[1] - mt->bo->offset64,
 392                            I915_GEM_DOMAIN_SAMPLER, 0);
 393 }
 394
 395 /**
 396  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 397  * read from this buffer with Data Port Read instructions/messages.
 398  */
 399 void
 400 brw_create_constant_surface(struct brw_context *brw,
 401                             drm_intel_bo *bo,
 402                             uint32_t offset,
 403                             uint32_t size,
 404                             uint32_t *out_offset)
 405 {
 406    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 407                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 408                                        size, 1, false);
 409 }
 410
 411 /**
 412  * Create the buffer surface. Shader buffer variables will be
 413  * read from / write to this buffer with Data Port Read/Write
 414  * instructions/messages.
 415  */
 416 void
 417 brw_create_buffer_surface(struct brw_context *brw,
 418                           drm_intel_bo *bo,
 419                           uint32_t offset,
 420                           uint32_t size,
 421                           uint32_t *out_offset)
 422 {
 423    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 424     * messages. We need these specifically for the fragment shader since they
 425     * include a pixel mask header that we need to ensure correct behavior
 426     * with helper invocations, which cannot write to the buffer.
 427     */
 428    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 429                                        BRW_SURFACEFORMAT_RAW,
 430                                        size, 1, true);
 431 }
 432
 433 /**
 434  * Set up a binding table entry for use by stream output logic (transform
 435  * feedback).
 436  *
 437  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 438  */
 439 void
 440 brw_update_sol_surface(struct brw_context *brw,
 441                        struct gl_buffer_object *buffer_obj,
 442                        uint32_t *out_offset, unsigned num_vector_components,
 443                        unsigned stride_dwords, unsigned offset_dwords)
 444 {
 445    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 446    uint32_t offset_bytes = 4 * offset_dwords;
 447    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 448                                              offset_bytes,
 449                                              buffer_obj->Size - offset_bytes);
 450    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 451                                     out_offset);
 452    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 453    size_t size_dwords = buffer_obj->Size / 4;
 454    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 455
 456    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 457     * too big to map using a single binding table entry?
 458     */
 459    assert((size_dwords - offset_dwords) / stride_dwords
 460           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 461
 462    if (size_dwords > offset_dwords + num_vector_components) {
 463       /* There is room for at least 1 transform feedback output in the buffer.
 464        * Compute the number of additional transform feedback outputs the
 465        * buffer has room for.
 466        */
 467       buffer_size_minus_1 =
 468          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 469    } else {
 470       /* There isn't even room for a single transform feedback output in the
 471        * buffer.  We can't configure the binding table entry to prevent output
 472        * entirely; we'll have to rely on the geometry shader to detect
 473        * overflow.  But to minimize the damage in case of a bug, set up the
 474        * binding table entry to just allow a single output.
 475        */
 476       buffer_size_minus_1 = 0;
 477    }
 478    width = buffer_size_minus_1 & 0x7f;
 479    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 480    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 481
 482    switch (num_vector_components) {
 483    case 1:
 484       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 485       break;
 486    case 2:
 487       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 488       break;
 489    case 3:
 490       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 491       break;
 492    case 4:
 493       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 494       break;
 495    default:
 496       unreachable("Invalid vector size for transform feedback output");
 497    }
 498
 499    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 500       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 501       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 502       BRW_SURFACE_RC_READ_WRITE;
 503    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 504    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 505               height << BRW_SURFACE_HEIGHT_SHIFT);
 506    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 507               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 508    surf[4] = 0;
 509    surf[5] = 0;
 510
 511    /* Emit relocation to surface contents. */
 512    drm_intel_bo_emit_reloc(brw->batch.bo,
 513                            *out_offset + 4,
 514                            bo, offset_bytes,
 515                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 516 }
 517
 518 /* Creates a new WM constant buffer reflecting the current fragment program's
 519  * constants, if needed by the fragment program.
 520  *
 521  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 522  * state atom.
 523  */
 524 static void
 525 brw_upload_wm_pull_constants(struct brw_context *brw)
 526 {
 527    struct brw_stage_state *stage_state = &brw->wm.base;
 528    /* BRW_NEW_FRAGMENT_PROGRAM */
 529    struct brw_fragment_program *fp =
 530       (struct brw_fragment_program *) brw->fragment_program;
 531    /* BRW_NEW_FS_PROG_DATA */
 532    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 533
 534    /* _NEW_PROGRAM_CONSTANTS */
 535    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 536                              stage_state, prog_data);
 537 }
 538
 539 const struct brw_tracked_state brw_wm_pull_constants = {
 540    .dirty = {
 541       .mesa = _NEW_PROGRAM_CONSTANTS,
 542       .brw = BRW_NEW_BATCH |
 543              BRW_NEW_FRAGMENT_PROGRAM |
 544              BRW_NEW_FS_PROG_DATA,
 545    },
 546    .emit = brw_upload_wm_pull_constants,
 547 };
 548
 549 /**
 550  * Creates a null renderbuffer surface.
 551  *
 552  * This is used when the shader doesn't write to any color output.  An FB
 553  * write to target 0 will still be emitted, because that's how the thread is
 554  * terminated (and computed depth is returned), so we need to have the
 555  * hardware discard the target 0 color output..
 556  */
 557 static void
 558 brw_emit_null_surface_state(struct brw_context *brw,
 559                             unsigned width,
 560                             unsigned height,
 561                             unsigned samples,
 562                             uint32_t *out_offset)
 563 {
 564    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 565     * Notes):
 566     *
 567     *     A null surface will be used in instances where an actual surface is
 568     *     not bound. When a write message is generated to a null surface, no
 569     *     actual surface is written to. When a read message (including any
 570     *     sampling engine message) is generated to a null surface, the result
 571     *     is all zeros. Note that a null surface type is allowed to be used
 572     *     with all messages, even if it is not specificially indicated as
 573     *     supported. All of the remaining fields in surface state are ignored
 574     *     for null surfaces, with the following exceptions:
 575     *
 576     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 577     *       depth buffer’s corresponding state for all render target surfaces,
 578     *       including null.
 579     *
 580     *     - Surface Format must be R8G8B8A8_UNORM.
 581     */
 582    unsigned surface_type = BRW_SURFACE_NULL;
 583    drm_intel_bo *bo = NULL;
 584    unsigned pitch_minus_1 = 0;
 585    uint32_t multisampling_state = 0;
 586    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 587                                     out_offset);
 588
 589    if (samples > 1) {
 590       /* On Gen6, null render targets seem to cause GPU hangs when
 591        * multisampling.  So work around this problem by rendering into dummy
 592        * color buffer.
 593        *
 594        * To decrease the amount of memory needed by the workaround buffer, we
 595        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 596        * the amount of memory needed for the workaround buffer is
 597        * (width_in_tiles + height_in_tiles - 1) tiles.
 598        *
 599        * Note that since the workaround buffer will be interpreted by the
 600        * hardware as an interleaved multisampled buffer, we need to compute
 601        * width_in_tiles and height_in_tiles by dividing the width and height
 602        * by 16 rather than the normal Y-tile size of 32.
 603        */
 604       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 605       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 606       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 607       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 608                          size_needed);
 609       bo = brw->wm.multisampled_null_render_target_bo;
 610       surface_type = BRW_SURFACE_2D;
 611       pitch_minus_1 = 127;
 612       multisampling_state = brw_get_surface_num_multisamples(samples);
 613    }
 614
 615    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 616               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 617    if (brw->gen < 6) {
 618       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 619                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 620                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 621                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 622    }
 623    surf[1] = bo ? bo->offset64 : 0;
 624    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 625               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 626
 627    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 628     * Notes):
 629     *
 630     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 631     */
 632    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 633               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 634    surf[4] = multisampling_state;
 635    surf[5] = 0;
 636
 637    if (bo) {
 638       drm_intel_bo_emit_reloc(brw->batch.bo,
 639                               *out_offset + 4,
 640                               bo, 0,
 641                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 642    }
 643 }
 644
 645 /**
 646  * Sets up a surface state structure to point at the given region.
 647  * While it is only used for the front/back buffer currently, it should be
 648  * usable for further buffers when doing ARB_draw_buffer support.
 649  */
 650 static uint32_t
 651 brw_update_renderbuffer_surface(struct brw_context *brw,
 652                                 struct gl_renderbuffer *rb,
 653                                 bool layered, unsigned unit,
 654                                 uint32_t surf_index)
 655 {
 656    struct gl_context *ctx = &brw->ctx;
 657    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 658    struct intel_mipmap_tree *mt = irb->mt;
 659    uint32_t *surf;
 660    uint32_t tile_x, tile_y;
 661    uint32_t format = 0;
 662    uint32_t offset;
 663    /* _NEW_BUFFERS */
 664    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 665    /* BRW_NEW_FS_PROG_DATA */
 666
 667    assert(!layered);
 668
 669    if (rb->TexImage && !brw->has_surface_tile_offset) {
 670       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 671
 672       if (tile_x != 0 || tile_y != 0) {
 673          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 674           * destination in a miptree unless you actually setup your renderbuffer
 675           * as a miptree and used the fragile lod/array_index/etc. controls to
 676           * select the image.  So, instead, we just make a new single-level
 677           * miptree and render into that.
 678           */
 679          intel_renderbuffer_move_to_temp(brw, irb, false);
 680          mt = irb->mt;
 681       }
 682    }
 683
 684    intel_miptree_used_for_rendering(irb->mt);
 685
 686    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 687
 688    format = brw->render_target_format[rb_format];
 689    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 690       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 691                     __func__, _mesa_get_format_name(rb_format));
 692    }
 693
 694    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 695               format << BRW_SURFACE_FORMAT_SHIFT);
 696
 697    /* reloc */
 698    assert(mt->offset % mt->cpp == 0);
 699    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 700               mt->bo->offset64 + mt->offset);
 701
 702    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 703               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 704
 705    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 706               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 707
 708    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 709
 710    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 711    /* Note that the low bits of these fields are missing, so
 712     * there's the possibility of getting in trouble.
 713     */
 714    assert(tile_x % 4 == 0);
 715    assert(tile_y % 2 == 0);
 716    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 717               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 718               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 719
 720    if (brw->gen < 6) {
 721       /* _NEW_COLOR */
 722       if (!ctx->Color.ColorLogicOpEnabled &&
 723           (ctx->Color.BlendEnabled & (1 << unit)))
 724          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 725
 726       if (!ctx->Color.ColorMask[unit][0])
 727          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 728       if (!ctx->Color.ColorMask[unit][1])
 729          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 730       if (!ctx->Color.ColorMask[unit][2])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 732
 733       /* As mentioned above, disable writes to the alpha component when the
 734        * renderbuffer is XRGB.
 735        */
 736       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 737           !ctx->Color.ColorMask[unit][3]) {
 738          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 739       }
 740    }
 741
 742    drm_intel_bo_emit_reloc(brw->batch.bo,
 743                            offset + 4,
 744                            mt->bo,
 745                            surf[1] - mt->bo->offset64,
 746                            I915_GEM_DOMAIN_RENDER,
 747                            I915_GEM_DOMAIN_RENDER);
 748
 749    return offset;
 750 }
 751
 752 /**
 753  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 754  */
 755 void
 756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 757                                  const struct gl_framebuffer *fb,
 758                                  uint32_t render_target_start,
 759                                  uint32_t *surf_offset)
 760 {
 761    GLuint i;
 762    const unsigned int w = _mesa_geometric_width(fb);
 763    const unsigned int h = _mesa_geometric_height(fb);
 764    const unsigned int s = _mesa_geometric_samples(fb);
 765
 766    /* Update surfaces for drawing buffers */
 767    if (fb->_NumColorDrawBuffers >= 1) {
 768       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 769          const uint32_t surf_index = render_target_start + i;
 770
 771          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 772             surf_offset[surf_index] =
 773                brw->vtbl.update_renderbuffer_surface(
 774                   brw, fb->_ColorDrawBuffers[i],
 775                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 776          } else {
 777             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 778                &surf_offset[surf_index]);
 779          }
 780       }
 781    } else {
 782       const uint32_t surf_index = render_target_start;
 783       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 784          &surf_offset[surf_index]);
 785    }
 786 }
 787
 788 static void
 789 update_renderbuffer_surfaces(struct brw_context *brw)
 790 {
 791    const struct gl_context *ctx = &brw->ctx;
 792
 793    /* _NEW_BUFFERS | _NEW_COLOR */
 794    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 795    brw_update_renderbuffer_surfaces(
 796       brw, fb,
 797       brw->wm.prog_data->binding_table.render_target_start,
 798       brw->wm.base.surf_offset);
 799    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 800 }
 801
 802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 803    .dirty = {
 804       .mesa = _NEW_BUFFERS |
 805               _NEW_COLOR,
 806       .brw = BRW_NEW_BATCH |
 807              BRW_NEW_FS_PROG_DATA,
 808    },
 809    .emit = update_renderbuffer_surfaces,
 810 };
 811
 812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 813    .dirty = {
 814       .mesa = _NEW_BUFFERS,
 815       .brw = BRW_NEW_BATCH,
 816    },
 817    .emit = update_renderbuffer_surfaces,
 818 };
 819
 820
 821 static void
 822 update_stage_texture_surfaces(struct brw_context *brw,
 823                               const struct gl_program *prog,
 824                               struct brw_stage_state *stage_state,
 825                               bool for_gather)
 826 {
 827    if (!prog)
 828       return;
 829
 830    struct gl_context *ctx = &brw->ctx;
 831
 832    uint32_t *surf_offset = stage_state->surf_offset;
 833
 834    /* BRW_NEW_*_PROG_DATA */
 835    if (for_gather)
 836       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 837    else
 838       surf_offset += stage_state->prog_data->binding_table.texture_start;
 839
 840    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 841    for (unsigned s = 0; s < num_samplers; s++) {
 842       surf_offset[s] = 0;
 843
 844       if (prog->SamplersUsed & (1 << s)) {
 845          const unsigned unit = prog->SamplerUnits[s];
 846
 847          /* _NEW_TEXTURE */
 848          if (ctx->Texture.Unit[unit]._Current) {
 849             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 850          }
 851       }
 852    }
 853 }
 854
 855
 856 /**
 857  * Construct SURFACE_STATE objects for enabled textures.
 858  */
 859 static void
 860 brw_update_texture_surfaces(struct brw_context *brw)
 861 {
 862    /* BRW_NEW_VERTEX_PROGRAM */
 863    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 864
 865    /* BRW_NEW_TESS_PROGRAMS */
 866    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 867    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 868
 869    /* BRW_NEW_GEOMETRY_PROGRAM */
 870    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 871
 872    /* BRW_NEW_FRAGMENT_PROGRAM */
 873    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 874
 875    /* BRW_NEW_COMPUTE_PROGRAM */
 876    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 877
 878    /* _NEW_TEXTURE */
 879    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 880    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
 881    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
 882    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 883    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 884    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 885
 886    /* emit alternate set of surface state for gather. this
 887     * allows the surface format to be overriden for only the
 888     * gather4 messages. */
 889    if (brw->gen < 8) {
 890       if (vs && vs->UsesGather)
 891          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 892       if (tcs && tcs->UsesGather)
 893          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
 894       if (tes && tes->UsesGather)
 895          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
 896       if (gs && gs->UsesGather)
 897          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 898       if (fs && fs->UsesGather)
 899          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 900       if (cs && cs->UsesGather)
 901          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 902    }
 903
 904    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 905 }
 906
 907 const struct brw_tracked_state brw_texture_surfaces = {
 908    .dirty = {
 909       .mesa = _NEW_TEXTURE,
 910       .brw = BRW_NEW_BATCH |
 911              BRW_NEW_COMPUTE_PROGRAM |
 912              BRW_NEW_FRAGMENT_PROGRAM |
 913              BRW_NEW_FS_PROG_DATA |
 914              BRW_NEW_GEOMETRY_PROGRAM |
 915              BRW_NEW_GS_PROG_DATA |
 916              BRW_NEW_TESS_PROGRAMS |
 917              BRW_NEW_TCS_PROG_DATA |
 918              BRW_NEW_TES_PROG_DATA |
 919              BRW_NEW_TEXTURE_BUFFER |
 920              BRW_NEW_VERTEX_PROGRAM |
 921              BRW_NEW_VS_PROG_DATA,
 922    },
 923    .emit = brw_update_texture_surfaces,
 924 };
 925
 926 void
 927 brw_upload_ubo_surfaces(struct brw_context *brw,
 928                         struct gl_shader *shader,
 929                         struct brw_stage_state *stage_state,
 930                         struct brw_stage_prog_data *prog_data)
 931 {
 932    struct gl_context *ctx = &brw->ctx;
 933
 934    if (!shader)
 935       return;
 936
 937    uint32_t *ubo_surf_offsets =
 938       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 939
 940    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 941       struct gl_uniform_buffer_binding *binding =
 942          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 943
 944       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 945          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 946       } else {
 947          struct intel_buffer_object *intel_bo =
 948             intel_buffer_object(binding->BufferObject);
 949          drm_intel_bo *bo =
 950             intel_bufferobj_buffer(brw, intel_bo,
 951                                    binding->Offset,
 952                                    binding->BufferObject->Size - binding->Offset);
 953          brw_create_constant_surface(brw, bo, binding->Offset,
 954                                      binding->BufferObject->Size - binding->Offset,
 955                                      &ubo_surf_offsets[i]);
 956       }
 957    }
 958
 959    uint32_t *ssbo_surf_offsets =
 960       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 961
 962    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 963       struct gl_shader_storage_buffer_binding *binding =
 964          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 965
 966       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 967          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 968       } else {
 969          struct intel_buffer_object *intel_bo =
 970             intel_buffer_object(binding->BufferObject);
 971          drm_intel_bo *bo =
 972             intel_bufferobj_buffer(brw, intel_bo,
 973                                    binding->Offset,
 974                                    binding->BufferObject->Size - binding->Offset);
 975          brw_create_buffer_surface(brw, bo, binding->Offset,
 976                                    binding->BufferObject->Size - binding->Offset,
 977                                    &ssbo_surf_offsets[i]);
 978       }
 979    }
 980
 981    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
 982       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 983 }
 984
 985 static void
 986 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 987 {
 988    struct gl_context *ctx = &brw->ctx;
 989    /* _NEW_PROGRAM */
 990    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 991
 992    if (!prog)
 993       return;
 994
 995    /* BRW_NEW_FS_PROG_DATA */
 996    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 997                            &brw->wm.base, &brw->wm.prog_data->base);
 998 }
 999
1000 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1001    .dirty = {
1002       .mesa = _NEW_PROGRAM,
1003       .brw = BRW_NEW_BATCH |
1004              BRW_NEW_FS_PROG_DATA |
1005              BRW_NEW_UNIFORM_BUFFER,
1006    },
1007    .emit = brw_upload_wm_ubo_surfaces,
1008 };
1009
1010 static void
1011 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1012 {
1013    struct gl_context *ctx = &brw->ctx;
1014    /* _NEW_PROGRAM */
1015    struct gl_shader_program *prog =
1016       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1017
1018    if (!prog)
1019       return;
1020
1021    /* BRW_NEW_CS_PROG_DATA */
1022    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1023                            &brw->cs.base, &brw->cs.prog_data->base);
1024 }
1025
1026 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1027    .dirty = {
1028       .mesa = _NEW_PROGRAM,
1029       .brw = BRW_NEW_BATCH |
1030              BRW_NEW_CS_PROG_DATA |
1031              BRW_NEW_UNIFORM_BUFFER,
1032    },
1033    .emit = brw_upload_cs_ubo_surfaces,
1034 };
1035
1036 void
1037 brw_upload_abo_surfaces(struct brw_context *brw,
1038                         struct gl_shader *shader,
1039                         struct brw_stage_state *stage_state,
1040                         struct brw_stage_prog_data *prog_data)
1041 {
1042    struct gl_context *ctx = &brw->ctx;
1043    uint32_t *surf_offsets =
1044       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1045
1046    if (shader && shader->NumAtomicBuffers) {
1047       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1048          struct gl_atomic_buffer_binding *binding =
1049             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1050          struct intel_buffer_object *intel_bo =
1051             intel_buffer_object(binding->BufferObject);
1052          drm_intel_bo *bo = intel_bufferobj_buffer(
1053             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1054
1055          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1056                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1057                                              bo->size - binding->Offset, 1, true);
1058       }
1059
1060       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1061    }
1062 }
1063
1064 static void
1065 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1066 {
1067    struct gl_context *ctx = &brw->ctx;
1068    /* _NEW_PROGRAM */
1069    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1070
1071    if (prog) {
1072       /* BRW_NEW_FS_PROG_DATA */
1073       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1074                               &brw->wm.base, &brw->wm.prog_data->base);
1075    }
1076 }
1077
1078 const struct brw_tracked_state brw_wm_abo_surfaces = {
1079    .dirty = {
1080       .mesa = _NEW_PROGRAM,
1081       .brw = BRW_NEW_ATOMIC_BUFFER |
1082              BRW_NEW_BATCH |
1083              BRW_NEW_FS_PROG_DATA,
1084    },
1085    .emit = brw_upload_wm_abo_surfaces,
1086 };
1087
1088 static void
1089 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1090 {
1091    struct gl_context *ctx = &brw->ctx;
1092    /* _NEW_PROGRAM */
1093    struct gl_shader_program *prog =
1094       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1095
1096    if (prog) {
1097       /* BRW_NEW_CS_PROG_DATA */
1098       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1099                               &brw->cs.base, &brw->cs.prog_data->base);
1100    }
1101 }
1102
1103 const struct brw_tracked_state brw_cs_abo_surfaces = {
1104    .dirty = {
1105       .mesa = _NEW_PROGRAM,
1106       .brw = BRW_NEW_ATOMIC_BUFFER |
1107              BRW_NEW_BATCH |
1108              BRW_NEW_CS_PROG_DATA,
1109    },
1110    .emit = brw_upload_cs_abo_surfaces,
1111 };
1112
1113 static void
1114 brw_upload_cs_image_surfaces(struct brw_context *brw)
1115 {
1116    struct gl_context *ctx = &brw->ctx;
1117    /* _NEW_PROGRAM */
1118    struct gl_shader_program *prog =
1119       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1120
1121    if (prog) {
1122       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1123       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1124                                 &brw->cs.base, &brw->cs.prog_data->base);
1125    }
1126 }
1127
1128 const struct brw_tracked_state brw_cs_image_surfaces = {
1129    .dirty = {
1130       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1131       .brw = BRW_NEW_BATCH |
1132              BRW_NEW_CS_PROG_DATA |
1133              BRW_NEW_IMAGE_UNITS
1134    },
1135    .emit = brw_upload_cs_image_surfaces,
1136 };
1137
1138 static uint32_t
1139 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1140 {
1141    if (access == GL_WRITE_ONLY) {
1142       return brw_format_for_mesa_format(format);
1143    } else {
1144       /* Typed surface reads support a very limited subset of the shader
1145        * image formats.  Translate it into the closest format the
1146        * hardware supports.
1147        */
1148       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1149           (_mesa_get_format_bytes(format) >= 8 &&
1150            (brw->gen == 7 && !brw->is_haswell)))
1151          return BRW_SURFACEFORMAT_RAW;
1152       else
1153          return brw_format_for_mesa_format(
1154             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1155    }
1156 }
1157
1158 static void
1159 update_default_image_param(struct brw_context *brw,
1160                            struct gl_image_unit *u,
1161                            unsigned surface_idx,
1162                            struct brw_image_param *param)
1163 {
1164    memset(param, 0, sizeof(*param));
1165    param->surface_idx = surface_idx;
1166    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1167     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1168     * detailed explanation of these parameters.
1169     */
1170    param->swizzling[0] = 0xff;
1171    param->swizzling[1] = 0xff;
1172 }
1173
1174 static void
1175 update_buffer_image_param(struct brw_context *brw,
1176                           struct gl_image_unit *u,
1177                           unsigned surface_idx,
1178                           struct brw_image_param *param)
1179 {
1180    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1181
1182    update_default_image_param(brw, u, surface_idx, param);
1183
1184    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1185    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1186 }
1187
1188 static void
1189 update_texture_image_param(struct brw_context *brw,
1190                            struct gl_image_unit *u,
1191                            unsigned surface_idx,
1192                            struct brw_image_param *param)
1193 {
1194    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1195
1196    update_default_image_param(brw, u, surface_idx, param);
1197
1198    param->size[0] = minify(mt->logical_width0, u->Level);
1199    param->size[1] = minify(mt->logical_height0, u->Level);
1200    param->size[2] = (!u->Layered ? 1 :
1201                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1202                      u->TexObj->Target == GL_TEXTURE_3D ?
1203                      minify(mt->logical_depth0, u->Level) :
1204                      mt->logical_depth0);
1205
1206    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1207                                   &param->offset[0],
1208                                   &param->offset[1]);
1209
1210    param->stride[0] = mt->cpp;
1211    param->stride[1] = mt->pitch / mt->cpp;
1212    param->stride[2] =
1213       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1214    param->stride[3] =
1215       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1216
1217    if (mt->tiling == I915_TILING_X) {
1218       /* An X tile is a rectangular block of 512x8 bytes. */
1219       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1220       param->tiling[1] = _mesa_logbase2(8);
1221
1222       if (brw->has_swizzling) {
1223          /* Right shifts required to swizzle bits 9 and 10 of the memory
1224           * address with bit 6.
1225           */
1226          param->swizzling[0] = 3;
1227          param->swizzling[1] = 4;
1228       }
1229    } else if (mt->tiling == I915_TILING_Y) {
1230       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1231        * different to the layout of an X-tiled surface, we simply pretend that
1232        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1233        * one arranged in X-major order just like is the case for X-tiling.
1234        */
1235       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1236       param->tiling[1] = _mesa_logbase2(32);
1237
1238       if (brw->has_swizzling) {
1239          /* Right shift required to swizzle bit 9 of the memory address with
1240           * bit 6.
1241           */
1242          param->swizzling[0] = 3;
1243       }
1244    }
1245
1246    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1247     * address calculation algorithm (emit_address_calculation() in
1248     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1249     * modulus equal to the LOD.
1250     */
1251    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1252                        0);
1253 }
1254
1255 static void
1256 update_image_surface(struct brw_context *brw,
1257                      struct gl_image_unit *u,
1258                      GLenum access,
1259                      unsigned surface_idx,
1260                      uint32_t *surf_offset,
1261                      struct brw_image_param *param)
1262 {
1263    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1264       struct gl_texture_object *obj = u->TexObj;
1265       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1266
1267       if (obj->Target == GL_TEXTURE_BUFFER) {
1268          struct intel_buffer_object *intel_obj =
1269             intel_buffer_object(obj->BufferObject);
1270          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1271                                       _mesa_get_format_bytes(u->_ActualFormat));
1272
1273          brw->vtbl.emit_buffer_surface_state(
1274             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1275             format, intel_obj->Base.Size / texel_size, texel_size,
1276             access != GL_READ_ONLY);
1277
1278          update_buffer_image_param(brw, u, surface_idx, param);
1279
1280       } else {
1281          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1282          struct intel_mipmap_tree *mt = intel_obj->mt;
1283
1284          if (format == BRW_SURFACEFORMAT_RAW) {
1285             brw->vtbl.emit_buffer_surface_state(
1286                brw, surf_offset, mt->bo, mt->offset,
1287                format, mt->bo->size - mt->offset, 1 /* pitch */,
1288                access != GL_READ_ONLY);
1289
1290          } else {
1291             const unsigned min_layer = obj->MinLayer + u->_Layer;
1292             const unsigned min_level = obj->MinLevel + u->Level;
1293             const unsigned num_layers = (!u->Layered ? 1 :
1294                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1295                                          mt->logical_depth0);
1296             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1297                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1298                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1299
1300             brw->vtbl.emit_texture_surface_state(
1301                brw, mt, target,
1302                min_layer, min_layer + num_layers,
1303                min_level, min_level + 1,
1304                format, SWIZZLE_XYZW,
1305                surf_offset, access != GL_READ_ONLY, false);
1306          }
1307
1308          update_texture_image_param(brw, u, surface_idx, param);
1309       }
1310
1311    } else {
1312       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1313       update_default_image_param(brw, u, surface_idx, param);
1314    }
1315 }
1316
1317 void
1318 brw_upload_image_surfaces(struct brw_context *brw,
1319                           struct gl_shader *shader,
1320                           struct brw_stage_state *stage_state,
1321                           struct brw_stage_prog_data *prog_data)
1322 {
1323    struct gl_context *ctx = &brw->ctx;
1324
1325    if (shader && shader->NumImages) {
1326       for (unsigned i = 0; i < shader->NumImages; i++) {
1327          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1328          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1329
1330          update_image_surface(brw, u, shader->ImageAccess[i],
1331                               surf_idx,
1332                               &stage_state->surf_offset[surf_idx],
1333                               &prog_data->image_param[i]);
1334       }
1335
1336       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1337    }
1338 }
1339
1340 static void
1341 brw_upload_wm_image_surfaces(struct brw_context *brw)
1342 {
1343    struct gl_context *ctx = &brw->ctx;
1344    /* BRW_NEW_FRAGMENT_PROGRAM */
1345    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1346
1347    if (prog) {
1348       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1349       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1350                                 &brw->wm.base, &brw->wm.prog_data->base);
1351    }
1352 }
1353
1354 const struct brw_tracked_state brw_wm_image_surfaces = {
1355    .dirty = {
1356       .mesa = _NEW_TEXTURE,
1357       .brw = BRW_NEW_BATCH |
1358              BRW_NEW_FRAGMENT_PROGRAM |
1359              BRW_NEW_FS_PROG_DATA |
1360              BRW_NEW_IMAGE_UNITS
1361    },
1362    .emit = brw_upload_wm_image_surfaces,
1363 };
1364
1365 void
1366 gen4_init_vtable_surface_functions(struct brw_context *brw)
1367 {
1368    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1369    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1370    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1371    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1372 }
1373
1374 static void
1375 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1376 {
1377    struct gl_context *ctx = &brw->ctx;
1378    /* _NEW_PROGRAM */
1379    struct gl_shader_program *prog =
1380       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1381
1382    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1383       const unsigned surf_idx =
1384          brw->cs.prog_data->binding_table.work_groups_start;
1385       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1386       drm_intel_bo *bo;
1387       uint32_t bo_offset;
1388
1389       if (brw->compute.num_work_groups_bo == NULL) {
1390          bo = NULL;
1391          intel_upload_data(brw,
1392                            (void *)brw->compute.num_work_groups,
1393                            3 * sizeof(GLuint),
1394                            sizeof(GLuint),
1395                            &bo,
1396                            &bo_offset);
1397       } else {
1398          bo = brw->compute.num_work_groups_bo;
1399          bo_offset = brw->compute.num_work_groups_offset;
1400       }
1401
1402       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1403                                           bo, bo_offset,
1404                                           BRW_SURFACEFORMAT_RAW,
1405                                           3 * sizeof(GLuint), 1, true);
1406       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1407    }
1408 }
1409
1410 const struct brw_tracked_state brw_cs_work_groups_surface = {
1411    .dirty = {
1412       .brw = BRW_NEW_CS_WORK_GROUPS
1413    },
1414    .emit = brw_upload_cs_work_groups_surface,
1415 };