src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "intel_mipmap_tree.h"
  43 #include "intel_batchbuffer.h"
  44 #include "intel_tex.h"
  45 #include "intel_fbo.h"
  46 #include "intel_buffer_objects.h"
  47
  48 #include "brw_context.h"
  49 #include "brw_state.h"
  50 #include "brw_defines.h"
  51 #include "brw_wm.h"
  52
  53 GLuint
  54 translate_tex_target(GLenum target)
  55 {
  56    switch (target) {
  57    case GL_TEXTURE_1D:
  58    case GL_TEXTURE_1D_ARRAY_EXT:
  59       return BRW_SURFACE_1D;
  60
  61    case GL_TEXTURE_RECTANGLE_NV:
  62       return BRW_SURFACE_2D;
  63
  64    case GL_TEXTURE_2D:
  65    case GL_TEXTURE_2D_ARRAY_EXT:
  66    case GL_TEXTURE_EXTERNAL_OES:
  67    case GL_TEXTURE_2D_MULTISAMPLE:
  68    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  69       return BRW_SURFACE_2D;
  70
  71    case GL_TEXTURE_3D:
  72       return BRW_SURFACE_3D;
  73
  74    case GL_TEXTURE_CUBE_MAP:
  75    case GL_TEXTURE_CUBE_MAP_ARRAY:
  76       return BRW_SURFACE_CUBE;
  77
  78    default:
  79       unreachable("not reached");
  80    }
  81 }
  82
  83 uint32_t
  84 brw_get_surface_tiling_bits(uint32_t tiling)
  85 {
  86    switch (tiling) {
  87    case I915_TILING_X:
  88       return BRW_SURFACE_TILED;
  89    case I915_TILING_Y:
  90       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  91    default:
  92       return 0;
  93    }
  94 }
  95
  96
  97 uint32_t
  98 brw_get_surface_num_multisamples(unsigned num_samples)
  99 {
 100    if (num_samples > 1)
 101       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 102    else
 103       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 104 }
 105
 106 void
 107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 108                       bool is_render_target,
 109                       unsigned *width, unsigned *height,
 110                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 111 {
 112    static const unsigned halign_stencil = 8;
 113
 114    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 115     * there are half as many rows.
 116     * In addition, mip-levels are accessed manually by the program and
 117     * therefore the surface is setup to cover all the mip-levels for one slice.
 118     * (Hardware is still used to access individual slices).
 119     */
 120    *tiling = I915_TILING_Y;
 121    *pitch = mt->pitch * 2;
 122    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 123    *height = (mt->total_height / mt->physical_depth0) / 2;
 124
 125    if (is_render_target) {
 126       *format = BRW_SURFACEFORMAT_R8_UINT;
 127    }
 128 }
 129
 130
 131 /**
 132  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 133  * swizzling.
 134  */
 135 int
 136 brw_get_texture_swizzle(const struct gl_context *ctx,
 137                         const struct gl_texture_object *t)
 138 {
 139    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 140
 141    int swizzles[SWIZZLE_NIL + 1] = {
 142       SWIZZLE_X,
 143       SWIZZLE_Y,
 144       SWIZZLE_Z,
 145       SWIZZLE_W,
 146       SWIZZLE_ZERO,
 147       SWIZZLE_ONE,
 148       SWIZZLE_NIL
 149    };
 150
 151    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 152        img->_BaseFormat == GL_DEPTH_STENCIL) {
 153       GLenum depth_mode = t->DepthMode;
 154
 155       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 156        * with depth component data specified with a sized internal format.
 157        * Otherwise, it's left at the old default, GL_LUMINANCE.
 158        */
 159       if (_mesa_is_gles3(ctx) &&
 160           img->InternalFormat != GL_DEPTH_COMPONENT &&
 161           img->InternalFormat != GL_DEPTH_STENCIL) {
 162          depth_mode = GL_RED;
 163       }
 164
 165       switch (depth_mode) {
 166       case GL_ALPHA:
 167          swizzles[0] = SWIZZLE_ZERO;
 168          swizzles[1] = SWIZZLE_ZERO;
 169          swizzles[2] = SWIZZLE_ZERO;
 170          swizzles[3] = SWIZZLE_X;
 171          break;
 172       case GL_LUMINANCE:
 173          swizzles[0] = SWIZZLE_X;
 174          swizzles[1] = SWIZZLE_X;
 175          swizzles[2] = SWIZZLE_X;
 176          swizzles[3] = SWIZZLE_ONE;
 177          break;
 178       case GL_INTENSITY:
 179          swizzles[0] = SWIZZLE_X;
 180          swizzles[1] = SWIZZLE_X;
 181          swizzles[2] = SWIZZLE_X;
 182          swizzles[3] = SWIZZLE_X;
 183          break;
 184       case GL_RED:
 185          swizzles[0] = SWIZZLE_X;
 186          swizzles[1] = SWIZZLE_ZERO;
 187          swizzles[2] = SWIZZLE_ZERO;
 188          swizzles[3] = SWIZZLE_ONE;
 189          break;
 190       }
 191    }
 192
 193    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 194
 195    /* If the texture's format is alpha-only, force R, G, and B to
 196     * 0.0. Similarly, if the texture's format has no alpha channel,
 197     * force the alpha value read to 1.0. This allows for the
 198     * implementation to use an RGBA texture for any of these formats
 199     * without leaking any unexpected values.
 200     */
 201    switch (img->_BaseFormat) {
 202    case GL_ALPHA:
 203       swizzles[0] = SWIZZLE_ZERO;
 204       swizzles[1] = SWIZZLE_ZERO;
 205       swizzles[2] = SWIZZLE_ZERO;
 206       break;
 207    case GL_LUMINANCE:
 208       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 209          swizzles[0] = SWIZZLE_X;
 210          swizzles[1] = SWIZZLE_X;
 211          swizzles[2] = SWIZZLE_X;
 212          swizzles[3] = SWIZZLE_ONE;
 213       }
 214       break;
 215    case GL_LUMINANCE_ALPHA:
 216       if (datatype == GL_SIGNED_NORMALIZED) {
 217          swizzles[0] = SWIZZLE_X;
 218          swizzles[1] = SWIZZLE_X;
 219          swizzles[2] = SWIZZLE_X;
 220          swizzles[3] = SWIZZLE_W;
 221       }
 222       break;
 223    case GL_INTENSITY:
 224       if (datatype == GL_SIGNED_NORMALIZED) {
 225          swizzles[0] = SWIZZLE_X;
 226          swizzles[1] = SWIZZLE_X;
 227          swizzles[2] = SWIZZLE_X;
 228          swizzles[3] = SWIZZLE_X;
 229       }
 230       break;
 231    case GL_RED:
 232    case GL_RG:
 233    case GL_RGB:
 234       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 235          swizzles[3] = SWIZZLE_ONE;
 236       break;
 237    }
 238
 239    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 243 }
 244
 245 static void
 246 gen4_emit_buffer_surface_state(struct brw_context *brw,
 247                                uint32_t *out_offset,
 248                                drm_intel_bo *bo,
 249                                unsigned buffer_offset,
 250                                unsigned surface_format,
 251                                unsigned buffer_size,
 252                                unsigned pitch,
 253                                bool rw)
 254 {
 255    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 256                                     6 * 4, 32, out_offset);
 257    memset(surf, 0, 6 * 4);
 258
 259    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 260              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 261              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 262    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 263    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 264              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 265    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 266              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 267
 268    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 269     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 270     * physical cache.  It is mapped in hardware to the sampler cache."
 271     */
 272    if (bo) {
 273       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 274                               bo, buffer_offset,
 275                               I915_GEM_DOMAIN_SAMPLER,
 276                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 277    }
 278 }
 279
 280 void
 281 brw_update_buffer_texture_surface(struct gl_context *ctx,
 282                                   unsigned unit,
 283                                   uint32_t *surf_offset)
 284 {
 285    struct brw_context *brw = brw_context(ctx);
 286    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 287    struct intel_buffer_object *intel_obj =
 288       intel_buffer_object(tObj->BufferObject);
 289    uint32_t size = tObj->BufferSize;
 290    drm_intel_bo *bo = NULL;
 291    mesa_format format = tObj->_BufferObjectFormat;
 292    uint32_t brw_format = brw_format_for_mesa_format(format);
 293    int texel_size = _mesa_get_format_bytes(format);
 294
 295    if (intel_obj) {
 296       size = MIN2(size, intel_obj->Base.Size);
 297       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 298    }
 299
 300    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 301       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 302                     _mesa_get_format_name(format));
 303    }
 304
 305    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 306                                        tObj->BufferOffset,
 307                                        brw_format,
 308                                        size / texel_size,
 309                                        texel_size,
 310                                        false /* rw */);
 311 }
 312
 313 static void
 314 brw_update_texture_surface(struct gl_context *ctx,
 315                            unsigned unit,
 316                            uint32_t *surf_offset,
 317                            bool for_gather)
 318 {
 319    struct brw_context *brw = brw_context(ctx);
 320    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 321    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 322    struct intel_mipmap_tree *mt = intelObj->mt;
 323    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 324    uint32_t *surf;
 325
 326    /* BRW_NEW_TEXTURE_BUFFER */
 327    if (tObj->Target == GL_TEXTURE_BUFFER) {
 328       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 329       return;
 330    }
 331
 332    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 333                           6 * 4, 32, surf_offset);
 334
 335    uint32_t tex_format = translate_tex_format(brw, mt->format,
 336                                               sampler->sRGBDecode);
 337
 338    if (for_gather) {
 339       /* Sandybridge's gather4 message is broken for integer formats.
 340        * To work around this, we pretend the surface is UNORM for
 341        * 8 or 16-bit formats, and emit shader instructions to recover
 342        * the real INT/UINT value.  For 32-bit formats, we pretend
 343        * the surface is FLOAT, and simply reinterpret the resulting
 344        * bits.
 345        */
 346       switch (tex_format) {
 347       case BRW_SURFACEFORMAT_R8_SINT:
 348       case BRW_SURFACEFORMAT_R8_UINT:
 349          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 350          break;
 351
 352       case BRW_SURFACEFORMAT_R16_SINT:
 353       case BRW_SURFACEFORMAT_R16_UINT:
 354          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 355          break;
 356
 357       case BRW_SURFACEFORMAT_R32_SINT:
 358       case BRW_SURFACEFORMAT_R32_UINT:
 359          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 360          break;
 361
 362       default:
 363          break;
 364       }
 365    }
 366
 367    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 368               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 369               BRW_SURFACE_CUBEFACE_ENABLES |
 370               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 371
 372    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 373
 374    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 375               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 376               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 377
 378    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 379               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 380               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 381
 382    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 383               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 384
 385    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 386
 387    /* Emit relocation to surface contents */
 388    drm_intel_bo_emit_reloc(brw->batch.bo,
 389                            *surf_offset + 4,
 390                            mt->bo,
 391                            surf[1] - mt->bo->offset64,
 392                            I915_GEM_DOMAIN_SAMPLER, 0);
 393 }
 394
 395 /**
 396  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 397  * read from this buffer with Data Port Read instructions/messages.
 398  */
 399 void
 400 brw_create_constant_surface(struct brw_context *brw,
 401                             drm_intel_bo *bo,
 402                             uint32_t offset,
 403                             uint32_t size,
 404                             uint32_t *out_offset)
 405 {
 406    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 407                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 408                                        size, 1, false);
 409 }
 410
 411 /**
 412  * Create the buffer surface. Shader buffer variables will be
 413  * read from / write to this buffer with Data Port Read/Write
 414  * instructions/messages.
 415  */
 416 void
 417 brw_create_buffer_surface(struct brw_context *brw,
 418                           drm_intel_bo *bo,
 419                           uint32_t offset,
 420                           uint32_t size,
 421                           uint32_t *out_offset)
 422 {
 423    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 424     * messages. We need these specifically for the fragment shader since they
 425     * include a pixel mask header that we need to ensure correct behavior
 426     * with helper invocations, which cannot write to the buffer.
 427     */
 428    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 429                                        BRW_SURFACEFORMAT_RAW,
 430                                        size, 1, true);
 431 }
 432
 433 /**
 434  * Set up a binding table entry for use by stream output logic (transform
 435  * feedback).
 436  *
 437  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 438  */
 439 void
 440 brw_update_sol_surface(struct brw_context *brw,
 441                        struct gl_buffer_object *buffer_obj,
 442                        uint32_t *out_offset, unsigned num_vector_components,
 443                        unsigned stride_dwords, unsigned offset_dwords)
 444 {
 445    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 446    uint32_t offset_bytes = 4 * offset_dwords;
 447    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 448                                              offset_bytes,
 449                                              buffer_obj->Size - offset_bytes);
 450    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 451                                     out_offset);
 452    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 453    size_t size_dwords = buffer_obj->Size / 4;
 454    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 455
 456    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 457     * too big to map using a single binding table entry?
 458     */
 459    assert((size_dwords - offset_dwords) / stride_dwords
 460           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 461
 462    if (size_dwords > offset_dwords + num_vector_components) {
 463       /* There is room for at least 1 transform feedback output in the buffer.
 464        * Compute the number of additional transform feedback outputs the
 465        * buffer has room for.
 466        */
 467       buffer_size_minus_1 =
 468          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 469    } else {
 470       /* There isn't even room for a single transform feedback output in the
 471        * buffer.  We can't configure the binding table entry to prevent output
 472        * entirely; we'll have to rely on the geometry shader to detect
 473        * overflow.  But to minimize the damage in case of a bug, set up the
 474        * binding table entry to just allow a single output.
 475        */
 476       buffer_size_minus_1 = 0;
 477    }
 478    width = buffer_size_minus_1 & 0x7f;
 479    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 480    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 481
 482    switch (num_vector_components) {
 483    case 1:
 484       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 485       break;
 486    case 2:
 487       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 488       break;
 489    case 3:
 490       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 491       break;
 492    case 4:
 493       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 494       break;
 495    default:
 496       unreachable("Invalid vector size for transform feedback output");
 497    }
 498
 499    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 500       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 501       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 502       BRW_SURFACE_RC_READ_WRITE;
 503    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 504    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 505               height << BRW_SURFACE_HEIGHT_SHIFT);
 506    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 507               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 508    surf[4] = 0;
 509    surf[5] = 0;
 510
 511    /* Emit relocation to surface contents. */
 512    drm_intel_bo_emit_reloc(brw->batch.bo,
 513                            *out_offset + 4,
 514                            bo, offset_bytes,
 515                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 516 }
 517
 518 /* Creates a new WM constant buffer reflecting the current fragment program's
 519  * constants, if needed by the fragment program.
 520  *
 521  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 522  * state atom.
 523  */
 524 static void
 525 brw_upload_wm_pull_constants(struct brw_context *brw)
 526 {
 527    struct brw_stage_state *stage_state = &brw->wm.base;
 528    /* BRW_NEW_FRAGMENT_PROGRAM */
 529    struct brw_fragment_program *fp =
 530       (struct brw_fragment_program *) brw->fragment_program;
 531    /* BRW_NEW_FS_PROG_DATA */
 532    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 533
 534    /* _NEW_PROGRAM_CONSTANTS */
 535    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 536                              stage_state, prog_data);
 537 }
 538
 539 const struct brw_tracked_state brw_wm_pull_constants = {
 540    .dirty = {
 541       .mesa = _NEW_PROGRAM_CONSTANTS,
 542       .brw = BRW_NEW_BATCH |
 543              BRW_NEW_FRAGMENT_PROGRAM |
 544              BRW_NEW_FS_PROG_DATA,
 545    },
 546    .emit = brw_upload_wm_pull_constants,
 547 };
 548
 549 /**
 550  * Creates a null renderbuffer surface.
 551  *
 552  * This is used when the shader doesn't write to any color output.  An FB
 553  * write to target 0 will still be emitted, because that's how the thread is
 554  * terminated (and computed depth is returned), so we need to have the
 555  * hardware discard the target 0 color output..
 556  */
 557 static void
 558 brw_emit_null_surface_state(struct brw_context *brw,
 559                             unsigned width,
 560                             unsigned height,
 561                             unsigned samples,
 562                             uint32_t *out_offset)
 563 {
 564    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 565     * Notes):
 566     *
 567     *     A null surface will be used in instances where an actual surface is
 568     *     not bound. When a write message is generated to a null surface, no
 569     *     actual surface is written to. When a read message (including any
 570     *     sampling engine message) is generated to a null surface, the result
 571     *     is all zeros. Note that a null surface type is allowed to be used
 572     *     with all messages, even if it is not specificially indicated as
 573     *     supported. All of the remaining fields in surface state are ignored
 574     *     for null surfaces, with the following exceptions:
 575     *
 576     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 577     *       depth buffer’s corresponding state for all render target surfaces,
 578     *       including null.
 579     *
 580     *     - Surface Format must be R8G8B8A8_UNORM.
 581     */
 582    unsigned surface_type = BRW_SURFACE_NULL;
 583    drm_intel_bo *bo = NULL;
 584    unsigned pitch_minus_1 = 0;
 585    uint32_t multisampling_state = 0;
 586    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 587                                     out_offset);
 588
 589    if (samples > 1) {
 590       /* On Gen6, null render targets seem to cause GPU hangs when
 591        * multisampling.  So work around this problem by rendering into dummy
 592        * color buffer.
 593        *
 594        * To decrease the amount of memory needed by the workaround buffer, we
 595        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 596        * the amount of memory needed for the workaround buffer is
 597        * (width_in_tiles + height_in_tiles - 1) tiles.
 598        *
 599        * Note that since the workaround buffer will be interpreted by the
 600        * hardware as an interleaved multisampled buffer, we need to compute
 601        * width_in_tiles and height_in_tiles by dividing the width and height
 602        * by 16 rather than the normal Y-tile size of 32.
 603        */
 604       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 605       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 606       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 607       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 608                          size_needed);
 609       bo = brw->wm.multisampled_null_render_target_bo;
 610       surface_type = BRW_SURFACE_2D;
 611       pitch_minus_1 = 127;
 612       multisampling_state = brw_get_surface_num_multisamples(samples);
 613    }
 614
 615    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 616               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 617    if (brw->gen < 6) {
 618       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 619                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 620                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 621                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 622    }
 623    surf[1] = bo ? bo->offset64 : 0;
 624    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 625               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 626
 627    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 628     * Notes):
 629     *
 630     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 631     */
 632    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 633               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 634    surf[4] = multisampling_state;
 635    surf[5] = 0;
 636
 637    if (bo) {
 638       drm_intel_bo_emit_reloc(brw->batch.bo,
 639                               *out_offset + 4,
 640                               bo, 0,
 641                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 642    }
 643 }
 644
 645 /**
 646  * Sets up a surface state structure to point at the given region.
 647  * While it is only used for the front/back buffer currently, it should be
 648  * usable for further buffers when doing ARB_draw_buffer support.
 649  */
 650 static uint32_t
 651 brw_update_renderbuffer_surface(struct brw_context *brw,
 652                                 struct gl_renderbuffer *rb,
 653                                 bool layered, unsigned unit,
 654                                 uint32_t surf_index)
 655 {
 656    struct gl_context *ctx = &brw->ctx;
 657    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 658    struct intel_mipmap_tree *mt = irb->mt;
 659    uint32_t *surf;
 660    uint32_t tile_x, tile_y;
 661    uint32_t format = 0;
 662    uint32_t offset;
 663    /* _NEW_BUFFERS */
 664    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 665    /* BRW_NEW_FS_PROG_DATA */
 666
 667    assert(!layered);
 668
 669    if (rb->TexImage && !brw->has_surface_tile_offset) {
 670       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 671
 672       if (tile_x != 0 || tile_y != 0) {
 673          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 674           * destination in a miptree unless you actually setup your renderbuffer
 675           * as a miptree and used the fragile lod/array_index/etc. controls to
 676           * select the image.  So, instead, we just make a new single-level
 677           * miptree and render into that.
 678           */
 679          intel_renderbuffer_move_to_temp(brw, irb, false);
 680          mt = irb->mt;
 681       }
 682    }
 683
 684    intel_miptree_used_for_rendering(irb->mt);
 685
 686    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 687
 688    format = brw->render_target_format[rb_format];
 689    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 690       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 691                     __func__, _mesa_get_format_name(rb_format));
 692    }
 693
 694    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 695               format << BRW_SURFACE_FORMAT_SHIFT);
 696
 697    /* reloc */
 698    assert(mt->offset % mt->cpp == 0);
 699    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 700               mt->bo->offset64 + mt->offset);
 701
 702    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 703               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 704
 705    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 706               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 707
 708    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 709
 710    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 711    /* Note that the low bits of these fields are missing, so
 712     * there's the possibility of getting in trouble.
 713     */
 714    assert(tile_x % 4 == 0);
 715    assert(tile_y % 2 == 0);
 716    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 717               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 718               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 719
 720    if (brw->gen < 6) {
 721       /* _NEW_COLOR */
 722       if (!ctx->Color.ColorLogicOpEnabled &&
 723           (ctx->Color.BlendEnabled & (1 << unit)))
 724          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 725
 726       if (!ctx->Color.ColorMask[unit][0])
 727          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 728       if (!ctx->Color.ColorMask[unit][1])
 729          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 730       if (!ctx->Color.ColorMask[unit][2])
 731          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 732
 733       /* As mentioned above, disable writes to the alpha component when the
 734        * renderbuffer is XRGB.
 735        */
 736       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 737           !ctx->Color.ColorMask[unit][3]) {
 738          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 739       }
 740    }
 741
 742    drm_intel_bo_emit_reloc(brw->batch.bo,
 743                            offset + 4,
 744                            mt->bo,
 745                            surf[1] - mt->bo->offset64,
 746                            I915_GEM_DOMAIN_RENDER,
 747                            I915_GEM_DOMAIN_RENDER);
 748
 749    return offset;
 750 }
 751
 752 /**
 753  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 754  */
 755 void
 756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 757                                  const struct gl_framebuffer *fb,
 758                                  uint32_t render_target_start,
 759                                  uint32_t *surf_offset)
 760 {
 761    GLuint i;
 762    const unsigned int w = _mesa_geometric_width(fb);
 763    const unsigned int h = _mesa_geometric_height(fb);
 764    const unsigned int s = _mesa_geometric_samples(fb);
 765
 766    /* Update surfaces for drawing buffers */
 767    if (fb->_NumColorDrawBuffers >= 1) {
 768       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 769          const uint32_t surf_index = render_target_start + i;
 770
 771          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 772             surf_offset[surf_index] =
 773                brw->vtbl.update_renderbuffer_surface(
 774                   brw, fb->_ColorDrawBuffers[i],
 775                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 776          } else {
 777             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 778                &surf_offset[surf_index]);
 779          }
 780       }
 781    } else {
 782       const uint32_t surf_index = render_target_start;
 783       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 784          &surf_offset[surf_index]);
 785    }
 786 }
 787
 788 static void
 789 update_renderbuffer_surfaces(struct brw_context *brw)
 790 {
 791    const struct gl_context *ctx = &brw->ctx;
 792
 793    /* _NEW_BUFFERS | _NEW_COLOR */
 794    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 795    brw_update_renderbuffer_surfaces(
 796       brw, fb,
 797       brw->wm.prog_data->binding_table.render_target_start,
 798       brw->wm.base.surf_offset);
 799    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 800 }
 801
 802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 803    .dirty = {
 804       .mesa = _NEW_BUFFERS |
 805               _NEW_COLOR,
 806       .brw = BRW_NEW_BATCH |
 807              BRW_NEW_FS_PROG_DATA,
 808    },
 809    .emit = update_renderbuffer_surfaces,
 810 };
 811
 812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 813    .dirty = {
 814       .mesa = _NEW_BUFFERS,
 815       .brw = BRW_NEW_BATCH,
 816    },
 817    .emit = update_renderbuffer_surfaces,
 818 };
 819
 820
 821 static void
 822 update_stage_texture_surfaces(struct brw_context *brw,
 823                               const struct gl_program *prog,
 824                               struct brw_stage_state *stage_state,
 825                               bool for_gather)
 826 {
 827    if (!prog)
 828       return;
 829
 830    struct gl_context *ctx = &brw->ctx;
 831
 832    uint32_t *surf_offset = stage_state->surf_offset;
 833
 834    /* BRW_NEW_*_PROG_DATA */
 835    if (for_gather)
 836       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 837    else
 838       surf_offset += stage_state->prog_data->binding_table.texture_start;
 839
 840    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 841    for (unsigned s = 0; s < num_samplers; s++) {
 842       surf_offset[s] = 0;
 843
 844       if (prog->SamplersUsed & (1 << s)) {
 845          const unsigned unit = prog->SamplerUnits[s];
 846
 847          /* _NEW_TEXTURE */
 848          if (ctx->Texture.Unit[unit]._Current) {
 849             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 850          }
 851       }
 852    }
 853 }
 854
 855
 856 /**
 857  * Construct SURFACE_STATE objects for enabled textures.
 858  */
 859 static void
 860 brw_update_texture_surfaces(struct brw_context *brw)
 861 {
 862    /* BRW_NEW_VERTEX_PROGRAM */
 863    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 864
 865    /* BRW_NEW_TESS_PROGRAMS */
 866    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
 867    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
 868
 869    /* BRW_NEW_GEOMETRY_PROGRAM */
 870    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 871
 872    /* BRW_NEW_FRAGMENT_PROGRAM */
 873    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 874
 875    /* _NEW_TEXTURE */
 876    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 877    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
 878    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
 879    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 880    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 881
 882    /* emit alternate set of surface state for gather. this
 883     * allows the surface format to be overriden for only the
 884     * gather4 messages. */
 885    if (brw->gen < 8) {
 886       if (vs && vs->UsesGather)
 887          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 888       if (tcs && tcs->UsesGather)
 889          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
 890       if (tes && tes->UsesGather)
 891          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
 892       if (gs && gs->UsesGather)
 893          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 894       if (fs && fs->UsesGather)
 895          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 896    }
 897
 898    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 899 }
 900
 901 const struct brw_tracked_state brw_texture_surfaces = {
 902    .dirty = {
 903       .mesa = _NEW_TEXTURE,
 904       .brw = BRW_NEW_BATCH |
 905              BRW_NEW_FRAGMENT_PROGRAM |
 906              BRW_NEW_FS_PROG_DATA |
 907              BRW_NEW_GEOMETRY_PROGRAM |
 908              BRW_NEW_GS_PROG_DATA |
 909              BRW_NEW_TESS_PROGRAMS |
 910              BRW_NEW_TCS_PROG_DATA |
 911              BRW_NEW_TES_PROG_DATA |
 912              BRW_NEW_TEXTURE_BUFFER |
 913              BRW_NEW_VERTEX_PROGRAM |
 914              BRW_NEW_VS_PROG_DATA,
 915    },
 916    .emit = brw_update_texture_surfaces,
 917 };
 918
 919 static void
 920 brw_update_cs_texture_surfaces(struct brw_context *brw)
 921 {
 922    /* BRW_NEW_COMPUTE_PROGRAM */
 923    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 924
 925    /* _NEW_TEXTURE */
 926    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 927
 928    /* emit alternate set of surface state for gather. this
 929     * allows the surface format to be overriden for only the
 930     * gather4 messages.
 931     */
 932    if (brw->gen < 8) {
 933       if (cs && cs->UsesGather)
 934          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 935    }
 936
 937    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 938 }
 939
 940 const struct brw_tracked_state brw_cs_texture_surfaces = {
 941    .dirty = {
 942       .mesa = _NEW_TEXTURE,
 943       .brw = BRW_NEW_BATCH |
 944              BRW_NEW_COMPUTE_PROGRAM,
 945    },
 946    .emit = brw_update_cs_texture_surfaces,
 947 };
 948
 949
 950 void
 951 brw_upload_ubo_surfaces(struct brw_context *brw,
 952                         struct gl_shader *shader,
 953                         struct brw_stage_state *stage_state,
 954                         struct brw_stage_prog_data *prog_data)
 955 {
 956    struct gl_context *ctx = &brw->ctx;
 957
 958    if (!shader)
 959       return;
 960
 961    uint32_t *ubo_surf_offsets =
 962       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 963
 964    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 965       struct gl_uniform_buffer_binding *binding =
 966          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 967
 968       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 969          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 970       } else {
 971          struct intel_buffer_object *intel_bo =
 972             intel_buffer_object(binding->BufferObject);
 973          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 974          if (!binding->AutomaticSize)
 975             size = MIN2(size, binding->Size);
 976          drm_intel_bo *bo =
 977             intel_bufferobj_buffer(brw, intel_bo,
 978                                    binding->Offset,
 979                                    size);
 980          brw_create_constant_surface(brw, bo, binding->Offset,
 981                                      size,
 982                                      &ubo_surf_offsets[i]);
 983       }
 984    }
 985
 986    uint32_t *ssbo_surf_offsets =
 987       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 988
 989    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 990       struct gl_shader_storage_buffer_binding *binding =
 991          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 992
 993       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 994          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 995       } else {
 996          struct intel_buffer_object *intel_bo =
 997             intel_buffer_object(binding->BufferObject);
 998          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
 999          if (!binding->AutomaticSize)
1000             size = MIN2(size, binding->Size);
1001          drm_intel_bo *bo =
1002             intel_bufferobj_buffer(brw, intel_bo,
1003                                    binding->Offset,
1004                                    size);
1005          brw_create_buffer_surface(brw, bo, binding->Offset,
1006                                    size,
1007                                    &ssbo_surf_offsets[i]);
1008       }
1009    }
1010
1011    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1012       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1013 }
1014
1015 static void
1016 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1017 {
1018    struct gl_context *ctx = &brw->ctx;
1019    /* _NEW_PROGRAM */
1020    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1021
1022    if (!prog)
1023       return;
1024
1025    /* BRW_NEW_FS_PROG_DATA */
1026    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1027                            &brw->wm.base, &brw->wm.prog_data->base);
1028 }
1029
1030 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1031    .dirty = {
1032       .mesa = _NEW_PROGRAM,
1033       .brw = BRW_NEW_BATCH |
1034              BRW_NEW_FS_PROG_DATA |
1035              BRW_NEW_UNIFORM_BUFFER,
1036    },
1037    .emit = brw_upload_wm_ubo_surfaces,
1038 };
1039
1040 static void
1041 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1042 {
1043    struct gl_context *ctx = &brw->ctx;
1044    /* _NEW_PROGRAM */
1045    struct gl_shader_program *prog =
1046       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1047
1048    if (!prog)
1049       return;
1050
1051    /* BRW_NEW_CS_PROG_DATA */
1052    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1053                            &brw->cs.base, &brw->cs.prog_data->base);
1054 }
1055
1056 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1057    .dirty = {
1058       .mesa = _NEW_PROGRAM,
1059       .brw = BRW_NEW_BATCH |
1060              BRW_NEW_CS_PROG_DATA |
1061              BRW_NEW_UNIFORM_BUFFER,
1062    },
1063    .emit = brw_upload_cs_ubo_surfaces,
1064 };
1065
1066 void
1067 brw_upload_abo_surfaces(struct brw_context *brw,
1068                         struct gl_shader *shader,
1069                         struct brw_stage_state *stage_state,
1070                         struct brw_stage_prog_data *prog_data)
1071 {
1072    struct gl_context *ctx = &brw->ctx;
1073    uint32_t *surf_offsets =
1074       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1075
1076    if (shader && shader->NumAtomicBuffers) {
1077       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1078          struct gl_atomic_buffer_binding *binding =
1079             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1080          struct intel_buffer_object *intel_bo =
1081             intel_buffer_object(binding->BufferObject);
1082          drm_intel_bo *bo = intel_bufferobj_buffer(
1083             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1084
1085          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1086                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1087                                              bo->size - binding->Offset, 1, true);
1088       }
1089
1090       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1091    }
1092 }
1093
1094 static void
1095 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1096 {
1097    struct gl_context *ctx = &brw->ctx;
1098    /* _NEW_PROGRAM */
1099    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1100
1101    if (prog) {
1102       /* BRW_NEW_FS_PROG_DATA */
1103       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1104                               &brw->wm.base, &brw->wm.prog_data->base);
1105    }
1106 }
1107
1108 const struct brw_tracked_state brw_wm_abo_surfaces = {
1109    .dirty = {
1110       .mesa = _NEW_PROGRAM,
1111       .brw = BRW_NEW_ATOMIC_BUFFER |
1112              BRW_NEW_BATCH |
1113              BRW_NEW_FS_PROG_DATA,
1114    },
1115    .emit = brw_upload_wm_abo_surfaces,
1116 };
1117
1118 static void
1119 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1120 {
1121    struct gl_context *ctx = &brw->ctx;
1122    /* _NEW_PROGRAM */
1123    struct gl_shader_program *prog =
1124       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1125
1126    if (prog) {
1127       /* BRW_NEW_CS_PROG_DATA */
1128       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1129                               &brw->cs.base, &brw->cs.prog_data->base);
1130    }
1131 }
1132
1133 const struct brw_tracked_state brw_cs_abo_surfaces = {
1134    .dirty = {
1135       .mesa = _NEW_PROGRAM,
1136       .brw = BRW_NEW_ATOMIC_BUFFER |
1137              BRW_NEW_BATCH |
1138              BRW_NEW_CS_PROG_DATA,
1139    },
1140    .emit = brw_upload_cs_abo_surfaces,
1141 };
1142
1143 static void
1144 brw_upload_cs_image_surfaces(struct brw_context *brw)
1145 {
1146    struct gl_context *ctx = &brw->ctx;
1147    /* _NEW_PROGRAM */
1148    struct gl_shader_program *prog =
1149       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1150
1151    if (prog) {
1152       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1153       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1154                                 &brw->cs.base, &brw->cs.prog_data->base);
1155    }
1156 }
1157
1158 const struct brw_tracked_state brw_cs_image_surfaces = {
1159    .dirty = {
1160       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1161       .brw = BRW_NEW_BATCH |
1162              BRW_NEW_CS_PROG_DATA |
1163              BRW_NEW_IMAGE_UNITS
1164    },
1165    .emit = brw_upload_cs_image_surfaces,
1166 };
1167
1168 static uint32_t
1169 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1170 {
1171    if (access == GL_WRITE_ONLY) {
1172       return brw_format_for_mesa_format(format);
1173    } else {
1174       /* Typed surface reads support a very limited subset of the shader
1175        * image formats.  Translate it into the closest format the
1176        * hardware supports.
1177        */
1178       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1179           (_mesa_get_format_bytes(format) >= 8 &&
1180            (brw->gen == 7 && !brw->is_haswell)))
1181          return BRW_SURFACEFORMAT_RAW;
1182       else
1183          return brw_format_for_mesa_format(
1184             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1185    }
1186 }
1187
1188 static void
1189 update_default_image_param(struct brw_context *brw,
1190                            struct gl_image_unit *u,
1191                            unsigned surface_idx,
1192                            struct brw_image_param *param)
1193 {
1194    memset(param, 0, sizeof(*param));
1195    param->surface_idx = surface_idx;
1196    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1197     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1198     * detailed explanation of these parameters.
1199     */
1200    param->swizzling[0] = 0xff;
1201    param->swizzling[1] = 0xff;
1202 }
1203
1204 static void
1205 update_buffer_image_param(struct brw_context *brw,
1206                           struct gl_image_unit *u,
1207                           unsigned surface_idx,
1208                           struct brw_image_param *param)
1209 {
1210    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1211
1212    update_default_image_param(brw, u, surface_idx, param);
1213
1214    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1215    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1216 }
1217
1218 static void
1219 update_texture_image_param(struct brw_context *brw,
1220                            struct gl_image_unit *u,
1221                            unsigned surface_idx,
1222                            struct brw_image_param *param)
1223 {
1224    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1225
1226    update_default_image_param(brw, u, surface_idx, param);
1227
1228    param->size[0] = minify(mt->logical_width0, u->Level);
1229    param->size[1] = minify(mt->logical_height0, u->Level);
1230    param->size[2] = (!u->Layered ? 1 :
1231                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1232                      u->TexObj->Target == GL_TEXTURE_3D ?
1233                      minify(mt->logical_depth0, u->Level) :
1234                      mt->logical_depth0);
1235
1236    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1237                                   &param->offset[0],
1238                                   &param->offset[1]);
1239
1240    param->stride[0] = mt->cpp;
1241    param->stride[1] = mt->pitch / mt->cpp;
1242    param->stride[2] =
1243       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1244    param->stride[3] =
1245       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1246
1247    if (mt->tiling == I915_TILING_X) {
1248       /* An X tile is a rectangular block of 512x8 bytes. */
1249       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1250       param->tiling[1] = _mesa_logbase2(8);
1251
1252       if (brw->has_swizzling) {
1253          /* Right shifts required to swizzle bits 9 and 10 of the memory
1254           * address with bit 6.
1255           */
1256          param->swizzling[0] = 3;
1257          param->swizzling[1] = 4;
1258       }
1259    } else if (mt->tiling == I915_TILING_Y) {
1260       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1261        * different to the layout of an X-tiled surface, we simply pretend that
1262        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1263        * one arranged in X-major order just like is the case for X-tiling.
1264        */
1265       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1266       param->tiling[1] = _mesa_logbase2(32);
1267
1268       if (brw->has_swizzling) {
1269          /* Right shift required to swizzle bit 9 of the memory address with
1270           * bit 6.
1271           */
1272          param->swizzling[0] = 3;
1273       }
1274    }
1275
1276    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1277     * address calculation algorithm (emit_address_calculation() in
1278     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1279     * modulus equal to the LOD.
1280     */
1281    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1282                        0);
1283 }
1284
1285 static void
1286 update_image_surface(struct brw_context *brw,
1287                      struct gl_image_unit *u,
1288                      GLenum access,
1289                      unsigned surface_idx,
1290                      uint32_t *surf_offset,
1291                      struct brw_image_param *param)
1292 {
1293    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1294       struct gl_texture_object *obj = u->TexObj;
1295       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1296
1297       if (obj->Target == GL_TEXTURE_BUFFER) {
1298          struct intel_buffer_object *intel_obj =
1299             intel_buffer_object(obj->BufferObject);
1300          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1301                                       _mesa_get_format_bytes(u->_ActualFormat));
1302
1303          brw->vtbl.emit_buffer_surface_state(
1304             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1305             format, intel_obj->Base.Size / texel_size, texel_size,
1306             access != GL_READ_ONLY);
1307
1308          update_buffer_image_param(brw, u, surface_idx, param);
1309
1310       } else {
1311          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1312          struct intel_mipmap_tree *mt = intel_obj->mt;
1313
1314          if (format == BRW_SURFACEFORMAT_RAW) {
1315             brw->vtbl.emit_buffer_surface_state(
1316                brw, surf_offset, mt->bo, mt->offset,
1317                format, mt->bo->size - mt->offset, 1 /* pitch */,
1318                access != GL_READ_ONLY);
1319
1320          } else {
1321             const unsigned min_layer = obj->MinLayer + u->_Layer;
1322             const unsigned min_level = obj->MinLevel + u->Level;
1323             const unsigned num_layers = (!u->Layered ? 1 :
1324                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1325                                          mt->logical_depth0);
1326             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1327                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1328                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1329             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1330
1331             brw->vtbl.emit_texture_surface_state(
1332                brw, mt, target,
1333                min_layer, min_layer + num_layers,
1334                min_level, min_level + 1,
1335                format, SWIZZLE_XYZW,
1336                surf_offset, surf_index, access != GL_READ_ONLY, false);
1337          }
1338
1339          update_texture_image_param(brw, u, surface_idx, param);
1340       }
1341
1342    } else {
1343       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1344       update_default_image_param(brw, u, surface_idx, param);
1345    }
1346 }
1347
1348 void
1349 brw_upload_image_surfaces(struct brw_context *brw,
1350                           struct gl_shader *shader,
1351                           struct brw_stage_state *stage_state,
1352                           struct brw_stage_prog_data *prog_data)
1353 {
1354    struct gl_context *ctx = &brw->ctx;
1355
1356    if (shader && shader->NumImages) {
1357       for (unsigned i = 0; i < shader->NumImages; i++) {
1358          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1359          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1360
1361          update_image_surface(brw, u, shader->ImageAccess[i],
1362                               surf_idx,
1363                               &stage_state->surf_offset[surf_idx],
1364                               &prog_data->image_param[i]);
1365       }
1366
1367       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1368       /* This may have changed the image metadata dependent on the context
1369        * image unit state and passed to the program as uniforms, make sure
1370        * that push and pull constants are reuploaded.
1371        */
1372       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1373    }
1374 }
1375
1376 static void
1377 brw_upload_wm_image_surfaces(struct brw_context *brw)
1378 {
1379    struct gl_context *ctx = &brw->ctx;
1380    /* BRW_NEW_FRAGMENT_PROGRAM */
1381    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1382
1383    if (prog) {
1384       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1385       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1386                                 &brw->wm.base, &brw->wm.prog_data->base);
1387    }
1388 }
1389
1390 const struct brw_tracked_state brw_wm_image_surfaces = {
1391    .dirty = {
1392       .mesa = _NEW_TEXTURE,
1393       .brw = BRW_NEW_BATCH |
1394              BRW_NEW_FRAGMENT_PROGRAM |
1395              BRW_NEW_FS_PROG_DATA |
1396              BRW_NEW_IMAGE_UNITS
1397    },
1398    .emit = brw_upload_wm_image_surfaces,
1399 };
1400
1401 void
1402 gen4_init_vtable_surface_functions(struct brw_context *brw)
1403 {
1404    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1405    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1406    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1407    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1408 }
1409
1410 static void
1411 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1412 {
1413    struct gl_context *ctx = &brw->ctx;
1414    /* _NEW_PROGRAM */
1415    struct gl_shader_program *prog =
1416       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1417
1418    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1419       const unsigned surf_idx =
1420          brw->cs.prog_data->binding_table.work_groups_start;
1421       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1422       drm_intel_bo *bo;
1423       uint32_t bo_offset;
1424
1425       if (brw->compute.num_work_groups_bo == NULL) {
1426          bo = NULL;
1427          intel_upload_data(brw,
1428                            (void *)brw->compute.num_work_groups,
1429                            3 * sizeof(GLuint),
1430                            sizeof(GLuint),
1431                            &bo,
1432                            &bo_offset);
1433       } else {
1434          bo = brw->compute.num_work_groups_bo;
1435          bo_offset = brw->compute.num_work_groups_offset;
1436       }
1437
1438       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1439                                           bo, bo_offset,
1440                                           BRW_SURFACEFORMAT_RAW,
1441                                           3 * sizeof(GLuint), 1, true);
1442       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1443    }
1444 }
1445
1446 const struct brw_tracked_state brw_cs_work_groups_surface = {
1447    .dirty = {
1448       .brw = BRW_NEW_CS_WORK_GROUPS
1449    },
1450    .emit = brw_upload_cs_work_groups_surface,
1451 };