src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_instruction.h"
  40 #include "main/framebuffer.h"
  41
  42 #include "intel_mipmap_tree.h"
  43 #include "intel_batchbuffer.h"
  44 #include "intel_tex.h"
  45 #include "intel_fbo.h"
  46 #include "intel_buffer_objects.h"
  47
  48 #include "brw_context.h"
  49 #include "brw_state.h"
  50 #include "brw_defines.h"
  51 #include "brw_wm.h"
  52
  53 GLuint
  54 translate_tex_target(GLenum target)
  55 {
  56    switch (target) {
  57    case GL_TEXTURE_1D:
  58    case GL_TEXTURE_1D_ARRAY_EXT:
  59       return BRW_SURFACE_1D;
  60
  61    case GL_TEXTURE_RECTANGLE_NV:
  62       return BRW_SURFACE_2D;
  63
  64    case GL_TEXTURE_2D:
  65    case GL_TEXTURE_2D_ARRAY_EXT:
  66    case GL_TEXTURE_EXTERNAL_OES:
  67    case GL_TEXTURE_2D_MULTISAMPLE:
  68    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  69       return BRW_SURFACE_2D;
  70
  71    case GL_TEXTURE_3D:
  72       return BRW_SURFACE_3D;
  73
  74    case GL_TEXTURE_CUBE_MAP:
  75    case GL_TEXTURE_CUBE_MAP_ARRAY:
  76       return BRW_SURFACE_CUBE;
  77
  78    default:
  79       unreachable("not reached");
  80    }
  81 }
  82
  83 uint32_t
  84 brw_get_surface_tiling_bits(uint32_t tiling)
  85 {
  86    switch (tiling) {
  87    case I915_TILING_X:
  88       return BRW_SURFACE_TILED;
  89    case I915_TILING_Y:
  90       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  91    default:
  92       return 0;
  93    }
  94 }
  95
  96
  97 uint32_t
  98 brw_get_surface_num_multisamples(unsigned num_samples)
  99 {
 100    if (num_samples > 1)
 101       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 102    else
 103       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 104 }
 105
 106 void
 107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
 108                       bool is_render_target,
 109                       unsigned *width, unsigned *height,
 110                       unsigned *pitch, uint32_t *tiling, unsigned *format)
 111 {
 112    static const unsigned halign_stencil = 8;
 113
 114    /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
 115     * there are half as many rows.
 116     * In addition, mip-levels are accessed manually by the program and
 117     * therefore the surface is setup to cover all the mip-levels for one slice.
 118     * (Hardware is still used to access individual slices).
 119     */
 120    *tiling = I915_TILING_Y;
 121    *pitch = mt->pitch * 2;
 122    *width = ALIGN(mt->total_width, halign_stencil) * 2;
 123    *height = (mt->total_height / mt->physical_depth0) / 2;
 124
 125    if (is_render_target) {
 126       *format = BRW_SURFACEFORMAT_R8_UINT;
 127    }
 128 }
 129
 130
 131 /**
 132  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 133  * swizzling.
 134  */
 135 int
 136 brw_get_texture_swizzle(const struct gl_context *ctx,
 137                         const struct gl_texture_object *t)
 138 {
 139    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 140
 141    int swizzles[SWIZZLE_NIL + 1] = {
 142       SWIZZLE_X,
 143       SWIZZLE_Y,
 144       SWIZZLE_Z,
 145       SWIZZLE_W,
 146       SWIZZLE_ZERO,
 147       SWIZZLE_ONE,
 148       SWIZZLE_NIL
 149    };
 150
 151    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 152        img->_BaseFormat == GL_DEPTH_STENCIL) {
 153       GLenum depth_mode = t->DepthMode;
 154
 155       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 156        * with depth component data specified with a sized internal format.
 157        * Otherwise, it's left at the old default, GL_LUMINANCE.
 158        */
 159       if (_mesa_is_gles3(ctx) &&
 160           img->InternalFormat != GL_DEPTH_COMPONENT &&
 161           img->InternalFormat != GL_DEPTH_STENCIL) {
 162          depth_mode = GL_RED;
 163       }
 164
 165       switch (depth_mode) {
 166       case GL_ALPHA:
 167          swizzles[0] = SWIZZLE_ZERO;
 168          swizzles[1] = SWIZZLE_ZERO;
 169          swizzles[2] = SWIZZLE_ZERO;
 170          swizzles[3] = SWIZZLE_X;
 171          break;
 172       case GL_LUMINANCE:
 173          swizzles[0] = SWIZZLE_X;
 174          swizzles[1] = SWIZZLE_X;
 175          swizzles[2] = SWIZZLE_X;
 176          swizzles[3] = SWIZZLE_ONE;
 177          break;
 178       case GL_INTENSITY:
 179          swizzles[0] = SWIZZLE_X;
 180          swizzles[1] = SWIZZLE_X;
 181          swizzles[2] = SWIZZLE_X;
 182          swizzles[3] = SWIZZLE_X;
 183          break;
 184       case GL_RED:
 185          swizzles[0] = SWIZZLE_X;
 186          swizzles[1] = SWIZZLE_ZERO;
 187          swizzles[2] = SWIZZLE_ZERO;
 188          swizzles[3] = SWIZZLE_ONE;
 189          break;
 190       }
 191    }
 192
 193    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 194
 195    /* If the texture's format is alpha-only, force R, G, and B to
 196     * 0.0. Similarly, if the texture's format has no alpha channel,
 197     * force the alpha value read to 1.0. This allows for the
 198     * implementation to use an RGBA texture for any of these formats
 199     * without leaking any unexpected values.
 200     */
 201    switch (img->_BaseFormat) {
 202    case GL_ALPHA:
 203       swizzles[0] = SWIZZLE_ZERO;
 204       swizzles[1] = SWIZZLE_ZERO;
 205       swizzles[2] = SWIZZLE_ZERO;
 206       break;
 207    case GL_LUMINANCE:
 208       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 209          swizzles[0] = SWIZZLE_X;
 210          swizzles[1] = SWIZZLE_X;
 211          swizzles[2] = SWIZZLE_X;
 212          swizzles[3] = SWIZZLE_ONE;
 213       }
 214       break;
 215    case GL_LUMINANCE_ALPHA:
 216       if (datatype == GL_SIGNED_NORMALIZED) {
 217          swizzles[0] = SWIZZLE_X;
 218          swizzles[1] = SWIZZLE_X;
 219          swizzles[2] = SWIZZLE_X;
 220          swizzles[3] = SWIZZLE_W;
 221       }
 222       break;
 223    case GL_INTENSITY:
 224       if (datatype == GL_SIGNED_NORMALIZED) {
 225          swizzles[0] = SWIZZLE_X;
 226          swizzles[1] = SWIZZLE_X;
 227          swizzles[2] = SWIZZLE_X;
 228          swizzles[3] = SWIZZLE_X;
 229       }
 230       break;
 231    case GL_RED:
 232    case GL_RG:
 233    case GL_RGB:
 234       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 235          swizzles[3] = SWIZZLE_ONE;
 236       break;
 237    }
 238
 239    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 240                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 241                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 242                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 243 }
 244
 245 static void
 246 gen4_emit_buffer_surface_state(struct brw_context *brw,
 247                                uint32_t *out_offset,
 248                                drm_intel_bo *bo,
 249                                unsigned buffer_offset,
 250                                unsigned surface_format,
 251                                unsigned buffer_size,
 252                                unsigned pitch,
 253                                bool rw)
 254 {
 255    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 256                                     6 * 4, 32, out_offset);
 257    memset(surf, 0, 6 * 4);
 258
 259    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 260              surface_format << BRW_SURFACE_FORMAT_SHIFT |
 261              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
 262    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
 263    surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 264              ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
 265    surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 266              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 267
 268    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 269     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 270     * physical cache.  It is mapped in hardware to the sampler cache."
 271     */
 272    if (bo) {
 273       drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
 274                               bo, buffer_offset,
 275                               I915_GEM_DOMAIN_SAMPLER,
 276                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 277    }
 278 }
 279
 280 void
 281 brw_update_buffer_texture_surface(struct gl_context *ctx,
 282                                   unsigned unit,
 283                                   uint32_t *surf_offset)
 284 {
 285    struct brw_context *brw = brw_context(ctx);
 286    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 287    struct intel_buffer_object *intel_obj =
 288       intel_buffer_object(tObj->BufferObject);
 289    uint32_t size = tObj->BufferSize;
 290    drm_intel_bo *bo = NULL;
 291    mesa_format format = tObj->_BufferObjectFormat;
 292    uint32_t brw_format = brw_format_for_mesa_format(format);
 293    int texel_size = _mesa_get_format_bytes(format);
 294
 295    if (intel_obj) {
 296       size = MIN2(size, intel_obj->Base.Size);
 297       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 298    }
 299
 300    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 301       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 302                     _mesa_get_format_name(format));
 303    }
 304
 305    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
 306                                        tObj->BufferOffset,
 307                                        brw_format,
 308                                        size / texel_size,
 309                                        texel_size,
 310                                        false /* rw */);
 311 }
 312
 313 static void
 314 brw_update_texture_surface(struct gl_context *ctx,
 315                            unsigned unit,
 316                            uint32_t *surf_offset,
 317                            bool for_gather)
 318 {
 319    struct brw_context *brw = brw_context(ctx);
 320    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 321    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 322    struct intel_mipmap_tree *mt = intelObj->mt;
 323    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 324    uint32_t *surf;
 325
 326    /* BRW_NEW_TEXTURE_BUFFER */
 327    if (tObj->Target == GL_TEXTURE_BUFFER) {
 328       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 329       return;
 330    }
 331
 332    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 333                           6 * 4, 32, surf_offset);
 334
 335    uint32_t tex_format = translate_tex_format(brw, mt->format,
 336                                               sampler->sRGBDecode);
 337
 338    if (for_gather) {
 339       /* Sandybridge's gather4 message is broken for integer formats.
 340        * To work around this, we pretend the surface is UNORM for
 341        * 8 or 16-bit formats, and emit shader instructions to recover
 342        * the real INT/UINT value.  For 32-bit formats, we pretend
 343        * the surface is FLOAT, and simply reinterpret the resulting
 344        * bits.
 345        */
 346       switch (tex_format) {
 347       case BRW_SURFACEFORMAT_R8_SINT:
 348       case BRW_SURFACEFORMAT_R8_UINT:
 349          tex_format = BRW_SURFACEFORMAT_R8_UNORM;
 350          break;
 351
 352       case BRW_SURFACEFORMAT_R16_SINT:
 353       case BRW_SURFACEFORMAT_R16_UINT:
 354          tex_format = BRW_SURFACEFORMAT_R16_UNORM;
 355          break;
 356
 357       case BRW_SURFACEFORMAT_R32_SINT:
 358       case BRW_SURFACEFORMAT_R32_UINT:
 359          tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
 360          break;
 361
 362       default:
 363          break;
 364       }
 365    }
 366
 367    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 368               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 369               BRW_SURFACE_CUBEFACE_ENABLES |
 370               tex_format << BRW_SURFACE_FORMAT_SHIFT);
 371
 372    surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
 373
 374    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 375               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 376               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 377
 378    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 379               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 380               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 381
 382    surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
 383               SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
 384
 385    surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 386
 387    /* Emit relocation to surface contents */
 388    drm_intel_bo_emit_reloc(brw->batch.bo,
 389                            *surf_offset + 4,
 390                            mt->bo,
 391                            surf[1] - mt->bo->offset64,
 392                            I915_GEM_DOMAIN_SAMPLER, 0);
 393 }
 394
 395 /**
 396  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 397  * read from this buffer with Data Port Read instructions/messages.
 398  */
 399 void
 400 brw_create_constant_surface(struct brw_context *brw,
 401                             drm_intel_bo *bo,
 402                             uint32_t offset,
 403                             uint32_t size,
 404                             uint32_t *out_offset,
 405                             bool dword_pitch)
 406 {
 407    uint32_t stride = dword_pitch ? 4 : 16;
 408    uint32_t elements = ALIGN(size, stride) / stride;
 409
 410    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 411                                        BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 412                                        elements, stride, false);
 413 }
 414
 415 /**
 416  * Create the buffer surface. Shader buffer variables will be
 417  * read from / write to this buffer with Data Port Read/Write
 418  * instructions/messages.
 419  */
 420 void
 421 brw_create_buffer_surface(struct brw_context *brw,
 422                           drm_intel_bo *bo,
 423                           uint32_t offset,
 424                           uint32_t size,
 425                           uint32_t *out_offset,
 426                           bool dword_pitch)
 427 {
 428    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 429     * messages. We need these specifically for the fragment shader since they
 430     * include a pixel mask header that we need to ensure correct behavior
 431     * with helper invocations, which cannot write to the buffer.
 432     */
 433    brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
 434                                        BRW_SURFACEFORMAT_RAW,
 435                                        size, 1, true);
 436 }
 437
 438 /**
 439  * Set up a binding table entry for use by stream output logic (transform
 440  * feedback).
 441  *
 442  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 443  */
 444 void
 445 brw_update_sol_surface(struct brw_context *brw,
 446                        struct gl_buffer_object *buffer_obj,
 447                        uint32_t *out_offset, unsigned num_vector_components,
 448                        unsigned stride_dwords, unsigned offset_dwords)
 449 {
 450    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 451    uint32_t offset_bytes = 4 * offset_dwords;
 452    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 453                                              offset_bytes,
 454                                              buffer_obj->Size - offset_bytes);
 455    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 456                                     out_offset);
 457    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 458    size_t size_dwords = buffer_obj->Size / 4;
 459    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 460
 461    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 462     * too big to map using a single binding table entry?
 463     */
 464    assert((size_dwords - offset_dwords) / stride_dwords
 465           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 466
 467    if (size_dwords > offset_dwords + num_vector_components) {
 468       /* There is room for at least 1 transform feedback output in the buffer.
 469        * Compute the number of additional transform feedback outputs the
 470        * buffer has room for.
 471        */
 472       buffer_size_minus_1 =
 473          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 474    } else {
 475       /* There isn't even room for a single transform feedback output in the
 476        * buffer.  We can't configure the binding table entry to prevent output
 477        * entirely; we'll have to rely on the geometry shader to detect
 478        * overflow.  But to minimize the damage in case of a bug, set up the
 479        * binding table entry to just allow a single output.
 480        */
 481       buffer_size_minus_1 = 0;
 482    }
 483    width = buffer_size_minus_1 & 0x7f;
 484    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 485    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 486
 487    switch (num_vector_components) {
 488    case 1:
 489       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 490       break;
 491    case 2:
 492       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 493       break;
 494    case 3:
 495       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 496       break;
 497    case 4:
 498       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 499       break;
 500    default:
 501       unreachable("Invalid vector size for transform feedback output");
 502    }
 503
 504    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 505       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 506       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 507       BRW_SURFACE_RC_READ_WRITE;
 508    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 509    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 510               height << BRW_SURFACE_HEIGHT_SHIFT);
 511    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 512               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 513    surf[4] = 0;
 514    surf[5] = 0;
 515
 516    /* Emit relocation to surface contents. */
 517    drm_intel_bo_emit_reloc(brw->batch.bo,
 518                            *out_offset + 4,
 519                            bo, offset_bytes,
 520                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 521 }
 522
 523 /* Creates a new WM constant buffer reflecting the current fragment program's
 524  * constants, if needed by the fragment program.
 525  *
 526  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 527  * state atom.
 528  */
 529 static void
 530 brw_upload_wm_pull_constants(struct brw_context *brw)
 531 {
 532    struct brw_stage_state *stage_state = &brw->wm.base;
 533    /* BRW_NEW_FRAGMENT_PROGRAM */
 534    struct brw_fragment_program *fp =
 535       (struct brw_fragment_program *) brw->fragment_program;
 536    /* BRW_NEW_FS_PROG_DATA */
 537    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 538
 539    /* _NEW_PROGRAM_CONSTANTS */
 540    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 541                              stage_state, prog_data, true);
 542 }
 543
 544 const struct brw_tracked_state brw_wm_pull_constants = {
 545    .dirty = {
 546       .mesa = _NEW_PROGRAM_CONSTANTS,
 547       .brw = BRW_NEW_BATCH |
 548              BRW_NEW_FRAGMENT_PROGRAM |
 549              BRW_NEW_FS_PROG_DATA,
 550    },
 551    .emit = brw_upload_wm_pull_constants,
 552 };
 553
 554 /**
 555  * Creates a null renderbuffer surface.
 556  *
 557  * This is used when the shader doesn't write to any color output.  An FB
 558  * write to target 0 will still be emitted, because that's how the thread is
 559  * terminated (and computed depth is returned), so we need to have the
 560  * hardware discard the target 0 color output..
 561  */
 562 static void
 563 brw_emit_null_surface_state(struct brw_context *brw,
 564                             unsigned width,
 565                             unsigned height,
 566                             unsigned samples,
 567                             uint32_t *out_offset)
 568 {
 569    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 570     * Notes):
 571     *
 572     *     A null surface will be used in instances where an actual surface is
 573     *     not bound. When a write message is generated to a null surface, no
 574     *     actual surface is written to. When a read message (including any
 575     *     sampling engine message) is generated to a null surface, the result
 576     *     is all zeros. Note that a null surface type is allowed to be used
 577     *     with all messages, even if it is not specificially indicated as
 578     *     supported. All of the remaining fields in surface state are ignored
 579     *     for null surfaces, with the following exceptions:
 580     *
 581     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 582     *       depth buffer’s corresponding state for all render target surfaces,
 583     *       including null.
 584     *
 585     *     - Surface Format must be R8G8B8A8_UNORM.
 586     */
 587    unsigned surface_type = BRW_SURFACE_NULL;
 588    drm_intel_bo *bo = NULL;
 589    unsigned pitch_minus_1 = 0;
 590    uint32_t multisampling_state = 0;
 591    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 592                                     out_offset);
 593
 594    if (samples > 1) {
 595       /* On Gen6, null render targets seem to cause GPU hangs when
 596        * multisampling.  So work around this problem by rendering into dummy
 597        * color buffer.
 598        *
 599        * To decrease the amount of memory needed by the workaround buffer, we
 600        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 601        * the amount of memory needed for the workaround buffer is
 602        * (width_in_tiles + height_in_tiles - 1) tiles.
 603        *
 604        * Note that since the workaround buffer will be interpreted by the
 605        * hardware as an interleaved multisampled buffer, we need to compute
 606        * width_in_tiles and height_in_tiles by dividing the width and height
 607        * by 16 rather than the normal Y-tile size of 32.
 608        */
 609       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 610       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 611       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 612       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 613                          size_needed);
 614       bo = brw->wm.multisampled_null_render_target_bo;
 615       surface_type = BRW_SURFACE_2D;
 616       pitch_minus_1 = 127;
 617       multisampling_state = brw_get_surface_num_multisamples(samples);
 618    }
 619
 620    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 621               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 622    if (brw->gen < 6) {
 623       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 624                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 625                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 626                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 627    }
 628    surf[1] = bo ? bo->offset64 : 0;
 629    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 630               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 631
 632    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 633     * Notes):
 634     *
 635     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 636     */
 637    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 638               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 639    surf[4] = multisampling_state;
 640    surf[5] = 0;
 641
 642    if (bo) {
 643       drm_intel_bo_emit_reloc(brw->batch.bo,
 644                               *out_offset + 4,
 645                               bo, 0,
 646                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 647    }
 648 }
 649
 650 /**
 651  * Sets up a surface state structure to point at the given region.
 652  * While it is only used for the front/back buffer currently, it should be
 653  * usable for further buffers when doing ARB_draw_buffer support.
 654  */
 655 static uint32_t
 656 brw_update_renderbuffer_surface(struct brw_context *brw,
 657                                 struct gl_renderbuffer *rb,
 658                                 bool layered, unsigned unit,
 659                                 uint32_t surf_index)
 660 {
 661    struct gl_context *ctx = &brw->ctx;
 662    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 663    struct intel_mipmap_tree *mt = irb->mt;
 664    uint32_t *surf;
 665    uint32_t tile_x, tile_y;
 666    uint32_t format = 0;
 667    uint32_t offset;
 668    /* _NEW_BUFFERS */
 669    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 670    /* BRW_NEW_FS_PROG_DATA */
 671
 672    assert(!layered);
 673
 674    if (rb->TexImage && !brw->has_surface_tile_offset) {
 675       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 676
 677       if (tile_x != 0 || tile_y != 0) {
 678          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 679           * destination in a miptree unless you actually setup your renderbuffer
 680           * as a miptree and used the fragile lod/array_index/etc. controls to
 681           * select the image.  So, instead, we just make a new single-level
 682           * miptree and render into that.
 683           */
 684          intel_renderbuffer_move_to_temp(brw, irb, false);
 685          mt = irb->mt;
 686       }
 687    }
 688
 689    intel_miptree_used_for_rendering(irb->mt);
 690
 691    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 692
 693    format = brw->render_target_format[rb_format];
 694    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 695       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 696                     __func__, _mesa_get_format_name(rb_format));
 697    }
 698
 699    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 700               format << BRW_SURFACE_FORMAT_SHIFT);
 701
 702    /* reloc */
 703    assert(mt->offset % mt->cpp == 0);
 704    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 705               mt->bo->offset64 + mt->offset);
 706
 707    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 708               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 709
 710    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 711               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 712
 713    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 714
 715    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 716    /* Note that the low bits of these fields are missing, so
 717     * there's the possibility of getting in trouble.
 718     */
 719    assert(tile_x % 4 == 0);
 720    assert(tile_y % 2 == 0);
 721    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 722               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 723               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 724
 725    if (brw->gen < 6) {
 726       /* _NEW_COLOR */
 727       if (!ctx->Color.ColorLogicOpEnabled &&
 728           (ctx->Color.BlendEnabled & (1 << unit)))
 729          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 730
 731       if (!ctx->Color.ColorMask[unit][0])
 732          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 733       if (!ctx->Color.ColorMask[unit][1])
 734          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 735       if (!ctx->Color.ColorMask[unit][2])
 736          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 737
 738       /* As mentioned above, disable writes to the alpha component when the
 739        * renderbuffer is XRGB.
 740        */
 741       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 742           !ctx->Color.ColorMask[unit][3]) {
 743          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 744       }
 745    }
 746
 747    drm_intel_bo_emit_reloc(brw->batch.bo,
 748                            offset + 4,
 749                            mt->bo,
 750                            surf[1] - mt->bo->offset64,
 751                            I915_GEM_DOMAIN_RENDER,
 752                            I915_GEM_DOMAIN_RENDER);
 753
 754    return offset;
 755 }
 756
 757 /**
 758  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 759  */
 760 void
 761 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 762                                  const struct gl_framebuffer *fb,
 763                                  uint32_t render_target_start,
 764                                  uint32_t *surf_offset)
 765 {
 766    GLuint i;
 767    const unsigned int w = _mesa_geometric_width(fb);
 768    const unsigned int h = _mesa_geometric_height(fb);
 769    const unsigned int s = _mesa_geometric_samples(fb);
 770
 771    /* Update surfaces for drawing buffers */
 772    if (fb->_NumColorDrawBuffers >= 1) {
 773       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 774          const uint32_t surf_index = render_target_start + i;
 775
 776          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 777             surf_offset[surf_index] =
 778                brw->vtbl.update_renderbuffer_surface(
 779                   brw, fb->_ColorDrawBuffers[i],
 780                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 781          } else {
 782             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 783                &surf_offset[surf_index]);
 784          }
 785       }
 786    } else {
 787       const uint32_t surf_index = render_target_start;
 788       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 789          &surf_offset[surf_index]);
 790    }
 791 }
 792
 793 static void
 794 update_renderbuffer_surfaces(struct brw_context *brw)
 795 {
 796    const struct gl_context *ctx = &brw->ctx;
 797
 798    /* _NEW_BUFFERS | _NEW_COLOR */
 799    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 800    brw_update_renderbuffer_surfaces(
 801       brw, fb,
 802       brw->wm.prog_data->binding_table.render_target_start,
 803       brw->wm.base.surf_offset);
 804    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 805 }
 806
 807 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 808    .dirty = {
 809       .mesa = _NEW_BUFFERS |
 810               _NEW_COLOR,
 811       .brw = BRW_NEW_BATCH |
 812              BRW_NEW_FS_PROG_DATA,
 813    },
 814    .emit = update_renderbuffer_surfaces,
 815 };
 816
 817 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 818    .dirty = {
 819       .mesa = _NEW_BUFFERS,
 820       .brw = BRW_NEW_BATCH,
 821    },
 822    .emit = update_renderbuffer_surfaces,
 823 };
 824
 825
 826 static void
 827 update_stage_texture_surfaces(struct brw_context *brw,
 828                               const struct gl_program *prog,
 829                               struct brw_stage_state *stage_state,
 830                               bool for_gather)
 831 {
 832    if (!prog)
 833       return;
 834
 835    struct gl_context *ctx = &brw->ctx;
 836
 837    uint32_t *surf_offset = stage_state->surf_offset;
 838
 839    /* BRW_NEW_*_PROG_DATA */
 840    if (for_gather)
 841       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 842    else
 843       surf_offset += stage_state->prog_data->binding_table.texture_start;
 844
 845    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
 846    for (unsigned s = 0; s < num_samplers; s++) {
 847       surf_offset[s] = 0;
 848
 849       if (prog->SamplersUsed & (1 << s)) {
 850          const unsigned unit = prog->SamplerUnits[s];
 851
 852          /* _NEW_TEXTURE */
 853          if (ctx->Texture.Unit[unit]._Current) {
 854             brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
 855          }
 856       }
 857    }
 858 }
 859
 860
 861 /**
 862  * Construct SURFACE_STATE objects for enabled textures.
 863  */
 864 static void
 865 brw_update_texture_surfaces(struct brw_context *brw)
 866 {
 867    /* BRW_NEW_VERTEX_PROGRAM */
 868    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 869
 870    /* BRW_NEW_GEOMETRY_PROGRAM */
 871    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
 872
 873    /* BRW_NEW_FRAGMENT_PROGRAM */
 874    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 875
 876    /* BRW_NEW_COMPUTE_PROGRAM */
 877    struct gl_program *cs = (struct gl_program *) brw->compute_program;
 878
 879    /* _NEW_TEXTURE */
 880    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
 881    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
 882    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
 883    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
 884
 885    /* emit alternate set of surface state for gather. this
 886     * allows the surface format to be overriden for only the
 887     * gather4 messages. */
 888    if (brw->gen < 8) {
 889       if (vs && vs->UsesGather)
 890          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
 891       if (gs && gs->UsesGather)
 892          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
 893       if (fs && fs->UsesGather)
 894          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
 895       if (cs && cs->UsesGather)
 896          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
 897    }
 898
 899    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 900 }
 901
 902 const struct brw_tracked_state brw_texture_surfaces = {
 903    .dirty = {
 904       .mesa = _NEW_TEXTURE,
 905       .brw = BRW_NEW_BATCH |
 906              BRW_NEW_COMPUTE_PROGRAM |
 907              BRW_NEW_FRAGMENT_PROGRAM |
 908              BRW_NEW_FS_PROG_DATA |
 909              BRW_NEW_GEOMETRY_PROGRAM |
 910              BRW_NEW_GS_PROG_DATA |
 911              BRW_NEW_TEXTURE_BUFFER |
 912              BRW_NEW_VERTEX_PROGRAM |
 913              BRW_NEW_VS_PROG_DATA,
 914    },
 915    .emit = brw_update_texture_surfaces,
 916 };
 917
 918 void
 919 brw_upload_ubo_surfaces(struct brw_context *brw,
 920                         struct gl_shader *shader,
 921                         struct brw_stage_state *stage_state,
 922                         struct brw_stage_prog_data *prog_data,
 923                         bool dword_pitch)
 924 {
 925    struct gl_context *ctx = &brw->ctx;
 926
 927    if (!shader)
 928       return;
 929
 930    uint32_t *ubo_surf_offsets =
 931       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 932
 933    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 934       struct gl_uniform_buffer_binding *binding =
 935          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
 936
 937       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 938          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
 939       } else {
 940          struct intel_buffer_object *intel_bo =
 941             intel_buffer_object(binding->BufferObject);
 942          drm_intel_bo *bo =
 943             intel_bufferobj_buffer(brw, intel_bo,
 944                                    binding->Offset,
 945                                    binding->BufferObject->Size - binding->Offset);
 946          brw_create_constant_surface(brw, bo, binding->Offset,
 947                                      binding->BufferObject->Size - binding->Offset,
 948                                      &ubo_surf_offsets[i],
 949                                      dword_pitch);
 950       }
 951    }
 952
 953    uint32_t *ssbo_surf_offsets =
 954       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
 955
 956    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
 957       struct gl_shader_storage_buffer_binding *binding =
 958          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
 959
 960       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
 961          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
 962       } else {
 963          struct intel_buffer_object *intel_bo =
 964             intel_buffer_object(binding->BufferObject);
 965          drm_intel_bo *bo =
 966             intel_bufferobj_buffer(brw, intel_bo,
 967                                    binding->Offset,
 968                                    binding->BufferObject->Size - binding->Offset);
 969          brw_create_buffer_surface(brw, bo, binding->Offset,
 970                                    binding->BufferObject->Size - binding->Offset,
 971                                    &ssbo_surf_offsets[i],
 972                                    dword_pitch);
 973       }
 974    }
 975
 976    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
 977       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 978 }
 979
 980 static void
 981 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 982 {
 983    struct gl_context *ctx = &brw->ctx;
 984    /* _NEW_PROGRAM */
 985    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
 986
 987    if (!prog)
 988       return;
 989
 990    /* BRW_NEW_FS_PROG_DATA */
 991    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 992                            &brw->wm.base, &brw->wm.prog_data->base, true);
 993 }
 994
 995 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 996    .dirty = {
 997       .mesa = _NEW_PROGRAM,
 998       .brw = BRW_NEW_BATCH |
 999              BRW_NEW_FS_PROG_DATA |
1000              BRW_NEW_UNIFORM_BUFFER,
1001    },
1002    .emit = brw_upload_wm_ubo_surfaces,
1003 };
1004
1005 static void
1006 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1007 {
1008    struct gl_context *ctx = &brw->ctx;
1009    /* _NEW_PROGRAM */
1010    struct gl_shader_program *prog =
1011       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1012
1013    if (!prog)
1014       return;
1015
1016    /* BRW_NEW_CS_PROG_DATA */
1017    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1018                            &brw->cs.base, &brw->cs.prog_data->base, true);
1019 }
1020
1021 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1022    .dirty = {
1023       .mesa = _NEW_PROGRAM,
1024       .brw = BRW_NEW_BATCH |
1025              BRW_NEW_CS_PROG_DATA |
1026              BRW_NEW_UNIFORM_BUFFER,
1027    },
1028    .emit = brw_upload_cs_ubo_surfaces,
1029 };
1030
1031 void
1032 brw_upload_abo_surfaces(struct brw_context *brw,
1033                         struct gl_shader *shader,
1034                         struct brw_stage_state *stage_state,
1035                         struct brw_stage_prog_data *prog_data)
1036 {
1037    struct gl_context *ctx = &brw->ctx;
1038    uint32_t *surf_offsets =
1039       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1040
1041    if (shader && shader->NumAtomicBuffers) {
1042       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1043          struct gl_atomic_buffer_binding *binding =
1044             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1045          struct intel_buffer_object *intel_bo =
1046             intel_buffer_object(binding->BufferObject);
1047          drm_intel_bo *bo = intel_bufferobj_buffer(
1048             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1049
1050          brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1051                                              binding->Offset, BRW_SURFACEFORMAT_RAW,
1052                                              bo->size - binding->Offset, 1, true);
1053       }
1054
1055       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1056    }
1057 }
1058
1059 static void
1060 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1061 {
1062    struct gl_context *ctx = &brw->ctx;
1063    /* _NEW_PROGRAM */
1064    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1065
1066    if (prog) {
1067       /* BRW_NEW_FS_PROG_DATA */
1068       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1069                               &brw->wm.base, &brw->wm.prog_data->base);
1070    }
1071 }
1072
1073 const struct brw_tracked_state brw_wm_abo_surfaces = {
1074    .dirty = {
1075       .mesa = _NEW_PROGRAM,
1076       .brw = BRW_NEW_ATOMIC_BUFFER |
1077              BRW_NEW_BATCH |
1078              BRW_NEW_FS_PROG_DATA,
1079    },
1080    .emit = brw_upload_wm_abo_surfaces,
1081 };
1082
1083 static void
1084 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1085 {
1086    struct gl_context *ctx = &brw->ctx;
1087    /* _NEW_PROGRAM */
1088    struct gl_shader_program *prog =
1089       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1090
1091    if (prog) {
1092       /* BRW_NEW_CS_PROG_DATA */
1093       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1094                               &brw->cs.base, &brw->cs.prog_data->base);
1095    }
1096 }
1097
1098 const struct brw_tracked_state brw_cs_abo_surfaces = {
1099    .dirty = {
1100       .mesa = _NEW_PROGRAM,
1101       .brw = BRW_NEW_ATOMIC_BUFFER |
1102              BRW_NEW_BATCH |
1103              BRW_NEW_CS_PROG_DATA,
1104    },
1105    .emit = brw_upload_cs_abo_surfaces,
1106 };
1107
1108 static void
1109 brw_upload_cs_image_surfaces(struct brw_context *brw)
1110 {
1111    struct gl_context *ctx = &brw->ctx;
1112    /* _NEW_PROGRAM */
1113    struct gl_shader_program *prog =
1114       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1115
1116    if (prog) {
1117       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1118       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1119                                 &brw->cs.base, &brw->cs.prog_data->base);
1120    }
1121 }
1122
1123 const struct brw_tracked_state brw_cs_image_surfaces = {
1124    .dirty = {
1125       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1126       .brw = BRW_NEW_BATCH |
1127              BRW_NEW_CS_PROG_DATA |
1128              BRW_NEW_IMAGE_UNITS
1129    },
1130    .emit = brw_upload_cs_image_surfaces,
1131 };
1132
1133 static uint32_t
1134 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1135 {
1136    if (access == GL_WRITE_ONLY) {
1137       return brw_format_for_mesa_format(format);
1138    } else {
1139       /* Typed surface reads support a very limited subset of the shader
1140        * image formats.  Translate it into the closest format the
1141        * hardware supports.
1142        */
1143       if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1144           (_mesa_get_format_bytes(format) >= 8 &&
1145            (brw->gen == 7 && !brw->is_haswell)))
1146          return BRW_SURFACEFORMAT_RAW;
1147       else
1148          return brw_format_for_mesa_format(
1149             brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1150    }
1151 }
1152
1153 static void
1154 update_default_image_param(struct brw_context *brw,
1155                            struct gl_image_unit *u,
1156                            unsigned surface_idx,
1157                            struct brw_image_param *param)
1158 {
1159    memset(param, 0, sizeof(*param));
1160    param->surface_idx = surface_idx;
1161    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1162     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1163     * detailed explanation of these parameters.
1164     */
1165    param->swizzling[0] = 0xff;
1166    param->swizzling[1] = 0xff;
1167 }
1168
1169 static void
1170 update_buffer_image_param(struct brw_context *brw,
1171                           struct gl_image_unit *u,
1172                           unsigned surface_idx,
1173                           struct brw_image_param *param)
1174 {
1175    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1176
1177    update_default_image_param(brw, u, surface_idx, param);
1178
1179    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1180    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1181 }
1182
1183 static void
1184 update_texture_image_param(struct brw_context *brw,
1185                            struct gl_image_unit *u,
1186                            unsigned surface_idx,
1187                            struct brw_image_param *param)
1188 {
1189    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1190
1191    update_default_image_param(brw, u, surface_idx, param);
1192
1193    param->size[0] = minify(mt->logical_width0, u->Level);
1194    param->size[1] = minify(mt->logical_height0, u->Level);
1195    param->size[2] = (!u->Layered ? 1 :
1196                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1197                      u->TexObj->Target == GL_TEXTURE_3D ?
1198                      minify(mt->logical_depth0, u->Level) :
1199                      mt->logical_depth0);
1200
1201    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1202                                   &param->offset[0],
1203                                   &param->offset[1]);
1204
1205    param->stride[0] = mt->cpp;
1206    param->stride[1] = mt->pitch / mt->cpp;
1207    param->stride[2] =
1208       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1209    param->stride[3] =
1210       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1211
1212    if (mt->tiling == I915_TILING_X) {
1213       /* An X tile is a rectangular block of 512x8 bytes. */
1214       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1215       param->tiling[1] = _mesa_logbase2(8);
1216
1217       if (brw->has_swizzling) {
1218          /* Right shifts required to swizzle bits 9 and 10 of the memory
1219           * address with bit 6.
1220           */
1221          param->swizzling[0] = 3;
1222          param->swizzling[1] = 4;
1223       }
1224    } else if (mt->tiling == I915_TILING_Y) {
1225       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1226        * different to the layout of an X-tiled surface, we simply pretend that
1227        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1228        * one arranged in X-major order just like is the case for X-tiling.
1229        */
1230       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1231       param->tiling[1] = _mesa_logbase2(32);
1232
1233       if (brw->has_swizzling) {
1234          /* Right shift required to swizzle bit 9 of the memory address with
1235           * bit 6.
1236           */
1237          param->swizzling[0] = 3;
1238       }
1239    }
1240
1241    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1242     * address calculation algorithm (emit_address_calculation() in
1243     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1244     * modulus equal to the LOD.
1245     */
1246    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1247                        0);
1248 }
1249
1250 static void
1251 update_image_surface(struct brw_context *brw,
1252                      struct gl_image_unit *u,
1253                      GLenum access,
1254                      unsigned surface_idx,
1255                      uint32_t *surf_offset,
1256                      struct brw_image_param *param)
1257 {
1258    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1259       struct gl_texture_object *obj = u->TexObj;
1260       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1261
1262       if (obj->Target == GL_TEXTURE_BUFFER) {
1263          struct intel_buffer_object *intel_obj =
1264             intel_buffer_object(obj->BufferObject);
1265          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1266                                       _mesa_get_format_bytes(u->_ActualFormat));
1267
1268          brw->vtbl.emit_buffer_surface_state(
1269             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1270             format, intel_obj->Base.Size / texel_size, texel_size,
1271             access != GL_READ_ONLY);
1272
1273          update_buffer_image_param(brw, u, surface_idx, param);
1274
1275       } else {
1276          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1277          struct intel_mipmap_tree *mt = intel_obj->mt;
1278
1279          if (format == BRW_SURFACEFORMAT_RAW) {
1280             brw->vtbl.emit_buffer_surface_state(
1281                brw, surf_offset, mt->bo, mt->offset,
1282                format, mt->bo->size - mt->offset, 1 /* pitch */,
1283                access != GL_READ_ONLY);
1284
1285          } else {
1286             const unsigned min_layer = obj->MinLayer + u->_Layer;
1287             const unsigned min_level = obj->MinLevel + u->Level;
1288             const unsigned num_layers = (!u->Layered ? 1 :
1289                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1290                                          mt->logical_depth0);
1291             const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1292                                    obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1293                                    GL_TEXTURE_2D_ARRAY : obj->Target);
1294
1295             brw->vtbl.emit_texture_surface_state(
1296                brw, mt, target,
1297                min_layer, min_layer + num_layers,
1298                min_level, min_level + 1,
1299                format, SWIZZLE_XYZW,
1300                surf_offset, access != GL_READ_ONLY, false);
1301          }
1302
1303          update_texture_image_param(brw, u, surface_idx, param);
1304       }
1305
1306    } else {
1307       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1308       update_default_image_param(brw, u, surface_idx, param);
1309    }
1310 }
1311
1312 void
1313 brw_upload_image_surfaces(struct brw_context *brw,
1314                           struct gl_shader *shader,
1315                           struct brw_stage_state *stage_state,
1316                           struct brw_stage_prog_data *prog_data)
1317 {
1318    struct gl_context *ctx = &brw->ctx;
1319
1320    if (shader && shader->NumImages) {
1321       for (unsigned i = 0; i < shader->NumImages; i++) {
1322          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1323          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1324
1325          update_image_surface(brw, u, shader->ImageAccess[i],
1326                               surf_idx,
1327                               &stage_state->surf_offset[surf_idx],
1328                               &prog_data->image_param[i]);
1329       }
1330
1331       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1332    }
1333 }
1334
1335 static void
1336 brw_upload_wm_image_surfaces(struct brw_context *brw)
1337 {
1338    struct gl_context *ctx = &brw->ctx;
1339    /* BRW_NEW_FRAGMENT_PROGRAM */
1340    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
1341
1342    if (prog) {
1343       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1344       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1345                                 &brw->wm.base, &brw->wm.prog_data->base);
1346    }
1347 }
1348
1349 const struct brw_tracked_state brw_wm_image_surfaces = {
1350    .dirty = {
1351       .mesa = _NEW_TEXTURE,
1352       .brw = BRW_NEW_BATCH |
1353              BRW_NEW_FRAGMENT_PROGRAM |
1354              BRW_NEW_FS_PROG_DATA |
1355              BRW_NEW_IMAGE_UNITS
1356    },
1357    .emit = brw_upload_wm_image_surfaces,
1358 };
1359
1360 void
1361 gen4_init_vtable_surface_functions(struct brw_context *brw)
1362 {
1363    brw->vtbl.update_texture_surface = brw_update_texture_surface;
1364    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1365    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1366    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1367 }
1368
1369 static void
1370 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1371 {
1372    struct gl_context *ctx = &brw->ctx;
1373    /* _NEW_PROGRAM */
1374    struct gl_shader_program *prog =
1375       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1376
1377    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1378       const unsigned surf_idx =
1379          brw->cs.prog_data->binding_table.work_groups_start;
1380       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1381       drm_intel_bo *bo;
1382       uint32_t bo_offset;
1383
1384       if (brw->compute.num_work_groups_bo == NULL) {
1385          bo = NULL;
1386          intel_upload_data(brw,
1387                            (void *)brw->compute.num_work_groups,
1388                            3 * sizeof(GLuint),
1389                            sizeof(GLuint),
1390                            &bo,
1391                            &bo_offset);
1392       } else {
1393          bo = brw->compute.num_work_groups_bo;
1394          bo_offset = brw->compute.num_work_groups_offset;
1395       }
1396
1397       brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1398                                           bo, bo_offset,
1399                                           BRW_SURFACEFORMAT_RAW,
1400                                           3 * sizeof(GLuint), 1, true);
1401       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1402    }
1403 }
1404
1405 const struct brw_tracked_state brw_cs_work_groups_surface = {
1406    .dirty = {
1407       .brw = BRW_NEW_CS_WORK_GROUPS
1408    },
1409    .emit = brw_upload_cs_work_groups_surface,
1410 };