src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "main/teximage.h"
  39 #include "program/prog_parameter.h"
  40 #include "program/prog_instruction.h"
  41 #include "main/framebuffer.h"
  42 #include "main/shaderapi.h"
  43
  44 #include "isl/isl.h"
  45
  46 #include "intel_mipmap_tree.h"
  47 #include "intel_batchbuffer.h"
  48 #include "intel_tex.h"
  49 #include "intel_fbo.h"
  50 #include "intel_buffer_objects.h"
  51
  52 #include "brw_context.h"
  53 #include "brw_state.h"
  54 #include "brw_defines.h"
  55 #include "brw_wm.h"
  56
  57 struct surface_state_info {
  58    unsigned num_dwords;
  59    unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
  60    unsigned reloc_dw;
  61    unsigned aux_reloc_dw;
  62    unsigned tex_mocs;
  63    unsigned rb_mocs;
  64 };
  65
  66 static const struct surface_state_info surface_state_infos[] = {
  67    [4] = {6,  32, 1,  0},
  68    [5] = {6,  32, 1,  0},
  69    [6] = {6,  32, 1,  0},
  70    [7] = {8,  32, 1,  6,  GEN7_MOCS_L3, GEN7_MOCS_L3},
  71    [8] = {13, 64, 8,  10, BDW_MOCS_WB,  BDW_MOCS_PTE},
  72    [9] = {16, 64, 8,  10, SKL_MOCS_WB,  SKL_MOCS_PTE},
  73 };
  74
  75 static void
  76 brw_emit_surface_state(struct brw_context *brw,
  77                        struct intel_mipmap_tree *mt,
  78                        const struct isl_view *view,
  79                        uint32_t mocs, bool for_gather,
  80                        uint32_t *surf_offset, int surf_index,
  81                        unsigned read_domains, unsigned write_domains)
  82 {
  83    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
  84
  85    struct isl_surf surf;
  86    intel_miptree_get_isl_surf(brw, mt, &surf);
  87
  88    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
  89
  90    struct isl_surf *aux_surf = NULL, aux_surf_s;
  91    uint64_t aux_offset = 0;
  92    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
  93    if (mt->mcs_mt &&
  94        ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
  95         mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
  96       intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
  97       aux_surf = &aux_surf_s;
  98       assert(mt->mcs_mt->offset == 0);
  99       aux_offset = mt->mcs_mt->bo->offset64;
 100
 101       /* We only really need a clear color if we also have an auxiliary
 102        * surfacae.  Without one, it does nothing.
 103        */
 104       clear_color = intel_miptree_get_isl_clear_color(brw, mt);
 105    }
 106
 107    uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 108                                     ss_info.num_dwords * 4, ss_info.ss_align,
 109                                     surf_index, surf_offset);
 110
 111    isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
 112                        .address = mt->bo->offset64 + mt->offset,
 113                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 114                        .aux_address = aux_offset,
 115                        .mocs = mocs, .clear_color = clear_color);
 116
 117    drm_intel_bo_emit_reloc(brw->batch.bo,
 118                            *surf_offset + 4 * ss_info.reloc_dw,
 119                            mt->bo, mt->offset,
 120                            read_domains, write_domains);
 121
 122    if (aux_surf) {
 123       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 124        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 125        * contain other control information.  Since buffer addresses are always
 126        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 127        * an ordinary reloc to do the necessary address translation.
 128        */
 129       assert((aux_offset & 0xfff) == 0);
 130       drm_intel_bo_emit_reloc(brw->batch.bo,
 131                               *surf_offset + 4 * ss_info.aux_reloc_dw,
 132                               mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
 133                               read_domains, write_domains);
 134    }
 135 }
 136
 137 uint32_t
 138 brw_update_renderbuffer_surface(struct brw_context *brw,
 139                                 struct gl_renderbuffer *rb,
 140                                 bool layered, unsigned unit /* unused */,
 141                                 uint32_t surf_index)
 142 {
 143    struct gl_context *ctx = &brw->ctx;
 144    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 145    struct intel_mipmap_tree *mt = irb->mt;
 146
 147    assert(brw_render_target_supported(brw, rb));
 148    intel_miptree_used_for_rendering(mt);
 149
 150    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 151    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 152       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 153                     __func__, _mesa_get_format_name(rb_format));
 154    }
 155
 156    const unsigned layer_multiplier =
 157       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 158        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 159       MAX2(irb->mt->num_samples, 1) : 1;
 160
 161    struct isl_view view = {
 162       .format = brw->render_target_format[rb_format],
 163       .base_level = irb->mt_level - irb->mt->first_level,
 164       .levels = 1,
 165       .base_array_layer = irb->mt_layer / layer_multiplier,
 166       .array_len = MAX2(irb->layer_count, 1),
 167       .channel_select = {
 168          ISL_CHANNEL_SELECT_RED,
 169          ISL_CHANNEL_SELECT_GREEN,
 170          ISL_CHANNEL_SELECT_BLUE,
 171          ISL_CHANNEL_SELECT_ALPHA,
 172       },
 173       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 174    };
 175
 176    uint32_t offset;
 177    brw_emit_surface_state(brw, mt, &view,
 178                           surface_state_infos[brw->gen].rb_mocs, false,
 179                           &offset, surf_index,
 180                           I915_GEM_DOMAIN_RENDER,
 181                           I915_GEM_DOMAIN_RENDER);
 182    return offset;
 183 }
 184
 185 GLuint
 186 translate_tex_target(GLenum target)
 187 {
 188    switch (target) {
 189    case GL_TEXTURE_1D:
 190    case GL_TEXTURE_1D_ARRAY_EXT:
 191       return BRW_SURFACE_1D;
 192
 193    case GL_TEXTURE_RECTANGLE_NV:
 194       return BRW_SURFACE_2D;
 195
 196    case GL_TEXTURE_2D:
 197    case GL_TEXTURE_2D_ARRAY_EXT:
 198    case GL_TEXTURE_EXTERNAL_OES:
 199    case GL_TEXTURE_2D_MULTISAMPLE:
 200    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 201       return BRW_SURFACE_2D;
 202
 203    case GL_TEXTURE_3D:
 204       return BRW_SURFACE_3D;
 205
 206    case GL_TEXTURE_CUBE_MAP:
 207    case GL_TEXTURE_CUBE_MAP_ARRAY:
 208       return BRW_SURFACE_CUBE;
 209
 210    default:
 211       unreachable("not reached");
 212    }
 213 }
 214
 215 uint32_t
 216 brw_get_surface_tiling_bits(uint32_t tiling)
 217 {
 218    switch (tiling) {
 219    case I915_TILING_X:
 220       return BRW_SURFACE_TILED;
 221    case I915_TILING_Y:
 222       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 223    default:
 224       return 0;
 225    }
 226 }
 227
 228
 229 uint32_t
 230 brw_get_surface_num_multisamples(unsigned num_samples)
 231 {
 232    if (num_samples > 1)
 233       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 234    else
 235       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 236 }
 237
 238 /**
 239  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 240  * swizzling.
 241  */
 242 int
 243 brw_get_texture_swizzle(const struct gl_context *ctx,
 244                         const struct gl_texture_object *t)
 245 {
 246    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 247
 248    int swizzles[SWIZZLE_NIL + 1] = {
 249       SWIZZLE_X,
 250       SWIZZLE_Y,
 251       SWIZZLE_Z,
 252       SWIZZLE_W,
 253       SWIZZLE_ZERO,
 254       SWIZZLE_ONE,
 255       SWIZZLE_NIL
 256    };
 257
 258    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 259        img->_BaseFormat == GL_DEPTH_STENCIL) {
 260       GLenum depth_mode = t->DepthMode;
 261
 262       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 263        * with depth component data specified with a sized internal format.
 264        * Otherwise, it's left at the old default, GL_LUMINANCE.
 265        */
 266       if (_mesa_is_gles3(ctx) &&
 267           img->InternalFormat != GL_DEPTH_COMPONENT &&
 268           img->InternalFormat != GL_DEPTH_STENCIL) {
 269          depth_mode = GL_RED;
 270       }
 271
 272       switch (depth_mode) {
 273       case GL_ALPHA:
 274          swizzles[0] = SWIZZLE_ZERO;
 275          swizzles[1] = SWIZZLE_ZERO;
 276          swizzles[2] = SWIZZLE_ZERO;
 277          swizzles[3] = SWIZZLE_X;
 278          break;
 279       case GL_LUMINANCE:
 280          swizzles[0] = SWIZZLE_X;
 281          swizzles[1] = SWIZZLE_X;
 282          swizzles[2] = SWIZZLE_X;
 283          swizzles[3] = SWIZZLE_ONE;
 284          break;
 285       case GL_INTENSITY:
 286          swizzles[0] = SWIZZLE_X;
 287          swizzles[1] = SWIZZLE_X;
 288          swizzles[2] = SWIZZLE_X;
 289          swizzles[3] = SWIZZLE_X;
 290          break;
 291       case GL_RED:
 292          swizzles[0] = SWIZZLE_X;
 293          swizzles[1] = SWIZZLE_ZERO;
 294          swizzles[2] = SWIZZLE_ZERO;
 295          swizzles[3] = SWIZZLE_ONE;
 296          break;
 297       }
 298    }
 299
 300    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 301
 302    /* If the texture's format is alpha-only, force R, G, and B to
 303     * 0.0. Similarly, if the texture's format has no alpha channel,
 304     * force the alpha value read to 1.0. This allows for the
 305     * implementation to use an RGBA texture for any of these formats
 306     * without leaking any unexpected values.
 307     */
 308    switch (img->_BaseFormat) {
 309    case GL_ALPHA:
 310       swizzles[0] = SWIZZLE_ZERO;
 311       swizzles[1] = SWIZZLE_ZERO;
 312       swizzles[2] = SWIZZLE_ZERO;
 313       break;
 314    case GL_LUMINANCE:
 315       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 316          swizzles[0] = SWIZZLE_X;
 317          swizzles[1] = SWIZZLE_X;
 318          swizzles[2] = SWIZZLE_X;
 319          swizzles[3] = SWIZZLE_ONE;
 320       }
 321       break;
 322    case GL_LUMINANCE_ALPHA:
 323       if (datatype == GL_SIGNED_NORMALIZED) {
 324          swizzles[0] = SWIZZLE_X;
 325          swizzles[1] = SWIZZLE_X;
 326          swizzles[2] = SWIZZLE_X;
 327          swizzles[3] = SWIZZLE_W;
 328       }
 329       break;
 330    case GL_INTENSITY:
 331       if (datatype == GL_SIGNED_NORMALIZED) {
 332          swizzles[0] = SWIZZLE_X;
 333          swizzles[1] = SWIZZLE_X;
 334          swizzles[2] = SWIZZLE_X;
 335          swizzles[3] = SWIZZLE_X;
 336       }
 337       break;
 338    case GL_RED:
 339    case GL_RG:
 340    case GL_RGB:
 341       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 342          swizzles[3] = SWIZZLE_ONE;
 343       break;
 344    }
 345
 346    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 347                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 348                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 349                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 350 }
 351
 352 /**
 353  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 354  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 355  *
 356  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 357  *         0          1          2          3             4            5
 358  *         4          5          6          7             0            1
 359  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 360  *
 361  * which is simply adding 4 then modding by 8 (or anding with 7).
 362  *
 363  * We then may need to apply workarounds for textureGather hardware bugs.
 364  */
 365 static unsigned
 366 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 367 {
 368    unsigned scs = (swizzle + 4) & 7;
 369
 370    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 371 }
 372
 373 void
 374 brw_update_texture_surface(struct gl_context *ctx,
 375                            unsigned unit,
 376                            uint32_t *surf_offset,
 377                            bool for_gather,
 378                            uint32_t plane)
 379 {
 380    struct brw_context *brw = brw_context(ctx);
 381    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 382
 383    if (obj->Target == GL_TEXTURE_BUFFER) {
 384       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 385
 386    } else {
 387       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 388       struct intel_mipmap_tree *mt = intel_obj->mt;
 389
 390       if (plane > 0) {
 391          if (mt->plane[plane - 1] == NULL)
 392             return;
 393          mt = mt->plane[plane - 1];
 394       }
 395
 396       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 397       /* If this is a view with restricted NumLayers, then our effective depth
 398        * is not just the miptree depth.
 399        */
 400       const unsigned view_num_layers =
 401          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 402                                                             mt->logical_depth0;
 403
 404       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 405        * texturing functions that return a float, as our code generation always
 406        * selects the .x channel (which would always be 0).
 407        */
 408       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 409       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 410          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 411           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 412       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 413                                 brw_get_texture_swizzle(&brw->ctx, obj));
 414
 415       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 416       unsigned format = translate_tex_format(brw, mesa_fmt,
 417                                              sampler->sRGBDecode);
 418
 419       /* Implement gen6 and gen7 gather work-around */
 420       bool need_green_to_blue = false;
 421       if (for_gather) {
 422          if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
 423             format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 424             need_green_to_blue = brw->is_haswell;
 425          } else if (brw->gen == 6) {
 426             /* Sandybridge's gather4 message is broken for integer formats.
 427              * To work around this, we pretend the surface is UNORM for
 428              * 8 or 16-bit formats, and emit shader instructions to recover
 429              * the real INT/UINT value.  For 32-bit formats, we pretend
 430              * the surface is FLOAT, and simply reinterpret the resulting
 431              * bits.
 432              */
 433             switch (format) {
 434             case BRW_SURFACEFORMAT_R8_SINT:
 435             case BRW_SURFACEFORMAT_R8_UINT:
 436                format = BRW_SURFACEFORMAT_R8_UNORM;
 437                break;
 438
 439             case BRW_SURFACEFORMAT_R16_SINT:
 440             case BRW_SURFACEFORMAT_R16_UINT:
 441                format = BRW_SURFACEFORMAT_R16_UNORM;
 442                break;
 443
 444             case BRW_SURFACEFORMAT_R32_SINT:
 445             case BRW_SURFACEFORMAT_R32_UINT:
 446                format = BRW_SURFACEFORMAT_R32_FLOAT;
 447                break;
 448
 449             default:
 450                break;
 451             }
 452          }
 453       }
 454
 455       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 456          assert(brw->gen >= 8);
 457          mt = mt->stencil_mt;
 458          format = BRW_SURFACEFORMAT_R8_UINT;
 459       }
 460
 461       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 462
 463       struct isl_view view = {
 464          .format = format,
 465          .base_level = obj->MinLevel + obj->BaseLevel,
 466          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 467          .base_array_layer = obj->MinLayer,
 468          .array_len = view_num_layers,
 469          .channel_select = {
 470             swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 471             swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 472             swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 473             swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 474          },
 475          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 476       };
 477
 478       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 479           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 480          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 481
 482       brw_emit_surface_state(brw, mt, &view,
 483                              surface_state_infos[brw->gen].tex_mocs, for_gather,
 484                              surf_offset, surf_index,
 485                              I915_GEM_DOMAIN_SAMPLER, 0);
 486    }
 487 }
 488
 489 void
 490 brw_emit_buffer_surface_state(struct brw_context *brw,
 491                               uint32_t *out_offset,
 492                               drm_intel_bo *bo,
 493                               unsigned buffer_offset,
 494                               unsigned surface_format,
 495                               unsigned buffer_size,
 496                               unsigned pitch,
 497                               bool rw)
 498 {
 499    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
 500
 501    uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 502                                   ss_info.num_dwords * 4, ss_info.ss_align,
 503                                   out_offset);
 504
 505    isl_buffer_fill_state(&brw->isl_dev, dw,
 506                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 507                          .size = buffer_size,
 508                          .format = surface_format,
 509                          .stride = pitch,
 510                          .mocs = ss_info.tex_mocs);
 511
 512    if (bo) {
 513       drm_intel_bo_emit_reloc(brw->batch.bo,
 514                               *out_offset + 4 * ss_info.reloc_dw,
 515                               bo, buffer_offset,
 516                               I915_GEM_DOMAIN_SAMPLER,
 517                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 518    }
 519 }
 520
 521 void
 522 brw_update_buffer_texture_surface(struct gl_context *ctx,
 523                                   unsigned unit,
 524                                   uint32_t *surf_offset)
 525 {
 526    struct brw_context *brw = brw_context(ctx);
 527    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 528    struct intel_buffer_object *intel_obj =
 529       intel_buffer_object(tObj->BufferObject);
 530    uint32_t size = tObj->BufferSize;
 531    drm_intel_bo *bo = NULL;
 532    mesa_format format = tObj->_BufferObjectFormat;
 533    uint32_t brw_format = brw_format_for_mesa_format(format);
 534    int texel_size = _mesa_get_format_bytes(format);
 535
 536    if (intel_obj) {
 537       size = MIN2(size, intel_obj->Base.Size);
 538       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 539    }
 540
 541    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 542       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 543                     _mesa_get_format_name(format));
 544    }
 545
 546    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 547                                  tObj->BufferOffset,
 548                                  brw_format,
 549                                  size,
 550                                  texel_size,
 551                                  false /* rw */);
 552 }
 553
 554 /**
 555  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 556  * read from this buffer with Data Port Read instructions/messages.
 557  */
 558 void
 559 brw_create_constant_surface(struct brw_context *brw,
 560                             drm_intel_bo *bo,
 561                             uint32_t offset,
 562                             uint32_t size,
 563                             uint32_t *out_offset)
 564 {
 565    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 566                                  BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 567                                  size, 1, false);
 568 }
 569
 570 /**
 571  * Create the buffer surface. Shader buffer variables will be
 572  * read from / write to this buffer with Data Port Read/Write
 573  * instructions/messages.
 574  */
 575 void
 576 brw_create_buffer_surface(struct brw_context *brw,
 577                           drm_intel_bo *bo,
 578                           uint32_t offset,
 579                           uint32_t size,
 580                           uint32_t *out_offset)
 581 {
 582    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 583     * messages. We need these specifically for the fragment shader since they
 584     * include a pixel mask header that we need to ensure correct behavior
 585     * with helper invocations, which cannot write to the buffer.
 586     */
 587    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 588                                  BRW_SURFACEFORMAT_RAW,
 589                                  size, 1, true);
 590 }
 591
 592 /**
 593  * Set up a binding table entry for use by stream output logic (transform
 594  * feedback).
 595  *
 596  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 597  */
 598 void
 599 brw_update_sol_surface(struct brw_context *brw,
 600                        struct gl_buffer_object *buffer_obj,
 601                        uint32_t *out_offset, unsigned num_vector_components,
 602                        unsigned stride_dwords, unsigned offset_dwords)
 603 {
 604    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 605    uint32_t offset_bytes = 4 * offset_dwords;
 606    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 607                                              offset_bytes,
 608                                              buffer_obj->Size - offset_bytes);
 609    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 610                                     out_offset);
 611    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 612    size_t size_dwords = buffer_obj->Size / 4;
 613    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 614
 615    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 616     * too big to map using a single binding table entry?
 617     */
 618    assert((size_dwords - offset_dwords) / stride_dwords
 619           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 620
 621    if (size_dwords > offset_dwords + num_vector_components) {
 622       /* There is room for at least 1 transform feedback output in the buffer.
 623        * Compute the number of additional transform feedback outputs the
 624        * buffer has room for.
 625        */
 626       buffer_size_minus_1 =
 627          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 628    } else {
 629       /* There isn't even room for a single transform feedback output in the
 630        * buffer.  We can't configure the binding table entry to prevent output
 631        * entirely; we'll have to rely on the geometry shader to detect
 632        * overflow.  But to minimize the damage in case of a bug, set up the
 633        * binding table entry to just allow a single output.
 634        */
 635       buffer_size_minus_1 = 0;
 636    }
 637    width = buffer_size_minus_1 & 0x7f;
 638    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 639    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 640
 641    switch (num_vector_components) {
 642    case 1:
 643       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 644       break;
 645    case 2:
 646       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 647       break;
 648    case 3:
 649       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 650       break;
 651    case 4:
 652       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 653       break;
 654    default:
 655       unreachable("Invalid vector size for transform feedback output");
 656    }
 657
 658    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 659       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 660       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 661       BRW_SURFACE_RC_READ_WRITE;
 662    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 663    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 664               height << BRW_SURFACE_HEIGHT_SHIFT);
 665    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 666               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 667    surf[4] = 0;
 668    surf[5] = 0;
 669
 670    /* Emit relocation to surface contents. */
 671    drm_intel_bo_emit_reloc(brw->batch.bo,
 672                            *out_offset + 4,
 673                            bo, offset_bytes,
 674                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 675 }
 676
 677 /* Creates a new WM constant buffer reflecting the current fragment program's
 678  * constants, if needed by the fragment program.
 679  *
 680  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 681  * state atom.
 682  */
 683 static void
 684 brw_upload_wm_pull_constants(struct brw_context *brw)
 685 {
 686    struct brw_stage_state *stage_state = &brw->wm.base;
 687    /* BRW_NEW_FRAGMENT_PROGRAM */
 688    struct brw_fragment_program *fp =
 689       (struct brw_fragment_program *) brw->fragment_program;
 690    /* BRW_NEW_FS_PROG_DATA */
 691    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 692
 693    _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
 694    /* _NEW_PROGRAM_CONSTANTS */
 695    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 696                              stage_state, prog_data);
 697 }
 698
 699 const struct brw_tracked_state brw_wm_pull_constants = {
 700    .dirty = {
 701       .mesa = _NEW_PROGRAM_CONSTANTS,
 702       .brw = BRW_NEW_BATCH |
 703              BRW_NEW_BLORP |
 704              BRW_NEW_FRAGMENT_PROGRAM |
 705              BRW_NEW_FS_PROG_DATA,
 706    },
 707    .emit = brw_upload_wm_pull_constants,
 708 };
 709
 710 /**
 711  * Creates a null renderbuffer surface.
 712  *
 713  * This is used when the shader doesn't write to any color output.  An FB
 714  * write to target 0 will still be emitted, because that's how the thread is
 715  * terminated (and computed depth is returned), so we need to have the
 716  * hardware discard the target 0 color output..
 717  */
 718 static void
 719 brw_emit_null_surface_state(struct brw_context *brw,
 720                             unsigned width,
 721                             unsigned height,
 722                             unsigned samples,
 723                             uint32_t *out_offset)
 724 {
 725    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 726     * Notes):
 727     *
 728     *     A null surface will be used in instances where an actual surface is
 729     *     not bound. When a write message is generated to a null surface, no
 730     *     actual surface is written to. When a read message (including any
 731     *     sampling engine message) is generated to a null surface, the result
 732     *     is all zeros. Note that a null surface type is allowed to be used
 733     *     with all messages, even if it is not specificially indicated as
 734     *     supported. All of the remaining fields in surface state are ignored
 735     *     for null surfaces, with the following exceptions:
 736     *
 737     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 738     *       depth buffer’s corresponding state for all render target surfaces,
 739     *       including null.
 740     *
 741     *     - Surface Format must be R8G8B8A8_UNORM.
 742     */
 743    unsigned surface_type = BRW_SURFACE_NULL;
 744    drm_intel_bo *bo = NULL;
 745    unsigned pitch_minus_1 = 0;
 746    uint32_t multisampling_state = 0;
 747    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 748                                     out_offset);
 749
 750    if (samples > 1) {
 751       /* On Gen6, null render targets seem to cause GPU hangs when
 752        * multisampling.  So work around this problem by rendering into dummy
 753        * color buffer.
 754        *
 755        * To decrease the amount of memory needed by the workaround buffer, we
 756        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 757        * the amount of memory needed for the workaround buffer is
 758        * (width_in_tiles + height_in_tiles - 1) tiles.
 759        *
 760        * Note that since the workaround buffer will be interpreted by the
 761        * hardware as an interleaved multisampled buffer, we need to compute
 762        * width_in_tiles and height_in_tiles by dividing the width and height
 763        * by 16 rather than the normal Y-tile size of 32.
 764        */
 765       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 766       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 767       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 768       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 769                          size_needed);
 770       bo = brw->wm.multisampled_null_render_target_bo;
 771       surface_type = BRW_SURFACE_2D;
 772       pitch_minus_1 = 127;
 773       multisampling_state = brw_get_surface_num_multisamples(samples);
 774    }
 775
 776    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 777               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 778    if (brw->gen < 6) {
 779       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 780                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 781                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 782                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 783    }
 784    surf[1] = bo ? bo->offset64 : 0;
 785    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 786               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 787
 788    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 789     * Notes):
 790     *
 791     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 792     */
 793    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 794               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 795    surf[4] = multisampling_state;
 796    surf[5] = 0;
 797
 798    if (bo) {
 799       drm_intel_bo_emit_reloc(brw->batch.bo,
 800                               *out_offset + 4,
 801                               bo, 0,
 802                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 803    }
 804 }
 805
 806 /**
 807  * Sets up a surface state structure to point at the given region.
 808  * While it is only used for the front/back buffer currently, it should be
 809  * usable for further buffers when doing ARB_draw_buffer support.
 810  */
 811 static uint32_t
 812 gen4_update_renderbuffer_surface(struct brw_context *brw,
 813                                  struct gl_renderbuffer *rb,
 814                                  bool layered, unsigned unit,
 815                                  uint32_t surf_index)
 816 {
 817    struct gl_context *ctx = &brw->ctx;
 818    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 819    struct intel_mipmap_tree *mt = irb->mt;
 820    uint32_t *surf;
 821    uint32_t tile_x, tile_y;
 822    uint32_t format = 0;
 823    uint32_t offset;
 824    /* _NEW_BUFFERS */
 825    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 826    /* BRW_NEW_FS_PROG_DATA */
 827
 828    assert(!layered);
 829
 830    if (rb->TexImage && !brw->has_surface_tile_offset) {
 831       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 832
 833       if (tile_x != 0 || tile_y != 0) {
 834          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 835           * destination in a miptree unless you actually setup your renderbuffer
 836           * as a miptree and used the fragile lod/array_index/etc. controls to
 837           * select the image.  So, instead, we just make a new single-level
 838           * miptree and render into that.
 839           */
 840          intel_renderbuffer_move_to_temp(brw, irb, false);
 841          mt = irb->mt;
 842       }
 843    }
 844
 845    intel_miptree_used_for_rendering(irb->mt);
 846
 847    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 848
 849    format = brw->render_target_format[rb_format];
 850    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 851       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 852                     __func__, _mesa_get_format_name(rb_format));
 853    }
 854
 855    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 856               format << BRW_SURFACE_FORMAT_SHIFT);
 857
 858    /* reloc */
 859    assert(mt->offset % mt->cpp == 0);
 860    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 861               mt->bo->offset64 + mt->offset);
 862
 863    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 864               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 865
 866    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 867               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 868
 869    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 870
 871    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 872    /* Note that the low bits of these fields are missing, so
 873     * there's the possibility of getting in trouble.
 874     */
 875    assert(tile_x % 4 == 0);
 876    assert(tile_y % 2 == 0);
 877    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 878               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 879               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 880
 881    if (brw->gen < 6) {
 882       /* _NEW_COLOR */
 883       if (!ctx->Color.ColorLogicOpEnabled &&
 884           (ctx->Color.BlendEnabled & (1 << unit)))
 885          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 886
 887       if (!ctx->Color.ColorMask[unit][0])
 888          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 889       if (!ctx->Color.ColorMask[unit][1])
 890          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 891       if (!ctx->Color.ColorMask[unit][2])
 892          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 893
 894       /* As mentioned above, disable writes to the alpha component when the
 895        * renderbuffer is XRGB.
 896        */
 897       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 898           !ctx->Color.ColorMask[unit][3]) {
 899          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 900       }
 901    }
 902
 903    drm_intel_bo_emit_reloc(brw->batch.bo,
 904                            offset + 4,
 905                            mt->bo,
 906                            surf[1] - mt->bo->offset64,
 907                            I915_GEM_DOMAIN_RENDER,
 908                            I915_GEM_DOMAIN_RENDER);
 909
 910    return offset;
 911 }
 912
 913 /**
 914  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 915  */
 916 void
 917 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 918                                  const struct gl_framebuffer *fb,
 919                                  uint32_t render_target_start,
 920                                  uint32_t *surf_offset)
 921 {
 922    GLuint i;
 923    const unsigned int w = _mesa_geometric_width(fb);
 924    const unsigned int h = _mesa_geometric_height(fb);
 925    const unsigned int s = _mesa_geometric_samples(fb);
 926
 927    /* Update surfaces for drawing buffers */
 928    if (fb->_NumColorDrawBuffers >= 1) {
 929       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 930          const uint32_t surf_index = render_target_start + i;
 931
 932          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 933             surf_offset[surf_index] =
 934                brw->vtbl.update_renderbuffer_surface(
 935                   brw, fb->_ColorDrawBuffers[i],
 936                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 937          } else {
 938             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 939                &surf_offset[surf_index]);
 940          }
 941       }
 942    } else {
 943       const uint32_t surf_index = render_target_start;
 944       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 945          &surf_offset[surf_index]);
 946    }
 947 }
 948
 949 static void
 950 update_renderbuffer_surfaces(struct brw_context *brw)
 951 {
 952    const struct gl_context *ctx = &brw->ctx;
 953
 954    /* _NEW_BUFFERS | _NEW_COLOR */
 955    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 956    brw_update_renderbuffer_surfaces(
 957       brw, fb,
 958       brw->wm.prog_data->binding_table.render_target_start,
 959       brw->wm.base.surf_offset);
 960    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 961 }
 962
 963 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 964    .dirty = {
 965       .mesa = _NEW_BUFFERS |
 966               _NEW_COLOR,
 967       .brw = BRW_NEW_BATCH |
 968              BRW_NEW_BLORP |
 969              BRW_NEW_FS_PROG_DATA,
 970    },
 971    .emit = update_renderbuffer_surfaces,
 972 };
 973
 974 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 975    .dirty = {
 976       .mesa = _NEW_BUFFERS,
 977       .brw = BRW_NEW_BATCH |
 978              BRW_NEW_BLORP,
 979    },
 980    .emit = update_renderbuffer_surfaces,
 981 };
 982
 983
 984 static void
 985 update_stage_texture_surfaces(struct brw_context *brw,
 986                               const struct gl_program *prog,
 987                               struct brw_stage_state *stage_state,
 988                               bool for_gather, uint32_t plane)
 989 {
 990    if (!prog)
 991       return;
 992
 993    struct gl_context *ctx = &brw->ctx;
 994
 995    uint32_t *surf_offset = stage_state->surf_offset;
 996
 997    /* BRW_NEW_*_PROG_DATA */
 998    if (for_gather)
 999       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1000    else
1001       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1002
1003    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1004    for (unsigned s = 0; s < num_samplers; s++) {
1005       surf_offset[s] = 0;
1006
1007       if (prog->SamplersUsed & (1 << s)) {
1008          const unsigned unit = prog->SamplerUnits[s];
1009
1010          /* _NEW_TEXTURE */
1011          if (ctx->Texture.Unit[unit]._Current) {
1012             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1013          }
1014       }
1015    }
1016 }
1017
1018
1019 /**
1020  * Construct SURFACE_STATE objects for enabled textures.
1021  */
1022 static void
1023 brw_update_texture_surfaces(struct brw_context *brw)
1024 {
1025    /* BRW_NEW_VERTEX_PROGRAM */
1026    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1027
1028    /* BRW_NEW_TESS_PROGRAMS */
1029    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1030    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1031
1032    /* BRW_NEW_GEOMETRY_PROGRAM */
1033    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1034
1035    /* BRW_NEW_FRAGMENT_PROGRAM */
1036    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1037
1038    /* _NEW_TEXTURE */
1039    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1040    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1041    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1042    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1043    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1044
1045    /* emit alternate set of surface state for gather. this
1046     * allows the surface format to be overriden for only the
1047     * gather4 messages. */
1048    if (brw->gen < 8) {
1049       if (vs && vs->UsesGather)
1050          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1051       if (tcs && tcs->UsesGather)
1052          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1053       if (tes && tes->UsesGather)
1054          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1055       if (gs && gs->UsesGather)
1056          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1057       if (fs && fs->UsesGather)
1058          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1059    }
1060
1061    if (fs) {
1062       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1063       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1064    }
1065
1066    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1067 }
1068
1069 const struct brw_tracked_state brw_texture_surfaces = {
1070    .dirty = {
1071       .mesa = _NEW_TEXTURE,
1072       .brw = BRW_NEW_BATCH |
1073              BRW_NEW_BLORP |
1074              BRW_NEW_FRAGMENT_PROGRAM |
1075              BRW_NEW_FS_PROG_DATA |
1076              BRW_NEW_GEOMETRY_PROGRAM |
1077              BRW_NEW_GS_PROG_DATA |
1078              BRW_NEW_TESS_PROGRAMS |
1079              BRW_NEW_TCS_PROG_DATA |
1080              BRW_NEW_TES_PROG_DATA |
1081              BRW_NEW_TEXTURE_BUFFER |
1082              BRW_NEW_VERTEX_PROGRAM |
1083              BRW_NEW_VS_PROG_DATA,
1084    },
1085    .emit = brw_update_texture_surfaces,
1086 };
1087
1088 static void
1089 brw_update_cs_texture_surfaces(struct brw_context *brw)
1090 {
1091    /* BRW_NEW_COMPUTE_PROGRAM */
1092    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1093
1094    /* _NEW_TEXTURE */
1095    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1096
1097    /* emit alternate set of surface state for gather. this
1098     * allows the surface format to be overriden for only the
1099     * gather4 messages.
1100     */
1101    if (brw->gen < 8) {
1102       if (cs && cs->UsesGather)
1103          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1104    }
1105
1106    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1107 }
1108
1109 const struct brw_tracked_state brw_cs_texture_surfaces = {
1110    .dirty = {
1111       .mesa = _NEW_TEXTURE,
1112       .brw = BRW_NEW_BATCH |
1113              BRW_NEW_BLORP |
1114              BRW_NEW_COMPUTE_PROGRAM,
1115    },
1116    .emit = brw_update_cs_texture_surfaces,
1117 };
1118
1119
1120 void
1121 brw_upload_ubo_surfaces(struct brw_context *brw,
1122                         struct gl_linked_shader *shader,
1123                         struct brw_stage_state *stage_state,
1124                         struct brw_stage_prog_data *prog_data)
1125 {
1126    struct gl_context *ctx = &brw->ctx;
1127
1128    if (!shader)
1129       return;
1130
1131    uint32_t *ubo_surf_offsets =
1132       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1133
1134    for (int i = 0; i < shader->NumUniformBlocks; i++) {
1135       struct gl_uniform_buffer_binding *binding =
1136          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1137
1138       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1139          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1140       } else {
1141          struct intel_buffer_object *intel_bo =
1142             intel_buffer_object(binding->BufferObject);
1143          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1144          if (!binding->AutomaticSize)
1145             size = MIN2(size, binding->Size);
1146          drm_intel_bo *bo =
1147             intel_bufferobj_buffer(brw, intel_bo,
1148                                    binding->Offset,
1149                                    size);
1150          brw_create_constant_surface(brw, bo, binding->Offset,
1151                                      size,
1152                                      &ubo_surf_offsets[i]);
1153       }
1154    }
1155
1156    uint32_t *ssbo_surf_offsets =
1157       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1158
1159    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1160       struct gl_shader_storage_buffer_binding *binding =
1161          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1162
1163       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1164          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1165       } else {
1166          struct intel_buffer_object *intel_bo =
1167             intel_buffer_object(binding->BufferObject);
1168          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1169          if (!binding->AutomaticSize)
1170             size = MIN2(size, binding->Size);
1171          drm_intel_bo *bo =
1172             intel_bufferobj_buffer(brw, intel_bo,
1173                                    binding->Offset,
1174                                    size);
1175          brw_create_buffer_surface(brw, bo, binding->Offset,
1176                                    size,
1177                                    &ssbo_surf_offsets[i]);
1178       }
1179    }
1180
1181    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1182       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1183 }
1184
1185 static void
1186 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1187 {
1188    struct gl_context *ctx = &brw->ctx;
1189    /* _NEW_PROGRAM */
1190    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1191
1192    if (!prog)
1193       return;
1194
1195    /* BRW_NEW_FS_PROG_DATA */
1196    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1197                            &brw->wm.base, &brw->wm.prog_data->base);
1198 }
1199
1200 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1201    .dirty = {
1202       .mesa = _NEW_PROGRAM,
1203       .brw = BRW_NEW_BATCH |
1204              BRW_NEW_BLORP |
1205              BRW_NEW_FS_PROG_DATA |
1206              BRW_NEW_UNIFORM_BUFFER,
1207    },
1208    .emit = brw_upload_wm_ubo_surfaces,
1209 };
1210
1211 static void
1212 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1213 {
1214    struct gl_context *ctx = &brw->ctx;
1215    /* _NEW_PROGRAM */
1216    struct gl_shader_program *prog =
1217       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1218
1219    if (!prog)
1220       return;
1221
1222    /* BRW_NEW_CS_PROG_DATA */
1223    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1224                            &brw->cs.base, &brw->cs.prog_data->base);
1225 }
1226
1227 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1228    .dirty = {
1229       .mesa = _NEW_PROGRAM,
1230       .brw = BRW_NEW_BATCH |
1231              BRW_NEW_BLORP |
1232              BRW_NEW_CS_PROG_DATA |
1233              BRW_NEW_UNIFORM_BUFFER,
1234    },
1235    .emit = brw_upload_cs_ubo_surfaces,
1236 };
1237
1238 void
1239 brw_upload_abo_surfaces(struct brw_context *brw,
1240                         struct gl_linked_shader *shader,
1241                         struct brw_stage_state *stage_state,
1242                         struct brw_stage_prog_data *prog_data)
1243 {
1244    struct gl_context *ctx = &brw->ctx;
1245    uint32_t *surf_offsets =
1246       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1247
1248    if (shader && shader->NumAtomicBuffers) {
1249       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1250          struct gl_atomic_buffer_binding *binding =
1251             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1252          struct intel_buffer_object *intel_bo =
1253             intel_buffer_object(binding->BufferObject);
1254          drm_intel_bo *bo = intel_bufferobj_buffer(
1255             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1256
1257          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1258                                        binding->Offset, BRW_SURFACEFORMAT_RAW,
1259                                        bo->size - binding->Offset, 1, true);
1260       }
1261
1262       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1263    }
1264 }
1265
1266 static void
1267 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1268 {
1269    struct gl_context *ctx = &brw->ctx;
1270    /* _NEW_PROGRAM */
1271    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1272
1273    if (prog) {
1274       /* BRW_NEW_FS_PROG_DATA */
1275       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1276                               &brw->wm.base, &brw->wm.prog_data->base);
1277    }
1278 }
1279
1280 const struct brw_tracked_state brw_wm_abo_surfaces = {
1281    .dirty = {
1282       .mesa = _NEW_PROGRAM,
1283       .brw = BRW_NEW_ATOMIC_BUFFER |
1284              BRW_NEW_BLORP |
1285              BRW_NEW_BATCH |
1286              BRW_NEW_FS_PROG_DATA,
1287    },
1288    .emit = brw_upload_wm_abo_surfaces,
1289 };
1290
1291 static void
1292 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1293 {
1294    struct gl_context *ctx = &brw->ctx;
1295    /* _NEW_PROGRAM */
1296    struct gl_shader_program *prog =
1297       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1298
1299    if (prog) {
1300       /* BRW_NEW_CS_PROG_DATA */
1301       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1302                               &brw->cs.base, &brw->cs.prog_data->base);
1303    }
1304 }
1305
1306 const struct brw_tracked_state brw_cs_abo_surfaces = {
1307    .dirty = {
1308       .mesa = _NEW_PROGRAM,
1309       .brw = BRW_NEW_ATOMIC_BUFFER |
1310              BRW_NEW_BLORP |
1311              BRW_NEW_BATCH |
1312              BRW_NEW_CS_PROG_DATA,
1313    },
1314    .emit = brw_upload_cs_abo_surfaces,
1315 };
1316
1317 static void
1318 brw_upload_cs_image_surfaces(struct brw_context *brw)
1319 {
1320    struct gl_context *ctx = &brw->ctx;
1321    /* _NEW_PROGRAM */
1322    struct gl_shader_program *prog =
1323       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1324
1325    if (prog) {
1326       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1327       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1328                                 &brw->cs.base, &brw->cs.prog_data->base);
1329    }
1330 }
1331
1332 const struct brw_tracked_state brw_cs_image_surfaces = {
1333    .dirty = {
1334       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1335       .brw = BRW_NEW_BATCH |
1336              BRW_NEW_BLORP |
1337              BRW_NEW_CS_PROG_DATA |
1338              BRW_NEW_IMAGE_UNITS
1339    },
1340    .emit = brw_upload_cs_image_surfaces,
1341 };
1342
1343 static uint32_t
1344 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1345 {
1346    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1347    uint32_t hw_format = brw_format_for_mesa_format(format);
1348    if (access == GL_WRITE_ONLY) {
1349       return hw_format;
1350    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1351       /* Typed surface reads support a very limited subset of the shader
1352        * image formats.  Translate it into the closest format the
1353        * hardware supports.
1354        */
1355       return isl_lower_storage_image_format(devinfo, hw_format);
1356    } else {
1357       /* The hardware doesn't actually support a typed format that we can use
1358        * so we have to fall back to untyped read/write messages.
1359        */
1360       return BRW_SURFACEFORMAT_RAW;
1361    }
1362 }
1363
1364 static void
1365 update_default_image_param(struct brw_context *brw,
1366                            struct gl_image_unit *u,
1367                            unsigned surface_idx,
1368                            struct brw_image_param *param)
1369 {
1370    memset(param, 0, sizeof(*param));
1371    param->surface_idx = surface_idx;
1372    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1373     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1374     * detailed explanation of these parameters.
1375     */
1376    param->swizzling[0] = 0xff;
1377    param->swizzling[1] = 0xff;
1378 }
1379
1380 static void
1381 update_buffer_image_param(struct brw_context *brw,
1382                           struct gl_image_unit *u,
1383                           unsigned surface_idx,
1384                           struct brw_image_param *param)
1385 {
1386    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1387
1388    update_default_image_param(brw, u, surface_idx, param);
1389
1390    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1391    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1392 }
1393
1394 static void
1395 update_texture_image_param(struct brw_context *brw,
1396                            struct gl_image_unit *u,
1397                            unsigned surface_idx,
1398                            struct brw_image_param *param)
1399 {
1400    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1401
1402    update_default_image_param(brw, u, surface_idx, param);
1403
1404    param->size[0] = minify(mt->logical_width0, u->Level);
1405    param->size[1] = minify(mt->logical_height0, u->Level);
1406    param->size[2] = (!u->Layered ? 1 :
1407                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1408                      u->TexObj->Target == GL_TEXTURE_3D ?
1409                      minify(mt->logical_depth0, u->Level) :
1410                      mt->logical_depth0);
1411
1412    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1413                                   &param->offset[0],
1414                                   &param->offset[1]);
1415
1416    param->stride[0] = mt->cpp;
1417    param->stride[1] = mt->pitch / mt->cpp;
1418    param->stride[2] =
1419       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1420    param->stride[3] =
1421       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1422
1423    if (mt->tiling == I915_TILING_X) {
1424       /* An X tile is a rectangular block of 512x8 bytes. */
1425       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1426       param->tiling[1] = _mesa_logbase2(8);
1427
1428       if (brw->has_swizzling) {
1429          /* Right shifts required to swizzle bits 9 and 10 of the memory
1430           * address with bit 6.
1431           */
1432          param->swizzling[0] = 3;
1433          param->swizzling[1] = 4;
1434       }
1435    } else if (mt->tiling == I915_TILING_Y) {
1436       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1437        * different to the layout of an X-tiled surface, we simply pretend that
1438        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1439        * one arranged in X-major order just like is the case for X-tiling.
1440        */
1441       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1442       param->tiling[1] = _mesa_logbase2(32);
1443
1444       if (brw->has_swizzling) {
1445          /* Right shift required to swizzle bit 9 of the memory address with
1446           * bit 6.
1447           */
1448          param->swizzling[0] = 3;
1449       }
1450    }
1451
1452    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1453     * address calculation algorithm (emit_address_calculation() in
1454     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1455     * modulus equal to the LOD.
1456     */
1457    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1458                        0);
1459 }
1460
1461 static void
1462 update_image_surface(struct brw_context *brw,
1463                      struct gl_image_unit *u,
1464                      GLenum access,
1465                      unsigned surface_idx,
1466                      uint32_t *surf_offset,
1467                      struct brw_image_param *param)
1468 {
1469    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1470       struct gl_texture_object *obj = u->TexObj;
1471       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1472
1473       if (obj->Target == GL_TEXTURE_BUFFER) {
1474          struct intel_buffer_object *intel_obj =
1475             intel_buffer_object(obj->BufferObject);
1476          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1477                                       _mesa_get_format_bytes(u->_ActualFormat));
1478
1479          brw_emit_buffer_surface_state(
1480             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1481             format, intel_obj->Base.Size, texel_size,
1482             access != GL_READ_ONLY);
1483
1484          update_buffer_image_param(brw, u, surface_idx, param);
1485
1486       } else {
1487          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1488          struct intel_mipmap_tree *mt = intel_obj->mt;
1489
1490          if (format == BRW_SURFACEFORMAT_RAW) {
1491             brw_emit_buffer_surface_state(
1492                brw, surf_offset, mt->bo, mt->offset,
1493                format, mt->bo->size - mt->offset, 1 /* pitch */,
1494                access != GL_READ_ONLY);
1495
1496          } else {
1497             const unsigned num_layers = (!u->Layered ? 1 :
1498                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1499                                          mt->logical_depth0);
1500
1501             struct isl_view view = {
1502                .format = format,
1503                .base_level = obj->MinLevel + u->Level,
1504                .levels = 1,
1505                .base_array_layer = obj->MinLayer + u->_Layer,
1506                .array_len = num_layers,
1507                .channel_select = {
1508                   ISL_CHANNEL_SELECT_RED,
1509                   ISL_CHANNEL_SELECT_GREEN,
1510                   ISL_CHANNEL_SELECT_BLUE,
1511                   ISL_CHANNEL_SELECT_ALPHA,
1512                },
1513                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1514             };
1515
1516             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1517
1518             brw_emit_surface_state(brw, mt, &view,
1519                                    surface_state_infos[brw->gen].tex_mocs, false,
1520                                    surf_offset, surf_index,
1521                                    I915_GEM_DOMAIN_SAMPLER,
1522                                    access == GL_READ_ONLY ? 0 :
1523                                              I915_GEM_DOMAIN_SAMPLER);
1524          }
1525
1526          update_texture_image_param(brw, u, surface_idx, param);
1527       }
1528
1529    } else {
1530       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1531       update_default_image_param(brw, u, surface_idx, param);
1532    }
1533 }
1534
1535 void
1536 brw_upload_image_surfaces(struct brw_context *brw,
1537                           struct gl_linked_shader *shader,
1538                           struct brw_stage_state *stage_state,
1539                           struct brw_stage_prog_data *prog_data)
1540 {
1541    struct gl_context *ctx = &brw->ctx;
1542
1543    if (shader && shader->NumImages) {
1544       for (unsigned i = 0; i < shader->NumImages; i++) {
1545          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1546          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1547
1548          update_image_surface(brw, u, shader->ImageAccess[i],
1549                               surf_idx,
1550                               &stage_state->surf_offset[surf_idx],
1551                               &prog_data->image_param[i]);
1552       }
1553
1554       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1555       /* This may have changed the image metadata dependent on the context
1556        * image unit state and passed to the program as uniforms, make sure
1557        * that push and pull constants are reuploaded.
1558        */
1559       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1560    }
1561 }
1562
1563 static void
1564 brw_upload_wm_image_surfaces(struct brw_context *brw)
1565 {
1566    struct gl_context *ctx = &brw->ctx;
1567    /* BRW_NEW_FRAGMENT_PROGRAM */
1568    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1569
1570    if (prog) {
1571       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1572       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1573                                 &brw->wm.base, &brw->wm.prog_data->base);
1574    }
1575 }
1576
1577 const struct brw_tracked_state brw_wm_image_surfaces = {
1578    .dirty = {
1579       .mesa = _NEW_TEXTURE,
1580       .brw = BRW_NEW_BATCH |
1581              BRW_NEW_BLORP |
1582              BRW_NEW_FRAGMENT_PROGRAM |
1583              BRW_NEW_FS_PROG_DATA |
1584              BRW_NEW_IMAGE_UNITS
1585    },
1586    .emit = brw_upload_wm_image_surfaces,
1587 };
1588
1589 void
1590 gen4_init_vtable_surface_functions(struct brw_context *brw)
1591 {
1592    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1593    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1594 }
1595
1596 void
1597 gen6_init_vtable_surface_functions(struct brw_context *brw)
1598 {
1599    gen4_init_vtable_surface_functions(brw);
1600    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1601 }
1602
1603 static void
1604 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1605 {
1606    struct gl_context *ctx = &brw->ctx;
1607    /* _NEW_PROGRAM */
1608    struct gl_shader_program *prog =
1609       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1610
1611    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1612       const unsigned surf_idx =
1613          brw->cs.prog_data->binding_table.work_groups_start;
1614       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1615       drm_intel_bo *bo;
1616       uint32_t bo_offset;
1617
1618       if (brw->compute.num_work_groups_bo == NULL) {
1619          bo = NULL;
1620          intel_upload_data(brw,
1621                            (void *)brw->compute.num_work_groups,
1622                            3 * sizeof(GLuint),
1623                            sizeof(GLuint),
1624                            &bo,
1625                            &bo_offset);
1626       } else {
1627          bo = brw->compute.num_work_groups_bo;
1628          bo_offset = brw->compute.num_work_groups_offset;
1629       }
1630
1631       brw_emit_buffer_surface_state(brw, surf_offset,
1632                                     bo, bo_offset,
1633                                     BRW_SURFACEFORMAT_RAW,
1634                                     3 * sizeof(GLuint), 1, true);
1635       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1636    }
1637 }
1638
1639 const struct brw_tracked_state brw_cs_work_groups_surface = {
1640    .dirty = {
1641       .brw = BRW_NEW_BLORP |
1642              BRW_NEW_CS_WORK_GROUPS
1643    },
1644    .emit = brw_upload_cs_work_groups_surface,
1645 };