src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "main/teximage.h"
  39 #include "program/prog_parameter.h"
  40 #include "program/prog_instruction.h"
  41 #include "main/framebuffer.h"
  42
  43 #include "isl/isl.h"
  44
  45 #include "intel_mipmap_tree.h"
  46 #include "intel_batchbuffer.h"
  47 #include "intel_tex.h"
  48 #include "intel_fbo.h"
  49 #include "intel_buffer_objects.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_state.h"
  53 #include "brw_defines.h"
  54 #include "brw_wm.h"
  55
  56 struct surface_state_info {
  57    unsigned num_dwords;
  58    unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
  59    unsigned reloc_dw;
  60    unsigned aux_reloc_dw;
  61    unsigned tex_mocs;
  62    unsigned rb_mocs;
  63 };
  64
  65 static const struct surface_state_info surface_state_infos[] = {
  66    [4] = {6,  32, 1,  0},
  67    [5] = {6,  32, 1,  0},
  68    [6] = {6,  32, 1,  0},
  69    [7] = {8,  32, 1,  6,  GEN7_MOCS_L3, GEN7_MOCS_L3},
  70    [8] = {13, 64, 8,  10, BDW_MOCS_WB,  BDW_MOCS_PTE},
  71    [9] = {16, 64, 8,  10, SKL_MOCS_WB,  SKL_MOCS_PTE},
  72 };
  73
  74 void
  75 brw_emit_surface_state(struct brw_context *brw,
  76                        struct intel_mipmap_tree *mt,
  77                        const struct isl_view *view,
  78                        uint32_t mocs, bool for_gather,
  79                        uint32_t *surf_offset, int surf_index,
  80                        unsigned read_domains, unsigned write_domains)
  81 {
  82    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
  83
  84    struct isl_surf surf;
  85    intel_miptree_get_isl_surf(brw, mt, &surf);
  86
  87    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
  88
  89    struct isl_surf *aux_surf = NULL, aux_surf_s;
  90    uint64_t aux_offset = 0;
  91    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
  92    if (mt->mcs_mt &&
  93        ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
  94         mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
  95       intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
  96       aux_surf = &aux_surf_s;
  97       assert(mt->mcs_mt->offset == 0);
  98       aux_offset = mt->mcs_mt->bo->offset64;
  99
 100       /* We only really need a clear color if we also have an auxiliary
 101        * surfacae.  Without one, it does nothing.
 102        */
 103       clear_color = intel_miptree_get_isl_clear_color(brw, mt);
 104    }
 105
 106    uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 107                                     ss_info.num_dwords * 4, ss_info.ss_align,
 108                                     surf_index, surf_offset);
 109
 110    isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
 111                        .address = mt->bo->offset64 + mt->offset,
 112                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 113                        .aux_address = aux_offset,
 114                        .mocs = mocs, .clear_color = clear_color);
 115
 116    drm_intel_bo_emit_reloc(brw->batch.bo,
 117                            *surf_offset + 4 * ss_info.reloc_dw,
 118                            mt->bo, mt->offset,
 119                            read_domains, write_domains);
 120
 121    if (aux_surf) {
 122       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 123        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 124        * contain other control information.  Since buffer addresses are always
 125        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 126        * an ordinary reloc to do the necessary address translation.
 127        */
 128       assert((aux_offset & 0xfff) == 0);
 129       drm_intel_bo_emit_reloc(brw->batch.bo,
 130                               *surf_offset + 4 * ss_info.aux_reloc_dw,
 131                               mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
 132                               read_domains, write_domains);
 133    }
 134 }
 135
 136 uint32_t
 137 brw_update_renderbuffer_surface(struct brw_context *brw,
 138                                 struct gl_renderbuffer *rb,
 139                                 bool layered, unsigned unit /* unused */,
 140                                 uint32_t surf_index)
 141 {
 142    struct gl_context *ctx = &brw->ctx;
 143    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 144    struct intel_mipmap_tree *mt = irb->mt;
 145
 146    assert(brw_render_target_supported(brw, rb));
 147    intel_miptree_used_for_rendering(mt);
 148
 149    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 150    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 151       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 152                     __func__, _mesa_get_format_name(rb_format));
 153    }
 154
 155    const unsigned layer_multiplier =
 156       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 157        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 158       MAX2(irb->mt->num_samples, 1) : 1;
 159
 160    struct isl_view view = {
 161       .format = brw->render_target_format[rb_format],
 162       .base_level = irb->mt_level - irb->mt->first_level,
 163       .levels = 1,
 164       .base_array_layer = irb->mt_layer / layer_multiplier,
 165       .array_len = MAX2(irb->layer_count, 1),
 166       .channel_select = {
 167          ISL_CHANNEL_SELECT_RED,
 168          ISL_CHANNEL_SELECT_GREEN,
 169          ISL_CHANNEL_SELECT_BLUE,
 170          ISL_CHANNEL_SELECT_ALPHA,
 171       },
 172       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 173    };
 174
 175    uint32_t offset;
 176    brw_emit_surface_state(brw, mt, &view,
 177                           surface_state_infos[brw->gen].rb_mocs, false,
 178                           &offset, surf_index,
 179                           I915_GEM_DOMAIN_RENDER,
 180                           I915_GEM_DOMAIN_RENDER);
 181    return offset;
 182 }
 183
 184 GLuint
 185 translate_tex_target(GLenum target)
 186 {
 187    switch (target) {
 188    case GL_TEXTURE_1D:
 189    case GL_TEXTURE_1D_ARRAY_EXT:
 190       return BRW_SURFACE_1D;
 191
 192    case GL_TEXTURE_RECTANGLE_NV:
 193       return BRW_SURFACE_2D;
 194
 195    case GL_TEXTURE_2D:
 196    case GL_TEXTURE_2D_ARRAY_EXT:
 197    case GL_TEXTURE_EXTERNAL_OES:
 198    case GL_TEXTURE_2D_MULTISAMPLE:
 199    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 200       return BRW_SURFACE_2D;
 201
 202    case GL_TEXTURE_3D:
 203       return BRW_SURFACE_3D;
 204
 205    case GL_TEXTURE_CUBE_MAP:
 206    case GL_TEXTURE_CUBE_MAP_ARRAY:
 207       return BRW_SURFACE_CUBE;
 208
 209    default:
 210       unreachable("not reached");
 211    }
 212 }
 213
 214 uint32_t
 215 brw_get_surface_tiling_bits(uint32_t tiling)
 216 {
 217    switch (tiling) {
 218    case I915_TILING_X:
 219       return BRW_SURFACE_TILED;
 220    case I915_TILING_Y:
 221       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 222    default:
 223       return 0;
 224    }
 225 }
 226
 227
 228 uint32_t
 229 brw_get_surface_num_multisamples(unsigned num_samples)
 230 {
 231    if (num_samples > 1)
 232       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 233    else
 234       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 235 }
 236
 237 /**
 238  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 239  * swizzling.
 240  */
 241 int
 242 brw_get_texture_swizzle(const struct gl_context *ctx,
 243                         const struct gl_texture_object *t)
 244 {
 245    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 246
 247    int swizzles[SWIZZLE_NIL + 1] = {
 248       SWIZZLE_X,
 249       SWIZZLE_Y,
 250       SWIZZLE_Z,
 251       SWIZZLE_W,
 252       SWIZZLE_ZERO,
 253       SWIZZLE_ONE,
 254       SWIZZLE_NIL
 255    };
 256
 257    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 258        img->_BaseFormat == GL_DEPTH_STENCIL) {
 259       GLenum depth_mode = t->DepthMode;
 260
 261       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 262        * with depth component data specified with a sized internal format.
 263        * Otherwise, it's left at the old default, GL_LUMINANCE.
 264        */
 265       if (_mesa_is_gles3(ctx) &&
 266           img->InternalFormat != GL_DEPTH_COMPONENT &&
 267           img->InternalFormat != GL_DEPTH_STENCIL) {
 268          depth_mode = GL_RED;
 269       }
 270
 271       switch (depth_mode) {
 272       case GL_ALPHA:
 273          swizzles[0] = SWIZZLE_ZERO;
 274          swizzles[1] = SWIZZLE_ZERO;
 275          swizzles[2] = SWIZZLE_ZERO;
 276          swizzles[3] = SWIZZLE_X;
 277          break;
 278       case GL_LUMINANCE:
 279          swizzles[0] = SWIZZLE_X;
 280          swizzles[1] = SWIZZLE_X;
 281          swizzles[2] = SWIZZLE_X;
 282          swizzles[3] = SWIZZLE_ONE;
 283          break;
 284       case GL_INTENSITY:
 285          swizzles[0] = SWIZZLE_X;
 286          swizzles[1] = SWIZZLE_X;
 287          swizzles[2] = SWIZZLE_X;
 288          swizzles[3] = SWIZZLE_X;
 289          break;
 290       case GL_RED:
 291          swizzles[0] = SWIZZLE_X;
 292          swizzles[1] = SWIZZLE_ZERO;
 293          swizzles[2] = SWIZZLE_ZERO;
 294          swizzles[3] = SWIZZLE_ONE;
 295          break;
 296       }
 297    }
 298
 299    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 300
 301    /* If the texture's format is alpha-only, force R, G, and B to
 302     * 0.0. Similarly, if the texture's format has no alpha channel,
 303     * force the alpha value read to 1.0. This allows for the
 304     * implementation to use an RGBA texture for any of these formats
 305     * without leaking any unexpected values.
 306     */
 307    switch (img->_BaseFormat) {
 308    case GL_ALPHA:
 309       swizzles[0] = SWIZZLE_ZERO;
 310       swizzles[1] = SWIZZLE_ZERO;
 311       swizzles[2] = SWIZZLE_ZERO;
 312       break;
 313    case GL_LUMINANCE:
 314       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 315          swizzles[0] = SWIZZLE_X;
 316          swizzles[1] = SWIZZLE_X;
 317          swizzles[2] = SWIZZLE_X;
 318          swizzles[3] = SWIZZLE_ONE;
 319       }
 320       break;
 321    case GL_LUMINANCE_ALPHA:
 322       if (datatype == GL_SIGNED_NORMALIZED) {
 323          swizzles[0] = SWIZZLE_X;
 324          swizzles[1] = SWIZZLE_X;
 325          swizzles[2] = SWIZZLE_X;
 326          swizzles[3] = SWIZZLE_W;
 327       }
 328       break;
 329    case GL_INTENSITY:
 330       if (datatype == GL_SIGNED_NORMALIZED) {
 331          swizzles[0] = SWIZZLE_X;
 332          swizzles[1] = SWIZZLE_X;
 333          swizzles[2] = SWIZZLE_X;
 334          swizzles[3] = SWIZZLE_X;
 335       }
 336       break;
 337    case GL_RED:
 338    case GL_RG:
 339    case GL_RGB:
 340       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 341          swizzles[3] = SWIZZLE_ONE;
 342       break;
 343    }
 344
 345    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 346                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 347                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 348                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 349 }
 350
 351 /**
 352  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 353  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 354  *
 355  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 356  *         0          1          2          3             4            5
 357  *         4          5          6          7             0            1
 358  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 359  *
 360  * which is simply adding 4 then modding by 8 (or anding with 7).
 361  *
 362  * We then may need to apply workarounds for textureGather hardware bugs.
 363  */
 364 static unsigned
 365 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 366 {
 367    unsigned scs = (swizzle + 4) & 7;
 368
 369    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 370 }
 371
 372 void
 373 brw_update_texture_surface(struct gl_context *ctx,
 374                            unsigned unit,
 375                            uint32_t *surf_offset,
 376                            bool for_gather,
 377                            uint32_t plane)
 378 {
 379    struct brw_context *brw = brw_context(ctx);
 380    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 381
 382    if (obj->Target == GL_TEXTURE_BUFFER) {
 383       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 384
 385    } else {
 386       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 387       struct intel_mipmap_tree *mt = intel_obj->mt;
 388
 389       if (plane > 0) {
 390          if (mt->plane[plane - 1] == NULL)
 391             return;
 392          mt = mt->plane[plane - 1];
 393       }
 394
 395       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 396       /* If this is a view with restricted NumLayers, then our effective depth
 397        * is not just the miptree depth.
 398        */
 399       const unsigned mt_num_layers =
 400          mt->logical_depth0 * (_mesa_is_cube_map_texture(mt->target) ? 6 : 1);
 401       const unsigned view_num_layers =
 402          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 403                                                             mt_num_layers;
 404
 405       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 406        * texturing functions that return a float, as our code generation always
 407        * selects the .x channel (which would always be 0).
 408        */
 409       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 410       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 411          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 412           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 413       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 414                                 brw_get_texture_swizzle(&brw->ctx, obj));
 415
 416       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 417       unsigned format = translate_tex_format(brw, mesa_fmt,
 418                                              sampler->sRGBDecode);
 419
 420       /* Implement gen6 and gen7 gather work-around */
 421       bool need_green_to_blue = false;
 422       if (for_gather) {
 423          if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
 424             format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 425             need_green_to_blue = brw->is_haswell;
 426          } else if (brw->gen == 6) {
 427             /* Sandybridge's gather4 message is broken for integer formats.
 428              * To work around this, we pretend the surface is UNORM for
 429              * 8 or 16-bit formats, and emit shader instructions to recover
 430              * the real INT/UINT value.  For 32-bit formats, we pretend
 431              * the surface is FLOAT, and simply reinterpret the resulting
 432              * bits.
 433              */
 434             switch (format) {
 435             case BRW_SURFACEFORMAT_R8_SINT:
 436             case BRW_SURFACEFORMAT_R8_UINT:
 437                format = BRW_SURFACEFORMAT_R8_UNORM;
 438                break;
 439
 440             case BRW_SURFACEFORMAT_R16_SINT:
 441             case BRW_SURFACEFORMAT_R16_UINT:
 442                format = BRW_SURFACEFORMAT_R16_UNORM;
 443                break;
 444
 445             case BRW_SURFACEFORMAT_R32_SINT:
 446             case BRW_SURFACEFORMAT_R32_UINT:
 447                format = BRW_SURFACEFORMAT_R32_FLOAT;
 448                break;
 449
 450             default:
 451                break;
 452             }
 453          }
 454       }
 455
 456       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 457          assert(brw->gen >= 8);
 458          mt = mt->stencil_mt;
 459          format = BRW_SURFACEFORMAT_R8_UINT;
 460       }
 461
 462       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 463
 464       struct isl_view view = {
 465          .format = format,
 466          .base_level = obj->MinLevel + obj->BaseLevel,
 467          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 468          .base_array_layer = obj->MinLayer,
 469          .array_len = view_num_layers,
 470          .channel_select = {
 471             swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 472             swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 473             swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 474             swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 475          },
 476          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 477       };
 478
 479       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 480           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 481          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 482
 483       brw_emit_surface_state(brw, mt, &view,
 484                              surface_state_infos[brw->gen].tex_mocs, for_gather,
 485                              surf_offset, surf_index,
 486                              I915_GEM_DOMAIN_SAMPLER, 0);
 487    }
 488 }
 489
 490 void
 491 brw_emit_buffer_surface_state(struct brw_context *brw,
 492                               uint32_t *out_offset,
 493                               drm_intel_bo *bo,
 494                               unsigned buffer_offset,
 495                               unsigned surface_format,
 496                               unsigned buffer_size,
 497                               unsigned pitch,
 498                               bool rw)
 499 {
 500    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
 501
 502    uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 503                                   ss_info.num_dwords * 4, ss_info.ss_align,
 504                                   out_offset);
 505
 506    isl_buffer_fill_state(&brw->isl_dev, dw,
 507                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 508                          .size = buffer_size,
 509                          .format = surface_format,
 510                          .stride = pitch,
 511                          .mocs = ss_info.tex_mocs);
 512
 513    if (bo) {
 514       drm_intel_bo_emit_reloc(brw->batch.bo,
 515                               *out_offset + 4 * ss_info.reloc_dw,
 516                               bo, buffer_offset,
 517                               I915_GEM_DOMAIN_SAMPLER,
 518                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 519    }
 520 }
 521
 522 void
 523 brw_update_buffer_texture_surface(struct gl_context *ctx,
 524                                   unsigned unit,
 525                                   uint32_t *surf_offset)
 526 {
 527    struct brw_context *brw = brw_context(ctx);
 528    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 529    struct intel_buffer_object *intel_obj =
 530       intel_buffer_object(tObj->BufferObject);
 531    uint32_t size = tObj->BufferSize;
 532    drm_intel_bo *bo = NULL;
 533    mesa_format format = tObj->_BufferObjectFormat;
 534    uint32_t brw_format = brw_format_for_mesa_format(format);
 535    int texel_size = _mesa_get_format_bytes(format);
 536
 537    if (intel_obj) {
 538       size = MIN2(size, intel_obj->Base.Size);
 539       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 540    }
 541
 542    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 543       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 544                     _mesa_get_format_name(format));
 545    }
 546
 547    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 548                                  tObj->BufferOffset,
 549                                  brw_format,
 550                                  size,
 551                                  texel_size,
 552                                  false /* rw */);
 553 }
 554
 555 /**
 556  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 557  * read from this buffer with Data Port Read instructions/messages.
 558  */
 559 void
 560 brw_create_constant_surface(struct brw_context *brw,
 561                             drm_intel_bo *bo,
 562                             uint32_t offset,
 563                             uint32_t size,
 564                             uint32_t *out_offset)
 565 {
 566    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 567                                  BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 568                                  size, 1, false);
 569 }
 570
 571 /**
 572  * Create the buffer surface. Shader buffer variables will be
 573  * read from / write to this buffer with Data Port Read/Write
 574  * instructions/messages.
 575  */
 576 void
 577 brw_create_buffer_surface(struct brw_context *brw,
 578                           drm_intel_bo *bo,
 579                           uint32_t offset,
 580                           uint32_t size,
 581                           uint32_t *out_offset)
 582 {
 583    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 584     * messages. We need these specifically for the fragment shader since they
 585     * include a pixel mask header that we need to ensure correct behavior
 586     * with helper invocations, which cannot write to the buffer.
 587     */
 588    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 589                                  BRW_SURFACEFORMAT_RAW,
 590                                  size, 1, true);
 591 }
 592
 593 /**
 594  * Set up a binding table entry for use by stream output logic (transform
 595  * feedback).
 596  *
 597  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 598  */
 599 void
 600 brw_update_sol_surface(struct brw_context *brw,
 601                        struct gl_buffer_object *buffer_obj,
 602                        uint32_t *out_offset, unsigned num_vector_components,
 603                        unsigned stride_dwords, unsigned offset_dwords)
 604 {
 605    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 606    uint32_t offset_bytes = 4 * offset_dwords;
 607    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 608                                              offset_bytes,
 609                                              buffer_obj->Size - offset_bytes);
 610    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 611                                     out_offset);
 612    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 613    size_t size_dwords = buffer_obj->Size / 4;
 614    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 615
 616    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 617     * too big to map using a single binding table entry?
 618     */
 619    assert((size_dwords - offset_dwords) / stride_dwords
 620           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 621
 622    if (size_dwords > offset_dwords + num_vector_components) {
 623       /* There is room for at least 1 transform feedback output in the buffer.
 624        * Compute the number of additional transform feedback outputs the
 625        * buffer has room for.
 626        */
 627       buffer_size_minus_1 =
 628          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 629    } else {
 630       /* There isn't even room for a single transform feedback output in the
 631        * buffer.  We can't configure the binding table entry to prevent output
 632        * entirely; we'll have to rely on the geometry shader to detect
 633        * overflow.  But to minimize the damage in case of a bug, set up the
 634        * binding table entry to just allow a single output.
 635        */
 636       buffer_size_minus_1 = 0;
 637    }
 638    width = buffer_size_minus_1 & 0x7f;
 639    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 640    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 641
 642    switch (num_vector_components) {
 643    case 1:
 644       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 645       break;
 646    case 2:
 647       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 648       break;
 649    case 3:
 650       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 651       break;
 652    case 4:
 653       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 654       break;
 655    default:
 656       unreachable("Invalid vector size for transform feedback output");
 657    }
 658
 659    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 660       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 661       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 662       BRW_SURFACE_RC_READ_WRITE;
 663    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 664    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 665               height << BRW_SURFACE_HEIGHT_SHIFT);
 666    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 667               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 668    surf[4] = 0;
 669    surf[5] = 0;
 670
 671    /* Emit relocation to surface contents. */
 672    drm_intel_bo_emit_reloc(brw->batch.bo,
 673                            *out_offset + 4,
 674                            bo, offset_bytes,
 675                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 676 }
 677
 678 /* Creates a new WM constant buffer reflecting the current fragment program's
 679  * constants, if needed by the fragment program.
 680  *
 681  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 682  * state atom.
 683  */
 684 static void
 685 brw_upload_wm_pull_constants(struct brw_context *brw)
 686 {
 687    struct brw_stage_state *stage_state = &brw->wm.base;
 688    /* BRW_NEW_FRAGMENT_PROGRAM */
 689    struct brw_fragment_program *fp =
 690       (struct brw_fragment_program *) brw->fragment_program;
 691    /* BRW_NEW_FS_PROG_DATA */
 692    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 693
 694    /* _NEW_PROGRAM_CONSTANTS */
 695    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 696                              stage_state, prog_data);
 697 }
 698
 699 const struct brw_tracked_state brw_wm_pull_constants = {
 700    .dirty = {
 701       .mesa = _NEW_PROGRAM_CONSTANTS,
 702       .brw = BRW_NEW_BATCH |
 703              BRW_NEW_BLORP |
 704              BRW_NEW_FRAGMENT_PROGRAM |
 705              BRW_NEW_FS_PROG_DATA,
 706    },
 707    .emit = brw_upload_wm_pull_constants,
 708 };
 709
 710 /**
 711  * Creates a null renderbuffer surface.
 712  *
 713  * This is used when the shader doesn't write to any color output.  An FB
 714  * write to target 0 will still be emitted, because that's how the thread is
 715  * terminated (and computed depth is returned), so we need to have the
 716  * hardware discard the target 0 color output..
 717  */
 718 static void
 719 brw_emit_null_surface_state(struct brw_context *brw,
 720                             unsigned width,
 721                             unsigned height,
 722                             unsigned samples,
 723                             uint32_t *out_offset)
 724 {
 725    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 726     * Notes):
 727     *
 728     *     A null surface will be used in instances where an actual surface is
 729     *     not bound. When a write message is generated to a null surface, no
 730     *     actual surface is written to. When a read message (including any
 731     *     sampling engine message) is generated to a null surface, the result
 732     *     is all zeros. Note that a null surface type is allowed to be used
 733     *     with all messages, even if it is not specificially indicated as
 734     *     supported. All of the remaining fields in surface state are ignored
 735     *     for null surfaces, with the following exceptions:
 736     *
 737     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 738     *       depth buffer’s corresponding state for all render target surfaces,
 739     *       including null.
 740     *
 741     *     - Surface Format must be R8G8B8A8_UNORM.
 742     */
 743    unsigned surface_type = BRW_SURFACE_NULL;
 744    drm_intel_bo *bo = NULL;
 745    unsigned pitch_minus_1 = 0;
 746    uint32_t multisampling_state = 0;
 747    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 748                                     out_offset);
 749
 750    if (samples > 1) {
 751       /* On Gen6, null render targets seem to cause GPU hangs when
 752        * multisampling.  So work around this problem by rendering into dummy
 753        * color buffer.
 754        *
 755        * To decrease the amount of memory needed by the workaround buffer, we
 756        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 757        * the amount of memory needed for the workaround buffer is
 758        * (width_in_tiles + height_in_tiles - 1) tiles.
 759        *
 760        * Note that since the workaround buffer will be interpreted by the
 761        * hardware as an interleaved multisampled buffer, we need to compute
 762        * width_in_tiles and height_in_tiles by dividing the width and height
 763        * by 16 rather than the normal Y-tile size of 32.
 764        */
 765       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 766       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 767       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 768       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 769                          size_needed);
 770       bo = brw->wm.multisampled_null_render_target_bo;
 771       surface_type = BRW_SURFACE_2D;
 772       pitch_minus_1 = 127;
 773       multisampling_state = brw_get_surface_num_multisamples(samples);
 774    }
 775
 776    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 777               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 778    if (brw->gen < 6) {
 779       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 780                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 781                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 782                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 783    }
 784    surf[1] = bo ? bo->offset64 : 0;
 785    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 786               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 787
 788    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 789     * Notes):
 790     *
 791     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 792     */
 793    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 794               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 795    surf[4] = multisampling_state;
 796    surf[5] = 0;
 797
 798    if (bo) {
 799       drm_intel_bo_emit_reloc(brw->batch.bo,
 800                               *out_offset + 4,
 801                               bo, 0,
 802                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 803    }
 804 }
 805
 806 /**
 807  * Sets up a surface state structure to point at the given region.
 808  * While it is only used for the front/back buffer currently, it should be
 809  * usable for further buffers when doing ARB_draw_buffer support.
 810  */
 811 static uint32_t
 812 gen4_update_renderbuffer_surface(struct brw_context *brw,
 813                                  struct gl_renderbuffer *rb,
 814                                  bool layered, unsigned unit,
 815                                  uint32_t surf_index)
 816 {
 817    struct gl_context *ctx = &brw->ctx;
 818    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 819    struct intel_mipmap_tree *mt = irb->mt;
 820    uint32_t *surf;
 821    uint32_t tile_x, tile_y;
 822    uint32_t format = 0;
 823    uint32_t offset;
 824    /* _NEW_BUFFERS */
 825    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 826    /* BRW_NEW_FS_PROG_DATA */
 827
 828    assert(!layered);
 829
 830    if (rb->TexImage && !brw->has_surface_tile_offset) {
 831       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 832
 833       if (tile_x != 0 || tile_y != 0) {
 834          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 835           * destination in a miptree unless you actually setup your renderbuffer
 836           * as a miptree and used the fragile lod/array_index/etc. controls to
 837           * select the image.  So, instead, we just make a new single-level
 838           * miptree and render into that.
 839           */
 840          intel_renderbuffer_move_to_temp(brw, irb, false);
 841          mt = irb->mt;
 842       }
 843    }
 844
 845    intel_miptree_used_for_rendering(irb->mt);
 846
 847    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 848
 849    format = brw->render_target_format[rb_format];
 850    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 851       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 852                     __func__, _mesa_get_format_name(rb_format));
 853    }
 854
 855    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 856               format << BRW_SURFACE_FORMAT_SHIFT);
 857
 858    /* reloc */
 859    assert(mt->offset % mt->cpp == 0);
 860    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 861               mt->bo->offset64 + mt->offset);
 862
 863    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 864               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 865
 866    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 867               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 868
 869    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 870
 871    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 872    /* Note that the low bits of these fields are missing, so
 873     * there's the possibility of getting in trouble.
 874     */
 875    assert(tile_x % 4 == 0);
 876    assert(tile_y % 2 == 0);
 877    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 878               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 879               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 880
 881    if (brw->gen < 6) {
 882       /* _NEW_COLOR */
 883       if (!ctx->Color.ColorLogicOpEnabled &&
 884           (ctx->Color.BlendEnabled & (1 << unit)))
 885          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 886
 887       if (!ctx->Color.ColorMask[unit][0])
 888          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 889       if (!ctx->Color.ColorMask[unit][1])
 890          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 891       if (!ctx->Color.ColorMask[unit][2])
 892          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 893
 894       /* As mentioned above, disable writes to the alpha component when the
 895        * renderbuffer is XRGB.
 896        */
 897       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 898           !ctx->Color.ColorMask[unit][3]) {
 899          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 900       }
 901    }
 902
 903    drm_intel_bo_emit_reloc(brw->batch.bo,
 904                            offset + 4,
 905                            mt->bo,
 906                            surf[1] - mt->bo->offset64,
 907                            I915_GEM_DOMAIN_RENDER,
 908                            I915_GEM_DOMAIN_RENDER);
 909
 910    return offset;
 911 }
 912
 913 /**
 914  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 915  */
 916 void
 917 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 918                                  const struct gl_framebuffer *fb,
 919                                  uint32_t render_target_start,
 920                                  uint32_t *surf_offset)
 921 {
 922    GLuint i;
 923    const unsigned int w = _mesa_geometric_width(fb);
 924    const unsigned int h = _mesa_geometric_height(fb);
 925    const unsigned int s = _mesa_geometric_samples(fb);
 926
 927    /* Update surfaces for drawing buffers */
 928    if (fb->_NumColorDrawBuffers >= 1) {
 929       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 930          const uint32_t surf_index = render_target_start + i;
 931
 932          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 933             surf_offset[surf_index] =
 934                brw->vtbl.update_renderbuffer_surface(
 935                   brw, fb->_ColorDrawBuffers[i],
 936                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 937          } else {
 938             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 939                &surf_offset[surf_index]);
 940          }
 941       }
 942    } else {
 943       const uint32_t surf_index = render_target_start;
 944       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 945          &surf_offset[surf_index]);
 946    }
 947 }
 948
 949 static void
 950 update_renderbuffer_surfaces(struct brw_context *brw)
 951 {
 952    const struct gl_context *ctx = &brw->ctx;
 953
 954    /* _NEW_BUFFERS | _NEW_COLOR */
 955    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 956    brw_update_renderbuffer_surfaces(
 957       brw, fb,
 958       brw->wm.prog_data->binding_table.render_target_start,
 959       brw->wm.base.surf_offset);
 960    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 961 }
 962
 963 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 964    .dirty = {
 965       .mesa = _NEW_BUFFERS |
 966               _NEW_COLOR,
 967       .brw = BRW_NEW_BATCH |
 968              BRW_NEW_BLORP |
 969              BRW_NEW_FS_PROG_DATA,
 970    },
 971    .emit = update_renderbuffer_surfaces,
 972 };
 973
 974 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 975    .dirty = {
 976       .mesa = _NEW_BUFFERS,
 977       .brw = BRW_NEW_BATCH |
 978              BRW_NEW_BLORP,
 979    },
 980    .emit = update_renderbuffer_surfaces,
 981 };
 982
 983
 984 static void
 985 update_stage_texture_surfaces(struct brw_context *brw,
 986                               const struct gl_program *prog,
 987                               struct brw_stage_state *stage_state,
 988                               bool for_gather, uint32_t plane)
 989 {
 990    if (!prog)
 991       return;
 992
 993    struct gl_context *ctx = &brw->ctx;
 994
 995    uint32_t *surf_offset = stage_state->surf_offset;
 996
 997    /* BRW_NEW_*_PROG_DATA */
 998    if (for_gather)
 999       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1000    else
1001       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1002
1003    unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
1004    for (unsigned s = 0; s < num_samplers; s++) {
1005       surf_offset[s] = 0;
1006
1007       if (prog->SamplersUsed & (1 << s)) {
1008          const unsigned unit = prog->SamplerUnits[s];
1009
1010          /* _NEW_TEXTURE */
1011          if (ctx->Texture.Unit[unit]._Current) {
1012             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1013          }
1014       }
1015    }
1016 }
1017
1018
1019 /**
1020  * Construct SURFACE_STATE objects for enabled textures.
1021  */
1022 static void
1023 brw_update_texture_surfaces(struct brw_context *brw)
1024 {
1025    /* BRW_NEW_VERTEX_PROGRAM */
1026    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1027
1028    /* BRW_NEW_TESS_PROGRAMS */
1029    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1030    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1031
1032    /* BRW_NEW_GEOMETRY_PROGRAM */
1033    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1034
1035    /* BRW_NEW_FRAGMENT_PROGRAM */
1036    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1037
1038    /* _NEW_TEXTURE */
1039    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1040    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1041    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1042    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1043    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1044
1045    /* emit alternate set of surface state for gather. this
1046     * allows the surface format to be overriden for only the
1047     * gather4 messages. */
1048    if (brw->gen < 8) {
1049       if (vs && vs->UsesGather)
1050          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1051       if (tcs && tcs->UsesGather)
1052          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1053       if (tes && tes->UsesGather)
1054          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1055       if (gs && gs->UsesGather)
1056          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1057       if (fs && fs->UsesGather)
1058          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1059    }
1060
1061    if (fs) {
1062       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1063       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1064    }
1065
1066    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1067 }
1068
1069 const struct brw_tracked_state brw_texture_surfaces = {
1070    .dirty = {
1071       .mesa = _NEW_TEXTURE,
1072       .brw = BRW_NEW_BATCH |
1073              BRW_NEW_BLORP |
1074              BRW_NEW_FRAGMENT_PROGRAM |
1075              BRW_NEW_FS_PROG_DATA |
1076              BRW_NEW_GEOMETRY_PROGRAM |
1077              BRW_NEW_GS_PROG_DATA |
1078              BRW_NEW_TESS_PROGRAMS |
1079              BRW_NEW_TCS_PROG_DATA |
1080              BRW_NEW_TES_PROG_DATA |
1081              BRW_NEW_TEXTURE_BUFFER |
1082              BRW_NEW_VERTEX_PROGRAM |
1083              BRW_NEW_VS_PROG_DATA,
1084    },
1085    .emit = brw_update_texture_surfaces,
1086 };
1087
1088 static void
1089 brw_update_cs_texture_surfaces(struct brw_context *brw)
1090 {
1091    /* BRW_NEW_COMPUTE_PROGRAM */
1092    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1093
1094    /* _NEW_TEXTURE */
1095    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1096
1097    /* emit alternate set of surface state for gather. this
1098     * allows the surface format to be overriden for only the
1099     * gather4 messages.
1100     */
1101    if (brw->gen < 8) {
1102       if (cs && cs->UsesGather)
1103          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1104    }
1105
1106    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1107 }
1108
1109 const struct brw_tracked_state brw_cs_texture_surfaces = {
1110    .dirty = {
1111       .mesa = _NEW_TEXTURE,
1112       .brw = BRW_NEW_BATCH |
1113              BRW_NEW_BLORP |
1114              BRW_NEW_COMPUTE_PROGRAM,
1115    },
1116    .emit = brw_update_cs_texture_surfaces,
1117 };
1118
1119
1120 void
1121 brw_upload_ubo_surfaces(struct brw_context *brw,
1122                         struct gl_linked_shader *shader,
1123                         struct brw_stage_state *stage_state,
1124                         struct brw_stage_prog_data *prog_data)
1125 {
1126    struct gl_context *ctx = &brw->ctx;
1127
1128    if (!shader)
1129       return;
1130
1131    uint32_t *ubo_surf_offsets =
1132       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1133
1134    for (int i = 0; i < shader->NumUniformBlocks; i++) {
1135       struct gl_uniform_buffer_binding *binding =
1136          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1137
1138       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1139          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1140       } else {
1141          struct intel_buffer_object *intel_bo =
1142             intel_buffer_object(binding->BufferObject);
1143          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1144          if (!binding->AutomaticSize)
1145             size = MIN2(size, binding->Size);
1146          drm_intel_bo *bo =
1147             intel_bufferobj_buffer(brw, intel_bo,
1148                                    binding->Offset,
1149                                    size);
1150          brw_create_constant_surface(brw, bo, binding->Offset,
1151                                      size,
1152                                      &ubo_surf_offsets[i]);
1153       }
1154    }
1155
1156    uint32_t *ssbo_surf_offsets =
1157       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1158
1159    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1160       struct gl_shader_storage_buffer_binding *binding =
1161          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1162
1163       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1164          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1165       } else {
1166          struct intel_buffer_object *intel_bo =
1167             intel_buffer_object(binding->BufferObject);
1168          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1169          if (!binding->AutomaticSize)
1170             size = MIN2(size, binding->Size);
1171          drm_intel_bo *bo =
1172             intel_bufferobj_buffer(brw, intel_bo,
1173                                    binding->Offset,
1174                                    size);
1175          brw_create_buffer_surface(brw, bo, binding->Offset,
1176                                    size,
1177                                    &ssbo_surf_offsets[i]);
1178       }
1179    }
1180
1181    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1182       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1183 }
1184
1185 static void
1186 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1187 {
1188    struct gl_context *ctx = &brw->ctx;
1189    /* _NEW_PROGRAM */
1190    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1191
1192    if (!prog)
1193       return;
1194
1195    /* BRW_NEW_FS_PROG_DATA */
1196    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1197                            &brw->wm.base, &brw->wm.prog_data->base);
1198 }
1199
1200 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1201    .dirty = {
1202       .mesa = _NEW_PROGRAM,
1203       .brw = BRW_NEW_BATCH |
1204              BRW_NEW_BLORP |
1205              BRW_NEW_FS_PROG_DATA |
1206              BRW_NEW_UNIFORM_BUFFER,
1207    },
1208    .emit = brw_upload_wm_ubo_surfaces,
1209 };
1210
1211 static void
1212 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1213 {
1214    struct gl_context *ctx = &brw->ctx;
1215    /* _NEW_PROGRAM */
1216    struct gl_shader_program *prog =
1217       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1218
1219    if (!prog)
1220       return;
1221
1222    /* BRW_NEW_CS_PROG_DATA */
1223    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1224                            &brw->cs.base, &brw->cs.prog_data->base);
1225 }
1226
1227 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1228    .dirty = {
1229       .mesa = _NEW_PROGRAM,
1230       .brw = BRW_NEW_BATCH |
1231              BRW_NEW_BLORP |
1232              BRW_NEW_CS_PROG_DATA |
1233              BRW_NEW_UNIFORM_BUFFER,
1234    },
1235    .emit = brw_upload_cs_ubo_surfaces,
1236 };
1237
1238 void
1239 brw_upload_abo_surfaces(struct brw_context *brw,
1240                         struct gl_linked_shader *shader,
1241                         struct brw_stage_state *stage_state,
1242                         struct brw_stage_prog_data *prog_data)
1243 {
1244    struct gl_context *ctx = &brw->ctx;
1245    uint32_t *surf_offsets =
1246       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1247
1248    if (shader && shader->NumAtomicBuffers) {
1249       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1250          struct gl_atomic_buffer_binding *binding =
1251             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1252          struct intel_buffer_object *intel_bo =
1253             intel_buffer_object(binding->BufferObject);
1254          drm_intel_bo *bo = intel_bufferobj_buffer(
1255             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1256
1257          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1258                                        binding->Offset, BRW_SURFACEFORMAT_RAW,
1259                                        bo->size - binding->Offset, 1, true);
1260       }
1261
1262       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1263    }
1264 }
1265
1266 static void
1267 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1268 {
1269    struct gl_context *ctx = &brw->ctx;
1270    /* _NEW_PROGRAM */
1271    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1272
1273    if (prog) {
1274       /* BRW_NEW_FS_PROG_DATA */
1275       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1276                               &brw->wm.base, &brw->wm.prog_data->base);
1277    }
1278 }
1279
1280 const struct brw_tracked_state brw_wm_abo_surfaces = {
1281    .dirty = {
1282       .mesa = _NEW_PROGRAM,
1283       .brw = BRW_NEW_ATOMIC_BUFFER |
1284              BRW_NEW_BLORP |
1285              BRW_NEW_BATCH |
1286              BRW_NEW_FS_PROG_DATA,
1287    },
1288    .emit = brw_upload_wm_abo_surfaces,
1289 };
1290
1291 static void
1292 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1293 {
1294    struct gl_context *ctx = &brw->ctx;
1295    /* _NEW_PROGRAM */
1296    struct gl_shader_program *prog =
1297       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1298
1299    if (prog) {
1300       /* BRW_NEW_CS_PROG_DATA */
1301       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1302                               &brw->cs.base, &brw->cs.prog_data->base);
1303    }
1304 }
1305
1306 const struct brw_tracked_state brw_cs_abo_surfaces = {
1307    .dirty = {
1308       .mesa = _NEW_PROGRAM,
1309       .brw = BRW_NEW_ATOMIC_BUFFER |
1310              BRW_NEW_BLORP |
1311              BRW_NEW_BATCH |
1312              BRW_NEW_CS_PROG_DATA,
1313    },
1314    .emit = brw_upload_cs_abo_surfaces,
1315 };
1316
1317 static void
1318 brw_upload_cs_image_surfaces(struct brw_context *brw)
1319 {
1320    struct gl_context *ctx = &brw->ctx;
1321    /* _NEW_PROGRAM */
1322    struct gl_shader_program *prog =
1323       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1324
1325    if (prog) {
1326       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1327       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1328                                 &brw->cs.base, &brw->cs.prog_data->base);
1329    }
1330 }
1331
1332 const struct brw_tracked_state brw_cs_image_surfaces = {
1333    .dirty = {
1334       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1335       .brw = BRW_NEW_BATCH |
1336              BRW_NEW_BLORP |
1337              BRW_NEW_CS_PROG_DATA |
1338              BRW_NEW_IMAGE_UNITS
1339    },
1340    .emit = brw_upload_cs_image_surfaces,
1341 };
1342
1343 static uint32_t
1344 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1345 {
1346    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1347    uint32_t hw_format = brw_format_for_mesa_format(format);
1348    if (access == GL_WRITE_ONLY) {
1349       return hw_format;
1350    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1351       /* Typed surface reads support a very limited subset of the shader
1352        * image formats.  Translate it into the closest format the
1353        * hardware supports.
1354        */
1355       return isl_lower_storage_image_format(devinfo, hw_format);
1356    } else {
1357       /* The hardware doesn't actually support a typed format that we can use
1358        * so we have to fall back to untyped read/write messages.
1359        */
1360       return BRW_SURFACEFORMAT_RAW;
1361    }
1362 }
1363
1364 static void
1365 update_default_image_param(struct brw_context *brw,
1366                            struct gl_image_unit *u,
1367                            unsigned surface_idx,
1368                            struct brw_image_param *param)
1369 {
1370    memset(param, 0, sizeof(*param));
1371    param->surface_idx = surface_idx;
1372    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1373     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1374     * detailed explanation of these parameters.
1375     */
1376    param->swizzling[0] = 0xff;
1377    param->swizzling[1] = 0xff;
1378 }
1379
1380 static void
1381 update_buffer_image_param(struct brw_context *brw,
1382                           struct gl_image_unit *u,
1383                           unsigned surface_idx,
1384                           struct brw_image_param *param)
1385 {
1386    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1387
1388    update_default_image_param(brw, u, surface_idx, param);
1389
1390    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1391    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1392 }
1393
1394 static void
1395 update_texture_image_param(struct brw_context *brw,
1396                            struct gl_image_unit *u,
1397                            unsigned surface_idx,
1398                            struct brw_image_param *param)
1399 {
1400    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1401
1402    update_default_image_param(brw, u, surface_idx, param);
1403
1404    param->size[0] = minify(mt->logical_width0, u->Level);
1405    param->size[1] = minify(mt->logical_height0, u->Level);
1406    param->size[2] = (!u->Layered ? 1 :
1407                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1408                      u->TexObj->Target == GL_TEXTURE_3D ?
1409                      minify(mt->logical_depth0, u->Level) :
1410                      mt->logical_depth0);
1411
1412    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1413                                   &param->offset[0],
1414                                   &param->offset[1]);
1415
1416    param->stride[0] = mt->cpp;
1417    param->stride[1] = mt->pitch / mt->cpp;
1418    param->stride[2] =
1419       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1420    param->stride[3] =
1421       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1422
1423    if (mt->tiling == I915_TILING_X) {
1424       /* An X tile is a rectangular block of 512x8 bytes. */
1425       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1426       param->tiling[1] = _mesa_logbase2(8);
1427
1428       if (brw->has_swizzling) {
1429          /* Right shifts required to swizzle bits 9 and 10 of the memory
1430           * address with bit 6.
1431           */
1432          param->swizzling[0] = 3;
1433          param->swizzling[1] = 4;
1434       }
1435    } else if (mt->tiling == I915_TILING_Y) {
1436       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1437        * different to the layout of an X-tiled surface, we simply pretend that
1438        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1439        * one arranged in X-major order just like is the case for X-tiling.
1440        */
1441       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1442       param->tiling[1] = _mesa_logbase2(32);
1443
1444       if (brw->has_swizzling) {
1445          /* Right shift required to swizzle bit 9 of the memory address with
1446           * bit 6.
1447           */
1448          param->swizzling[0] = 3;
1449       }
1450    }
1451
1452    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1453     * address calculation algorithm (emit_address_calculation() in
1454     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1455     * modulus equal to the LOD.
1456     */
1457    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1458                        0);
1459 }
1460
1461 static void
1462 update_image_surface(struct brw_context *brw,
1463                      struct gl_image_unit *u,
1464                      GLenum access,
1465                      unsigned surface_idx,
1466                      uint32_t *surf_offset,
1467                      struct brw_image_param *param)
1468 {
1469    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1470       struct gl_texture_object *obj = u->TexObj;
1471       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1472
1473       if (obj->Target == GL_TEXTURE_BUFFER) {
1474          struct intel_buffer_object *intel_obj =
1475             intel_buffer_object(obj->BufferObject);
1476          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1477                                       _mesa_get_format_bytes(u->_ActualFormat));
1478
1479          brw_emit_buffer_surface_state(
1480             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1481             format, intel_obj->Base.Size, texel_size,
1482             access != GL_READ_ONLY);
1483
1484          update_buffer_image_param(brw, u, surface_idx, param);
1485
1486       } else {
1487          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1488          struct intel_mipmap_tree *mt = intel_obj->mt;
1489
1490          if (format == BRW_SURFACEFORMAT_RAW) {
1491             brw_emit_buffer_surface_state(
1492                brw, surf_offset, mt->bo, mt->offset,
1493                format, mt->bo->size - mt->offset, 1 /* pitch */,
1494                access != GL_READ_ONLY);
1495
1496          } else {
1497             const unsigned num_layers = (!u->Layered ? 1 :
1498                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1499                                          mt->logical_depth0);
1500
1501             struct isl_view view = {
1502                .format = format,
1503                .base_level = obj->MinLevel + u->Level,
1504                .levels = 1,
1505                .base_array_layer = obj->MinLayer + u->_Layer,
1506                .array_len = num_layers,
1507                .channel_select = {
1508                   ISL_CHANNEL_SELECT_RED,
1509                   ISL_CHANNEL_SELECT_GREEN,
1510                   ISL_CHANNEL_SELECT_BLUE,
1511                   ISL_CHANNEL_SELECT_ALPHA,
1512                },
1513                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1514             };
1515
1516             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1517
1518             brw_emit_surface_state(brw, mt, &view,
1519                                    surface_state_infos[brw->gen].tex_mocs, false,
1520                                    surf_offset, surf_index,
1521                                    I915_GEM_DOMAIN_SAMPLER,
1522                                    access == GL_READ_ONLY ? 0 :
1523                                              I915_GEM_DOMAIN_SAMPLER);
1524          }
1525
1526          update_texture_image_param(brw, u, surface_idx, param);
1527       }
1528
1529    } else {
1530       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1531       update_default_image_param(brw, u, surface_idx, param);
1532    }
1533 }
1534
1535 void
1536 brw_upload_image_surfaces(struct brw_context *brw,
1537                           struct gl_linked_shader *shader,
1538                           struct brw_stage_state *stage_state,
1539                           struct brw_stage_prog_data *prog_data)
1540 {
1541    struct gl_context *ctx = &brw->ctx;
1542
1543    if (shader && shader->NumImages) {
1544       for (unsigned i = 0; i < shader->NumImages; i++) {
1545          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1546          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1547
1548          update_image_surface(brw, u, shader->ImageAccess[i],
1549                               surf_idx,
1550                               &stage_state->surf_offset[surf_idx],
1551                               &prog_data->image_param[i]);
1552       }
1553
1554       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1555       /* This may have changed the image metadata dependent on the context
1556        * image unit state and passed to the program as uniforms, make sure
1557        * that push and pull constants are reuploaded.
1558        */
1559       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1560    }
1561 }
1562
1563 static void
1564 brw_upload_wm_image_surfaces(struct brw_context *brw)
1565 {
1566    struct gl_context *ctx = &brw->ctx;
1567    /* BRW_NEW_FRAGMENT_PROGRAM */
1568    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1569
1570    if (prog) {
1571       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1572       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1573                                 &brw->wm.base, &brw->wm.prog_data->base);
1574    }
1575 }
1576
1577 const struct brw_tracked_state brw_wm_image_surfaces = {
1578    .dirty = {
1579       .mesa = _NEW_TEXTURE,
1580       .brw = BRW_NEW_BATCH |
1581              BRW_NEW_BLORP |
1582              BRW_NEW_FRAGMENT_PROGRAM |
1583              BRW_NEW_FS_PROG_DATA |
1584              BRW_NEW_IMAGE_UNITS
1585    },
1586    .emit = brw_upload_wm_image_surfaces,
1587 };
1588
1589 void
1590 gen4_init_vtable_surface_functions(struct brw_context *brw)
1591 {
1592    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1593    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1594 }
1595
1596 void
1597 gen6_init_vtable_surface_functions(struct brw_context *brw)
1598 {
1599    gen4_init_vtable_surface_functions(brw);
1600    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1601 }
1602
1603 static void
1604 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1605 {
1606    struct gl_context *ctx = &brw->ctx;
1607    /* _NEW_PROGRAM */
1608    struct gl_shader_program *prog =
1609       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1610
1611    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1612       const unsigned surf_idx =
1613          brw->cs.prog_data->binding_table.work_groups_start;
1614       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1615       drm_intel_bo *bo;
1616       uint32_t bo_offset;
1617
1618       if (brw->compute.num_work_groups_bo == NULL) {
1619          bo = NULL;
1620          intel_upload_data(brw,
1621                            (void *)brw->compute.num_work_groups,
1622                            3 * sizeof(GLuint),
1623                            sizeof(GLuint),
1624                            &bo,
1625                            &bo_offset);
1626       } else {
1627          bo = brw->compute.num_work_groups_bo;
1628          bo_offset = brw->compute.num_work_groups_offset;
1629       }
1630
1631       brw_emit_buffer_surface_state(brw, surf_offset,
1632                                     bo, bo_offset,
1633                                     BRW_SURFACEFORMAT_RAW,
1634                                     3 * sizeof(GLuint), 1, true);
1635       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1636    }
1637 }
1638
1639 const struct brw_tracked_state brw_cs_work_groups_surface = {
1640    .dirty = {
1641       .brw = BRW_NEW_BLORP |
1642              BRW_NEW_CS_WORK_GROUPS
1643    },
1644    .emit = brw_upload_cs_work_groups_surface,
1645 };