src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "main/shaderimage.h"
  38 #include "main/teximage.h"
  39 #include "program/prog_parameter.h"
  40 #include "program/prog_instruction.h"
  41 #include "main/framebuffer.h"
  42
  43 #include "isl/isl.h"
  44
  45 #include "intel_mipmap_tree.h"
  46 #include "intel_batchbuffer.h"
  47 #include "intel_tex.h"
  48 #include "intel_fbo.h"
  49 #include "intel_buffer_objects.h"
  50
  51 #include "brw_context.h"
  52 #include "brw_state.h"
  53 #include "brw_defines.h"
  54 #include "brw_wm.h"
  55
  56 struct surface_state_info {
  57    unsigned num_dwords;
  58    unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
  59    unsigned reloc_dw;
  60    unsigned aux_reloc_dw;
  61    unsigned tex_mocs;
  62    unsigned rb_mocs;
  63 };
  64
  65 static const struct surface_state_info surface_state_infos[] = {
  66    [4] = {6,  32, 1,  0},
  67    [5] = {6,  32, 1,  0},
  68    [6] = {6,  32, 1,  0},
  69    [7] = {8,  32, 1,  6,  GEN7_MOCS_L3, GEN7_MOCS_L3},
  70    [8] = {13, 64, 8,  10, BDW_MOCS_WB,  BDW_MOCS_PTE},
  71    [9] = {16, 64, 8,  10, SKL_MOCS_WB,  SKL_MOCS_PTE},
  72 };
  73
  74 static void
  75 brw_emit_surface_state(struct brw_context *brw,
  76                        struct intel_mipmap_tree *mt,
  77                        const struct isl_view *view,
  78                        uint32_t mocs, bool for_gather,
  79                        uint32_t *surf_offset, int surf_index,
  80                        unsigned read_domains, unsigned write_domains)
  81 {
  82    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
  83
  84    struct isl_surf surf;
  85    intel_miptree_get_isl_surf(brw, mt, &surf);
  86
  87    union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
  88
  89    struct isl_surf *aux_surf = NULL, aux_surf_s;
  90    uint64_t aux_offset = 0;
  91    enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
  92    if (mt->mcs_mt &&
  93        ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
  94         mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
  95       intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
  96       aux_surf = &aux_surf_s;
  97       assert(mt->mcs_mt->offset == 0);
  98       aux_offset = mt->mcs_mt->bo->offset64;
  99
 100       /* We only really need a clear color if we also have an auxiliary
 101        * surfacae.  Without one, it does nothing.
 102        */
 103       clear_color = intel_miptree_get_isl_clear_color(brw, mt);
 104    }
 105
 106    uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 107                                     ss_info.num_dwords * 4, ss_info.ss_align,
 108                                     surf_index, surf_offset);
 109
 110    isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
 111                        .address = mt->bo->offset64 + mt->offset,
 112                        .aux_surf = aux_surf, .aux_usage = aux_usage,
 113                        .aux_address = aux_offset,
 114                        .mocs = mocs, .clear_color = clear_color);
 115
 116    drm_intel_bo_emit_reloc(brw->batch.bo,
 117                            *surf_offset + 4 * ss_info.reloc_dw,
 118                            mt->bo, mt->offset,
 119                            read_domains, write_domains);
 120
 121    if (aux_surf) {
 122       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
 123        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
 124        * contain other control information.  Since buffer addresses are always
 125        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
 126        * an ordinary reloc to do the necessary address translation.
 127        */
 128       assert((aux_offset & 0xfff) == 0);
 129       drm_intel_bo_emit_reloc(brw->batch.bo,
 130                               *surf_offset + 4 * ss_info.aux_reloc_dw,
 131                               mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
 132                               read_domains, write_domains);
 133    }
 134 }
 135
 136 uint32_t
 137 brw_update_renderbuffer_surface(struct brw_context *brw,
 138                                 struct gl_renderbuffer *rb,
 139                                 bool layered, unsigned unit /* unused */,
 140                                 uint32_t surf_index)
 141 {
 142    struct gl_context *ctx = &brw->ctx;
 143    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 144    struct intel_mipmap_tree *mt = irb->mt;
 145
 146    assert(brw_render_target_supported(brw, rb));
 147    intel_miptree_used_for_rendering(mt);
 148
 149    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 150    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 151       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 152                     __func__, _mesa_get_format_name(rb_format));
 153    }
 154
 155    const unsigned layer_multiplier =
 156       (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
 157        irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
 158       MAX2(irb->mt->num_samples, 1) : 1;
 159
 160    struct isl_view view = {
 161       .format = brw->render_target_format[rb_format],
 162       .base_level = irb->mt_level - irb->mt->first_level,
 163       .levels = 1,
 164       .base_array_layer = irb->mt_layer / layer_multiplier,
 165       .array_len = MAX2(irb->layer_count, 1),
 166       .channel_select = {
 167          ISL_CHANNEL_SELECT_RED,
 168          ISL_CHANNEL_SELECT_GREEN,
 169          ISL_CHANNEL_SELECT_BLUE,
 170          ISL_CHANNEL_SELECT_ALPHA,
 171       },
 172       .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
 173    };
 174
 175    uint32_t offset;
 176    brw_emit_surface_state(brw, mt, &view,
 177                           surface_state_infos[brw->gen].rb_mocs, false,
 178                           &offset, surf_index,
 179                           I915_GEM_DOMAIN_RENDER,
 180                           I915_GEM_DOMAIN_RENDER);
 181    return offset;
 182 }
 183
 184 GLuint
 185 translate_tex_target(GLenum target)
 186 {
 187    switch (target) {
 188    case GL_TEXTURE_1D:
 189    case GL_TEXTURE_1D_ARRAY_EXT:
 190       return BRW_SURFACE_1D;
 191
 192    case GL_TEXTURE_RECTANGLE_NV:
 193       return BRW_SURFACE_2D;
 194
 195    case GL_TEXTURE_2D:
 196    case GL_TEXTURE_2D_ARRAY_EXT:
 197    case GL_TEXTURE_EXTERNAL_OES:
 198    case GL_TEXTURE_2D_MULTISAMPLE:
 199    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 200       return BRW_SURFACE_2D;
 201
 202    case GL_TEXTURE_3D:
 203       return BRW_SURFACE_3D;
 204
 205    case GL_TEXTURE_CUBE_MAP:
 206    case GL_TEXTURE_CUBE_MAP_ARRAY:
 207       return BRW_SURFACE_CUBE;
 208
 209    default:
 210       unreachable("not reached");
 211    }
 212 }
 213
 214 uint32_t
 215 brw_get_surface_tiling_bits(uint32_t tiling)
 216 {
 217    switch (tiling) {
 218    case I915_TILING_X:
 219       return BRW_SURFACE_TILED;
 220    case I915_TILING_Y:
 221       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
 222    default:
 223       return 0;
 224    }
 225 }
 226
 227
 228 uint32_t
 229 brw_get_surface_num_multisamples(unsigned num_samples)
 230 {
 231    if (num_samples > 1)
 232       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 233    else
 234       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 235 }
 236
 237 /**
 238  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 239  * swizzling.
 240  */
 241 int
 242 brw_get_texture_swizzle(const struct gl_context *ctx,
 243                         const struct gl_texture_object *t)
 244 {
 245    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 246
 247    int swizzles[SWIZZLE_NIL + 1] = {
 248       SWIZZLE_X,
 249       SWIZZLE_Y,
 250       SWIZZLE_Z,
 251       SWIZZLE_W,
 252       SWIZZLE_ZERO,
 253       SWIZZLE_ONE,
 254       SWIZZLE_NIL
 255    };
 256
 257    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 258        img->_BaseFormat == GL_DEPTH_STENCIL) {
 259       GLenum depth_mode = t->DepthMode;
 260
 261       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 262        * with depth component data specified with a sized internal format.
 263        * Otherwise, it's left at the old default, GL_LUMINANCE.
 264        */
 265       if (_mesa_is_gles3(ctx) &&
 266           img->InternalFormat != GL_DEPTH_COMPONENT &&
 267           img->InternalFormat != GL_DEPTH_STENCIL) {
 268          depth_mode = GL_RED;
 269       }
 270
 271       switch (depth_mode) {
 272       case GL_ALPHA:
 273          swizzles[0] = SWIZZLE_ZERO;
 274          swizzles[1] = SWIZZLE_ZERO;
 275          swizzles[2] = SWIZZLE_ZERO;
 276          swizzles[3] = SWIZZLE_X;
 277          break;
 278       case GL_LUMINANCE:
 279          swizzles[0] = SWIZZLE_X;
 280          swizzles[1] = SWIZZLE_X;
 281          swizzles[2] = SWIZZLE_X;
 282          swizzles[3] = SWIZZLE_ONE;
 283          break;
 284       case GL_INTENSITY:
 285          swizzles[0] = SWIZZLE_X;
 286          swizzles[1] = SWIZZLE_X;
 287          swizzles[2] = SWIZZLE_X;
 288          swizzles[3] = SWIZZLE_X;
 289          break;
 290       case GL_RED:
 291          swizzles[0] = SWIZZLE_X;
 292          swizzles[1] = SWIZZLE_ZERO;
 293          swizzles[2] = SWIZZLE_ZERO;
 294          swizzles[3] = SWIZZLE_ONE;
 295          break;
 296       }
 297    }
 298
 299    GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
 300
 301    /* If the texture's format is alpha-only, force R, G, and B to
 302     * 0.0. Similarly, if the texture's format has no alpha channel,
 303     * force the alpha value read to 1.0. This allows for the
 304     * implementation to use an RGBA texture for any of these formats
 305     * without leaking any unexpected values.
 306     */
 307    switch (img->_BaseFormat) {
 308    case GL_ALPHA:
 309       swizzles[0] = SWIZZLE_ZERO;
 310       swizzles[1] = SWIZZLE_ZERO;
 311       swizzles[2] = SWIZZLE_ZERO;
 312       break;
 313    case GL_LUMINANCE:
 314       if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
 315          swizzles[0] = SWIZZLE_X;
 316          swizzles[1] = SWIZZLE_X;
 317          swizzles[2] = SWIZZLE_X;
 318          swizzles[3] = SWIZZLE_ONE;
 319       }
 320       break;
 321    case GL_LUMINANCE_ALPHA:
 322       if (datatype == GL_SIGNED_NORMALIZED) {
 323          swizzles[0] = SWIZZLE_X;
 324          swizzles[1] = SWIZZLE_X;
 325          swizzles[2] = SWIZZLE_X;
 326          swizzles[3] = SWIZZLE_W;
 327       }
 328       break;
 329    case GL_INTENSITY:
 330       if (datatype == GL_SIGNED_NORMALIZED) {
 331          swizzles[0] = SWIZZLE_X;
 332          swizzles[1] = SWIZZLE_X;
 333          swizzles[2] = SWIZZLE_X;
 334          swizzles[3] = SWIZZLE_X;
 335       }
 336       break;
 337    case GL_RED:
 338    case GL_RG:
 339    case GL_RGB:
 340       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 341          swizzles[3] = SWIZZLE_ONE;
 342       break;
 343    }
 344
 345    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 346                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 347                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 348                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 349 }
 350
 351 /**
 352  * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
 353  * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
 354  *
 355  * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
 356  *         0          1          2          3             4            5
 357  *         4          5          6          7             0            1
 358  *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
 359  *
 360  * which is simply adding 4 then modding by 8 (or anding with 7).
 361  *
 362  * We then may need to apply workarounds for textureGather hardware bugs.
 363  */
 364 static unsigned
 365 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
 366 {
 367    unsigned scs = (swizzle + 4) & 7;
 368
 369    return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
 370 }
 371
 372 void
 373 brw_update_texture_surface(struct gl_context *ctx,
 374                            unsigned unit,
 375                            uint32_t *surf_offset,
 376                            bool for_gather,
 377                            uint32_t plane)
 378 {
 379    struct brw_context *brw = brw_context(ctx);
 380    struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
 381
 382    if (obj->Target == GL_TEXTURE_BUFFER) {
 383       brw_update_buffer_texture_surface(ctx, unit, surf_offset);
 384
 385    } else {
 386       struct intel_texture_object *intel_obj = intel_texture_object(obj);
 387       struct intel_mipmap_tree *mt = intel_obj->mt;
 388
 389       if (plane > 0) {
 390          if (mt->plane[plane - 1] == NULL)
 391             return;
 392          mt = mt->plane[plane - 1];
 393       }
 394
 395       struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 396       /* If this is a view with restricted NumLayers, then our effective depth
 397        * is not just the miptree depth.
 398        */
 399       const unsigned view_num_layers =
 400          (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
 401                                                             mt->logical_depth0;
 402
 403       /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
 404        * texturing functions that return a float, as our code generation always
 405        * selects the .x channel (which would always be 0).
 406        */
 407       struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
 408       const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
 409          (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
 410           firstImage->_BaseFormat == GL_DEPTH_STENCIL);
 411       const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
 412                                 brw_get_texture_swizzle(&brw->ctx, obj));
 413
 414       mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
 415       unsigned format = translate_tex_format(brw, mesa_fmt,
 416                                              sampler->sRGBDecode);
 417
 418       /* Implement gen6 and gen7 gather work-around */
 419       bool need_green_to_blue = false;
 420       if (for_gather) {
 421          if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
 422             format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
 423             need_green_to_blue = brw->is_haswell;
 424          } else if (brw->gen == 6) {
 425             /* Sandybridge's gather4 message is broken for integer formats.
 426              * To work around this, we pretend the surface is UNORM for
 427              * 8 or 16-bit formats, and emit shader instructions to recover
 428              * the real INT/UINT value.  For 32-bit formats, we pretend
 429              * the surface is FLOAT, and simply reinterpret the resulting
 430              * bits.
 431              */
 432             switch (format) {
 433             case BRW_SURFACEFORMAT_R8_SINT:
 434             case BRW_SURFACEFORMAT_R8_UINT:
 435                format = BRW_SURFACEFORMAT_R8_UNORM;
 436                break;
 437
 438             case BRW_SURFACEFORMAT_R16_SINT:
 439             case BRW_SURFACEFORMAT_R16_UINT:
 440                format = BRW_SURFACEFORMAT_R16_UNORM;
 441                break;
 442
 443             case BRW_SURFACEFORMAT_R32_SINT:
 444             case BRW_SURFACEFORMAT_R32_UINT:
 445                format = BRW_SURFACEFORMAT_R32_FLOAT;
 446                break;
 447
 448             default:
 449                break;
 450             }
 451          }
 452       }
 453
 454       if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
 455          assert(brw->gen >= 8);
 456          mt = mt->stencil_mt;
 457          format = BRW_SURFACEFORMAT_R8_UINT;
 458       }
 459
 460       const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 461
 462       struct isl_view view = {
 463          .format = format,
 464          .base_level = obj->MinLevel + obj->BaseLevel,
 465          .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
 466          .base_array_layer = obj->MinLayer,
 467          .array_len = view_num_layers,
 468          .channel_select = {
 469             swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
 470             swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
 471             swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
 472             swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
 473          },
 474          .usage = ISL_SURF_USAGE_TEXTURE_BIT,
 475       };
 476
 477       if (obj->Target == GL_TEXTURE_CUBE_MAP ||
 478           obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
 479          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 480
 481       brw_emit_surface_state(brw, mt, &view,
 482                              surface_state_infos[brw->gen].tex_mocs, for_gather,
 483                              surf_offset, surf_index,
 484                              I915_GEM_DOMAIN_SAMPLER, 0);
 485    }
 486 }
 487
 488 void
 489 brw_emit_buffer_surface_state(struct brw_context *brw,
 490                               uint32_t *out_offset,
 491                               drm_intel_bo *bo,
 492                               unsigned buffer_offset,
 493                               unsigned surface_format,
 494                               unsigned buffer_size,
 495                               unsigned pitch,
 496                               bool rw)
 497 {
 498    const struct surface_state_info ss_info = surface_state_infos[brw->gen];
 499
 500    uint32_t *dw = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 501                                   ss_info.num_dwords * 4, ss_info.ss_align,
 502                                   out_offset);
 503
 504    isl_buffer_fill_state(&brw->isl_dev, dw,
 505                          .address = (bo ? bo->offset64 : 0) + buffer_offset,
 506                          .size = buffer_size,
 507                          .format = surface_format,
 508                          .stride = pitch,
 509                          .mocs = ss_info.tex_mocs);
 510
 511    if (bo) {
 512       drm_intel_bo_emit_reloc(brw->batch.bo,
 513                               *out_offset + 4 * ss_info.reloc_dw,
 514                               bo, buffer_offset,
 515                               I915_GEM_DOMAIN_SAMPLER,
 516                               (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
 517    }
 518 }
 519
 520 void
 521 brw_update_buffer_texture_surface(struct gl_context *ctx,
 522                                   unsigned unit,
 523                                   uint32_t *surf_offset)
 524 {
 525    struct brw_context *brw = brw_context(ctx);
 526    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 527    struct intel_buffer_object *intel_obj =
 528       intel_buffer_object(tObj->BufferObject);
 529    uint32_t size = tObj->BufferSize;
 530    drm_intel_bo *bo = NULL;
 531    mesa_format format = tObj->_BufferObjectFormat;
 532    uint32_t brw_format = brw_format_for_mesa_format(format);
 533    int texel_size = _mesa_get_format_bytes(format);
 534
 535    if (intel_obj) {
 536       size = MIN2(size, intel_obj->Base.Size);
 537       bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
 538    }
 539
 540    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 541       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 542                     _mesa_get_format_name(format));
 543    }
 544
 545    brw_emit_buffer_surface_state(brw, surf_offset, bo,
 546                                  tObj->BufferOffset,
 547                                  brw_format,
 548                                  size,
 549                                  texel_size,
 550                                  false /* rw */);
 551 }
 552
 553 /**
 554  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 555  * read from this buffer with Data Port Read instructions/messages.
 556  */
 557 void
 558 brw_create_constant_surface(struct brw_context *brw,
 559                             drm_intel_bo *bo,
 560                             uint32_t offset,
 561                             uint32_t size,
 562                             uint32_t *out_offset)
 563 {
 564    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 565                                  BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
 566                                  size, 1, false);
 567 }
 568
 569 /**
 570  * Create the buffer surface. Shader buffer variables will be
 571  * read from / write to this buffer with Data Port Read/Write
 572  * instructions/messages.
 573  */
 574 void
 575 brw_create_buffer_surface(struct brw_context *brw,
 576                           drm_intel_bo *bo,
 577                           uint32_t offset,
 578                           uint32_t size,
 579                           uint32_t *out_offset)
 580 {
 581    /* Use a raw surface so we can reuse existing untyped read/write/atomic
 582     * messages. We need these specifically for the fragment shader since they
 583     * include a pixel mask header that we need to ensure correct behavior
 584     * with helper invocations, which cannot write to the buffer.
 585     */
 586    brw_emit_buffer_surface_state(brw, out_offset, bo, offset,
 587                                  BRW_SURFACEFORMAT_RAW,
 588                                  size, 1, true);
 589 }
 590
 591 /**
 592  * Set up a binding table entry for use by stream output logic (transform
 593  * feedback).
 594  *
 595  * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
 596  */
 597 void
 598 brw_update_sol_surface(struct brw_context *brw,
 599                        struct gl_buffer_object *buffer_obj,
 600                        uint32_t *out_offset, unsigned num_vector_components,
 601                        unsigned stride_dwords, unsigned offset_dwords)
 602 {
 603    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 604    uint32_t offset_bytes = 4 * offset_dwords;
 605    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
 606                                              offset_bytes,
 607                                              buffer_obj->Size - offset_bytes);
 608    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 609                                     out_offset);
 610    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 611    size_t size_dwords = buffer_obj->Size / 4;
 612    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 613
 614    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 615     * too big to map using a single binding table entry?
 616     */
 617    assert((size_dwords - offset_dwords) / stride_dwords
 618           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 619
 620    if (size_dwords > offset_dwords + num_vector_components) {
 621       /* There is room for at least 1 transform feedback output in the buffer.
 622        * Compute the number of additional transform feedback outputs the
 623        * buffer has room for.
 624        */
 625       buffer_size_minus_1 =
 626          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 627    } else {
 628       /* There isn't even room for a single transform feedback output in the
 629        * buffer.  We can't configure the binding table entry to prevent output
 630        * entirely; we'll have to rely on the geometry shader to detect
 631        * overflow.  But to minimize the damage in case of a bug, set up the
 632        * binding table entry to just allow a single output.
 633        */
 634       buffer_size_minus_1 = 0;
 635    }
 636    width = buffer_size_minus_1 & 0x7f;
 637    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 638    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 639
 640    switch (num_vector_components) {
 641    case 1:
 642       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 643       break;
 644    case 2:
 645       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 646       break;
 647    case 3:
 648       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 649       break;
 650    case 4:
 651       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 652       break;
 653    default:
 654       unreachable("Invalid vector size for transform feedback output");
 655    }
 656
 657    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 658       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 659       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 660       BRW_SURFACE_RC_READ_WRITE;
 661    surf[1] = bo->offset64 + offset_bytes; /* reloc */
 662    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 663               height << BRW_SURFACE_HEIGHT_SHIFT);
 664    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 665               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 666    surf[4] = 0;
 667    surf[5] = 0;
 668
 669    /* Emit relocation to surface contents. */
 670    drm_intel_bo_emit_reloc(brw->batch.bo,
 671                            *out_offset + 4,
 672                            bo, offset_bytes,
 673                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 674 }
 675
 676 /* Creates a new WM constant buffer reflecting the current fragment program's
 677  * constants, if needed by the fragment program.
 678  *
 679  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 680  * state atom.
 681  */
 682 static void
 683 brw_upload_wm_pull_constants(struct brw_context *brw)
 684 {
 685    struct brw_stage_state *stage_state = &brw->wm.base;
 686    /* BRW_NEW_FRAGMENT_PROGRAM */
 687    struct brw_fragment_program *fp =
 688       (struct brw_fragment_program *) brw->fragment_program;
 689    /* BRW_NEW_FS_PROG_DATA */
 690    struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
 691
 692    /* _NEW_PROGRAM_CONSTANTS */
 693    brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
 694                              stage_state, prog_data);
 695 }
 696
 697 const struct brw_tracked_state brw_wm_pull_constants = {
 698    .dirty = {
 699       .mesa = _NEW_PROGRAM_CONSTANTS,
 700       .brw = BRW_NEW_BATCH |
 701              BRW_NEW_BLORP |
 702              BRW_NEW_FRAGMENT_PROGRAM |
 703              BRW_NEW_FS_PROG_DATA,
 704    },
 705    .emit = brw_upload_wm_pull_constants,
 706 };
 707
 708 /**
 709  * Creates a null renderbuffer surface.
 710  *
 711  * This is used when the shader doesn't write to any color output.  An FB
 712  * write to target 0 will still be emitted, because that's how the thread is
 713  * terminated (and computed depth is returned), so we need to have the
 714  * hardware discard the target 0 color output..
 715  */
 716 static void
 717 brw_emit_null_surface_state(struct brw_context *brw,
 718                             unsigned width,
 719                             unsigned height,
 720                             unsigned samples,
 721                             uint32_t *out_offset)
 722 {
 723    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 724     * Notes):
 725     *
 726     *     A null surface will be used in instances where an actual surface is
 727     *     not bound. When a write message is generated to a null surface, no
 728     *     actual surface is written to. When a read message (including any
 729     *     sampling engine message) is generated to a null surface, the result
 730     *     is all zeros. Note that a null surface type is allowed to be used
 731     *     with all messages, even if it is not specificially indicated as
 732     *     supported. All of the remaining fields in surface state are ignored
 733     *     for null surfaces, with the following exceptions:
 734     *
 735     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 736     *       depth buffer’s corresponding state for all render target surfaces,
 737     *       including null.
 738     *
 739     *     - Surface Format must be R8G8B8A8_UNORM.
 740     */
 741    unsigned surface_type = BRW_SURFACE_NULL;
 742    drm_intel_bo *bo = NULL;
 743    unsigned pitch_minus_1 = 0;
 744    uint32_t multisampling_state = 0;
 745    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 746                                     out_offset);
 747
 748    if (samples > 1) {
 749       /* On Gen6, null render targets seem to cause GPU hangs when
 750        * multisampling.  So work around this problem by rendering into dummy
 751        * color buffer.
 752        *
 753        * To decrease the amount of memory needed by the workaround buffer, we
 754        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 755        * the amount of memory needed for the workaround buffer is
 756        * (width_in_tiles + height_in_tiles - 1) tiles.
 757        *
 758        * Note that since the workaround buffer will be interpreted by the
 759        * hardware as an interleaved multisampled buffer, we need to compute
 760        * width_in_tiles and height_in_tiles by dividing the width and height
 761        * by 16 rather than the normal Y-tile size of 32.
 762        */
 763       unsigned width_in_tiles = ALIGN(width, 16) / 16;
 764       unsigned height_in_tiles = ALIGN(height, 16) / 16;
 765       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 766       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 767                          size_needed);
 768       bo = brw->wm.multisampled_null_render_target_bo;
 769       surface_type = BRW_SURFACE_2D;
 770       pitch_minus_1 = 127;
 771       multisampling_state = brw_get_surface_num_multisamples(samples);
 772    }
 773
 774    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 775               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 776    if (brw->gen < 6) {
 777       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 778                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 779                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 780                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 781    }
 782    surf[1] = bo ? bo->offset64 : 0;
 783    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 784               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 785
 786    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 787     * Notes):
 788     *
 789     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 790     */
 791    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 792               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 793    surf[4] = multisampling_state;
 794    surf[5] = 0;
 795
 796    if (bo) {
 797       drm_intel_bo_emit_reloc(brw->batch.bo,
 798                               *out_offset + 4,
 799                               bo, 0,
 800                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 801    }
 802 }
 803
 804 /**
 805  * Sets up a surface state structure to point at the given region.
 806  * While it is only used for the front/back buffer currently, it should be
 807  * usable for further buffers when doing ARB_draw_buffer support.
 808  */
 809 static uint32_t
 810 gen4_update_renderbuffer_surface(struct brw_context *brw,
 811                                  struct gl_renderbuffer *rb,
 812                                  bool layered, unsigned unit,
 813                                  uint32_t surf_index)
 814 {
 815    struct gl_context *ctx = &brw->ctx;
 816    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 817    struct intel_mipmap_tree *mt = irb->mt;
 818    uint32_t *surf;
 819    uint32_t tile_x, tile_y;
 820    uint32_t format = 0;
 821    uint32_t offset;
 822    /* _NEW_BUFFERS */
 823    mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 824    /* BRW_NEW_FS_PROG_DATA */
 825
 826    assert(!layered);
 827
 828    if (rb->TexImage && !brw->has_surface_tile_offset) {
 829       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 830
 831       if (tile_x != 0 || tile_y != 0) {
 832          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 833           * destination in a miptree unless you actually setup your renderbuffer
 834           * as a miptree and used the fragile lod/array_index/etc. controls to
 835           * select the image.  So, instead, we just make a new single-level
 836           * miptree and render into that.
 837           */
 838          intel_renderbuffer_move_to_temp(brw, irb, false);
 839          mt = irb->mt;
 840       }
 841    }
 842
 843    intel_miptree_used_for_rendering(irb->mt);
 844
 845    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
 846
 847    format = brw->render_target_format[rb_format];
 848    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 849       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 850                     __func__, _mesa_get_format_name(rb_format));
 851    }
 852
 853    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 854               format << BRW_SURFACE_FORMAT_SHIFT);
 855
 856    /* reloc */
 857    assert(mt->offset % mt->cpp == 0);
 858    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 859               mt->bo->offset64 + mt->offset);
 860
 861    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 862               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 863
 864    surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
 865               (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 866
 867    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 868
 869    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 870    /* Note that the low bits of these fields are missing, so
 871     * there's the possibility of getting in trouble.
 872     */
 873    assert(tile_x % 4 == 0);
 874    assert(tile_y % 2 == 0);
 875    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 876               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 877               (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 878
 879    if (brw->gen < 6) {
 880       /* _NEW_COLOR */
 881       if (!ctx->Color.ColorLogicOpEnabled &&
 882           (ctx->Color.BlendEnabled & (1 << unit)))
 883          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 884
 885       if (!ctx->Color.ColorMask[unit][0])
 886          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 887       if (!ctx->Color.ColorMask[unit][1])
 888          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 889       if (!ctx->Color.ColorMask[unit][2])
 890          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 891
 892       /* As mentioned above, disable writes to the alpha component when the
 893        * renderbuffer is XRGB.
 894        */
 895       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 896           !ctx->Color.ColorMask[unit][3]) {
 897          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 898       }
 899    }
 900
 901    drm_intel_bo_emit_reloc(brw->batch.bo,
 902                            offset + 4,
 903                            mt->bo,
 904                            surf[1] - mt->bo->offset64,
 905                            I915_GEM_DOMAIN_RENDER,
 906                            I915_GEM_DOMAIN_RENDER);
 907
 908    return offset;
 909 }
 910
 911 /**
 912  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 913  */
 914 void
 915 brw_update_renderbuffer_surfaces(struct brw_context *brw,
 916                                  const struct gl_framebuffer *fb,
 917                                  uint32_t render_target_start,
 918                                  uint32_t *surf_offset)
 919 {
 920    GLuint i;
 921    const unsigned int w = _mesa_geometric_width(fb);
 922    const unsigned int h = _mesa_geometric_height(fb);
 923    const unsigned int s = _mesa_geometric_samples(fb);
 924
 925    /* Update surfaces for drawing buffers */
 926    if (fb->_NumColorDrawBuffers >= 1) {
 927       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
 928          const uint32_t surf_index = render_target_start + i;
 929
 930          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
 931             surf_offset[surf_index] =
 932                brw->vtbl.update_renderbuffer_surface(
 933                   brw, fb->_ColorDrawBuffers[i],
 934                   _mesa_geometric_layers(fb) > 0, i, surf_index);
 935          } else {
 936             brw->vtbl.emit_null_surface_state(brw, w, h, s,
 937                &surf_offset[surf_index]);
 938          }
 939       }
 940    } else {
 941       const uint32_t surf_index = render_target_start;
 942       brw->vtbl.emit_null_surface_state(brw, w, h, s,
 943          &surf_offset[surf_index]);
 944    }
 945 }
 946
 947 static void
 948 update_renderbuffer_surfaces(struct brw_context *brw)
 949 {
 950    const struct gl_context *ctx = &brw->ctx;
 951
 952    /* _NEW_BUFFERS | _NEW_COLOR */
 953    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 954    brw_update_renderbuffer_surfaces(
 955       brw, fb,
 956       brw->wm.prog_data->binding_table.render_target_start,
 957       brw->wm.base.surf_offset);
 958    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 959 }
 960
 961 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 962    .dirty = {
 963       .mesa = _NEW_BUFFERS |
 964               _NEW_COLOR,
 965       .brw = BRW_NEW_BATCH |
 966              BRW_NEW_BLORP |
 967              BRW_NEW_FS_PROG_DATA,
 968    },
 969    .emit = update_renderbuffer_surfaces,
 970 };
 971
 972 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 973    .dirty = {
 974       .mesa = _NEW_BUFFERS,
 975       .brw = BRW_NEW_BATCH |
 976              BRW_NEW_BLORP,
 977    },
 978    .emit = update_renderbuffer_surfaces,
 979 };
 980
 981
 982 static void
 983 update_stage_texture_surfaces(struct brw_context *brw,
 984                               const struct gl_program *prog,
 985                               struct brw_stage_state *stage_state,
 986                               bool for_gather, uint32_t plane)
 987 {
 988    if (!prog)
 989       return;
 990
 991    struct gl_context *ctx = &brw->ctx;
 992
 993    uint32_t *surf_offset = stage_state->surf_offset;
 994
 995    /* BRW_NEW_*_PROG_DATA */
 996    if (for_gather)
 997       surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
 998    else
 999       surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1000
1001    unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1002    for (unsigned s = 0; s < num_samplers; s++) {
1003       surf_offset[s] = 0;
1004
1005       if (prog->SamplersUsed & (1 << s)) {
1006          const unsigned unit = prog->SamplerUnits[s];
1007
1008          /* _NEW_TEXTURE */
1009          if (ctx->Texture.Unit[unit]._Current) {
1010             brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
1011          }
1012       }
1013    }
1014 }
1015
1016
1017 /**
1018  * Construct SURFACE_STATE objects for enabled textures.
1019  */
1020 static void
1021 brw_update_texture_surfaces(struct brw_context *brw)
1022 {
1023    /* BRW_NEW_VERTEX_PROGRAM */
1024    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
1025
1026    /* BRW_NEW_TESS_PROGRAMS */
1027    struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
1028    struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
1029
1030    /* BRW_NEW_GEOMETRY_PROGRAM */
1031    struct gl_program *gs = (struct gl_program *) brw->geometry_program;
1032
1033    /* BRW_NEW_FRAGMENT_PROGRAM */
1034    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
1035
1036    /* _NEW_TEXTURE */
1037    update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1038    update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1039    update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1040    update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1041    update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1042
1043    /* emit alternate set of surface state for gather. this
1044     * allows the surface format to be overriden for only the
1045     * gather4 messages. */
1046    if (brw->gen < 8) {
1047       if (vs && vs->UsesGather)
1048          update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1049       if (tcs && tcs->UsesGather)
1050          update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1051       if (tes && tes->UsesGather)
1052          update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1053       if (gs && gs->UsesGather)
1054          update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1055       if (fs && fs->UsesGather)
1056          update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1057    }
1058
1059    if (fs) {
1060       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1061       update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1062    }
1063
1064    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1065 }
1066
1067 const struct brw_tracked_state brw_texture_surfaces = {
1068    .dirty = {
1069       .mesa = _NEW_TEXTURE,
1070       .brw = BRW_NEW_BATCH |
1071              BRW_NEW_BLORP |
1072              BRW_NEW_FRAGMENT_PROGRAM |
1073              BRW_NEW_FS_PROG_DATA |
1074              BRW_NEW_GEOMETRY_PROGRAM |
1075              BRW_NEW_GS_PROG_DATA |
1076              BRW_NEW_TESS_PROGRAMS |
1077              BRW_NEW_TCS_PROG_DATA |
1078              BRW_NEW_TES_PROG_DATA |
1079              BRW_NEW_TEXTURE_BUFFER |
1080              BRW_NEW_VERTEX_PROGRAM |
1081              BRW_NEW_VS_PROG_DATA,
1082    },
1083    .emit = brw_update_texture_surfaces,
1084 };
1085
1086 static void
1087 brw_update_cs_texture_surfaces(struct brw_context *brw)
1088 {
1089    /* BRW_NEW_COMPUTE_PROGRAM */
1090    struct gl_program *cs = (struct gl_program *) brw->compute_program;
1091
1092    /* _NEW_TEXTURE */
1093    update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1094
1095    /* emit alternate set of surface state for gather. this
1096     * allows the surface format to be overriden for only the
1097     * gather4 messages.
1098     */
1099    if (brw->gen < 8) {
1100       if (cs && cs->UsesGather)
1101          update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1102    }
1103
1104    brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1105 }
1106
1107 const struct brw_tracked_state brw_cs_texture_surfaces = {
1108    .dirty = {
1109       .mesa = _NEW_TEXTURE,
1110       .brw = BRW_NEW_BATCH |
1111              BRW_NEW_BLORP |
1112              BRW_NEW_COMPUTE_PROGRAM,
1113    },
1114    .emit = brw_update_cs_texture_surfaces,
1115 };
1116
1117
1118 void
1119 brw_upload_ubo_surfaces(struct brw_context *brw,
1120                         struct gl_linked_shader *shader,
1121                         struct brw_stage_state *stage_state,
1122                         struct brw_stage_prog_data *prog_data)
1123 {
1124    struct gl_context *ctx = &brw->ctx;
1125
1126    if (!shader)
1127       return;
1128
1129    uint32_t *ubo_surf_offsets =
1130       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1131
1132    for (int i = 0; i < shader->NumUniformBlocks; i++) {
1133       struct gl_uniform_buffer_binding *binding =
1134          &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1135
1136       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1137          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1138       } else {
1139          struct intel_buffer_object *intel_bo =
1140             intel_buffer_object(binding->BufferObject);
1141          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1142          if (!binding->AutomaticSize)
1143             size = MIN2(size, binding->Size);
1144          drm_intel_bo *bo =
1145             intel_bufferobj_buffer(brw, intel_bo,
1146                                    binding->Offset,
1147                                    size);
1148          brw_create_constant_surface(brw, bo, binding->Offset,
1149                                      size,
1150                                      &ubo_surf_offsets[i]);
1151       }
1152    }
1153
1154    uint32_t *ssbo_surf_offsets =
1155       &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1156
1157    for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1158       struct gl_shader_storage_buffer_binding *binding =
1159          &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1160
1161       if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1162          brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1163       } else {
1164          struct intel_buffer_object *intel_bo =
1165             intel_buffer_object(binding->BufferObject);
1166          GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1167          if (!binding->AutomaticSize)
1168             size = MIN2(size, binding->Size);
1169          drm_intel_bo *bo =
1170             intel_bufferobj_buffer(brw, intel_bo,
1171                                    binding->Offset,
1172                                    size);
1173          brw_create_buffer_surface(brw, bo, binding->Offset,
1174                                    size,
1175                                    &ssbo_surf_offsets[i]);
1176       }
1177    }
1178
1179    if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1180       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1181 }
1182
1183 static void
1184 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1185 {
1186    struct gl_context *ctx = &brw->ctx;
1187    /* _NEW_PROGRAM */
1188    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1189
1190    if (!prog)
1191       return;
1192
1193    /* BRW_NEW_FS_PROG_DATA */
1194    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1195                            &brw->wm.base, &brw->wm.prog_data->base);
1196 }
1197
1198 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1199    .dirty = {
1200       .mesa = _NEW_PROGRAM,
1201       .brw = BRW_NEW_BATCH |
1202              BRW_NEW_BLORP |
1203              BRW_NEW_FS_PROG_DATA |
1204              BRW_NEW_UNIFORM_BUFFER,
1205    },
1206    .emit = brw_upload_wm_ubo_surfaces,
1207 };
1208
1209 static void
1210 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1211 {
1212    struct gl_context *ctx = &brw->ctx;
1213    /* _NEW_PROGRAM */
1214    struct gl_shader_program *prog =
1215       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1216
1217    if (!prog)
1218       return;
1219
1220    /* BRW_NEW_CS_PROG_DATA */
1221    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1222                            &brw->cs.base, &brw->cs.prog_data->base);
1223 }
1224
1225 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1226    .dirty = {
1227       .mesa = _NEW_PROGRAM,
1228       .brw = BRW_NEW_BATCH |
1229              BRW_NEW_BLORP |
1230              BRW_NEW_CS_PROG_DATA |
1231              BRW_NEW_UNIFORM_BUFFER,
1232    },
1233    .emit = brw_upload_cs_ubo_surfaces,
1234 };
1235
1236 void
1237 brw_upload_abo_surfaces(struct brw_context *brw,
1238                         struct gl_linked_shader *shader,
1239                         struct brw_stage_state *stage_state,
1240                         struct brw_stage_prog_data *prog_data)
1241 {
1242    struct gl_context *ctx = &brw->ctx;
1243    uint32_t *surf_offsets =
1244       &stage_state->surf_offset[prog_data->binding_table.abo_start];
1245
1246    if (shader && shader->NumAtomicBuffers) {
1247       for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1248          struct gl_atomic_buffer_binding *binding =
1249             &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1250          struct intel_buffer_object *intel_bo =
1251             intel_buffer_object(binding->BufferObject);
1252          drm_intel_bo *bo = intel_bufferobj_buffer(
1253             brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1254
1255          brw_emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1256                                        binding->Offset, BRW_SURFACEFORMAT_RAW,
1257                                        bo->size - binding->Offset, 1, true);
1258       }
1259
1260       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1261    }
1262 }
1263
1264 static void
1265 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1266 {
1267    struct gl_context *ctx = &brw->ctx;
1268    /* _NEW_PROGRAM */
1269    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1270
1271    if (prog) {
1272       /* BRW_NEW_FS_PROG_DATA */
1273       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1274                               &brw->wm.base, &brw->wm.prog_data->base);
1275    }
1276 }
1277
1278 const struct brw_tracked_state brw_wm_abo_surfaces = {
1279    .dirty = {
1280       .mesa = _NEW_PROGRAM,
1281       .brw = BRW_NEW_ATOMIC_BUFFER |
1282              BRW_NEW_BLORP |
1283              BRW_NEW_BATCH |
1284              BRW_NEW_FS_PROG_DATA,
1285    },
1286    .emit = brw_upload_wm_abo_surfaces,
1287 };
1288
1289 static void
1290 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1291 {
1292    struct gl_context *ctx = &brw->ctx;
1293    /* _NEW_PROGRAM */
1294    struct gl_shader_program *prog =
1295       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1296
1297    if (prog) {
1298       /* BRW_NEW_CS_PROG_DATA */
1299       brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1300                               &brw->cs.base, &brw->cs.prog_data->base);
1301    }
1302 }
1303
1304 const struct brw_tracked_state brw_cs_abo_surfaces = {
1305    .dirty = {
1306       .mesa = _NEW_PROGRAM,
1307       .brw = BRW_NEW_ATOMIC_BUFFER |
1308              BRW_NEW_BLORP |
1309              BRW_NEW_BATCH |
1310              BRW_NEW_CS_PROG_DATA,
1311    },
1312    .emit = brw_upload_cs_abo_surfaces,
1313 };
1314
1315 static void
1316 brw_upload_cs_image_surfaces(struct brw_context *brw)
1317 {
1318    struct gl_context *ctx = &brw->ctx;
1319    /* _NEW_PROGRAM */
1320    struct gl_shader_program *prog =
1321       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1322
1323    if (prog) {
1324       /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1325       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1326                                 &brw->cs.base, &brw->cs.prog_data->base);
1327    }
1328 }
1329
1330 const struct brw_tracked_state brw_cs_image_surfaces = {
1331    .dirty = {
1332       .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1333       .brw = BRW_NEW_BATCH |
1334              BRW_NEW_BLORP |
1335              BRW_NEW_CS_PROG_DATA |
1336              BRW_NEW_IMAGE_UNITS
1337    },
1338    .emit = brw_upload_cs_image_surfaces,
1339 };
1340
1341 static uint32_t
1342 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1343 {
1344    const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1345    uint32_t hw_format = brw_format_for_mesa_format(format);
1346    if (access == GL_WRITE_ONLY) {
1347       return hw_format;
1348    } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1349       /* Typed surface reads support a very limited subset of the shader
1350        * image formats.  Translate it into the closest format the
1351        * hardware supports.
1352        */
1353       return isl_lower_storage_image_format(devinfo, hw_format);
1354    } else {
1355       /* The hardware doesn't actually support a typed format that we can use
1356        * so we have to fall back to untyped read/write messages.
1357        */
1358       return BRW_SURFACEFORMAT_RAW;
1359    }
1360 }
1361
1362 static void
1363 update_default_image_param(struct brw_context *brw,
1364                            struct gl_image_unit *u,
1365                            unsigned surface_idx,
1366                            struct brw_image_param *param)
1367 {
1368    memset(param, 0, sizeof(*param));
1369    param->surface_idx = surface_idx;
1370    /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1371     * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1372     * detailed explanation of these parameters.
1373     */
1374    param->swizzling[0] = 0xff;
1375    param->swizzling[1] = 0xff;
1376 }
1377
1378 static void
1379 update_buffer_image_param(struct brw_context *brw,
1380                           struct gl_image_unit *u,
1381                           unsigned surface_idx,
1382                           struct brw_image_param *param)
1383 {
1384    struct gl_buffer_object *obj = u->TexObj->BufferObject;
1385
1386    update_default_image_param(brw, u, surface_idx, param);
1387
1388    param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1389    param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1390 }
1391
1392 static void
1393 update_texture_image_param(struct brw_context *brw,
1394                            struct gl_image_unit *u,
1395                            unsigned surface_idx,
1396                            struct brw_image_param *param)
1397 {
1398    struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1399
1400    update_default_image_param(brw, u, surface_idx, param);
1401
1402    param->size[0] = minify(mt->logical_width0, u->Level);
1403    param->size[1] = minify(mt->logical_height0, u->Level);
1404    param->size[2] = (!u->Layered ? 1 :
1405                      u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1406                      u->TexObj->Target == GL_TEXTURE_3D ?
1407                      minify(mt->logical_depth0, u->Level) :
1408                      mt->logical_depth0);
1409
1410    intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1411                                   &param->offset[0],
1412                                   &param->offset[1]);
1413
1414    param->stride[0] = mt->cpp;
1415    param->stride[1] = mt->pitch / mt->cpp;
1416    param->stride[2] =
1417       brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1418    param->stride[3] =
1419       brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1420
1421    if (mt->tiling == I915_TILING_X) {
1422       /* An X tile is a rectangular block of 512x8 bytes. */
1423       param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1424       param->tiling[1] = _mesa_logbase2(8);
1425
1426       if (brw->has_swizzling) {
1427          /* Right shifts required to swizzle bits 9 and 10 of the memory
1428           * address with bit 6.
1429           */
1430          param->swizzling[0] = 3;
1431          param->swizzling[1] = 4;
1432       }
1433    } else if (mt->tiling == I915_TILING_Y) {
1434       /* The layout of a Y-tiled surface in memory isn't really fundamentally
1435        * different to the layout of an X-tiled surface, we simply pretend that
1436        * the surface is broken up in a number of smaller 16Bx32 tiles, each
1437        * one arranged in X-major order just like is the case for X-tiling.
1438        */
1439       param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1440       param->tiling[1] = _mesa_logbase2(32);
1441
1442       if (brw->has_swizzling) {
1443          /* Right shift required to swizzle bit 9 of the memory address with
1444           * bit 6.
1445           */
1446          param->swizzling[0] = 3;
1447       }
1448    }
1449
1450    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
1451     * address calculation algorithm (emit_address_calculation() in
1452     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1453     * modulus equal to the LOD.
1454     */
1455    param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1456                        0);
1457 }
1458
1459 static void
1460 update_image_surface(struct brw_context *brw,
1461                      struct gl_image_unit *u,
1462                      GLenum access,
1463                      unsigned surface_idx,
1464                      uint32_t *surf_offset,
1465                      struct brw_image_param *param)
1466 {
1467    if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1468       struct gl_texture_object *obj = u->TexObj;
1469       const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1470
1471       if (obj->Target == GL_TEXTURE_BUFFER) {
1472          struct intel_buffer_object *intel_obj =
1473             intel_buffer_object(obj->BufferObject);
1474          const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1475                                       _mesa_get_format_bytes(u->_ActualFormat));
1476
1477          brw_emit_buffer_surface_state(
1478             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1479             format, intel_obj->Base.Size, texel_size,
1480             access != GL_READ_ONLY);
1481
1482          update_buffer_image_param(brw, u, surface_idx, param);
1483
1484       } else {
1485          struct intel_texture_object *intel_obj = intel_texture_object(obj);
1486          struct intel_mipmap_tree *mt = intel_obj->mt;
1487
1488          if (format == BRW_SURFACEFORMAT_RAW) {
1489             brw_emit_buffer_surface_state(
1490                brw, surf_offset, mt->bo, mt->offset,
1491                format, mt->bo->size - mt->offset, 1 /* pitch */,
1492                access != GL_READ_ONLY);
1493
1494          } else {
1495             const unsigned num_layers = (!u->Layered ? 1 :
1496                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1497                                          mt->logical_depth0);
1498
1499             struct isl_view view = {
1500                .format = format,
1501                .base_level = obj->MinLevel + u->Level,
1502                .levels = 1,
1503                .base_array_layer = obj->MinLayer + u->_Layer,
1504                .array_len = num_layers,
1505                .channel_select = {
1506                   ISL_CHANNEL_SELECT_RED,
1507                   ISL_CHANNEL_SELECT_GREEN,
1508                   ISL_CHANNEL_SELECT_BLUE,
1509                   ISL_CHANNEL_SELECT_ALPHA,
1510                },
1511                .usage = ISL_SURF_USAGE_STORAGE_BIT,
1512             };
1513
1514             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1515
1516             brw_emit_surface_state(brw, mt, &view,
1517                                    surface_state_infos[brw->gen].tex_mocs, false,
1518                                    surf_offset, surf_index,
1519                                    I915_GEM_DOMAIN_SAMPLER,
1520                                    access == GL_READ_ONLY ? 0 :
1521                                              I915_GEM_DOMAIN_SAMPLER);
1522          }
1523
1524          update_texture_image_param(brw, u, surface_idx, param);
1525       }
1526
1527    } else {
1528       brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1529       update_default_image_param(brw, u, surface_idx, param);
1530    }
1531 }
1532
1533 void
1534 brw_upload_image_surfaces(struct brw_context *brw,
1535                           struct gl_linked_shader *shader,
1536                           struct brw_stage_state *stage_state,
1537                           struct brw_stage_prog_data *prog_data)
1538 {
1539    struct gl_context *ctx = &brw->ctx;
1540
1541    if (shader && shader->NumImages) {
1542       for (unsigned i = 0; i < shader->NumImages; i++) {
1543          struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1544          const unsigned surf_idx = prog_data->binding_table.image_start + i;
1545
1546          update_image_surface(brw, u, shader->ImageAccess[i],
1547                               surf_idx,
1548                               &stage_state->surf_offset[surf_idx],
1549                               &prog_data->image_param[i]);
1550       }
1551
1552       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1553       /* This may have changed the image metadata dependent on the context
1554        * image unit state and passed to the program as uniforms, make sure
1555        * that push and pull constants are reuploaded.
1556        */
1557       brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1558    }
1559 }
1560
1561 static void
1562 brw_upload_wm_image_surfaces(struct brw_context *brw)
1563 {
1564    struct gl_context *ctx = &brw->ctx;
1565    /* BRW_NEW_FRAGMENT_PROGRAM */
1566    struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1567
1568    if (prog) {
1569       /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1570       brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1571                                 &brw->wm.base, &brw->wm.prog_data->base);
1572    }
1573 }
1574
1575 const struct brw_tracked_state brw_wm_image_surfaces = {
1576    .dirty = {
1577       .mesa = _NEW_TEXTURE,
1578       .brw = BRW_NEW_BATCH |
1579              BRW_NEW_BLORP |
1580              BRW_NEW_FRAGMENT_PROGRAM |
1581              BRW_NEW_FS_PROG_DATA |
1582              BRW_NEW_IMAGE_UNITS
1583    },
1584    .emit = brw_upload_wm_image_surfaces,
1585 };
1586
1587 void
1588 gen4_init_vtable_surface_functions(struct brw_context *brw)
1589 {
1590    brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
1591    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1592 }
1593
1594 void
1595 gen6_init_vtable_surface_functions(struct brw_context *brw)
1596 {
1597    gen4_init_vtable_surface_functions(brw);
1598    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1599 }
1600
1601 static void
1602 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1603 {
1604    struct gl_context *ctx = &brw->ctx;
1605    /* _NEW_PROGRAM */
1606    struct gl_shader_program *prog =
1607       ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1608
1609    if (prog && brw->cs.prog_data->uses_num_work_groups) {
1610       const unsigned surf_idx =
1611          brw->cs.prog_data->binding_table.work_groups_start;
1612       uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1613       drm_intel_bo *bo;
1614       uint32_t bo_offset;
1615
1616       if (brw->compute.num_work_groups_bo == NULL) {
1617          bo = NULL;
1618          intel_upload_data(brw,
1619                            (void *)brw->compute.num_work_groups,
1620                            3 * sizeof(GLuint),
1621                            sizeof(GLuint),
1622                            &bo,
1623                            &bo_offset);
1624       } else {
1625          bo = brw->compute.num_work_groups_bo;
1626          bo_offset = brw->compute.num_work_groups_offset;
1627       }
1628
1629       brw_emit_buffer_surface_state(brw, surf_offset,
1630                                     bo, bo_offset,
1631                                     BRW_SURFACEFORMAT_RAW,
1632                                     3 * sizeof(GLuint), 1, true);
1633       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1634    }
1635 }
1636
1637 const struct brw_tracked_state brw_cs_work_groups_surface = {
1638    .dirty = {
1639       .brw = BRW_NEW_BLORP |
1640              BRW_NEW_CS_WORK_GROUPS
1641    },
1642    .emit = brw_upload_cs_work_groups_surface,
1643 };