src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193
 194 static void
 195 brw_update_buffer_texture_surface(struct gl_context *ctx,
 196                                   unsigned unit,
 197                                   uint32_t *binding_table,
 198                                   unsigned surf_index)
 199 {
 200    struct brw_context *brw = brw_context(ctx);
 201    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 202    uint32_t *surf;
 203    struct intel_buffer_object *intel_obj =
 204       intel_buffer_object(tObj->BufferObject);
 205    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 206    gl_format format = tObj->_BufferObjectFormat;
 207    uint32_t brw_format = brw_format_for_mesa_format(format);
 208    int texel_size = _mesa_get_format_bytes(format);
 209
 210    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 211       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 212                     _mesa_get_format_name(format));
 213    }
 214
 215    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 216                           6 * 4, 32, &binding_table[surf_index]);
 217
 218    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 219               (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
 220
 221    if (brw->gen >= 6)
 222       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 223
 224    if (bo) {
 225       surf[1] = bo->offset; /* reloc */
 226
 227       /* Emit relocation to surface contents. */
 228       drm_intel_bo_emit_reloc(brw->batch.bo,
 229                               binding_table[surf_index] + 4,
 230                               bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
 231
 232       int w = intel_obj->Base.Size / texel_size;
 233       surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 234                  ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 235       surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 236                  (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
 237    } else {
 238       surf[1] = 0;
 239       surf[2] = 0;
 240       surf[3] = 0;
 241    }
 242
 243    surf[4] = 0;
 244    surf[5] = 0;
 245 }
 246
 247 static void
 248 brw_update_texture_surface(struct gl_context *ctx,
 249                            unsigned unit,
 250                            uint32_t *binding_table,
 251                            unsigned surf_index)
 252 {
 253    struct brw_context *brw = brw_context(ctx);
 254    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 255    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 256    struct intel_mipmap_tree *mt = intelObj->mt;
 257    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
 258    struct intel_texture_image *intel_image = intel_texture_image(firstImage);
 259    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 260    uint32_t *surf;
 261
 262    if (tObj->Target == GL_TEXTURE_BUFFER) {
 263       brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
 264       return;
 265    }
 266
 267    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 268                           6 * 4, 32, &binding_table[surf_index]);
 269
 270    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 271               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 272               BRW_SURFACE_CUBEFACE_ENABLES |
 273               (translate_tex_format(brw,
 274                                     mt->format,
 275                                     tObj->DepthMode,
 276                                     sampler->sRGBDecode) <<
 277                BRW_SURFACE_FORMAT_SHIFT));
 278
 279    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 280
 281    surf[2] = ((intelObj->_MaxLevel - intel_image->mt->first_level) << BRW_SURFACE_LOD_SHIFT |
 282               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 283               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 284
 285    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 286               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 287               (intelObj->mt->region->pitch - 1) <<
 288               BRW_SURFACE_PITCH_SHIFT);
 289
 290    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
 291
 292    surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 293
 294    /* Emit relocation to surface contents */
 295    drm_intel_bo_emit_reloc(brw->batch.bo,
 296                            binding_table[surf_index] + 4,
 297                            intelObj->mt->region->bo,
 298                            surf[1] - intelObj->mt->region->bo->offset,
 299                            I915_GEM_DOMAIN_SAMPLER, 0);
 300 }
 301
 302 /**
 303  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 304  * read from this buffer with Data Port Read instructions/messages.
 305  */
 306 static void
 307 brw_create_constant_surface(struct brw_context *brw,
 308                             drm_intel_bo *bo,
 309                             uint32_t offset,
 310                             uint32_t size,
 311                             uint32_t *out_offset,
 312                             bool dword_pitch)
 313 {
 314    uint32_t stride = dword_pitch ? 4 : 16;
 315    uint32_t elements = ALIGN(size, stride) / stride;
 316    const GLint w = elements - 1;
 317    uint32_t *surf;
 318
 319    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 320                           6 * 4, 32, out_offset);
 321
 322    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 323               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 324               BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
 325
 326    if (brw->gen >= 6)
 327       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 328
 329    surf[1] = bo->offset + offset; /* reloc */
 330
 331    surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 332               ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 333
 334    surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 335               (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 336
 337    surf[4] = 0;
 338    surf[5] = 0;
 339
 340    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
 341     * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
 342     * physical cache.  It is mapped in hardware to the sampler cache."
 343     */
 344    drm_intel_bo_emit_reloc(brw->batch.bo,
 345                            *out_offset + 4,
 346                            bo, offset,
 347                            I915_GEM_DOMAIN_SAMPLER, 0);
 348 }
 349
 350 /**
 351  * Set up a binding table entry for use by stream output logic (transform
 352  * feedback).
 353  *
 354  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 355  */
 356 void
 357 brw_update_sol_surface(struct brw_context *brw,
 358                        struct gl_buffer_object *buffer_obj,
 359                        uint32_t *out_offset, unsigned num_vector_components,
 360                        unsigned stride_dwords, unsigned offset_dwords)
 361 {
 362    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 363    drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
 364    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 365                                     out_offset);
 366    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 367    uint32_t offset_bytes = 4 * offset_dwords;
 368    size_t size_dwords = buffer_obj->Size / 4;
 369    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 370
 371    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 372     * too big to map using a single binding table entry?
 373     */
 374    assert((size_dwords - offset_dwords) / stride_dwords
 375           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 376
 377    if (size_dwords > offset_dwords + num_vector_components) {
 378       /* There is room for at least 1 transform feedback output in the buffer.
 379        * Compute the number of additional transform feedback outputs the
 380        * buffer has room for.
 381        */
 382       buffer_size_minus_1 =
 383          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 384    } else {
 385       /* There isn't even room for a single transform feedback output in the
 386        * buffer.  We can't configure the binding table entry to prevent output
 387        * entirely; we'll have to rely on the geometry shader to detect
 388        * overflow.  But to minimize the damage in case of a bug, set up the
 389        * binding table entry to just allow a single output.
 390        */
 391       buffer_size_minus_1 = 0;
 392    }
 393    width = buffer_size_minus_1 & 0x7f;
 394    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 395    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 396
 397    switch (num_vector_components) {
 398    case 1:
 399       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 400       break;
 401    case 2:
 402       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 403       break;
 404    case 3:
 405       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 406       break;
 407    case 4:
 408       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 409       break;
 410    default:
 411       assert(!"Invalid vector size for transform feedback output");
 412       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 413       break;
 414    }
 415
 416    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 417       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 418       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 419       BRW_SURFACE_RC_READ_WRITE;
 420    surf[1] = bo->offset + offset_bytes; /* reloc */
 421    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 422               height << BRW_SURFACE_HEIGHT_SHIFT);
 423    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 424               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 425    surf[4] = 0;
 426    surf[5] = 0;
 427
 428    /* Emit relocation to surface contents. */
 429    drm_intel_bo_emit_reloc(brw->batch.bo,
 430                            *out_offset + 4,
 431                            bo, offset_bytes,
 432                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 433 }
 434
 435 /* Creates a new WM constant buffer reflecting the current fragment program's
 436  * constants, if needed by the fragment program.
 437  *
 438  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 439  * state atom.
 440  */
 441 static void
 442 brw_upload_wm_pull_constants(struct brw_context *brw)
 443 {
 444    struct gl_context *ctx = &brw->ctx;
 445    /* BRW_NEW_FRAGMENT_PROGRAM */
 446    struct brw_fragment_program *fp =
 447       (struct brw_fragment_program *) brw->fragment_program;
 448    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 449    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 450    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 451    float *constants;
 452    unsigned int i;
 453
 454    _mesa_load_state_parameters(ctx, params);
 455
 456    /* CACHE_NEW_WM_PROG */
 457    if (brw->wm.prog_data->nr_pull_params == 0) {
 458       if (brw->wm.const_bo) {
 459          drm_intel_bo_unreference(brw->wm.const_bo);
 460          brw->wm.const_bo = NULL;
 461          brw->wm.surf_offset[surf_index] = 0;
 462          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 463       }
 464       return;
 465    }
 466
 467    drm_intel_bo_unreference(brw->wm.const_bo);
 468    brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
 469                                          size, 64);
 470
 471    /* _NEW_PROGRAM_CONSTANTS */
 472    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
 473    constants = brw->wm.const_bo->virtual;
 474    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 475       constants[i] = *brw->wm.prog_data->pull_param[i];
 476    }
 477    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 478
 479    brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
 480                                      &brw->wm.surf_offset[surf_index],
 481                                      true);
 482
 483    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 484 }
 485
 486 const struct brw_tracked_state brw_wm_pull_constants = {
 487    .dirty = {
 488       .mesa = (_NEW_PROGRAM_CONSTANTS),
 489       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 490       .cache = CACHE_NEW_WM_PROG,
 491    },
 492    .emit = brw_upload_wm_pull_constants,
 493 };
 494
 495 static void
 496 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 497 {
 498    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 499     * Notes):
 500     *
 501     *     A null surface will be used in instances where an actual surface is
 502     *     not bound. When a write message is generated to a null surface, no
 503     *     actual surface is written to. When a read message (including any
 504     *     sampling engine message) is generated to a null surface, the result
 505     *     is all zeros. Note that a null surface type is allowed to be used
 506     *     with all messages, even if it is not specificially indicated as
 507     *     supported. All of the remaining fields in surface state are ignored
 508     *     for null surfaces, with the following exceptions:
 509     *
 510     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 511     *       depth buffer’s corresponding state for all render target surfaces,
 512     *       including null.
 513     *
 514     *     - Surface Format must be R8G8B8A8_UNORM.
 515     */
 516    struct gl_context *ctx = &brw->ctx;
 517    uint32_t *surf;
 518    unsigned surface_type = BRW_SURFACE_NULL;
 519    drm_intel_bo *bo = NULL;
 520    unsigned pitch_minus_1 = 0;
 521    uint32_t multisampling_state = 0;
 522
 523    /* _NEW_BUFFERS */
 524    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 525
 526    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 527                           &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
 528
 529    if (fb->Visual.samples > 1) {
 530       /* On Gen6, null render targets seem to cause GPU hangs when
 531        * multisampling.  So work around this problem by rendering into dummy
 532        * color buffer.
 533        *
 534        * To decrease the amount of memory needed by the workaround buffer, we
 535        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 536        * the amount of memory needed for the workaround buffer is
 537        * (width_in_tiles + height_in_tiles - 1) tiles.
 538        *
 539        * Note that since the workaround buffer will be interpreted by the
 540        * hardware as an interleaved multisampled buffer, we need to compute
 541        * width_in_tiles and height_in_tiles by dividing the width and height
 542        * by 16 rather than the normal Y-tile size of 32.
 543        */
 544       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 545       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 546       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 547       brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
 548                          size_needed);
 549       bo = brw->wm.multisampled_null_render_target_bo;
 550       surface_type = BRW_SURFACE_2D;
 551       pitch_minus_1 = 127;
 552       multisampling_state =
 553          brw_get_surface_num_multisamples(fb->Visual.samples);
 554    }
 555
 556    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 557               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 558    if (brw->gen < 6) {
 559       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 560                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 561                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 562                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 563    }
 564    surf[1] = bo ? bo->offset : 0;
 565    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 566               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 567
 568    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 569     * Notes):
 570     *
 571     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 572     */
 573    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 574               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 575    surf[4] = multisampling_state;
 576    surf[5] = 0;
 577
 578    if (bo) {
 579       drm_intel_bo_emit_reloc(brw->batch.bo,
 580                               brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 581                               bo, 0,
 582                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 583    }
 584 }
 585
 586 /**
 587  * Sets up a surface state structure to point at the given region.
 588  * While it is only used for the front/back buffer currently, it should be
 589  * usable for further buffers when doing ARB_draw_buffer support.
 590  */
 591 static void
 592 brw_update_renderbuffer_surface(struct brw_context *brw,
 593                                 struct gl_renderbuffer *rb,
 594                                 bool layered,
 595                                 unsigned int unit)
 596 {
 597    struct gl_context *ctx = &brw->ctx;
 598    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 599    struct intel_mipmap_tree *mt = irb->mt;
 600    struct intel_region *region;
 601    uint32_t *surf;
 602    uint32_t tile_x, tile_y;
 603    uint32_t format = 0;
 604    /* _NEW_BUFFERS */
 605    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 606
 607    assert(!layered);
 608
 609    if (rb->TexImage && !brw->has_surface_tile_offset) {
 610       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 611
 612       if (tile_x != 0 || tile_y != 0) {
 613          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 614           * destination in a miptree unless you actually setup your renderbuffer
 615           * as a miptree and used the fragile lod/array_index/etc. controls to
 616           * select the image.  So, instead, we just make a new single-level
 617           * miptree and render into that.
 618           */
 619          intel_renderbuffer_move_to_temp(brw, irb, false);
 620          mt = irb->mt;
 621       }
 622    }
 623
 624    intel_miptree_used_for_rendering(irb->mt);
 625
 626    region = irb->mt->region;
 627
 628    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 629                           &brw->wm.surf_offset[SURF_INDEX_DRAW(unit)]);
 630
 631    format = brw->render_target_format[rb_format];
 632    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 633       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 634                     __FUNCTION__, _mesa_get_format_name(rb_format));
 635    }
 636
 637    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 638               format << BRW_SURFACE_FORMAT_SHIFT);
 639
 640    /* reloc */
 641    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 642               region->bo->offset);
 643
 644    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 645               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 646
 647    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 648               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 649
 650    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 651
 652    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 653    /* Note that the low bits of these fields are missing, so
 654     * there's the possibility of getting in trouble.
 655     */
 656    assert(tile_x % 4 == 0);
 657    assert(tile_y % 2 == 0);
 658    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 659               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 660               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 661
 662    if (brw->gen < 6) {
 663       /* _NEW_COLOR */
 664       if (!ctx->Color.ColorLogicOpEnabled &&
 665           (ctx->Color.BlendEnabled & (1 << unit)))
 666          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 667
 668       if (!ctx->Color.ColorMask[unit][0])
 669          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 670       if (!ctx->Color.ColorMask[unit][1])
 671          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 672       if (!ctx->Color.ColorMask[unit][2])
 673          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 674
 675       /* As mentioned above, disable writes to the alpha component when the
 676        * renderbuffer is XRGB.
 677        */
 678       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 679           !ctx->Color.ColorMask[unit][3]) {
 680          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 681       }
 682    }
 683
 684    drm_intel_bo_emit_reloc(brw->batch.bo,
 685                            brw->wm.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
 686                            region->bo,
 687                            surf[1] - region->bo->offset,
 688                            I915_GEM_DOMAIN_RENDER,
 689                            I915_GEM_DOMAIN_RENDER);
 690 }
 691
 692 /**
 693  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 694  */
 695 static void
 696 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 697 {
 698    struct gl_context *ctx = &brw->ctx;
 699    GLuint i;
 700
 701    /* _NEW_BUFFERS | _NEW_COLOR */
 702    /* Update surfaces for drawing buffers */
 703    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 704       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 705          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 706             brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 707                                                   ctx->DrawBuffer->Layered, i);
 708          } else {
 709             brw->vtbl.update_null_renderbuffer_surface(brw, i);
 710          }
 711       }
 712    } else {
 713       brw->vtbl.update_null_renderbuffer_surface(brw, 0);
 714    }
 715    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 716 }
 717
 718 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 719    .dirty = {
 720       .mesa = (_NEW_COLOR |
 721                _NEW_BUFFERS),
 722       .brw = BRW_NEW_BATCH,
 723       .cache = 0
 724    },
 725    .emit = brw_update_renderbuffer_surfaces,
 726 };
 727
 728 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 729    .dirty = {
 730       .mesa = _NEW_BUFFERS,
 731       .brw = BRW_NEW_BATCH,
 732       .cache = 0
 733    },
 734    .emit = brw_update_renderbuffer_surfaces,
 735 };
 736
 737 /**
 738  * Construct SURFACE_STATE objects for enabled textures.
 739  */
 740 static void
 741 brw_update_texture_surfaces(struct brw_context *brw)
 742 {
 743    struct gl_context *ctx = &brw->ctx;
 744
 745    /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
 746     * Unfortunately, we're stuck using the gl_program structs until the
 747     * ARB_fragment_program front-end gets converted to GLSL IR.  These
 748     * have the downside that SamplerUnits is split and only contains the
 749     * mappings for samplers active in that stage.
 750     */
 751    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 752    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 753
 754    unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
 755
 756    for (unsigned s = 0; s < num_samplers; s++) {
 757       brw->vs.base.surf_offset[SURF_INDEX_VEC4_TEXTURE(s)] = 0;
 758       brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
 759
 760       if (vs->SamplersUsed & (1 << s)) {
 761          const unsigned unit = vs->SamplerUnits[s];
 762
 763          /* _NEW_TEXTURE */
 764          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 765             brw->vtbl.update_texture_surface(ctx, unit,
 766                                              brw->vs.base.surf_offset,
 767                                              SURF_INDEX_VEC4_TEXTURE(s));
 768          }
 769       }
 770
 771       if (fs->SamplersUsed & (1 << s)) {
 772          const unsigned unit = fs->SamplerUnits[s];
 773
 774          /* _NEW_TEXTURE */
 775          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 776             brw->vtbl.update_texture_surface(ctx, unit,
 777                                              brw->wm.surf_offset,
 778                                              SURF_INDEX_TEXTURE(s));
 779          }
 780       }
 781    }
 782
 783    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 784 }
 785
 786 const struct brw_tracked_state brw_texture_surfaces = {
 787    .dirty = {
 788       .mesa = _NEW_TEXTURE,
 789       .brw = BRW_NEW_BATCH |
 790              BRW_NEW_VERTEX_PROGRAM |
 791              BRW_NEW_FRAGMENT_PROGRAM,
 792       .cache = 0
 793    },
 794    .emit = brw_update_texture_surfaces,
 795 };
 796
 797 void
 798 brw_upload_ubo_surfaces(struct brw_context *brw,
 799                         struct gl_shader *shader,
 800                         uint32_t *surf_offsets)
 801 {
 802    struct gl_context *ctx = &brw->ctx;
 803
 804    if (!shader)
 805       return;
 806
 807    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 808       struct gl_uniform_buffer_binding *binding;
 809       struct intel_buffer_object *intel_bo;
 810
 811       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 812       intel_bo = intel_buffer_object(binding->BufferObject);
 813       drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
 814
 815       /* Because behavior for referencing outside of the binding's size in the
 816        * glBindBufferRange case is undefined, we can just bind the whole buffer
 817        * glBindBufferBase wants and be a correct implementation.
 818        */
 819       brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
 820                                         bo->size - binding->Offset,
 821                                         &surf_offsets[i],
 822                                         shader->Type == GL_FRAGMENT_SHADER);
 823    }
 824
 825    if (shader->NumUniformBlocks)
 826       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 827 }
 828
 829 static void
 830 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 831 {
 832    struct gl_context *ctx = &brw->ctx;
 833    /* _NEW_PROGRAM */
 834    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 835
 836    if (!prog)
 837       return;
 838
 839    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 840                            &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
 841 }
 842
 843 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 844    .dirty = {
 845       .mesa = _NEW_PROGRAM,
 846       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 847       .cache = 0,
 848    },
 849    .emit = brw_upload_wm_ubo_surfaces,
 850 };
 851
 852 /**
 853  * Constructs the binding table for the WM surface state, which maps unit
 854  * numbers to surface state objects.
 855  */
 856 static void
 857 brw_upload_wm_binding_table(struct brw_context *brw)
 858 {
 859    uint32_t *bind;
 860    int i;
 861
 862    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 863       gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
 864    }
 865
 866    /* CACHE_NEW_WM_PROG */
 867    unsigned entries = brw->wm.prog_data->binding_table_size;
 868    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 869                           sizeof(uint32_t) * entries,
 870                           32, &brw->wm.bind_bo_offset);
 871
 872    /* BRW_NEW_SURFACES */
 873    for (i = 0; i < entries; i++) {
 874       bind[i] = brw->wm.surf_offset[i];
 875    }
 876
 877    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 878 }
 879
 880 const struct brw_tracked_state brw_wm_binding_table = {
 881    .dirty = {
 882       .mesa = 0,
 883       .brw = (BRW_NEW_BATCH |
 884               BRW_NEW_SURFACES),
 885       .cache = CACHE_NEW_WM_PROG
 886    },
 887    .emit = brw_upload_wm_binding_table,
 888 };
 889
 890 void
 891 gen4_init_vtable_surface_functions(struct brw_context *brw)
 892 {
 893    brw->vtbl.update_texture_surface = brw_update_texture_surface;
 894    brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 895    brw->vtbl.update_null_renderbuffer_surface =
 896       brw_update_null_renderbuffer_surface;
 897    brw->vtbl.create_constant_surface = brw_create_constant_surface;
 898 }