src/mesa/drivers/dri/i965/brw_wm_surface_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/context.h"
  34 #include "main/blend.h"
  35 #include "main/mtypes.h"
  36 #include "main/samplerobj.h"
  37 #include "program/prog_parameter.h"
  38
  39 #include "intel_mipmap_tree.h"
  40 #include "intel_batchbuffer.h"
  41 #include "intel_tex.h"
  42 #include "intel_fbo.h"
  43 #include "intel_buffer_objects.h"
  44
  45 #include "brw_context.h"
  46 #include "brw_state.h"
  47 #include "brw_defines.h"
  48 #include "brw_wm.h"
  49
  50 GLuint
  51 translate_tex_target(GLenum target)
  52 {
  53    switch (target) {
  54    case GL_TEXTURE_1D:
  55    case GL_TEXTURE_1D_ARRAY_EXT:
  56       return BRW_SURFACE_1D;
  57
  58    case GL_TEXTURE_RECTANGLE_NV:
  59       return BRW_SURFACE_2D;
  60
  61    case GL_TEXTURE_2D:
  62    case GL_TEXTURE_2D_ARRAY_EXT:
  63    case GL_TEXTURE_EXTERNAL_OES:
  64    case GL_TEXTURE_2D_MULTISAMPLE:
  65    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
  66       return BRW_SURFACE_2D;
  67
  68    case GL_TEXTURE_3D:
  69       return BRW_SURFACE_3D;
  70
  71    case GL_TEXTURE_CUBE_MAP:
  72    case GL_TEXTURE_CUBE_MAP_ARRAY:
  73       return BRW_SURFACE_CUBE;
  74
  75    default:
  76       assert(0);
  77       return 0;
  78    }
  79 }
  80
  81 uint32_t
  82 brw_get_surface_tiling_bits(uint32_t tiling)
  83 {
  84    switch (tiling) {
  85    case I915_TILING_X:
  86       return BRW_SURFACE_TILED;
  87    case I915_TILING_Y:
  88       return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
  89    default:
  90       return 0;
  91    }
  92 }
  93
  94
  95 uint32_t
  96 brw_get_surface_num_multisamples(unsigned num_samples)
  97 {
  98    if (num_samples > 1)
  99       return BRW_SURFACE_MULTISAMPLECOUNT_4;
 100    else
 101       return BRW_SURFACE_MULTISAMPLECOUNT_1;
 102 }
 103
 104
 105 /**
 106  * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 107  * swizzling.
 108  */
 109 int
 110 brw_get_texture_swizzle(const struct gl_context *ctx,
 111                         const struct gl_texture_object *t)
 112 {
 113    const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
 114
 115    int swizzles[SWIZZLE_NIL + 1] = {
 116       SWIZZLE_X,
 117       SWIZZLE_Y,
 118       SWIZZLE_Z,
 119       SWIZZLE_W,
 120       SWIZZLE_ZERO,
 121       SWIZZLE_ONE,
 122       SWIZZLE_NIL
 123    };
 124
 125    if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
 126        img->_BaseFormat == GL_DEPTH_STENCIL) {
 127       GLenum depth_mode = t->DepthMode;
 128
 129       /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
 130        * with depth component data specified with a sized internal format.
 131        * Otherwise, it's left at the old default, GL_LUMINANCE.
 132        */
 133       if (_mesa_is_gles3(ctx) &&
 134           img->InternalFormat != GL_DEPTH_COMPONENT &&
 135           img->InternalFormat != GL_DEPTH_STENCIL) {
 136          depth_mode = GL_RED;
 137       }
 138
 139       switch (depth_mode) {
 140       case GL_ALPHA:
 141          swizzles[0] = SWIZZLE_ZERO;
 142          swizzles[1] = SWIZZLE_ZERO;
 143          swizzles[2] = SWIZZLE_ZERO;
 144          swizzles[3] = SWIZZLE_X;
 145          break;
 146       case GL_LUMINANCE:
 147          swizzles[0] = SWIZZLE_X;
 148          swizzles[1] = SWIZZLE_X;
 149          swizzles[2] = SWIZZLE_X;
 150          swizzles[3] = SWIZZLE_ONE;
 151          break;
 152       case GL_INTENSITY:
 153          swizzles[0] = SWIZZLE_X;
 154          swizzles[1] = SWIZZLE_X;
 155          swizzles[2] = SWIZZLE_X;
 156          swizzles[3] = SWIZZLE_X;
 157          break;
 158       case GL_RED:
 159          swizzles[0] = SWIZZLE_X;
 160          swizzles[1] = SWIZZLE_ZERO;
 161          swizzles[2] = SWIZZLE_ZERO;
 162          swizzles[3] = SWIZZLE_ONE;
 163          break;
 164       }
 165    }
 166
 167    /* If the texture's format is alpha-only, force R, G, and B to
 168     * 0.0. Similarly, if the texture's format has no alpha channel,
 169     * force the alpha value read to 1.0. This allows for the
 170     * implementation to use an RGBA texture for any of these formats
 171     * without leaking any unexpected values.
 172     */
 173    switch (img->_BaseFormat) {
 174    case GL_ALPHA:
 175       swizzles[0] = SWIZZLE_ZERO;
 176       swizzles[1] = SWIZZLE_ZERO;
 177       swizzles[2] = SWIZZLE_ZERO;
 178       break;
 179    case GL_RED:
 180    case GL_RG:
 181    case GL_RGB:
 182       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
 183          swizzles[3] = SWIZZLE_ONE;
 184       break;
 185    }
 186
 187    return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
 188                         swizzles[GET_SWZ(t->_Swizzle, 1)],
 189                         swizzles[GET_SWZ(t->_Swizzle, 2)],
 190                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 191 }
 192
 193
 194 static void
 195 brw_update_buffer_texture_surface(struct gl_context *ctx,
 196                                   unsigned unit,
 197                                   uint32_t *binding_table,
 198                                   unsigned surf_index)
 199 {
 200    struct brw_context *brw = brw_context(ctx);
 201    struct intel_context *intel = &brw->intel;
 202    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 203    uint32_t *surf;
 204    struct intel_buffer_object *intel_obj =
 205       intel_buffer_object(tObj->BufferObject);
 206    drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
 207    gl_format format = tObj->_BufferObjectFormat;
 208    uint32_t brw_format = brw_format_for_mesa_format(format);
 209    int texel_size = _mesa_get_format_bytes(format);
 210
 211    if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
 212       _mesa_problem(NULL, "bad format %s for texture buffer\n",
 213                     _mesa_get_format_name(format));
 214    }
 215
 216    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 217                           6 * 4, 32, &binding_table[surf_index]);
 218
 219    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 220               (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
 221
 222    if (intel->gen >= 6)
 223       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 224
 225    if (bo) {
 226       surf[1] = bo->offset; /* reloc */
 227
 228       /* Emit relocation to surface contents. */
 229       drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 230                               binding_table[surf_index] + 4,
 231                               bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
 232
 233       int w = intel_obj->Base.Size / texel_size;
 234       surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 235                  ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 236       surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 237                  (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
 238    } else {
 239       surf[1] = 0;
 240       surf[2] = 0;
 241       surf[3] = 0;
 242    }
 243
 244    surf[4] = 0;
 245    surf[5] = 0;
 246 }
 247
 248 static void
 249 brw_update_texture_surface(struct gl_context *ctx,
 250                            unsigned unit,
 251                            uint32_t *binding_table,
 252                            unsigned surf_index)
 253 {
 254    struct intel_context *intel = intel_context(ctx);
 255    struct brw_context *brw = brw_context(ctx);
 256    struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
 257    struct intel_texture_object *intelObj = intel_texture_object(tObj);
 258    struct intel_mipmap_tree *mt = intelObj->mt;
 259    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
 260    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
 261    uint32_t *surf;
 262    uint32_t tile_x, tile_y;
 263
 264    if (tObj->Target == GL_TEXTURE_BUFFER) {
 265       brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
 266       return;
 267    }
 268
 269    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 270                           6 * 4, 32, &binding_table[surf_index]);
 271
 272    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 273               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 274               BRW_SURFACE_CUBEFACE_ENABLES |
 275               (translate_tex_format(intel,
 276                                     mt->format,
 277                                     tObj->DepthMode,
 278                                     sampler->sRGBDecode) <<
 279                BRW_SURFACE_FORMAT_SHIFT));
 280
 281    surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
 282    surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
 283                                              &tile_x, &tile_y);
 284
 285    surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
 286               (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
 287               (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 288
 289    surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
 290               (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
 291               (intelObj->mt->region->pitch - 1) <<
 292               BRW_SURFACE_PITCH_SHIFT);
 293
 294    surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
 295
 296    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 297    /* Note that the low bits of these fields are missing, so
 298     * there's the possibility of getting in trouble.
 299     */
 300    assert(tile_x % 4 == 0);
 301    assert(tile_y % 2 == 0);
 302    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 303               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 304               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 305
 306    /* Emit relocation to surface contents */
 307    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 308                            binding_table[surf_index] + 4,
 309                            intelObj->mt->region->bo,
 310                            surf[1] - intelObj->mt->region->bo->offset,
 311                            I915_GEM_DOMAIN_SAMPLER, 0);
 312 }
 313
 314 /**
 315  * Create the constant buffer surface.  Vertex/fragment shader constants will be
 316  * read from this buffer with Data Port Read instructions/messages.
 317  */
 318 static void
 319 brw_create_constant_surface(struct brw_context *brw,
 320                             drm_intel_bo *bo,
 321                             uint32_t offset,
 322                             uint32_t size,
 323                             uint32_t *out_offset,
 324                             bool dword_pitch)
 325 {
 326    struct intel_context *intel = &brw->intel;
 327    uint32_t stride = dword_pitch ? 4 : 16;
 328    uint32_t elements = ALIGN(size, stride) / stride;
 329    const GLint w = elements - 1;
 330    uint32_t *surf;
 331
 332    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 333                           6 * 4, 32, out_offset);
 334
 335    surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 336               BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 337               BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
 338
 339    if (intel->gen >= 6)
 340       surf[0] |= BRW_SURFACE_RC_READ_WRITE;
 341
 342    surf[1] = bo->offset + offset; /* reloc */
 343
 344    surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
 345               ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
 346
 347    surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
 348               (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
 349
 350    surf[4] = 0;
 351    surf[5] = 0;
 352
 353    /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
 354     * bspec ("Data Cache") says that the data cache does not exist as
 355     * a separate cache and is just the sampler cache.
 356     */
 357    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 358                            *out_offset + 4,
 359                            bo, offset,
 360                            I915_GEM_DOMAIN_SAMPLER, 0);
 361 }
 362
 363 /**
 364  * Set up a binding table entry for use by stream output logic (transform
 365  * feedback).
 366  *
 367  * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
 368  */
 369 void
 370 brw_update_sol_surface(struct brw_context *brw,
 371                        struct gl_buffer_object *buffer_obj,
 372                        uint32_t *out_offset, unsigned num_vector_components,
 373                        unsigned stride_dwords, unsigned offset_dwords)
 374 {
 375    struct intel_context *intel = &brw->intel;
 376    struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
 377    drm_intel_bo *bo =
 378       intel_bufferobj_buffer(intel, intel_bo, INTEL_WRITE_PART);
 379    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
 380                                     out_offset);
 381    uint32_t pitch_minus_1 = 4*stride_dwords - 1;
 382    uint32_t offset_bytes = 4 * offset_dwords;
 383    size_t size_dwords = buffer_obj->Size / 4;
 384    uint32_t buffer_size_minus_1, width, height, depth, surface_format;
 385
 386    /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
 387     * too big to map using a single binding table entry?
 388     */
 389    assert((size_dwords - offset_dwords) / stride_dwords
 390           <= BRW_MAX_NUM_BUFFER_ENTRIES);
 391
 392    if (size_dwords > offset_dwords + num_vector_components) {
 393       /* There is room for at least 1 transform feedback output in the buffer.
 394        * Compute the number of additional transform feedback outputs the
 395        * buffer has room for.
 396        */
 397       buffer_size_minus_1 =
 398          (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
 399    } else {
 400       /* There isn't even room for a single transform feedback output in the
 401        * buffer.  We can't configure the binding table entry to prevent output
 402        * entirely; we'll have to rely on the geometry shader to detect
 403        * overflow.  But to minimize the damage in case of a bug, set up the
 404        * binding table entry to just allow a single output.
 405        */
 406       buffer_size_minus_1 = 0;
 407    }
 408    width = buffer_size_minus_1 & 0x7f;
 409    height = (buffer_size_minus_1 & 0xfff80) >> 7;
 410    depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
 411
 412    switch (num_vector_components) {
 413    case 1:
 414       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 415       break;
 416    case 2:
 417       surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
 418       break;
 419    case 3:
 420       surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
 421       break;
 422    case 4:
 423       surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 424       break;
 425    default:
 426       assert(!"Invalid vector size for transform feedback output");
 427       surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
 428       break;
 429    }
 430
 431    surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
 432       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
 433       surface_format << BRW_SURFACE_FORMAT_SHIFT |
 434       BRW_SURFACE_RC_READ_WRITE;
 435    surf[1] = bo->offset + offset_bytes; /* reloc */
 436    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 437               height << BRW_SURFACE_HEIGHT_SHIFT);
 438    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
 439               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 440    surf[4] = 0;
 441    surf[5] = 0;
 442
 443    /* Emit relocation to surface contents. */
 444    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 445                            *out_offset + 4,
 446                            bo, offset_bytes,
 447                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 448 }
 449
 450 /* Creates a new WM constant buffer reflecting the current fragment program's
 451  * constants, if needed by the fragment program.
 452  *
 453  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 454  * state atom.
 455  */
 456 static void
 457 brw_upload_wm_pull_constants(struct brw_context *brw)
 458 {
 459    struct gl_context *ctx = &brw->intel.ctx;
 460    struct intel_context *intel = &brw->intel;
 461    /* BRW_NEW_FRAGMENT_PROGRAM */
 462    struct brw_fragment_program *fp =
 463       (struct brw_fragment_program *) brw->fragment_program;
 464    struct gl_program_parameter_list *params = fp->program.Base.Parameters;
 465    const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
 466    const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
 467    float *constants;
 468    unsigned int i;
 469
 470    _mesa_load_state_parameters(ctx, params);
 471
 472    /* CACHE_NEW_WM_PROG */
 473    if (brw->wm.prog_data->nr_pull_params == 0) {
 474       if (brw->wm.const_bo) {
 475          drm_intel_bo_unreference(brw->wm.const_bo);
 476          brw->wm.const_bo = NULL;
 477          brw->wm.surf_offset[surf_index] = 0;
 478          brw->state.dirty.brw |= BRW_NEW_SURFACES;
 479       }
 480       return;
 481    }
 482
 483    drm_intel_bo_unreference(brw->wm.const_bo);
 484    brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "WM const bo",
 485                                          size, 64);
 486
 487    /* _NEW_PROGRAM_CONSTANTS */
 488    drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
 489    constants = brw->wm.const_bo->virtual;
 490    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
 491       constants[i] = *brw->wm.prog_data->pull_param[i];
 492    }
 493    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 494
 495    intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
 496                                        &brw->wm.surf_offset[surf_index],
 497                                        true);
 498
 499    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 500 }
 501
 502 const struct brw_tracked_state brw_wm_pull_constants = {
 503    .dirty = {
 504       .mesa = (_NEW_PROGRAM_CONSTANTS),
 505       .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
 506       .cache = CACHE_NEW_WM_PROG,
 507    },
 508    .emit = brw_upload_wm_pull_constants,
 509 };
 510
 511 static void
 512 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
 513 {
 514    /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
 515     * Notes):
 516     *
 517     *     A null surface will be used in instances where an actual surface is
 518     *     not bound. When a write message is generated to a null surface, no
 519     *     actual surface is written to. When a read message (including any
 520     *     sampling engine message) is generated to a null surface, the result
 521     *     is all zeros. Note that a null surface type is allowed to be used
 522     *     with all messages, even if it is not specificially indicated as
 523     *     supported. All of the remaining fields in surface state are ignored
 524     *     for null surfaces, with the following exceptions:
 525     *
 526     *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
 527     *       depth buffer’s corresponding state for all render target surfaces,
 528     *       including null.
 529     *
 530     *     - Surface Format must be R8G8B8A8_UNORM.
 531     */
 532    struct intel_context *intel = &brw->intel;
 533    struct gl_context *ctx = &intel->ctx;
 534    uint32_t *surf;
 535    unsigned surface_type = BRW_SURFACE_NULL;
 536    drm_intel_bo *bo = NULL;
 537    unsigned pitch_minus_1 = 0;
 538    uint32_t multisampling_state = 0;
 539
 540    /* _NEW_BUFFERS */
 541    const struct gl_framebuffer *fb = ctx->DrawBuffer;
 542
 543    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 544                           6 * 4, 32, &brw->wm.surf_offset[unit]);
 545
 546    if (fb->Visual.samples > 1) {
 547       /* On Gen6, null render targets seem to cause GPU hangs when
 548        * multisampling.  So work around this problem by rendering into dummy
 549        * color buffer.
 550        *
 551        * To decrease the amount of memory needed by the workaround buffer, we
 552        * set its pitch to 128 bytes (the width of a Y tile).  This means that
 553        * the amount of memory needed for the workaround buffer is
 554        * (width_in_tiles + height_in_tiles - 1) tiles.
 555        *
 556        * Note that since the workaround buffer will be interpreted by the
 557        * hardware as an interleaved multisampled buffer, we need to compute
 558        * width_in_tiles and height_in_tiles by dividing the width and height
 559        * by 16 rather than the normal Y-tile size of 32.
 560        */
 561       unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
 562       unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
 563       unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
 564       brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo,
 565                          size_needed);
 566       bo = brw->wm.multisampled_null_render_target_bo;
 567       surface_type = BRW_SURFACE_2D;
 568       pitch_minus_1 = 127;
 569       multisampling_state =
 570          brw_get_surface_num_multisamples(fb->Visual.samples);
 571    }
 572
 573    surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
 574               BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
 575    if (intel->gen < 6) {
 576       surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
 577                   1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
 578                   1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 579                   1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
 580    }
 581    surf[1] = bo ? bo->offset : 0;
 582    surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 583               (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 584
 585    /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
 586     * Notes):
 587     *
 588     *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
 589     */
 590    surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
 591               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
 592    surf[4] = multisampling_state;
 593    surf[5] = 0;
 594
 595    if (bo) {
 596       drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 597                               brw->wm.surf_offset[unit] + 4,
 598                               bo, 0,
 599                               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 600    }
 601 }
 602
 603 /**
 604  * Sets up a surface state structure to point at the given region.
 605  * While it is only used for the front/back buffer currently, it should be
 606  * usable for further buffers when doing ARB_draw_buffer support.
 607  */
 608 static void
 609 brw_update_renderbuffer_surface(struct brw_context *brw,
 610                                 struct gl_renderbuffer *rb,
 611                                 bool layered,
 612                                 unsigned int unit)
 613 {
 614    struct intel_context *intel = &brw->intel;
 615    struct gl_context *ctx = &intel->ctx;
 616    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 617    struct intel_mipmap_tree *mt = irb->mt;
 618    struct intel_region *region;
 619    uint32_t *surf;
 620    uint32_t tile_x, tile_y;
 621    uint32_t format = 0;
 622    /* _NEW_BUFFERS */
 623    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
 624
 625    assert(!layered);
 626
 627    if (rb->TexImage && !brw->has_surface_tile_offset) {
 628       intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
 629
 630       if (tile_x != 0 || tile_y != 0) {
 631          /* Original gen4 hardware couldn't draw to a non-tile-aligned
 632           * destination in a miptree unless you actually setup your renderbuffer
 633           * as a miptree and used the fragile lod/array_index/etc. controls to
 634           * select the image.  So, instead, we just make a new single-level
 635           * miptree and render into that.
 636           */
 637          intel_renderbuffer_move_to_temp(intel, irb, false);
 638          mt = irb->mt;
 639       }
 640    }
 641
 642    intel_miptree_used_for_rendering(irb->mt);
 643
 644    region = irb->mt->region;
 645
 646    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 647                           6 * 4, 32, &brw->wm.surf_offset[unit]);
 648
 649    format = brw->render_target_format[rb_format];
 650    if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
 651       _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
 652                     __FUNCTION__, _mesa_get_format_name(rb_format));
 653    }
 654
 655    surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
 656               format << BRW_SURFACE_FORMAT_SHIFT);
 657
 658    /* reloc */
 659    surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
 660               region->bo->offset);
 661
 662    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 663               (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 664
 665    surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
 666               (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 667
 668    surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
 669
 670    assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
 671    /* Note that the low bits of these fields are missing, so
 672     * there's the possibility of getting in trouble.
 673     */
 674    assert(tile_x % 4 == 0);
 675    assert(tile_y % 2 == 0);
 676    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
 677               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
 678               (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 679
 680    if (intel->gen < 6) {
 681       /* _NEW_COLOR */
 682       if (!ctx->Color.ColorLogicOpEnabled &&
 683           (ctx->Color.BlendEnabled & (1 << unit)))
 684          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
 685
 686       if (!ctx->Color.ColorMask[unit][0])
 687          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
 688       if (!ctx->Color.ColorMask[unit][1])
 689          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
 690       if (!ctx->Color.ColorMask[unit][2])
 691          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
 692
 693       /* As mentioned above, disable writes to the alpha component when the
 694        * renderbuffer is XRGB.
 695        */
 696       if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
 697           !ctx->Color.ColorMask[unit][3]) {
 698          surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
 699       }
 700    }
 701
 702    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
 703                            brw->wm.surf_offset[unit] + 4,
 704                            region->bo,
 705                            surf[1] - region->bo->offset,
 706                            I915_GEM_DOMAIN_RENDER,
 707                            I915_GEM_DOMAIN_RENDER);
 708 }
 709
 710 /**
 711  * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
 712  */
 713 static void
 714 brw_update_renderbuffer_surfaces(struct brw_context *brw)
 715 {
 716    struct intel_context *intel = &brw->intel;
 717    struct gl_context *ctx = &brw->intel.ctx;
 718    GLuint i;
 719
 720    /* _NEW_BUFFERS | _NEW_COLOR */
 721    /* Update surfaces for drawing buffers */
 722    if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
 723       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 724          if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
 725             intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
 726                                                     ctx->DrawBuffer->Layered, i);
 727          } else {
 728             intel->vtbl.update_null_renderbuffer_surface(brw, i);
 729          }
 730       }
 731    } else {
 732       intel->vtbl.update_null_renderbuffer_surface(brw, 0);
 733    }
 734    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 735 }
 736
 737 const struct brw_tracked_state brw_renderbuffer_surfaces = {
 738    .dirty = {
 739       .mesa = (_NEW_COLOR |
 740                _NEW_BUFFERS),
 741       .brw = BRW_NEW_BATCH,
 742       .cache = 0
 743    },
 744    .emit = brw_update_renderbuffer_surfaces,
 745 };
 746
 747 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 748    .dirty = {
 749       .mesa = _NEW_BUFFERS,
 750       .brw = BRW_NEW_BATCH,
 751       .cache = 0
 752    },
 753    .emit = brw_update_renderbuffer_surfaces,
 754 };
 755
 756 /**
 757  * Construct SURFACE_STATE objects for enabled textures.
 758  */
 759 static void
 760 brw_update_texture_surfaces(struct brw_context *brw)
 761 {
 762    struct intel_context *intel = &brw->intel;
 763    struct gl_context *ctx = &intel->ctx;
 764
 765    /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
 766     * Unfortunately, we're stuck using the gl_program structs until the
 767     * ARB_fragment_program front-end gets converted to GLSL IR.  These
 768     * have the downside that SamplerUnits is split and only contains the
 769     * mappings for samplers active in that stage.
 770     */
 771    struct gl_program *vs = (struct gl_program *) brw->vertex_program;
 772    struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 773
 774    unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
 775
 776    for (unsigned s = 0; s < num_samplers; s++) {
 777       brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
 778       brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
 779
 780       if (vs->SamplersUsed & (1 << s)) {
 781          const unsigned unit = vs->SamplerUnits[s];
 782
 783          /* _NEW_TEXTURE */
 784          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 785             intel->vtbl.update_texture_surface(ctx, unit,
 786                                                brw->vs.surf_offset,
 787                                                SURF_INDEX_VS_TEXTURE(s));
 788          }
 789       }
 790
 791       if (fs->SamplersUsed & (1 << s)) {
 792          const unsigned unit = fs->SamplerUnits[s];
 793
 794          /* _NEW_TEXTURE */
 795          if (ctx->Texture.Unit[unit]._ReallyEnabled) {
 796             intel->vtbl.update_texture_surface(ctx, unit,
 797                                                brw->wm.surf_offset,
 798                                                SURF_INDEX_TEXTURE(s));
 799          }
 800       }
 801    }
 802
 803    brw->state.dirty.brw |= BRW_NEW_SURFACES;
 804 }
 805
 806 const struct brw_tracked_state brw_texture_surfaces = {
 807    .dirty = {
 808       .mesa = _NEW_TEXTURE,
 809       .brw = BRW_NEW_BATCH |
 810              BRW_NEW_VERTEX_PROGRAM |
 811              BRW_NEW_FRAGMENT_PROGRAM,
 812       .cache = 0
 813    },
 814    .emit = brw_update_texture_surfaces,
 815 };
 816
 817 void
 818 brw_upload_ubo_surfaces(struct brw_context *brw,
 819                         struct gl_shader *shader,
 820                         uint32_t *surf_offsets)
 821 {
 822    struct gl_context *ctx = &brw->intel.ctx;
 823    struct intel_context *intel = &brw->intel;
 824
 825    if (!shader)
 826       return;
 827
 828    for (int i = 0; i < shader->NumUniformBlocks; i++) {
 829       struct gl_uniform_buffer_binding *binding;
 830       struct intel_buffer_object *intel_bo;
 831
 832       binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
 833       intel_bo = intel_buffer_object(binding->BufferObject);
 834       drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_bo, INTEL_READ);
 835
 836       /* Because behavior for referencing outside of the binding's size in the
 837        * glBindBufferRange case is undefined, we can just bind the whole buffer
 838        * glBindBufferBase wants and be a correct implementation.
 839        */
 840       intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
 841                                           bo->size - binding->Offset,
 842                                           &surf_offsets[i],
 843                                           shader->Type == GL_FRAGMENT_SHADER);
 844    }
 845
 846    if (shader->NumUniformBlocks)
 847       brw->state.dirty.brw |= BRW_NEW_SURFACES;
 848 }
 849
 850 static void
 851 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
 852 {
 853    struct gl_context *ctx = &brw->intel.ctx;
 854    /* _NEW_PROGRAM */
 855    struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
 856
 857    if (!prog)
 858       return;
 859
 860    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
 861                            &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
 862 }
 863
 864 const struct brw_tracked_state brw_wm_ubo_surfaces = {
 865    .dirty = {
 866       .mesa = _NEW_PROGRAM,
 867       .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
 868       .cache = 0,
 869    },
 870    .emit = brw_upload_wm_ubo_surfaces,
 871 };
 872
 873 /**
 874  * Constructs the binding table for the WM surface state, which maps unit
 875  * numbers to surface state objects.
 876  */
 877 static void
 878 brw_upload_wm_binding_table(struct brw_context *brw)
 879 {
 880    uint32_t *bind;
 881    int i;
 882
 883    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
 884       gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
 885    }
 886
 887    /* Might want to calculate nr_surfaces first, to avoid taking up so much
 888     * space for the binding table.
 889     */
 890    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 891                           sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
 892                           32, &brw->wm.bind_bo_offset);
 893
 894    /* BRW_NEW_SURFACES */
 895    for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
 896       bind[i] = brw->wm.surf_offset[i];
 897    }
 898
 899    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 900 }
 901
 902 const struct brw_tracked_state brw_wm_binding_table = {
 903    .dirty = {
 904       .mesa = 0,
 905       .brw = (BRW_NEW_BATCH |
 906               BRW_NEW_SURFACES),
 907       .cache = 0
 908    },
 909    .emit = brw_upload_wm_binding_table,
 910 };
 911
 912 void
 913 gen4_init_vtable_surface_functions(struct brw_context *brw)
 914 {
 915    struct intel_context *intel = &brw->intel;
 916
 917    intel->vtbl.update_texture_surface = brw_update_texture_surface;
 918    intel->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
 919    intel->vtbl.update_null_renderbuffer_surface =
 920       brw_update_null_renderbuffer_surface;
 921    intel->vtbl.create_constant_surface = brw_create_constant_surface;
 922 }