src/mesa/drivers/dri/i965/brw_misc_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33
  34 #include "intel_batchbuffer.h"
  35 #include "intel_fbo.h"
  36 #include "intel_mipmap_tree.h"
  37 #include "intel_regions.h"
  38
  39 #include "brw_context.h"
  40 #include "brw_state.h"
  41 #include "brw_defines.h"
  42
  43 #include "main/fbobject.h"
  44
  45 /* Constant single cliprect for framebuffer object or DRI2 drawing */
  46 static void upload_drawing_rect(struct brw_context *brw)
  47 {
  48    struct intel_context *intel = &brw->intel;
  49    struct gl_context *ctx = &intel->ctx;
  50
  51    BEGIN_BATCH(4);
  52    OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
  53    OUT_BATCH(0); /* xmin, ymin */
  54    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
  55             ((ctx->DrawBuffer->Height - 1) << 16));
  56    OUT_BATCH(0);
  57    ADVANCE_BATCH();
  58 }
  59
  60 const struct brw_tracked_state brw_drawing_rect = {
  61    .dirty = {
  62       .mesa = _NEW_BUFFERS,
  63       .brw = BRW_NEW_CONTEXT,
  64       .cache = 0
  65    },
  66    .emit = upload_drawing_rect
  67 };
  68
  69 /**
  70  * Upload the binding table pointers, which point each stage's array of surface
  71  * state pointers.
  72  *
  73  * The binding table pointers are relative to the surface state base address,
  74  * which points at the batchbuffer containing the streamed batch state.
  75  */
  76 static void upload_binding_table_pointers(struct brw_context *brw)
  77 {
  78    struct intel_context *intel = &brw->intel;
  79
  80    BEGIN_BATCH(6);
  81    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
  82    OUT_BATCH(brw->vs.bind_bo_offset);
  83    OUT_BATCH(0); /* gs */
  84    OUT_BATCH(0); /* clip */
  85    OUT_BATCH(0); /* sf */
  86    OUT_BATCH(brw->wm.bind_bo_offset);
  87    ADVANCE_BATCH();
  88 }
  89
  90 const struct brw_tracked_state brw_binding_table_pointers = {
  91    .dirty = {
  92       .mesa = 0,
  93       .brw = (BRW_NEW_BATCH |
  94               BRW_NEW_STATE_BASE_ADDRESS |
  95               BRW_NEW_VS_BINDING_TABLE |
  96               BRW_NEW_GS_BINDING_TABLE |
  97               BRW_NEW_PS_BINDING_TABLE),
  98       .cache = 0,
  99    },
 100    .emit = upload_binding_table_pointers,
 101 };
 102
 103 /**
 104  * Upload the binding table pointers, which point each stage's array of surface
 105  * state pointers.
 106  *
 107  * The binding table pointers are relative to the surface state base address,
 108  * which points at the batchbuffer containing the streamed batch state.
 109  */
 110 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 111 {
 112    struct intel_context *intel = &brw->intel;
 113
 114    BEGIN_BATCH(4);
 115    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
 116              GEN6_BINDING_TABLE_MODIFY_VS |
 117              GEN6_BINDING_TABLE_MODIFY_GS |
 118              GEN6_BINDING_TABLE_MODIFY_PS |
 119              (4 - 2));
 120    OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
 121    OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
 122    OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
 123    ADVANCE_BATCH();
 124 }
 125
 126 const struct brw_tracked_state gen6_binding_table_pointers = {
 127    .dirty = {
 128       .mesa = 0,
 129       .brw = (BRW_NEW_BATCH |
 130               BRW_NEW_STATE_BASE_ADDRESS |
 131               BRW_NEW_VS_BINDING_TABLE |
 132               BRW_NEW_GS_BINDING_TABLE |
 133               BRW_NEW_PS_BINDING_TABLE),
 134       .cache = 0,
 135    },
 136    .emit = upload_gen6_binding_table_pointers,
 137 };
 138
 139 /**
 140  * Upload pointers to the per-stage state.
 141  *
 142  * The state pointers in this packet are all relative to the general state
 143  * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
 144  */
 145 static void upload_pipelined_state_pointers(struct brw_context *brw )
 146 {
 147    struct intel_context *intel = &brw->intel;
 148
 149    if (intel->gen == 5) {
 150       /* Need to flush before changing clip max threads for errata. */
 151       BEGIN_BATCH(1);
 152       OUT_BATCH(MI_FLUSH);
 153       ADVANCE_BATCH();
 154    }
 155
 156    BEGIN_BATCH(7);
 157    OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
 158    OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 159              brw->vs.state_offset);
 160    if (brw->gs.prog_active)
 161       OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 162                 brw->gs.state_offset | 1);
 163    else
 164       OUT_BATCH(0);
 165    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 166              brw->clip.state_offset | 1);
 167    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 168              brw->sf.state_offset);
 169    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 170              brw->wm.state_offset);
 171    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 172              brw->cc.state_offset);
 173    ADVANCE_BATCH();
 174
 175    brw->state.dirty.brw |= BRW_NEW_PSP;
 176 }
 177
 178 static void upload_psp_urb_cbs(struct brw_context *brw )
 179 {
 180    upload_pipelined_state_pointers(brw);
 181    brw_upload_urb_fence(brw);
 182    brw_upload_cs_urb_state(brw);
 183 }
 184
 185 const struct brw_tracked_state brw_psp_urb_cbs = {
 186    .dirty = {
 187       .mesa = 0,
 188       .brw = (BRW_NEW_URB_FENCE |
 189               BRW_NEW_BATCH |
 190               BRW_NEW_STATE_BASE_ADDRESS),
 191       .cache = (CACHE_NEW_VS_UNIT |
 192                 CACHE_NEW_GS_UNIT |
 193                 CACHE_NEW_GS_PROG |
 194                 CACHE_NEW_CLIP_UNIT |
 195                 CACHE_NEW_SF_UNIT |
 196                 CACHE_NEW_WM_UNIT |
 197                 CACHE_NEW_CC_UNIT)
 198    },
 199    .emit = upload_psp_urb_cbs,
 200 };
 201
 202 uint32_t
 203 brw_depthbuffer_format(struct brw_context *brw)
 204 {
 205    struct intel_context *intel = &brw->intel;
 206    struct gl_context *ctx = &intel->ctx;
 207    struct gl_framebuffer *fb = ctx->DrawBuffer;
 208    struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 209    struct intel_renderbuffer *srb;
 210
 211    if (!drb &&
 212        (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
 213        !srb->mt->stencil_mt &&
 214        (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
 215         intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
 216       drb = srb;
 217    }
 218
 219    if (!drb)
 220       return BRW_DEPTHFORMAT_D32_FLOAT;
 221
 222    switch (drb->mt->format) {
 223    case MESA_FORMAT_Z16:
 224       return BRW_DEPTHFORMAT_D16_UNORM;
 225    case MESA_FORMAT_Z32_FLOAT:
 226       return BRW_DEPTHFORMAT_D32_FLOAT;
 227    case MESA_FORMAT_X8_Z24:
 228       if (intel->gen >= 6) {
 229          return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
 230       } else {
 231          /* Use D24_UNORM_S8, not D24_UNORM_X8.
 232           *
 233           * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
 234           * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
 235           * 3DSTATE_DEPTH_BUFFER.Surface_Format).
 236           *
 237           * However, on Gen5, D24_UNORM_X8 may be used only if separate
 238           * stencil is enabled, and we never enable it. From the Ironlake PRM,
 239           * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
 240           *     If this field is disabled, the Surface Format of the depth
 241           *     buffer cannot be D24_UNORM_X8_UINT.
 242           */
 243          return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 244       }
 245    case MESA_FORMAT_S8_Z24:
 246       return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 247    case MESA_FORMAT_Z32_FLOAT_X24S8:
 248       return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
 249    default:
 250       _mesa_problem(ctx, "Unexpected depth format %s\n",
 251                     _mesa_get_format_name(intel_rb_format(drb)));
 252       return BRW_DEPTHFORMAT_D16_UNORM;
 253    }
 254 }
 255
 256 static void emit_depthbuffer(struct brw_context *brw)
 257 {
 258    struct intel_context *intel = &brw->intel;
 259    struct gl_context *ctx = &intel->ctx;
 260    struct gl_framebuffer *fb = ctx->DrawBuffer;
 261    /* _NEW_BUFFERS */
 262    struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 263    struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 264    struct intel_mipmap_tree *stencil_mt = NULL;
 265    struct intel_region *hiz_region = NULL;
 266    unsigned int len;
 267    bool separate_stencil = false;
 268
 269    /* Amount by which drawing should be offset in order to draw to the
 270     * appropriate miplevel/zoffset/cubeface.  We will extract these values
 271     * from depth_irb or stencil_irb once we determine which is present.
 272     */
 273    uint32_t draw_x = 0, draw_y = 0;
 274
 275    /* Masks used to determine how much of the draw_x and draw_y offsets should
 276     * be performed using the fine adjustment of "depth coordinate offset X/Y"
 277     * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
 278     * performed by changing the base addresses of the buffers.
 279     *
 280     * Since the HiZ, depth, and stencil buffers all use the same "depth
 281     * coordinate offset X/Y" values, we need to make sure that the coarse
 282     * adjustment will be possible to apply to all three buffers.  Since coarse
 283     * adjustment can only be applied in multiples of the tile size, we will OR
 284     * together the tile masks of all the buffers to determine which offsets to
 285     * perform as fine adjustments.
 286     */
 287    uint32_t tile_mask_x = 0, tile_mask_y = 0;
 288
 289    if (depth_irb) {
 290       intel_region_get_tile_masks(depth_irb->mt->region,
 291                                   &tile_mask_x, &tile_mask_y);
 292    }
 293
 294    if (depth_irb &&
 295        depth_irb->mt &&
 296        depth_irb->mt->hiz_mt) {
 297       hiz_region = depth_irb->mt->hiz_mt->region;
 298
 299       uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
 300       intel_region_get_tile_masks(hiz_region,
 301                                   &hiz_tile_mask_x, &hiz_tile_mask_y);
 302
 303       /* Each HiZ row represents 2 rows of pixels */
 304       hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
 305
 306       tile_mask_x |= hiz_tile_mask_x;
 307       tile_mask_y |= hiz_tile_mask_y;
 308    }
 309
 310    /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
 311     * non-pipelined state that will need the PIPE_CONTROL workaround.
 312     */
 313    if (intel->gen == 6) {
 314       intel_emit_post_sync_nonzero_flush(intel);
 315       intel_emit_depth_stall_flushes(intel);
 316    }
 317
 318    /* Find the real separate stencil mt if present. */
 319    if (stencil_irb) {
 320       stencil_mt = stencil_irb->mt;
 321       if (stencil_mt->stencil_mt)
 322          stencil_mt = stencil_mt->stencil_mt;
 323
 324       if (stencil_mt->format == MESA_FORMAT_S8) {
 325          separate_stencil = true;
 326
 327          /* Separate stencil buffer uses 64x64 tiles. */
 328          tile_mask_x |= 63;
 329          tile_mask_y |= 63;
 330       } else {
 331          uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
 332          intel_region_get_tile_masks(stencil_mt->region,
 333                                      &stencil_tile_mask_x,
 334                                      &stencil_tile_mask_y);
 335
 336          tile_mask_x |= stencil_tile_mask_x;
 337          tile_mask_y |= stencil_tile_mask_y;
 338       }
 339    }
 340
 341    /* If there's a packed depth/stencil bound to stencil only, we need to
 342     * emit the packed depth/stencil buffer packet.
 343     */
 344    if (!depth_irb && stencil_irb && !separate_stencil)
 345       depth_irb = stencil_irb;
 346
 347    if (intel->gen >= 6)
 348       len = 7;
 349    else if (intel->is_g4x || intel->gen == 5)
 350       len = 6;
 351    else
 352       len = 5;
 353
 354    if (!depth_irb && !separate_stencil) {
 355       BEGIN_BATCH(len);
 356       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 357       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 358                 (BRW_SURFACE_NULL << 29));
 359       OUT_BATCH(0);
 360       OUT_BATCH(0);
 361       OUT_BATCH(0);
 362
 363       if (intel->is_g4x || intel->gen >= 5)
 364          OUT_BATCH(0);
 365
 366       if (intel->gen >= 6)
 367          OUT_BATCH(0);
 368
 369       ADVANCE_BATCH();
 370
 371    } else if (!depth_irb && separate_stencil) {
 372       uint32_t tile_x, tile_y;
 373
 374       /*
 375        * There exists a separate stencil buffer but no depth buffer.
 376        *
 377        * The stencil buffer inherits most of its fields from
 378        * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
 379        * height.
 380        *
 381        * Enable the hiz bit because it and the separate stencil bit must have
 382        * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
 383        * 1.21 "Separate Stencil Enable":
 384        *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
 385        *     Enable must also be enabled.
 386        *
 387        *     [DevGT]: This field must be set to the same value (enabled or
 388        *     disabled) as Hierarchical Depth Buffer Enable
 389        *
 390        * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
 391        * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
 392        *     [DevGT+]: This field must be set to TRUE.
 393        */
 394       assert(intel->has_separate_stencil);
 395
 396       draw_x = stencil_irb->draw_x;
 397       draw_y = stencil_irb->draw_y;
 398       tile_x = draw_x & tile_mask_x;
 399       tile_y = draw_y & tile_mask_y;
 400
 401       /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
 402        * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
 403        * Coordinate Offset X/Y":
 404        *
 405        *   "The 3 LSBs of both offsets must be zero to ensure correct
 406        *   alignment"
 407        *
 408        * We have no guarantee that tile_x and tile_y are correctly aligned,
 409        * since they are determined by the mipmap layout, which is only aligned
 410        * to multiples of 4.
 411        *
 412        * So, to avoid hanging the GPU, just smash the low order 3 bits of
 413        * tile_x and tile_y to 0.  This is a temporary workaround until we come
 414        * up with a better solution.
 415        */
 416       tile_x &= ~7;
 417       tile_y &= ~7;
 418
 419       BEGIN_BATCH(len);
 420       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 421       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 422                 (1 << 21) | /* separate stencil enable */
 423                 (1 << 22) | /* hiz enable */
 424                 (BRW_TILEWALK_YMAJOR << 26) |
 425                 (1 << 27) | /* tiled surface */
 426                 (BRW_SURFACE_2D << 29));
 427       OUT_BATCH(0);
 428       OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
 429                  (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
 430       OUT_BATCH(0);
 431
 432       if (intel->is_g4x || intel->gen >= 5)
 433          OUT_BATCH(tile_x | (tile_y << 16));
 434       else
 435          assert(tile_x == 0 && tile_y == 0);
 436
 437       if (intel->gen >= 6)
 438          OUT_BATCH(0);
 439
 440       ADVANCE_BATCH();
 441
 442    } else {
 443       struct intel_region *region = depth_irb->mt->region;
 444       uint32_t tile_x, tile_y, offset;
 445
 446       /* If using separate stencil, hiz must be enabled. */
 447       assert(!separate_stencil || hiz_region);
 448
 449       assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
 450       assert(!hiz_region || region->tiling == I915_TILING_Y);
 451
 452       draw_x = depth_irb->draw_x;
 453       draw_y = depth_irb->draw_y;
 454       tile_x = draw_x & tile_mask_x;
 455       tile_y = draw_y & tile_mask_y;
 456
 457       /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
 458        * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
 459        * Coordinate Offset X/Y":
 460        *
 461        *   "The 3 LSBs of both offsets must be zero to ensure correct
 462        *   alignment"
 463        *
 464        * We have no guarantee that tile_x and tile_y are correctly aligned,
 465        * since they are determined by the mipmap layout, which is only aligned
 466        * to multiples of 4.
 467        *
 468        * So, to avoid hanging the GPU, just smash the low order 3 bits of
 469        * tile_x and tile_y to 0.  This is a temporary workaround until we come
 470        * up with a better solution.
 471        */
 472       tile_x &= ~7;
 473       tile_y &= ~7;
 474
 475       offset = intel_region_get_aligned_offset(region,
 476                                                draw_x & ~tile_mask_x,
 477                                                draw_y & ~tile_mask_y);
 478
 479       BEGIN_BATCH(len);
 480       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 481       OUT_BATCH(((region->pitch * region->cpp) - 1) |
 482                 (brw_depthbuffer_format(brw) << 18) |
 483                 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
 484                 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
 485                 (BRW_TILEWALK_YMAJOR << 26) |
 486                 ((region->tiling != I915_TILING_NONE) << 27) |
 487                 (BRW_SURFACE_2D << 29));
 488       OUT_RELOC(region->bo,
 489                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 490                 offset);
 491       OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
 492                 (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
 493                 (((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
 494       OUT_BATCH(0);
 495
 496       if (intel->is_g4x || intel->gen >= 5)
 497          OUT_BATCH(tile_x | (tile_y << 16));
 498       else
 499          assert(tile_x == 0 && tile_y == 0);
 500
 501       if (intel->gen >= 6)
 502          OUT_BATCH(0);
 503
 504       ADVANCE_BATCH();
 505    }
 506
 507    if (hiz_region || separate_stencil) {
 508       /*
 509        * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
 510        * stencil enable' and 'hiz enable' bits were set. Therefore we must
 511        * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
 512        * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
 513        * failure to do so causes hangs on gen5 and a stall on gen6.
 514        */
 515
 516       /* Emit hiz buffer. */
 517       if (hiz_region) {
 518          uint32_t hiz_offset =
 519             intel_region_get_aligned_offset(hiz_region,
 520                                             draw_x & ~tile_mask_x,
 521                                             (draw_y & ~tile_mask_y) / 2);
 522
 523          BEGIN_BATCH(3);
 524          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 525          OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
 526          OUT_RELOC(hiz_region->bo,
 527                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 528                    hiz_offset);
 529          ADVANCE_BATCH();
 530       } else {
 531          BEGIN_BATCH(3);
 532          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 533          OUT_BATCH(0);
 534          OUT_BATCH(0);
 535          ADVANCE_BATCH();
 536       }
 537
 538       /* Emit stencil buffer. */
 539       if (separate_stencil) {
 540          struct intel_region *region = stencil_mt->region;
 541
 542          /* Note: we can't compute the stencil offset using
 543           * intel_region_get_aligned_offset(), because stencil_region claims
 544           * that the region is untiled; in fact it's W tiled.
 545           */
 546          uint32_t stencil_offset =
 547             (draw_y & ~tile_mask_y) * region->pitch +
 548             (draw_x & ~tile_mask_x) * 64;
 549
 550          BEGIN_BATCH(3);
 551          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
 552          /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
 553           * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
 554           *    The pitch must be set to 2x the value computed based on width, as
 555           *    the stencil buffer is stored with two rows interleaved.
 556           */
 557          OUT_BATCH(2 * region->pitch * region->cpp - 1);
 558          OUT_RELOC(region->bo,
 559                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 560                    stencil_offset);
 561          ADVANCE_BATCH();
 562       } else {
 563          BEGIN_BATCH(3);
 564          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
 565          OUT_BATCH(0);
 566          OUT_BATCH(0);
 567          ADVANCE_BATCH();
 568       }
 569    }
 570
 571    /*
 572     * On Gen >= 6, emit clear params for safety. If using hiz, then clear
 573     * params must be emitted.
 574     *
 575     * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
 576     *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
 577     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
 578     */
 579    if (intel->gen >= 6 || hiz_region) {
 580       if (intel->gen == 6)
 581          intel_emit_post_sync_nonzero_flush(intel);
 582
 583       BEGIN_BATCH(2);
 584       OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
 585       OUT_BATCH(0);
 586       ADVANCE_BATCH();
 587    }
 588 }
 589
 590 const struct brw_tracked_state brw_depthbuffer = {
 591    .dirty = {
 592       .mesa = _NEW_BUFFERS,
 593       .brw = BRW_NEW_BATCH,
 594       .cache = 0,
 595    },
 596    .emit = emit_depthbuffer,
 597 };
 598
 599
 600
 601 /***********************************************************************
 602  * Polygon stipple packet
 603  */
 604
 605 static void upload_polygon_stipple(struct brw_context *brw)
 606 {
 607    struct intel_context *intel = &brw->intel;
 608    struct gl_context *ctx = &brw->intel.ctx;
 609    GLuint i;
 610
 611    /* _NEW_POLYGON */
 612    if (!ctx->Polygon.StippleFlag)
 613       return;
 614
 615    if (intel->gen == 6)
 616       intel_emit_post_sync_nonzero_flush(intel);
 617
 618    BEGIN_BATCH(33);
 619    OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
 620
 621    /* Polygon stipple is provided in OpenGL order, i.e. bottom
 622     * row first.  If we're rendering to a window (i.e. the
 623     * default frame buffer object, 0), then we need to invert
 624     * it to match our pixel layout.  But if we're rendering
 625     * to a FBO (i.e. any named frame buffer object), we *don't*
 626     * need to invert - we already match the layout.
 627     */
 628    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 629       for (i = 0; i < 32; i++)
 630           OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
 631    }
 632    else {
 633       for (i = 0; i < 32; i++)
 634          OUT_BATCH(ctx->PolygonStipple[i]);
 635    }
 636    CACHED_BATCH();
 637 }
 638
 639 const struct brw_tracked_state brw_polygon_stipple = {
 640    .dirty = {
 641       .mesa = (_NEW_POLYGONSTIPPLE |
 642                _NEW_POLYGON),
 643       .brw = BRW_NEW_CONTEXT,
 644       .cache = 0
 645    },
 646    .emit = upload_polygon_stipple
 647 };
 648
 649
 650 /***********************************************************************
 651  * Polygon stipple offset packet
 652  */
 653
 654 static void upload_polygon_stipple_offset(struct brw_context *brw)
 655 {
 656    struct intel_context *intel = &brw->intel;
 657    struct gl_context *ctx = &brw->intel.ctx;
 658
 659    /* _NEW_POLYGON */
 660    if (!ctx->Polygon.StippleFlag)
 661       return;
 662
 663    if (intel->gen == 6)
 664       intel_emit_post_sync_nonzero_flush(intel);
 665
 666    BEGIN_BATCH(2);
 667    OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
 668
 669    /* _NEW_BUFFERS
 670     *
 671     * If we're drawing to a system window we have to invert the Y axis
 672     * in order to match the OpenGL pixel coordinate system, and our
 673     * offset must be matched to the window position.  If we're drawing
 674     * to a user-created FBO then our native pixel coordinate system
 675     * works just fine, and there's no window system to worry about.
 676     */
 677    if (_mesa_is_winsys_fbo(brw->intel.ctx.DrawBuffer))
 678       OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
 679    else
 680       OUT_BATCH(0);
 681    CACHED_BATCH();
 682 }
 683
 684 const struct brw_tracked_state brw_polygon_stipple_offset = {
 685    .dirty = {
 686       .mesa = (_NEW_BUFFERS |
 687                _NEW_POLYGON),
 688       .brw = BRW_NEW_CONTEXT,
 689       .cache = 0
 690    },
 691    .emit = upload_polygon_stipple_offset
 692 };
 693
 694 /**********************************************************************
 695  * AA Line parameters
 696  */
 697 static void upload_aa_line_parameters(struct brw_context *brw)
 698 {
 699    struct intel_context *intel = &brw->intel;
 700    struct gl_context *ctx = &brw->intel.ctx;
 701
 702    if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
 703       return;
 704
 705    if (intel->gen == 6)
 706       intel_emit_post_sync_nonzero_flush(intel);
 707
 708    OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
 709    /* use legacy aa line coverage computation */
 710    OUT_BATCH(0);
 711    OUT_BATCH(0);
 712    CACHED_BATCH();
 713 }
 714
 715 const struct brw_tracked_state brw_aa_line_parameters = {
 716    .dirty = {
 717       .mesa = _NEW_LINE,
 718       .brw = BRW_NEW_CONTEXT,
 719       .cache = 0
 720    },
 721    .emit = upload_aa_line_parameters
 722 };
 723
 724 /***********************************************************************
 725  * Line stipple packet
 726  */
 727
 728 static void upload_line_stipple(struct brw_context *brw)
 729 {
 730    struct intel_context *intel = &brw->intel;
 731    struct gl_context *ctx = &brw->intel.ctx;
 732    GLfloat tmp;
 733    GLint tmpi;
 734
 735    if (!ctx->Line.StippleFlag)
 736       return;
 737
 738    if (intel->gen == 6)
 739       intel_emit_post_sync_nonzero_flush(intel);
 740
 741    BEGIN_BATCH(3);
 742    OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
 743    OUT_BATCH(ctx->Line.StipplePattern);
 744    tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
 745    tmpi = tmp * (1<<13);
 746    OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
 747    CACHED_BATCH();
 748 }
 749
 750 const struct brw_tracked_state brw_line_stipple = {
 751    .dirty = {
 752       .mesa = _NEW_LINE,
 753       .brw = BRW_NEW_CONTEXT,
 754       .cache = 0
 755    },
 756    .emit = upload_line_stipple
 757 };
 758
 759
 760 /***********************************************************************
 761  * Misc invariant state packets
 762  */
 763
 764 static void upload_invariant_state( struct brw_context *brw )
 765 {
 766    struct intel_context *intel = &brw->intel;
 767
 768    /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
 769    if (intel->gen == 6)
 770       intel_emit_post_sync_nonzero_flush(intel);
 771
 772    /* Select the 3D pipeline (as opposed to media) */
 773    BEGIN_BATCH(1);
 774    OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
 775    ADVANCE_BATCH();
 776
 777    if (intel->gen < 6) {
 778       /* Disable depth offset clamping. */
 779       BEGIN_BATCH(2);
 780       OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
 781       OUT_BATCH_F(0.0);
 782       ADVANCE_BATCH();
 783    }
 784
 785    if (intel->gen == 6) {
 786       int i;
 787
 788       for (i = 0; i < 4; i++) {
 789          BEGIN_BATCH(4);
 790          OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
 791          OUT_BATCH(i << SVB_INDEX_SHIFT);
 792          OUT_BATCH(0);
 793          OUT_BATCH(0xffffffff);
 794          ADVANCE_BATCH();
 795       }
 796    }
 797
 798    BEGIN_BATCH(2);
 799    OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
 800    OUT_BATCH(0);
 801    ADVANCE_BATCH();
 802
 803    BEGIN_BATCH(1);
 804    OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
 805              (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
 806    ADVANCE_BATCH();
 807 }
 808
 809 const struct brw_tracked_state brw_invariant_state = {
 810    .dirty = {
 811       .mesa = 0,
 812       .brw = BRW_NEW_CONTEXT,
 813       .cache = 0
 814    },
 815    .emit = upload_invariant_state
 816 };
 817
 818 /**
 819  * Define the base addresses which some state is referenced from.
 820  *
 821  * This allows us to avoid having to emit relocations for the objects,
 822  * and is actually required for binding table pointers on gen6.
 823  *
 824  * Surface state base address covers binding table pointers and
 825  * surface state objects, but not the surfaces that the surface state
 826  * objects point to.
 827  */
 828 static void upload_state_base_address( struct brw_context *brw )
 829 {
 830    struct intel_context *intel = &brw->intel;
 831
 832    /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
 833     * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
 834     * programmed prior to STATE_BASE_ADDRESS.
 835     *
 836     * However, given that the instruction SBA (general state base
 837     * address) on this chipset is always set to 0 across X and GL,
 838     * maybe this isn't required for us in particular.
 839     */
 840
 841    if (intel->gen >= 6) {
 842       if (intel->gen == 6)
 843          intel_emit_post_sync_nonzero_flush(intel);
 844
 845        BEGIN_BATCH(10);
 846        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
 847        /* General state base address: stateless DP read/write requests */
 848        OUT_BATCH(1);
 849        /* Surface state base address:
 850         * BINDING_TABLE_STATE
 851         * SURFACE_STATE
 852         */
 853        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
 854         /* Dynamic state base address:
 855          * SAMPLER_STATE
 856          * SAMPLER_BORDER_COLOR_STATE
 857          * CLIP, SF, WM/CC viewport state
 858          * COLOR_CALC_STATE
 859          * DEPTH_STENCIL_STATE
 860          * BLEND_STATE
 861          * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
 862          * Disable is clear, which we rely on)
 863          */
 864        OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
 865                                    I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
 866
 867        OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
 868        OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 869                  1); /* Instruction base address: shader kernels (incl. SIP) */
 870
 871        OUT_BATCH(1); /* General state upper bound */
 872        /* Dynamic state upper bound.  Although the documentation says that
 873         * programming it to zero will cause it to be ignored, that is a lie.
 874         * If this isn't programmed to a real bound, the sampler border color
 875         * pointer is rejected, causing border color to mysteriously fail.
 876         */
 877        OUT_BATCH(0xfffff001);
 878        OUT_BATCH(1); /* Indirect object upper bound */
 879        OUT_BATCH(1); /* Instruction access upper bound */
 880        ADVANCE_BATCH();
 881    } else if (intel->gen == 5) {
 882        BEGIN_BATCH(8);
 883        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
 884        OUT_BATCH(1); /* General state base address */
 885        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
 886                  1); /* Surface state base address */
 887        OUT_BATCH(1); /* Indirect object base address */
 888        OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 889                  1); /* Instruction base address */
 890        OUT_BATCH(0xfffff001); /* General state upper bound */
 891        OUT_BATCH(1); /* Indirect object upper bound */
 892        OUT_BATCH(1); /* Instruction access upper bound */
 893        ADVANCE_BATCH();
 894    } else {
 895        BEGIN_BATCH(6);
 896        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
 897        OUT_BATCH(1); /* General state base address */
 898        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
 899                  1); /* Surface state base address */
 900        OUT_BATCH(1); /* Indirect object base address */
 901        OUT_BATCH(1); /* General state upper bound */
 902        OUT_BATCH(1); /* Indirect object upper bound */
 903        ADVANCE_BATCH();
 904    }
 905
 906    /* According to section 3.6.1 of VOL1 of the 965 PRM,
 907     * STATE_BASE_ADDRESS updates require a reissue of:
 908     *
 909     * 3DSTATE_PIPELINE_POINTERS
 910     * 3DSTATE_BINDING_TABLE_POINTERS
 911     * MEDIA_STATE_POINTERS
 912     *
 913     * and this continues through Ironlake.  The Sandy Bridge PRM, vol
 914     * 1 part 1 says that the folowing packets must be reissued:
 915     *
 916     * 3DSTATE_CC_POINTERS
 917     * 3DSTATE_BINDING_TABLE_POINTERS
 918     * 3DSTATE_SAMPLER_STATE_POINTERS
 919     * 3DSTATE_VIEWPORT_STATE_POINTERS
 920     * MEDIA_STATE_POINTERS
 921     *
 922     * Those are always reissued following SBA updates anyway (new
 923     * batch time), except in the case of the program cache BO
 924     * changing.  Having a separate state flag makes the sequence more
 925     * obvious.
 926     */
 927
 928    brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
 929 }
 930
 931 const struct brw_tracked_state brw_state_base_address = {
 932    .dirty = {
 933       .mesa = 0,
 934       .brw = (BRW_NEW_BATCH |
 935               BRW_NEW_PROGRAM_CACHE),
 936       .cache = 0,
 937    },
 938    .emit = upload_state_base_address
 939 };