src/mesa/drivers/dri/i965/brw_misc_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33
  34 #include "intel_batchbuffer.h"
  35 #include "intel_fbo.h"
  36 #include "intel_mipmap_tree.h"
  37 #include "intel_regions.h"
  38
  39 #include "brw_context.h"
  40 #include "brw_state.h"
  41 #include "brw_defines.h"
  42
  43 #include "main/fbobject.h"
  44
  45 /* Constant single cliprect for framebuffer object or DRI2 drawing */
  46 static void upload_drawing_rect(struct brw_context *brw)
  47 {
  48    struct intel_context *intel = &brw->intel;
  49    struct gl_context *ctx = &intel->ctx;
  50
  51    BEGIN_BATCH(4);
  52    OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
  53    OUT_BATCH(0); /* xmin, ymin */
  54    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
  55             ((ctx->DrawBuffer->Height - 1) << 16));
  56    OUT_BATCH(0);
  57    ADVANCE_BATCH();
  58 }
  59
  60 const struct brw_tracked_state brw_drawing_rect = {
  61    .dirty = {
  62       .mesa = _NEW_BUFFERS,
  63       .brw = BRW_NEW_CONTEXT,
  64       .cache = 0
  65    },
  66    .emit = upload_drawing_rect
  67 };
  68
  69 /**
  70  * Upload the binding table pointers, which point each stage's array of surface
  71  * state pointers.
  72  *
  73  * The binding table pointers are relative to the surface state base address,
  74  * which points at the batchbuffer containing the streamed batch state.
  75  */
  76 static void upload_binding_table_pointers(struct brw_context *brw)
  77 {
  78    struct intel_context *intel = &brw->intel;
  79
  80    BEGIN_BATCH(6);
  81    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
  82    OUT_BATCH(brw->vs.bind_bo_offset);
  83    OUT_BATCH(0); /* gs */
  84    OUT_BATCH(0); /* clip */
  85    OUT_BATCH(0); /* sf */
  86    OUT_BATCH(brw->wm.bind_bo_offset);
  87    ADVANCE_BATCH();
  88 }
  89
  90 const struct brw_tracked_state brw_binding_table_pointers = {
  91    .dirty = {
  92       .mesa = 0,
  93       .brw = (BRW_NEW_BATCH |
  94               BRW_NEW_STATE_BASE_ADDRESS |
  95               BRW_NEW_VS_BINDING_TABLE |
  96               BRW_NEW_GS_BINDING_TABLE |
  97               BRW_NEW_PS_BINDING_TABLE),
  98       .cache = 0,
  99    },
 100    .emit = upload_binding_table_pointers,
 101 };
 102
 103 /**
 104  * Upload the binding table pointers, which point each stage's array of surface
 105  * state pointers.
 106  *
 107  * The binding table pointers are relative to the surface state base address,
 108  * which points at the batchbuffer containing the streamed batch state.
 109  */
 110 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 111 {
 112    struct intel_context *intel = &brw->intel;
 113
 114    BEGIN_BATCH(4);
 115    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
 116              GEN6_BINDING_TABLE_MODIFY_VS |
 117              GEN6_BINDING_TABLE_MODIFY_GS |
 118              GEN6_BINDING_TABLE_MODIFY_PS |
 119              (4 - 2));
 120    OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
 121    OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
 122    OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
 123    ADVANCE_BATCH();
 124 }
 125
 126 const struct brw_tracked_state gen6_binding_table_pointers = {
 127    .dirty = {
 128       .mesa = 0,
 129       .brw = (BRW_NEW_BATCH |
 130               BRW_NEW_STATE_BASE_ADDRESS |
 131               BRW_NEW_VS_BINDING_TABLE |
 132               BRW_NEW_GS_BINDING_TABLE |
 133               BRW_NEW_PS_BINDING_TABLE),
 134       .cache = 0,
 135    },
 136    .emit = upload_gen6_binding_table_pointers,
 137 };
 138
 139 /**
 140  * Upload pointers to the per-stage state.
 141  *
 142  * The state pointers in this packet are all relative to the general state
 143  * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
 144  */
 145 static void upload_pipelined_state_pointers(struct brw_context *brw )
 146 {
 147    struct intel_context *intel = &brw->intel;
 148
 149    if (intel->gen == 5) {
 150       /* Need to flush before changing clip max threads for errata. */
 151       BEGIN_BATCH(1);
 152       OUT_BATCH(MI_FLUSH);
 153       ADVANCE_BATCH();
 154    }
 155
 156    BEGIN_BATCH(7);
 157    OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
 158    OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 159              brw->vs.state_offset);
 160    if (brw->gs.prog_active)
 161       OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 162                 brw->gs.state_offset | 1);
 163    else
 164       OUT_BATCH(0);
 165    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 166              brw->clip.state_offset | 1);
 167    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 168              brw->sf.state_offset);
 169    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 170              brw->wm.state_offset);
 171    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 172              brw->cc.state_offset);
 173    ADVANCE_BATCH();
 174
 175    brw->state.dirty.brw |= BRW_NEW_PSP;
 176 }
 177
 178 static void upload_psp_urb_cbs(struct brw_context *brw )
 179 {
 180    upload_pipelined_state_pointers(brw);
 181    brw_upload_urb_fence(brw);
 182    brw_upload_cs_urb_state(brw);
 183 }
 184
 185 const struct brw_tracked_state brw_psp_urb_cbs = {
 186    .dirty = {
 187       .mesa = 0,
 188       .brw = (BRW_NEW_URB_FENCE |
 189               BRW_NEW_BATCH |
 190               BRW_NEW_STATE_BASE_ADDRESS),
 191       .cache = (CACHE_NEW_VS_UNIT |
 192                 CACHE_NEW_GS_UNIT |
 193                 CACHE_NEW_GS_PROG |
 194                 CACHE_NEW_CLIP_UNIT |
 195                 CACHE_NEW_SF_UNIT |
 196                 CACHE_NEW_WM_UNIT |
 197                 CACHE_NEW_CC_UNIT)
 198    },
 199    .emit = upload_psp_urb_cbs,
 200 };
 201
 202 uint32_t
 203 brw_depthbuffer_format(struct brw_context *brw)
 204 {
 205    struct intel_context *intel = &brw->intel;
 206    struct gl_context *ctx = &intel->ctx;
 207    struct gl_framebuffer *fb = ctx->DrawBuffer;
 208    struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 209    struct intel_renderbuffer *srb;
 210
 211    if (!drb &&
 212        (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
 213        !srb->mt->stencil_mt &&
 214        (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
 215         intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
 216       drb = srb;
 217    }
 218
 219    if (!drb)
 220       return BRW_DEPTHFORMAT_D32_FLOAT;
 221
 222    switch (drb->mt->format) {
 223    case MESA_FORMAT_Z16:
 224       return BRW_DEPTHFORMAT_D16_UNORM;
 225    case MESA_FORMAT_Z32_FLOAT:
 226       return BRW_DEPTHFORMAT_D32_FLOAT;
 227    case MESA_FORMAT_X8_Z24:
 228       if (intel->gen >= 6) {
 229          return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
 230       } else {
 231          /* Use D24_UNORM_S8, not D24_UNORM_X8.
 232           *
 233           * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
 234           * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
 235           * 3DSTATE_DEPTH_BUFFER.Surface_Format).
 236           *
 237           * However, on Gen5, D24_UNORM_X8 may be used only if separate
 238           * stencil is enabled, and we never enable it. From the Ironlake PRM,
 239           * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
 240           *     If this field is disabled, the Surface Format of the depth
 241           *     buffer cannot be D24_UNORM_X8_UINT.
 242           */
 243          return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 244       }
 245    case MESA_FORMAT_S8_Z24:
 246       return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 247    case MESA_FORMAT_Z32_FLOAT_X24S8:
 248       return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
 249    default:
 250       _mesa_problem(ctx, "Unexpected depth format %s\n",
 251                     _mesa_get_format_name(intel_rb_format(drb)));
 252       return BRW_DEPTHFORMAT_D16_UNORM;
 253    }
 254 }
 255
 256 static void emit_depthbuffer(struct brw_context *brw)
 257 {
 258    struct intel_context *intel = &brw->intel;
 259    struct gl_context *ctx = &intel->ctx;
 260    struct gl_framebuffer *fb = ctx->DrawBuffer;
 261    /* _NEW_BUFFERS */
 262    struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 263    struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 264    struct intel_mipmap_tree *stencil_mt = NULL;
 265    struct intel_region *hiz_region = NULL;
 266    unsigned int len;
 267    bool separate_stencil = false;
 268
 269    /* Amount by which drawing should be offset in order to draw to the
 270     * appropriate miplevel/zoffset/cubeface.  We will extract these values
 271     * from depth_irb or stencil_irb once we determine which is present.
 272     */
 273    uint32_t draw_x = 0, draw_y = 0;
 274
 275    /* Masks used to determine how much of the draw_x and draw_y offsets should
 276     * be performed using the fine adjustment of "depth coordinate offset X/Y"
 277     * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
 278     * performed by changing the base addresses of the buffers.
 279     *
 280     * Since the HiZ, depth, and stencil buffers all use the same "depth
 281     * coordinate offset X/Y" values, we need to make sure that the coarse
 282     * adjustment will be possible to apply to all three buffers.  Since coarse
 283     * adjustment can only be applied in multiples of the tile size, we will OR
 284     * together the tile masks of all the buffers to determine which offsets to
 285     * perform as fine adjustments.
 286     */
 287    uint32_t tile_mask_x = 0, tile_mask_y = 0;
 288
 289    if (depth_irb) {
 290       intel_region_get_tile_masks(depth_irb->mt->region,
 291                                   &tile_mask_x, &tile_mask_y);
 292    }
 293
 294    if (depth_irb &&
 295        depth_irb->mt &&
 296        depth_irb->mt->hiz_mt) {
 297       hiz_region = depth_irb->mt->hiz_mt->region;
 298
 299       uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
 300       intel_region_get_tile_masks(hiz_region,
 301                                   &hiz_tile_mask_x, &hiz_tile_mask_y);
 302
 303       /* Each HiZ row represents 2 rows of pixels */
 304       hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
 305
 306       tile_mask_x |= hiz_tile_mask_x;
 307       tile_mask_y |= hiz_tile_mask_y;
 308    }
 309
 310    /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
 311     * non-pipelined state that will need the PIPE_CONTROL workaround.
 312     */
 313    if (intel->gen == 6) {
 314       intel_emit_post_sync_nonzero_flush(intel);
 315       intel_emit_depth_stall_flushes(intel);
 316    }
 317
 318    /* Find the real separate stencil mt if present. */
 319    if (stencil_irb) {
 320       stencil_mt = stencil_irb->mt;
 321       if (stencil_mt->stencil_mt)
 322          stencil_mt = stencil_mt->stencil_mt;
 323
 324       if (stencil_mt->format == MESA_FORMAT_S8) {
 325          separate_stencil = true;
 326
 327          /* Separate stencil buffer uses 64x64 tiles. */
 328          tile_mask_x |= 63;
 329          tile_mask_y |= 63;
 330       } else {
 331          uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
 332          intel_region_get_tile_masks(stencil_mt->region,
 333                                      &stencil_tile_mask_x,
 334                                      &stencil_tile_mask_y);
 335
 336          tile_mask_x |= stencil_tile_mask_x;
 337          tile_mask_y |= stencil_tile_mask_y;
 338       }
 339    }
 340
 341    /* If there's a packed depth/stencil bound to stencil only, we need to
 342     * emit the packed depth/stencil buffer packet.
 343     */
 344    if (!depth_irb && stencil_irb && !separate_stencil)
 345       depth_irb = stencil_irb;
 346
 347    if (intel->gen >= 6)
 348       len = 7;
 349    else if (intel->is_g4x || intel->gen == 5)
 350       len = 6;
 351    else
 352       len = 5;
 353
 354    if (!depth_irb && !separate_stencil) {
 355       BEGIN_BATCH(len);
 356       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 357       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 358                 (BRW_SURFACE_NULL << 29));
 359       OUT_BATCH(0);
 360       OUT_BATCH(0);
 361       OUT_BATCH(0);
 362
 363       if (intel->is_g4x || intel->gen >= 5)
 364          OUT_BATCH(0);
 365
 366       if (intel->gen >= 6)
 367          OUT_BATCH(0);
 368
 369       ADVANCE_BATCH();
 370
 371    } else if (!depth_irb && separate_stencil) {
 372       uint32_t tile_x, tile_y;
 373
 374       /*
 375        * There exists a separate stencil buffer but no depth buffer.
 376        *
 377        * The stencil buffer inherits most of its fields from
 378        * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
 379        * height.
 380        *
 381        * Enable the hiz bit because it and the separate stencil bit must have
 382        * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
 383        * 1.21 "Separate Stencil Enable":
 384        *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
 385        *     Enable must also be enabled.
 386        *
 387        *     [DevGT]: This field must be set to the same value (enabled or
 388        *     disabled) as Hierarchical Depth Buffer Enable
 389        *
 390        * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
 391        * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
 392        *     [DevGT+]: This field must be set to TRUE.
 393        */
 394       assert(intel->has_separate_stencil);
 395
 396       draw_x = stencil_irb->draw_x;
 397       draw_y = stencil_irb->draw_y;
 398       tile_x = draw_x & tile_mask_x;
 399       tile_y = draw_y & tile_mask_y;
 400
 401       BEGIN_BATCH(len);
 402       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 403       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 404                 (1 << 21) | /* separate stencil enable */
 405                 (1 << 22) | /* hiz enable */
 406                 (BRW_TILEWALK_YMAJOR << 26) |
 407                 (1 << 27) | /* tiled surface */
 408                 (BRW_SURFACE_2D << 29));
 409       OUT_BATCH(0);
 410       OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
 411                  (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
 412       OUT_BATCH(0);
 413
 414       if (intel->is_g4x || intel->gen >= 5)
 415          OUT_BATCH(tile_x | (tile_y << 16));
 416       else
 417          assert(tile_x == 0 && tile_y == 0);
 418
 419       if (intel->gen >= 6)
 420          OUT_BATCH(0);
 421
 422       ADVANCE_BATCH();
 423
 424    } else {
 425       struct intel_region *region = depth_irb->mt->region;
 426       uint32_t tile_x, tile_y, offset;
 427
 428       /* If using separate stencil, hiz must be enabled. */
 429       assert(!separate_stencil || hiz_region);
 430
 431       assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
 432       assert(!hiz_region || region->tiling == I915_TILING_Y);
 433
 434       draw_x = depth_irb->draw_x;
 435       draw_y = depth_irb->draw_y;
 436       tile_x = draw_x & tile_mask_x;
 437       tile_y = draw_y & tile_mask_y;
 438
 439       offset = intel_region_get_aligned_offset(region,
 440                                                draw_x & ~tile_mask_x,
 441                                                draw_y & ~tile_mask_y);
 442
 443       BEGIN_BATCH(len);
 444       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 445       OUT_BATCH(((region->pitch * region->cpp) - 1) |
 446                 (brw_depthbuffer_format(brw) << 18) |
 447                 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
 448                 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
 449                 (BRW_TILEWALK_YMAJOR << 26) |
 450                 ((region->tiling != I915_TILING_NONE) << 27) |
 451                 (BRW_SURFACE_2D << 29));
 452       OUT_RELOC(region->bo,
 453                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 454                 offset);
 455       OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
 456                 (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
 457                 (((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
 458       OUT_BATCH(0);
 459
 460       if (intel->is_g4x || intel->gen >= 5)
 461          OUT_BATCH(tile_x | (tile_y << 16));
 462       else
 463          assert(tile_x == 0 && tile_y == 0);
 464
 465       if (intel->gen >= 6)
 466          OUT_BATCH(0);
 467
 468       ADVANCE_BATCH();
 469    }
 470
 471    if (hiz_region || separate_stencil) {
 472       /*
 473        * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
 474        * stencil enable' and 'hiz enable' bits were set. Therefore we must
 475        * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
 476        * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
 477        * failure to do so causes hangs on gen5 and a stall on gen6.
 478        */
 479
 480       /* Emit hiz buffer. */
 481       if (hiz_region) {
 482          uint32_t hiz_offset =
 483             intel_region_get_aligned_offset(hiz_region,
 484                                             draw_x & ~tile_mask_x,
 485                                             (draw_y & ~tile_mask_y) / 2);
 486
 487          BEGIN_BATCH(3);
 488          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 489          OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
 490          OUT_RELOC(hiz_region->bo,
 491                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 492                    hiz_offset);
 493          ADVANCE_BATCH();
 494       } else {
 495          BEGIN_BATCH(3);
 496          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 497          OUT_BATCH(0);
 498          OUT_BATCH(0);
 499          ADVANCE_BATCH();
 500       }
 501
 502       /* Emit stencil buffer. */
 503       if (separate_stencil) {
 504          struct intel_region *region = stencil_mt->region;
 505
 506          /* Note: we can't compute the stencil offset using
 507           * intel_region_get_aligned_offset(), because stencil_region claims
 508           * that the region is untiled; in fact it's W tiled.
 509           */
 510          uint32_t stencil_offset =
 511             (draw_y & ~tile_mask_y) * region->pitch +
 512             (draw_x & ~tile_mask_x) * 64;
 513
 514          BEGIN_BATCH(3);
 515          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
 516          /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
 517           * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
 518           *    The pitch must be set to 2x the value computed based on width, as
 519           *    the stencil buffer is stored with two rows interleaved.
 520           */
 521          OUT_BATCH(2 * region->pitch * region->cpp - 1);
 522          OUT_RELOC(region->bo,
 523                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 524                    stencil_offset);
 525          ADVANCE_BATCH();
 526       } else {
 527          BEGIN_BATCH(3);
 528          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
 529          OUT_BATCH(0);
 530          OUT_BATCH(0);
 531          ADVANCE_BATCH();
 532       }
 533    }
 534
 535    /*
 536     * On Gen >= 6, emit clear params for safety. If using hiz, then clear
 537     * params must be emitted.
 538     *
 539     * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
 540     *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
 541     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
 542     */
 543    if (intel->gen >= 6 || hiz_region) {
 544       if (intel->gen == 6)
 545          intel_emit_post_sync_nonzero_flush(intel);
 546
 547       BEGIN_BATCH(2);
 548       OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
 549       OUT_BATCH(0);
 550       ADVANCE_BATCH();
 551    }
 552 }
 553
 554 const struct brw_tracked_state brw_depthbuffer = {
 555    .dirty = {
 556       .mesa = _NEW_BUFFERS,
 557       .brw = BRW_NEW_BATCH,
 558       .cache = 0,
 559    },
 560    .emit = emit_depthbuffer,
 561 };
 562
 563
 564
 565 /***********************************************************************
 566  * Polygon stipple packet
 567  */
 568
 569 static void upload_polygon_stipple(struct brw_context *brw)
 570 {
 571    struct intel_context *intel = &brw->intel;
 572    struct gl_context *ctx = &brw->intel.ctx;
 573    GLuint i;
 574
 575    /* _NEW_POLYGON */
 576    if (!ctx->Polygon.StippleFlag)
 577       return;
 578
 579    if (intel->gen == 6)
 580       intel_emit_post_sync_nonzero_flush(intel);
 581
 582    BEGIN_BATCH(33);
 583    OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
 584
 585    /* Polygon stipple is provided in OpenGL order, i.e. bottom
 586     * row first.  If we're rendering to a window (i.e. the
 587     * default frame buffer object, 0), then we need to invert
 588     * it to match our pixel layout.  But if we're rendering
 589     * to a FBO (i.e. any named frame buffer object), we *don't*
 590     * need to invert - we already match the layout.
 591     */
 592    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 593       for (i = 0; i < 32; i++)
 594           OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
 595    }
 596    else {
 597       for (i = 0; i < 32; i++)
 598          OUT_BATCH(ctx->PolygonStipple[i]);
 599    }
 600    CACHED_BATCH();
 601 }
 602
 603 const struct brw_tracked_state brw_polygon_stipple = {
 604    .dirty = {
 605       .mesa = (_NEW_POLYGONSTIPPLE |
 606                _NEW_POLYGON),
 607       .brw = BRW_NEW_CONTEXT,
 608       .cache = 0
 609    },
 610    .emit = upload_polygon_stipple
 611 };
 612
 613
 614 /***********************************************************************
 615  * Polygon stipple offset packet
 616  */
 617
 618 static void upload_polygon_stipple_offset(struct brw_context *brw)
 619 {
 620    struct intel_context *intel = &brw->intel;
 621    struct gl_context *ctx = &brw->intel.ctx;
 622
 623    /* _NEW_POLYGON */
 624    if (!ctx->Polygon.StippleFlag)
 625       return;
 626
 627    if (intel->gen == 6)
 628       intel_emit_post_sync_nonzero_flush(intel);
 629
 630    BEGIN_BATCH(2);
 631    OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
 632
 633    /* _NEW_BUFFERS
 634     *
 635     * If we're drawing to a system window we have to invert the Y axis
 636     * in order to match the OpenGL pixel coordinate system, and our
 637     * offset must be matched to the window position.  If we're drawing
 638     * to a user-created FBO then our native pixel coordinate system
 639     * works just fine, and there's no window system to worry about.
 640     */
 641    if (_mesa_is_winsys_fbo(brw->intel.ctx.DrawBuffer))
 642       OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
 643    else
 644       OUT_BATCH(0);
 645    CACHED_BATCH();
 646 }
 647
 648 const struct brw_tracked_state brw_polygon_stipple_offset = {
 649    .dirty = {
 650       .mesa = (_NEW_BUFFERS |
 651                _NEW_POLYGON),
 652       .brw = BRW_NEW_CONTEXT,
 653       .cache = 0
 654    },
 655    .emit = upload_polygon_stipple_offset
 656 };
 657
 658 /**********************************************************************
 659  * AA Line parameters
 660  */
 661 static void upload_aa_line_parameters(struct brw_context *brw)
 662 {
 663    struct intel_context *intel = &brw->intel;
 664    struct gl_context *ctx = &brw->intel.ctx;
 665
 666    if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
 667       return;
 668
 669    if (intel->gen == 6)
 670       intel_emit_post_sync_nonzero_flush(intel);
 671
 672    OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
 673    /* use legacy aa line coverage computation */
 674    OUT_BATCH(0);
 675    OUT_BATCH(0);
 676    CACHED_BATCH();
 677 }
 678
 679 const struct brw_tracked_state brw_aa_line_parameters = {
 680    .dirty = {
 681       .mesa = _NEW_LINE,
 682       .brw = BRW_NEW_CONTEXT,
 683       .cache = 0
 684    },
 685    .emit = upload_aa_line_parameters
 686 };
 687
 688 /***********************************************************************
 689  * Line stipple packet
 690  */
 691
 692 static void upload_line_stipple(struct brw_context *brw)
 693 {
 694    struct intel_context *intel = &brw->intel;
 695    struct gl_context *ctx = &brw->intel.ctx;
 696    GLfloat tmp;
 697    GLint tmpi;
 698
 699    if (!ctx->Line.StippleFlag)
 700       return;
 701
 702    if (intel->gen == 6)
 703       intel_emit_post_sync_nonzero_flush(intel);
 704
 705    BEGIN_BATCH(3);
 706    OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
 707    OUT_BATCH(ctx->Line.StipplePattern);
 708    tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
 709    tmpi = tmp * (1<<13);
 710    OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
 711    CACHED_BATCH();
 712 }
 713
 714 const struct brw_tracked_state brw_line_stipple = {
 715    .dirty = {
 716       .mesa = _NEW_LINE,
 717       .brw = BRW_NEW_CONTEXT,
 718       .cache = 0
 719    },
 720    .emit = upload_line_stipple
 721 };
 722
 723
 724 /***********************************************************************
 725  * Misc invariant state packets
 726  */
 727
 728 static void upload_invariant_state( struct brw_context *brw )
 729 {
 730    struct intel_context *intel = &brw->intel;
 731
 732    /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
 733    if (intel->gen == 6)
 734       intel_emit_post_sync_nonzero_flush(intel);
 735
 736    /* Select the 3D pipeline (as opposed to media) */
 737    BEGIN_BATCH(1);
 738    OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
 739    ADVANCE_BATCH();
 740
 741    if (intel->gen < 6) {
 742       /* Disable depth offset clamping. */
 743       BEGIN_BATCH(2);
 744       OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
 745       OUT_BATCH_F(0.0);
 746       ADVANCE_BATCH();
 747    }
 748
 749    if (intel->gen >= 6) {
 750       int i;
 751       int len = intel->gen >= 7 ? 4 : 3;
 752
 753       BEGIN_BATCH(len);
 754       OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
 755       OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
 756                 MS_NUMSAMPLES_1);
 757       OUT_BATCH(0); /* positions for 4/8-sample */
 758       if (intel->gen >= 7)
 759          OUT_BATCH(0);
 760       ADVANCE_BATCH();
 761
 762       BEGIN_BATCH(2);
 763       OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
 764       OUT_BATCH(1);
 765       ADVANCE_BATCH();
 766
 767       if (intel->gen < 7) {
 768          for (i = 0; i < 4; i++) {
 769             BEGIN_BATCH(4);
 770             OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
 771             OUT_BATCH(i << SVB_INDEX_SHIFT);
 772             OUT_BATCH(0);
 773             OUT_BATCH(0xffffffff);
 774             ADVANCE_BATCH();
 775          }
 776       }
 777    }
 778
 779    BEGIN_BATCH(2);
 780    OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
 781    OUT_BATCH(0);
 782    ADVANCE_BATCH();
 783
 784    BEGIN_BATCH(1);
 785    OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
 786              (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
 787    ADVANCE_BATCH();
 788 }
 789
 790 const struct brw_tracked_state brw_invariant_state = {
 791    .dirty = {
 792       .mesa = 0,
 793       .brw = BRW_NEW_CONTEXT,
 794       .cache = 0
 795    },
 796    .emit = upload_invariant_state
 797 };
 798
 799 /**
 800  * Define the base addresses which some state is referenced from.
 801  *
 802  * This allows us to avoid having to emit relocations for the objects,
 803  * and is actually required for binding table pointers on gen6.
 804  *
 805  * Surface state base address covers binding table pointers and
 806  * surface state objects, but not the surfaces that the surface state
 807  * objects point to.
 808  */
 809 static void upload_state_base_address( struct brw_context *brw )
 810 {
 811    struct intel_context *intel = &brw->intel;
 812
 813    /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
 814     * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
 815     * programmed prior to STATE_BASE_ADDRESS.
 816     *
 817     * However, given that the instruction SBA (general state base
 818     * address) on this chipset is always set to 0 across X and GL,
 819     * maybe this isn't required for us in particular.
 820     */
 821
 822    if (intel->gen >= 6) {
 823       if (intel->gen == 6)
 824          intel_emit_post_sync_nonzero_flush(intel);
 825
 826        BEGIN_BATCH(10);
 827        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
 828        /* General state base address: stateless DP read/write requests */
 829        OUT_BATCH(1);
 830        /* Surface state base address:
 831         * BINDING_TABLE_STATE
 832         * SURFACE_STATE
 833         */
 834        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
 835         /* Dynamic state base address:
 836          * SAMPLER_STATE
 837          * SAMPLER_BORDER_COLOR_STATE
 838          * CLIP, SF, WM/CC viewport state
 839          * COLOR_CALC_STATE
 840          * DEPTH_STENCIL_STATE
 841          * BLEND_STATE
 842          * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
 843          * Disable is clear, which we rely on)
 844          */
 845        OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
 846                                    I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
 847
 848        OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
 849        OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 850                  1); /* Instruction base address: shader kernels (incl. SIP) */
 851
 852        OUT_BATCH(1); /* General state upper bound */
 853        /* Dynamic state upper bound.  Although the documentation says that
 854         * programming it to zero will cause it to be ignored, that is a lie.
 855         * If this isn't programmed to a real bound, the sampler border color
 856         * pointer is rejected, causing border color to mysteriously fail.
 857         */
 858        OUT_BATCH(0xfffff001);
 859        OUT_BATCH(1); /* Indirect object upper bound */
 860        OUT_BATCH(1); /* Instruction access upper bound */
 861        ADVANCE_BATCH();
 862    } else if (intel->gen == 5) {
 863        BEGIN_BATCH(8);
 864        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
 865        OUT_BATCH(1); /* General state base address */
 866        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
 867                  1); /* Surface state base address */
 868        OUT_BATCH(1); /* Indirect object base address */
 869        OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 870                  1); /* Instruction base address */
 871        OUT_BATCH(0xfffff001); /* General state upper bound */
 872        OUT_BATCH(1); /* Indirect object upper bound */
 873        OUT_BATCH(1); /* Instruction access upper bound */
 874        ADVANCE_BATCH();
 875    } else {
 876        BEGIN_BATCH(6);
 877        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
 878        OUT_BATCH(1); /* General state base address */
 879        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
 880                  1); /* Surface state base address */
 881        OUT_BATCH(1); /* Indirect object base address */
 882        OUT_BATCH(1); /* General state upper bound */
 883        OUT_BATCH(1); /* Indirect object upper bound */
 884        ADVANCE_BATCH();
 885    }
 886
 887    /* According to section 3.6.1 of VOL1 of the 965 PRM,
 888     * STATE_BASE_ADDRESS updates require a reissue of:
 889     *
 890     * 3DSTATE_PIPELINE_POINTERS
 891     * 3DSTATE_BINDING_TABLE_POINTERS
 892     * MEDIA_STATE_POINTERS
 893     *
 894     * and this continues through Ironlake.  The Sandy Bridge PRM, vol
 895     * 1 part 1 says that the folowing packets must be reissued:
 896     *
 897     * 3DSTATE_CC_POINTERS
 898     * 3DSTATE_BINDING_TABLE_POINTERS
 899     * 3DSTATE_SAMPLER_STATE_POINTERS
 900     * 3DSTATE_VIEWPORT_STATE_POINTERS
 901     * MEDIA_STATE_POINTERS
 902     *
 903     * Those are always reissued following SBA updates anyway (new
 904     * batch time), except in the case of the program cache BO
 905     * changing.  Having a separate state flag makes the sequence more
 906     * obvious.
 907     */
 908
 909    brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
 910 }
 911
 912 const struct brw_tracked_state brw_state_base_address = {
 913    .dirty = {
 914       .mesa = 0,
 915       .brw = (BRW_NEW_BATCH |
 916               BRW_NEW_PROGRAM_CACHE),
 917       .cache = 0,
 918    },
 919    .emit = upload_state_base_address
 920 };