src/mesa/drivers/dri/i965/brw_misc_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33
  34 #include "intel_batchbuffer.h"
  35 #include "intel_fbo.h"
  36 #include "intel_mipmap_tree.h"
  37 #include "intel_regions.h"
  38
  39 #include "brw_context.h"
  40 #include "brw_state.h"
  41 #include "brw_defines.h"
  42
  43 /* Constant single cliprect for framebuffer object or DRI2 drawing */
  44 static void upload_drawing_rect(struct brw_context *brw)
  45 {
  46    struct intel_context *intel = &brw->intel;
  47    struct gl_context *ctx = &intel->ctx;
  48
  49    BEGIN_BATCH(4);
  50    OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
  51    OUT_BATCH(0); /* xmin, ymin */
  52    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
  53             ((ctx->DrawBuffer->Height - 1) << 16));
  54    OUT_BATCH(0);
  55    ADVANCE_BATCH();
  56 }
  57
  58 const struct brw_tracked_state brw_drawing_rect = {
  59    .dirty = {
  60       .mesa = _NEW_BUFFERS,
  61       .brw = BRW_NEW_CONTEXT,
  62       .cache = 0
  63    },
  64    .emit = upload_drawing_rect
  65 };
  66
  67 /**
  68  * Upload the binding table pointers, which point each stage's array of surface
  69  * state pointers.
  70  *
  71  * The binding table pointers are relative to the surface state base address,
  72  * which points at the batchbuffer containing the streamed batch state.
  73  */
  74 static void upload_binding_table_pointers(struct brw_context *brw)
  75 {
  76    struct intel_context *intel = &brw->intel;
  77
  78    BEGIN_BATCH(6);
  79    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
  80    OUT_BATCH(brw->bind.bo_offset);
  81    OUT_BATCH(0); /* gs */
  82    OUT_BATCH(0); /* clip */
  83    OUT_BATCH(0); /* sf */
  84    OUT_BATCH(brw->bind.bo_offset);
  85    ADVANCE_BATCH();
  86 }
  87
  88 const struct brw_tracked_state brw_binding_table_pointers = {
  89    .dirty = {
  90       .mesa = 0,
  91       .brw = (BRW_NEW_BATCH |
  92               BRW_NEW_STATE_BASE_ADDRESS |
  93               BRW_NEW_VS_BINDING_TABLE |
  94               BRW_NEW_GS_BINDING_TABLE |
  95               BRW_NEW_PS_BINDING_TABLE),
  96       .cache = 0,
  97    },
  98    .emit = upload_binding_table_pointers,
  99 };
 100
 101 /**
 102  * Upload the binding table pointers, which point each stage's array of surface
 103  * state pointers.
 104  *
 105  * The binding table pointers are relative to the surface state base address,
 106  * which points at the batchbuffer containing the streamed batch state.
 107  */
 108 static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 109 {
 110    struct intel_context *intel = &brw->intel;
 111
 112    BEGIN_BATCH(4);
 113    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
 114              GEN6_BINDING_TABLE_MODIFY_VS |
 115              GEN6_BINDING_TABLE_MODIFY_GS |
 116              GEN6_BINDING_TABLE_MODIFY_PS |
 117              (4 - 2));
 118    OUT_BATCH(brw->bind.bo_offset); /* vs */
 119    OUT_BATCH(brw->bind.bo_offset); /* gs */
 120    OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
 121    ADVANCE_BATCH();
 122 }
 123
 124 const struct brw_tracked_state gen6_binding_table_pointers = {
 125    .dirty = {
 126       .mesa = 0,
 127       .brw = (BRW_NEW_BATCH |
 128               BRW_NEW_STATE_BASE_ADDRESS |
 129               BRW_NEW_VS_BINDING_TABLE |
 130               BRW_NEW_GS_BINDING_TABLE |
 131               BRW_NEW_PS_BINDING_TABLE),
 132       .cache = 0,
 133    },
 134    .emit = upload_gen6_binding_table_pointers,
 135 };
 136
 137 /**
 138  * Upload pointers to the per-stage state.
 139  *
 140  * The state pointers in this packet are all relative to the general state
 141  * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
 142  */
 143 static void upload_pipelined_state_pointers(struct brw_context *brw )
 144 {
 145    struct intel_context *intel = &brw->intel;
 146
 147    if (intel->gen == 5) {
 148       /* Need to flush before changing clip max threads for errata. */
 149       BEGIN_BATCH(1);
 150       OUT_BATCH(MI_FLUSH);
 151       ADVANCE_BATCH();
 152    }
 153
 154    BEGIN_BATCH(7);
 155    OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
 156    OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 157              brw->vs.state_offset);
 158    if (brw->gs.prog_active)
 159       OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 160                 brw->gs.state_offset | 1);
 161    else
 162       OUT_BATCH(0);
 163    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 164              brw->clip.state_offset | 1);
 165    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 166              brw->sf.state_offset);
 167    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 168              brw->wm.state_offset);
 169    OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 170              brw->cc.state_offset);
 171    ADVANCE_BATCH();
 172
 173    brw->state.dirty.brw |= BRW_NEW_PSP;
 174 }
 175
 176 static void upload_psp_urb_cbs(struct brw_context *brw )
 177 {
 178    upload_pipelined_state_pointers(brw);
 179    brw_upload_urb_fence(brw);
 180    brw_upload_cs_urb_state(brw);
 181 }
 182
 183 const struct brw_tracked_state brw_psp_urb_cbs = {
 184    .dirty = {
 185       .mesa = 0,
 186       .brw = (BRW_NEW_URB_FENCE |
 187               BRW_NEW_BATCH |
 188               BRW_NEW_STATE_BASE_ADDRESS),
 189       .cache = (CACHE_NEW_VS_UNIT |
 190                 CACHE_NEW_GS_UNIT |
 191                 CACHE_NEW_GS_PROG |
 192                 CACHE_NEW_CLIP_UNIT |
 193                 CACHE_NEW_SF_UNIT |
 194                 CACHE_NEW_WM_UNIT |
 195                 CACHE_NEW_CC_UNIT)
 196    },
 197    .emit = upload_psp_urb_cbs,
 198 };
 199
 200 uint32_t
 201 brw_depthbuffer_format(struct brw_context *brw)
 202 {
 203    struct intel_context *intel = &brw->intel;
 204    struct gl_context *ctx = &intel->ctx;
 205    struct gl_framebuffer *fb = ctx->DrawBuffer;
 206    struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 207    struct intel_renderbuffer *srb;
 208
 209    if (!drb &&
 210        (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
 211        !srb->mt->stencil_mt &&
 212        (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
 213         intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
 214       drb = srb;
 215    }
 216
 217    if (!drb)
 218       return BRW_DEPTHFORMAT_D32_FLOAT;
 219
 220    switch (drb->mt->format) {
 221    case MESA_FORMAT_Z16:
 222       return BRW_DEPTHFORMAT_D16_UNORM;
 223    case MESA_FORMAT_Z32_FLOAT:
 224       return BRW_DEPTHFORMAT_D32_FLOAT;
 225    case MESA_FORMAT_X8_Z24:
 226       if (intel->gen >= 6) {
 227          return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
 228       } else {
 229          /* Use D24_UNORM_S8, not D24_UNORM_X8.
 230           *
 231           * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
 232           * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
 233           * 3DSTATE_DEPTH_BUFFER.Surface_Format).
 234           *
 235           * However, on Gen5, D24_UNORM_X8 may be used only if separate
 236           * stencil is enabled, and we never enable it. From the Ironlake PRM,
 237           * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
 238           *     If this field is disabled, the Surface Format of the depth
 239           *     buffer cannot be D24_UNORM_X8_UINT.
 240           */
 241          return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 242       }
 243    case MESA_FORMAT_S8_Z24:
 244       return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 245    case MESA_FORMAT_Z32_FLOAT_X24S8:
 246       return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
 247    default:
 248       _mesa_problem(ctx, "Unexpected depth format %s\n",
 249                     _mesa_get_format_name(intel_rb_format(drb)));
 250       return BRW_DEPTHFORMAT_D16_UNORM;
 251    }
 252 }
 253
 254 static void emit_depthbuffer(struct brw_context *brw)
 255 {
 256    struct intel_context *intel = &brw->intel;
 257    struct gl_context *ctx = &intel->ctx;
 258    struct gl_framebuffer *fb = ctx->DrawBuffer;
 259    /* _NEW_BUFFERS */
 260    struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 261    struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 262    struct intel_mipmap_tree *stencil_mt = NULL;
 263    struct intel_region *hiz_region = NULL;
 264    unsigned int len;
 265    bool separate_stencil = false;
 266
 267    if (depth_irb &&
 268        depth_irb->mt &&
 269        depth_irb->mt->hiz_mt) {
 270       hiz_region = depth_irb->mt->hiz_mt->region;
 271    }
 272
 273    /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
 274     * non-pipelined state that will need the PIPE_CONTROL workaround.
 275     */
 276    if (intel->gen == 6) {
 277       intel_emit_post_sync_nonzero_flush(intel);
 278       intel_emit_depth_stall_flushes(intel);
 279    }
 280
 281    /* Find the real separate stencil mt if present. */
 282    if (stencil_irb) {
 283       stencil_mt = stencil_irb->mt;
 284       if (stencil_mt->stencil_mt)
 285          stencil_mt = stencil_mt->stencil_mt;
 286
 287       if (stencil_mt->format == MESA_FORMAT_S8)
 288          separate_stencil = true;
 289    }
 290
 291    /* If there's a packed depth/stencil bound to stencil only, we need to
 292     * emit the packed depth/stencil buffer packet.
 293     */
 294    if (!depth_irb && stencil_irb && !separate_stencil)
 295       depth_irb = stencil_irb;
 296
 297    if (intel->gen >= 6)
 298       len = 7;
 299    else if (intel->is_g4x || intel->gen == 5)
 300       len = 6;
 301    else
 302       len = 5;
 303
 304    if (!depth_irb && !separate_stencil) {
 305       BEGIN_BATCH(len);
 306       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 307       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 308                 (BRW_SURFACE_NULL << 29));
 309       OUT_BATCH(0);
 310       OUT_BATCH(0);
 311       OUT_BATCH(0);
 312
 313       if (intel->is_g4x || intel->gen >= 5)
 314          OUT_BATCH(0);
 315
 316       if (intel->gen >= 6)
 317          OUT_BATCH(0);
 318
 319       ADVANCE_BATCH();
 320
 321    } else if (!depth_irb && separate_stencil) {
 322       /*
 323        * There exists a separate stencil buffer but no depth buffer.
 324        *
 325        * The stencil buffer inherits most of its fields from
 326        * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
 327        * height.
 328        *
 329        * Since the stencil buffer has quirky pitch requirements, its region
 330        * was allocated with half height and double cpp. So we need
 331        * a multiplier of 2 to obtain the surface's real height.
 332        *
 333        * Enable the hiz bit because it and the separate stencil bit must have
 334        * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
 335        * 1.21 "Separate Stencil Enable":
 336        *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
 337        *     Enable must also be enabled.
 338        *
 339        *     [DevGT]: This field must be set to the same value (enabled or
 340        *     disabled) as Hierarchical Depth Buffer Enable
 341        *
 342        * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
 343        * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
 344        *     [DevGT+]: This field must be set to TRUE.
 345        */
 346       assert(intel->has_separate_stencil);
 347
 348       BEGIN_BATCH(len);
 349       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 350       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 351                 (1 << 21) | /* separate stencil enable */
 352                 (1 << 22) | /* hiz enable */
 353                 (BRW_TILEWALK_YMAJOR << 26) |
 354                 (1 << 27) | /* tiled surface */
 355                 (BRW_SURFACE_2D << 29));
 356       OUT_BATCH(0);
 357       OUT_BATCH(((stencil_irb->Base.Base.Width - 1) << 6) |
 358                  (stencil_irb->Base.Base.Height - 1) << 19);
 359       OUT_BATCH(0);
 360       OUT_BATCH(0);
 361
 362       if (intel->gen >= 6)
 363          OUT_BATCH(0);
 364
 365       ADVANCE_BATCH();
 366
 367    } else {
 368       struct intel_region *region = depth_irb->mt->region;
 369       uint32_t tile_x, tile_y, offset;
 370
 371       /* If using separate stencil, hiz must be enabled. */
 372       assert(!separate_stencil || hiz_region);
 373
 374       offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
 375
 376       assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
 377       assert(!hiz_region || region->tiling == I915_TILING_Y);
 378
 379       BEGIN_BATCH(len);
 380       OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
 381       OUT_BATCH(((region->pitch * region->cpp) - 1) |
 382                 (brw_depthbuffer_format(brw) << 18) |
 383                 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
 384                 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
 385                 (BRW_TILEWALK_YMAJOR << 26) |
 386                 ((region->tiling != I915_TILING_NONE) << 27) |
 387                 (BRW_SURFACE_2D << 29));
 388       OUT_RELOC(region->bo,
 389                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 390                 offset);
 391       OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
 392                 (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
 393                 (((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
 394       OUT_BATCH(0);
 395
 396       if (intel->is_g4x || intel->gen >= 5)
 397          OUT_BATCH(tile_x | (tile_y << 16));
 398       else
 399          assert(tile_x == 0 && tile_y == 0);
 400
 401       if (intel->gen >= 6)
 402          OUT_BATCH(0);
 403
 404       ADVANCE_BATCH();
 405    }
 406
 407    if (hiz_region || separate_stencil) {
 408       /*
 409        * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
 410        * stencil enable' and 'hiz enable' bits were set. Therefore we must
 411        * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
 412        * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
 413        * failure to do so causes hangs on gen5 and a stall on gen6.
 414        */
 415
 416       /* Emit hiz buffer. */
 417       if (hiz_region) {
 418          BEGIN_BATCH(3);
 419          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 420          OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
 421          OUT_RELOC(hiz_region->bo,
 422                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 423                    0);
 424          ADVANCE_BATCH();
 425       } else {
 426          BEGIN_BATCH(3);
 427          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 428          OUT_BATCH(0);
 429          OUT_BATCH(0);
 430          ADVANCE_BATCH();
 431       }
 432
 433       /* Emit stencil buffer. */
 434       if (separate_stencil) {
 435          struct intel_region *region = stencil_mt->region;
 436          BEGIN_BATCH(3);
 437          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
 438          OUT_BATCH(region->pitch * region->cpp - 1);
 439          OUT_RELOC(region->bo,
 440                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 441                    0);
 442          ADVANCE_BATCH();
 443       } else {
 444          BEGIN_BATCH(3);
 445          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
 446          OUT_BATCH(0);
 447          OUT_BATCH(0);
 448          ADVANCE_BATCH();
 449       }
 450    }
 451
 452    /*
 453     * On Gen >= 6, emit clear params for safety. If using hiz, then clear
 454     * params must be emitted.
 455     *
 456     * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
 457     *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
 458     *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
 459     */
 460    if (intel->gen >= 6 || hiz_region) {
 461       if (intel->gen == 6)
 462          intel_emit_post_sync_nonzero_flush(intel);
 463
 464       BEGIN_BATCH(2);
 465       OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
 466       OUT_BATCH(0);
 467       ADVANCE_BATCH();
 468    }
 469 }
 470
 471 const struct brw_tracked_state brw_depthbuffer = {
 472    .dirty = {
 473       .mesa = _NEW_BUFFERS,
 474       .brw = BRW_NEW_BATCH,
 475       .cache = 0,
 476    },
 477    .emit = emit_depthbuffer,
 478 };
 479
 480
 481
 482 /***********************************************************************
 483  * Polygon stipple packet
 484  */
 485
 486 static void upload_polygon_stipple(struct brw_context *brw)
 487 {
 488    struct intel_context *intel = &brw->intel;
 489    struct gl_context *ctx = &brw->intel.ctx;
 490    GLuint i;
 491
 492    /* _NEW_POLYGON */
 493    if (!ctx->Polygon.StippleFlag)
 494       return;
 495
 496    if (intel->gen == 6)
 497       intel_emit_post_sync_nonzero_flush(intel);
 498
 499    BEGIN_BATCH(33);
 500    OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
 501
 502    /* Polygon stipple is provided in OpenGL order, i.e. bottom
 503     * row first.  If we're rendering to a window (i.e. the
 504     * default frame buffer object, 0), then we need to invert
 505     * it to match our pixel layout.  But if we're rendering
 506     * to a FBO (i.e. any named frame buffer object), we *don't*
 507     * need to invert - we already match the layout.
 508     */
 509    if (ctx->DrawBuffer->Name == 0) {
 510       for (i = 0; i < 32; i++)
 511           OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
 512    }
 513    else {
 514       for (i = 0; i < 32; i++)
 515          OUT_BATCH(ctx->PolygonStipple[i]);
 516    }
 517    CACHED_BATCH();
 518 }
 519
 520 const struct brw_tracked_state brw_polygon_stipple = {
 521    .dirty = {
 522       .mesa = (_NEW_POLYGONSTIPPLE |
 523                _NEW_POLYGON),
 524       .brw = BRW_NEW_CONTEXT,
 525       .cache = 0
 526    },
 527    .emit = upload_polygon_stipple
 528 };
 529
 530
 531 /***********************************************************************
 532  * Polygon stipple offset packet
 533  */
 534
 535 static void upload_polygon_stipple_offset(struct brw_context *brw)
 536 {
 537    struct intel_context *intel = &brw->intel;
 538    struct gl_context *ctx = &brw->intel.ctx;
 539
 540    /* _NEW_POLYGON */
 541    if (!ctx->Polygon.StippleFlag)
 542       return;
 543
 544    if (intel->gen == 6)
 545       intel_emit_post_sync_nonzero_flush(intel);
 546
 547    BEGIN_BATCH(2);
 548    OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
 549
 550    /* _NEW_BUFFERS
 551     *
 552     * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
 553     * we have to invert the Y axis in order to match the OpenGL
 554     * pixel coordinate system, and our offset must be matched
 555     * to the window position.  If we're drawing to a FBO
 556     * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
 557     * system works just fine, and there's no window system to
 558     * worry about.
 559     */
 560    if (brw->intel.ctx.DrawBuffer->Name == 0)
 561       OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
 562    else
 563       OUT_BATCH(0);
 564    CACHED_BATCH();
 565 }
 566
 567 const struct brw_tracked_state brw_polygon_stipple_offset = {
 568    .dirty = {
 569       .mesa = (_NEW_BUFFERS |
 570                _NEW_POLYGON),
 571       .brw = BRW_NEW_CONTEXT,
 572       .cache = 0
 573    },
 574    .emit = upload_polygon_stipple_offset
 575 };
 576
 577 /**********************************************************************
 578  * AA Line parameters
 579  */
 580 static void upload_aa_line_parameters(struct brw_context *brw)
 581 {
 582    struct intel_context *intel = &brw->intel;
 583    struct gl_context *ctx = &brw->intel.ctx;
 584
 585    if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
 586       return;
 587
 588    if (intel->gen == 6)
 589       intel_emit_post_sync_nonzero_flush(intel);
 590
 591    OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
 592    /* use legacy aa line coverage computation */
 593    OUT_BATCH(0);
 594    OUT_BATCH(0);
 595    CACHED_BATCH();
 596 }
 597
 598 const struct brw_tracked_state brw_aa_line_parameters = {
 599    .dirty = {
 600       .mesa = _NEW_LINE,
 601       .brw = BRW_NEW_CONTEXT,
 602       .cache = 0
 603    },
 604    .emit = upload_aa_line_parameters
 605 };
 606
 607 /***********************************************************************
 608  * Line stipple packet
 609  */
 610
 611 static void upload_line_stipple(struct brw_context *brw)
 612 {
 613    struct intel_context *intel = &brw->intel;
 614    struct gl_context *ctx = &brw->intel.ctx;
 615    GLfloat tmp;
 616    GLint tmpi;
 617
 618    if (!ctx->Line.StippleFlag)
 619       return;
 620
 621    if (intel->gen == 6)
 622       intel_emit_post_sync_nonzero_flush(intel);
 623
 624    BEGIN_BATCH(3);
 625    OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
 626    OUT_BATCH(ctx->Line.StipplePattern);
 627    tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
 628    tmpi = tmp * (1<<13);
 629    OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
 630    CACHED_BATCH();
 631 }
 632
 633 const struct brw_tracked_state brw_line_stipple = {
 634    .dirty = {
 635       .mesa = _NEW_LINE,
 636       .brw = BRW_NEW_CONTEXT,
 637       .cache = 0
 638    },
 639    .emit = upload_line_stipple
 640 };
 641
 642
 643 /***********************************************************************
 644  * Misc invariant state packets
 645  */
 646
 647 static void upload_invariant_state( struct brw_context *brw )
 648 {
 649    struct intel_context *intel = &brw->intel;
 650
 651    /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
 652    if (intel->gen == 6)
 653       intel_emit_post_sync_nonzero_flush(intel);
 654
 655    /* Select the 3D pipeline (as opposed to media) */
 656    BEGIN_BATCH(1);
 657    OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
 658    ADVANCE_BATCH();
 659
 660    if (intel->gen < 6) {
 661       /* Disable depth offset clamping. */
 662       BEGIN_BATCH(2);
 663       OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
 664       OUT_BATCH_F(0.0);
 665       ADVANCE_BATCH();
 666    }
 667
 668    if (intel->gen >= 6) {
 669       int i;
 670       int len = intel->gen >= 7 ? 4 : 3;
 671
 672       BEGIN_BATCH(len);
 673       OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
 674       OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
 675                 MS_NUMSAMPLES_1);
 676       OUT_BATCH(0); /* positions for 4/8-sample */
 677       if (intel->gen >= 7)
 678          OUT_BATCH(0);
 679       ADVANCE_BATCH();
 680
 681       BEGIN_BATCH(2);
 682       OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
 683       OUT_BATCH(1);
 684       ADVANCE_BATCH();
 685
 686       if (intel->gen < 7) {
 687          for (i = 0; i < 4; i++) {
 688             BEGIN_BATCH(4);
 689             OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
 690             OUT_BATCH(i << SVB_INDEX_SHIFT);
 691             OUT_BATCH(0);
 692             OUT_BATCH(0xffffffff);
 693             ADVANCE_BATCH();
 694          }
 695       }
 696    }
 697
 698    BEGIN_BATCH(2);
 699    OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
 700    OUT_BATCH(0);
 701    ADVANCE_BATCH();
 702
 703    BEGIN_BATCH(1);
 704    OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
 705              (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
 706    ADVANCE_BATCH();
 707 }
 708
 709 const struct brw_tracked_state brw_invariant_state = {
 710    .dirty = {
 711       .mesa = 0,
 712       .brw = BRW_NEW_CONTEXT,
 713       .cache = 0
 714    },
 715    .emit = upload_invariant_state
 716 };
 717
 718 /**
 719  * Define the base addresses which some state is referenced from.
 720  *
 721  * This allows us to avoid having to emit relocations for the objects,
 722  * and is actually required for binding table pointers on gen6.
 723  *
 724  * Surface state base address covers binding table pointers and
 725  * surface state objects, but not the surfaces that the surface state
 726  * objects point to.
 727  */
 728 static void upload_state_base_address( struct brw_context *brw )
 729 {
 730    struct intel_context *intel = &brw->intel;
 731
 732    /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
 733     * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
 734     * programmed prior to STATE_BASE_ADDRESS.
 735     *
 736     * However, given that the instruction SBA (general state base
 737     * address) on this chipset is always set to 0 across X and GL,
 738     * maybe this isn't required for us in particular.
 739     */
 740
 741    if (intel->gen >= 6) {
 742       if (intel->gen == 6)
 743          intel_emit_post_sync_nonzero_flush(intel);
 744
 745        BEGIN_BATCH(10);
 746        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
 747        /* General state base address: stateless DP read/write requests */
 748        OUT_BATCH(1);
 749        /* Surface state base address:
 750         * BINDING_TABLE_STATE
 751         * SURFACE_STATE
 752         */
 753        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
 754         /* Dynamic state base address:
 755          * SAMPLER_STATE
 756          * SAMPLER_BORDER_COLOR_STATE
 757          * CLIP, SF, WM/CC viewport state
 758          * COLOR_CALC_STATE
 759          * DEPTH_STENCIL_STATE
 760          * BLEND_STATE
 761          * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
 762          * Disable is clear, which we rely on)
 763          */
 764        OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
 765                                    I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
 766
 767        OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
 768        OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 769                  1); /* Instruction base address: shader kernels (incl. SIP) */
 770
 771        OUT_BATCH(1); /* General state upper bound */
 772        /* Dynamic state upper bound.  Although the documentation says that
 773         * programming it to zero will cause it to be ignored, that is a lie.
 774         * If this isn't programmed to a real bound, the sampler border color
 775         * pointer is rejected, causing border color to mysteriously fail.
 776         */
 777        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 778                  intel->batch.bo->size | 1);
 779        OUT_BATCH(1); /* Indirect object upper bound */
 780        OUT_BATCH(1); /* Instruction access upper bound */
 781        ADVANCE_BATCH();
 782    } else if (intel->gen == 5) {
 783        BEGIN_BATCH(8);
 784        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
 785        OUT_BATCH(1); /* General state base address */
 786        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
 787                  1); /* Surface state base address */
 788        OUT_BATCH(1); /* Indirect object base address */
 789        OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 790                  1); /* Instruction base address */
 791        OUT_BATCH(0xfffff001); /* General state upper bound */
 792        OUT_BATCH(1); /* Indirect object upper bound */
 793        OUT_BATCH(1); /* Instruction access upper bound */
 794        ADVANCE_BATCH();
 795    } else {
 796        BEGIN_BATCH(6);
 797        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
 798        OUT_BATCH(1); /* General state base address */
 799        OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
 800                  1); /* Surface state base address */
 801        OUT_BATCH(1); /* Indirect object base address */
 802        OUT_BATCH(1); /* General state upper bound */
 803        OUT_BATCH(1); /* Indirect object upper bound */
 804        ADVANCE_BATCH();
 805    }
 806
 807    /* According to section 3.6.1 of VOL1 of the 965 PRM,
 808     * STATE_BASE_ADDRESS updates require a reissue of:
 809     *
 810     * 3DSTATE_PIPELINE_POINTERS
 811     * 3DSTATE_BINDING_TABLE_POINTERS
 812     * MEDIA_STATE_POINTERS
 813     *
 814     * and this continues through Ironlake.  The Sandy Bridge PRM, vol
 815     * 1 part 1 says that the folowing packets must be reissued:
 816     *
 817     * 3DSTATE_CC_POINTERS
 818     * 3DSTATE_BINDING_TABLE_POINTERS
 819     * 3DSTATE_SAMPLER_STATE_POINTERS
 820     * 3DSTATE_VIEWPORT_STATE_POINTERS
 821     * MEDIA_STATE_POINTERS
 822     *
 823     * Those are always reissued following SBA updates anyway (new
 824     * batch time), except in the case of the program cache BO
 825     * changing.  Having a separate state flag makes the sequence more
 826     * obvious.
 827     */
 828
 829    brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
 830 }
 831
 832 const struct brw_tracked_state brw_state_base_address = {
 833    .dirty = {
 834       .mesa = 0,
 835       .brw = (BRW_NEW_BATCH |
 836               BRW_NEW_PROGRAM_CACHE),
 837       .cache = 0,
 838    },
 839    .emit = upload_state_base_address
 840 };