#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
#include "intel_regions.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-
-
-
-
-/***********************************************************************
- * Blend color
- */
-
-static void upload_blend_constant_color(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- struct brw_blend_constant_color bcc;
-
- memset(&bcc, 0, sizeof(bcc));
- bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
- bcc.header.length = sizeof(bcc)/4-2;
- bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
- bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
- bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
- bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
-
- BRW_CACHED_BATCH_STRUCT(brw, &bcc);
-}
-
-
-const struct brw_tracked_state brw_blend_constant_color = {
- .dirty = {
- .mesa = _NEW_COLOR,
- .brw = BRW_NEW_CONTEXT,
- .cache = 0
- },
- .emit = upload_blend_constant_color
-};
-
/* Constant single cliprect for framebuffer object or DRI2 drawing */
static void upload_drawing_rect(struct brw_context *brw)
{
struct gl_context *ctx = &intel->ctx;
BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0); /* xmin, ymin */
OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
((ctx->DrawBuffer->Height - 1) << 16));
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(6);
- OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
- OUT_BATCH(brw->wm.bind_bo_offset);
+ OUT_BATCH(brw->bind.bo_offset);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_binding_table_pointers = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_STATE_BASE_ADDRESS |
+ BRW_NEW_VS_BINDING_TABLE |
+ BRW_NEW_GS_BINDING_TABLE |
+ BRW_NEW_PS_BINDING_TABLE),
.cache = 0,
},
.emit = upload_binding_table_pointers,
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 |
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
GEN6_BINDING_TABLE_MODIFY_VS |
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
- OUT_BATCH(0); /* gs */
- OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
+ OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
+ OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
const struct brw_tracked_state gen6_binding_table_pointers = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_STATE_BASE_ADDRESS |
+ BRW_NEW_VS_BINDING_TABLE |
+ BRW_NEW_GS_BINDING_TABLE |
+ BRW_NEW_PS_BINDING_TABLE),
.cache = 0,
},
.emit = upload_gen6_binding_table_pointers,
}
BEGIN_BATCH(7);
- OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
- OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->vs.state_offset);
if (brw->gs.prog_active)
- OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->gs.state_offset | 1);
else
OUT_BATCH(0);
- OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
- OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->clip.state_offset | 1);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->sf.state_offset);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->wm.state_offset);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.state_offset);
ADVANCE_BATCH();
brw->state.dirty.brw |= BRW_NEW_PSP;
}
-
-static void prepare_psp_urb_cbs(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->vs.state_bo);
- brw_add_validated_bo(brw, brw->gs.state_bo);
- brw_add_validated_bo(brw, brw->clip.state_bo);
- brw_add_validated_bo(brw, brw->sf.state_bo);
- brw_add_validated_bo(brw, brw->wm.state_bo);
-}
-
static void upload_psp_urb_cbs(struct brw_context *brw )
{
upload_pipelined_state_pointers(brw);
const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+ .brw = (BRW_NEW_URB_FENCE |
+ BRW_NEW_BATCH |
+ BRW_NEW_STATE_BASE_ADDRESS),
.cache = (CACHE_NEW_VS_UNIT |
CACHE_NEW_GS_UNIT |
CACHE_NEW_GS_PROG |
CACHE_NEW_WM_UNIT |
CACHE_NEW_CC_UNIT)
},
- .prepare = prepare_psp_urb_cbs,
.emit = upload_psp_urb_cbs,
};
-static void prepare_depthbuffer(struct brw_context *brw)
+uint32_t
+brw_depthbuffer_format(struct brw_context *brw)
{
- struct intel_region *region = brw->state.depth_region;
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+ struct intel_renderbuffer *srb;
+
+ if (!drb &&
+ (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
+ !srb->mt->stencil_mt &&
+ (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
+ intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
+ drb = srb;
+ }
- if (region != NULL)
- brw_add_validated_bo(brw, region->buffer);
+ if (!drb)
+ return BRW_DEPTHFORMAT_D32_FLOAT;
+
+ switch (drb->mt->format) {
+ case MESA_FORMAT_Z16:
+ return BRW_DEPTHFORMAT_D16_UNORM;
+ case MESA_FORMAT_Z32_FLOAT:
+ return BRW_DEPTHFORMAT_D32_FLOAT;
+ case MESA_FORMAT_X8_Z24:
+ if (intel->gen >= 6) {
+ return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+ } else {
+ /* Use D24_UNORM_S8, not D24_UNORM_X8.
+ *
+ * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
+ * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
+ * 3DSTATE_DEPTH_BUFFER.Surface_Format).
+ *
+ * However, on Gen5, D24_UNORM_X8 may be used only if separate
+ * stencil is enabled, and we never enable it. From the Ironlake PRM,
+ * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
+ * If this field is disabled, the Surface Format of the depth
+ * buffer cannot be D24_UNORM_X8_UINT.
+ */
+ return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ }
+ case MESA_FORMAT_S8_Z24:
+ return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ case MESA_FORMAT_Z32_FLOAT_X24S8:
+ return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
+ default:
+ _mesa_problem(ctx, "Unexpected depth format %s\n",
+ _mesa_get_format_name(intel_rb_format(drb)));
+ return BRW_DEPTHFORMAT_D16_UNORM;
+ }
}
static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct intel_region *region = brw->state.depth_region;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ /* _NEW_BUFFERS */
+ struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+ struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+ struct intel_mipmap_tree *stencil_mt = NULL;
+ struct intel_region *hiz_region = NULL;
unsigned int len;
+ bool separate_stencil = false;
+
+ if (depth_irb &&
+ depth_irb->mt &&
+ depth_irb->mt->hiz_mt) {
+ hiz_region = depth_irb->mt->hiz_mt->region;
+ }
+
+ /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
+ * non-pipelined state that will need the PIPE_CONTROL workaround.
+ */
+ if (intel->gen == 6) {
+ intel_emit_post_sync_nonzero_flush(intel);
+ intel_emit_depth_stall_flushes(intel);
+ }
+
+ /* Find the real separate stencil mt if present. */
+ if (stencil_irb) {
+ stencil_mt = stencil_irb->mt;
+ if (stencil_mt->stencil_mt)
+ stencil_mt = stencil_mt->stencil_mt;
+
+ if (stencil_mt->format == MESA_FORMAT_S8)
+ separate_stencil = true;
+ }
+
+ /* If there's a packed depth/stencil bound to stencil only, we need to
+ * emit the packed depth/stencil buffer packet.
+ */
+ if (!depth_irb && stencil_irb && !separate_stencil)
+ depth_irb = stencil_irb;
if (intel->gen >= 6)
len = 7;
else
len = 5;
- if (region == NULL) {
+ if (!depth_irb && !separate_stencil) {
BEGIN_BATCH(len);
- OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
(BRW_SURFACE_NULL << 29));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
- } else {
- unsigned int format;
-
- switch (region->cpp) {
- case 2:
- format = BRW_DEPTHFORMAT_D16_UNORM;
- break;
- case 4:
- if (intel->depth_buffer_is_float)
- format = BRW_DEPTHFORMAT_D32_FLOAT;
- else
- format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
- break;
- default:
- assert(0);
- return;
- }
- assert(region->tiling != I915_TILING_X);
+ } else if (!depth_irb && separate_stencil) {
+ /*
+ * There exists a separate stencil buffer but no depth buffer.
+ *
+ * The stencil buffer inherits most of its fields from
+ * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
+ * height.
+ *
+ * Since the stencil buffer has quirky pitch requirements, its region
+ * was allocated with half height and double cpp. So we need
+ * a multiplier of 2 to obtain the surface's real height.
+ *
+ * Enable the hiz bit because it and the separate stencil bit must have
+ * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
+ * 1.21 "Separate Stencil Enable":
+ * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
+ * Enable must also be enabled.
+ *
+ * [DevGT]: This field must be set to the same value (enabled or
+ * disabled) as Hierarchical Depth Buffer Enable
+ *
+ * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
+ * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
+ * [DevGT+]: This field must be set to TRUE.
+ */
+ assert(intel->has_separate_stencil);
+
+ BEGIN_BATCH(len);
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (1 << 21) | /* separate stencil enable */
+ (1 << 22) | /* hiz enable */
+ (BRW_TILEWALK_YMAJOR << 26) |
+ (1 << 27) | /* tiled surface */
+ (BRW_SURFACE_2D << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(((stencil_irb->Base.Base.Width - 1) << 6) |
+ (stencil_irb->Base.Base.Height - 1) << 19);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
if (intel->gen >= 6)
- assert(region->tiling != I915_TILING_NONE);
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+
+ } else {
+ struct intel_region *region = depth_irb->mt->region;
+ uint32_t tile_x, tile_y, offset;
+
+ /* If using separate stencil, hiz must be enabled. */
+ assert(!separate_stencil || hiz_region);
+
+ offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
+
+ assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
+ assert(!hiz_region || region->tiling == I915_TILING_Y);
BEGIN_BATCH(len);
- OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
- (format << 18) |
+ (brw_depthbuffer_format(brw) << 18) |
+ ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
+ ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
(BRW_TILEWALK_YMAJOR << 26) |
((region->tiling != I915_TILING_NONE) << 27) |
(BRW_SURFACE_2D << 29));
- OUT_RELOC(region->buffer,
+ OUT_RELOC(region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- 0);
+ offset);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
- ((region->width - 1) << 6) |
- ((region->height - 1) << 19));
+ (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
+ (((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
OUT_BATCH(0);
if (intel->is_g4x || intel->gen >= 5)
- OUT_BATCH(0);
+ OUT_BATCH(tile_x | (tile_y << 16));
+ else
+ assert(tile_x == 0 && tile_y == 0);
if (intel->gen >= 6)
OUT_BATCH(0);
ADVANCE_BATCH();
}
- /* Initialize it for safety. */
- if (intel->gen >= 6) {
+ if (hiz_region || separate_stencil) {
+ /*
+ * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
+ * stencil enable' and 'hiz enable' bits were set. Therefore we must
+ * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
+ * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
+ * failure to do so causes hangs on gen5 and a stall on gen6.
+ */
+
+ /* Emit hiz buffer. */
+ if (hiz_region) {
+ BEGIN_BATCH(3);
+ OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+ OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+ OUT_RELOC(hiz_region->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(3);
+ OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+
+ /* Emit stencil buffer. */
+ if (separate_stencil) {
+ struct intel_region *region = stencil_mt->region;
+ BEGIN_BATCH(3);
+ OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+ OUT_BATCH(region->pitch * region->cpp - 1);
+ OUT_RELOC(region->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(3);
+ OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+ }
+
+ /*
+ * On Gen >= 6, emit clear params for safety. If using hiz, then clear
+ * params must be emitted.
+ *
+ * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
+ * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
+ * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+ */
+ if (intel->gen >= 6 || hiz_region) {
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_depthbuffer = {
.dirty = {
- .mesa = 0,
- .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+ .mesa = _NEW_BUFFERS,
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
- .prepare = prepare_depthbuffer,
.emit = emit_depthbuffer,
};
static void upload_polygon_stipple(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_polygon_stipple bps;
GLuint i;
- memset(&bps, 0, sizeof(bps));
- bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
- bps.header.length = sizeof(bps)/4-2;
+ /* _NEW_POLYGON */
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
+ BEGIN_BATCH(33);
+ OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
/* Polygon stipple is provided in OpenGL order, i.e. bottom
* row first. If we're rendering to a window (i.e. the
*/
if (ctx->DrawBuffer->Name == 0) {
for (i = 0; i < 32; i++)
- bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+ OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
}
else {
for (i = 0; i < 32; i++)
- bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+ OUT_BATCH(ctx->PolygonStipple[i]);
}
-
- BRW_CACHED_BATCH_STRUCT(brw, &bps);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_polygon_stipple = {
.dirty = {
- .mesa = _NEW_POLYGONSTIPPLE,
+ .mesa = (_NEW_POLYGONSTIPPLE |
+ _NEW_POLYGON),
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
static void upload_polygon_stipple_offset(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_polygon_stipple_offset bpso;
- memset(&bpso, 0, sizeof(bpso));
- bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
- bpso.header.length = sizeof(bpso)/4-2;
+ /* _NEW_POLYGON */
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
- /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
+
+ /* _NEW_BUFFERS
+ *
+ * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
* we have to invert the Y axis in order to match the OpenGL
* pixel coordinate system, and our offset must be matched
* to the window position. If we're drawing to a FBO
* system works just fine, and there's no window system to
* worry about.
*/
- if (brw->intel.ctx.DrawBuffer->Name == 0) {
- bpso.bits0.x_offset = 0;
- bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31;
- }
- else {
- bpso.bits0.y_offset = 0;
- bpso.bits0.x_offset = 0;
- }
-
- BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+ if (brw->intel.ctx.DrawBuffer->Name == 0)
+ OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+ else
+ OUT_BATCH(0);
+ CACHED_BATCH();
}
-#define _NEW_WINDOW_POS 0x40000000
-
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
- .mesa = _NEW_WINDOW_POS,
+ .mesa = (_NEW_BUFFERS |
+ _NEW_POLYGON),
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
*/
static void upload_aa_line_parameters(struct brw_context *brw)
{
- struct brw_aa_line_parameters balp;
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &brw->intel.ctx;
- if (!brw->has_aa_line_parameters)
+ if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
return;
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
+ OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
/* use legacy aa line coverage computation */
- memset(&balp, 0, sizeof(balp));
- balp.header.opcode = CMD_AA_LINE_PARAMETERS;
- balp.header.length = sizeof(balp) / 4 - 2;
-
- BRW_CACHED_BATCH_STRUCT(brw, &balp);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_aa_line_parameters = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_LINE,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
static void upload_line_stipple(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_line_stipple bls;
GLfloat tmp;
GLint tmpi;
- memset(&bls, 0, sizeof(bls));
- bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
- bls.header.length = sizeof(bls)/4 - 2;
+ if (!ctx->Line.StippleFlag)
+ return;
- bls.bits0.pattern = ctx->Line.StipplePattern;
- bls.bits1.repeat_count = ctx->Line.StippleFactor;
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+ BEGIN_BATCH(3);
+ OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
+ OUT_BATCH(ctx->Line.StipplePattern);
tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
tmpi = tmp * (1<<13);
-
-
- bls.bits1.inverse_repeat_count = tmpi;
-
- BRW_CACHED_BATCH_STRUCT(brw, &bls);
+ OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_line_stipple = {
/***********************************************************************
- * Misc invarient state packets
+ * Misc invariant state packets
*/
-static void upload_invarient_state( struct brw_context *brw )
+static void upload_invariant_state( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
- {
- /* 0x61040000 Pipeline Select */
- /* PipelineSelect : 0 */
- struct brw_pipeline_select ps;
+ /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
- memset(&ps, 0, sizeof(ps));
- ps.header.opcode = brw->CMD_PIPELINE_SELECT;
- ps.header.pipeline_select = 0;
- BRW_BATCH_STRUCT(brw, &ps);
- }
+ /* Select the 3D pipeline (as opposed to media) */
+ BEGIN_BATCH(1);
+ OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
+ ADVANCE_BATCH();
if (intel->gen < 6) {
- struct brw_global_depth_offset_clamp gdo;
- memset(&gdo, 0, sizeof(gdo));
-
- /* Disable depth offset clamping.
- */
- gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
- gdo.header.length = sizeof(gdo)/4 - 2;
- gdo.depth_offset_clamp = 0.0;
-
- BRW_BATCH_STRUCT(brw, &gdo);
+ /* Disable depth offset clamping. */
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
+ OUT_BATCH_F(0.0);
+ ADVANCE_BATCH();
}
if (intel->gen >= 6) {
int i;
+ int len = intel->gen >= 7 ? 4 : 3;
- BEGIN_BATCH(3);
- OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+ BEGIN_BATCH(len);
+ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
MS_NUMSAMPLES_1);
OUT_BATCH(0); /* positions for 4/8-sample */
+ if (intel->gen >= 7)
+ OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(1);
ADVANCE_BATCH();
- for (i = 0; i < 4; i++) {
- BEGIN_BATCH(4);
- OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
- OUT_BATCH(i << SVB_INDEX_SHIFT);
- OUT_BATCH(0);
- OUT_BATCH(0xffffffff);
- ADVANCE_BATCH();
+ if (intel->gen < 7) {
+ for (i = 0; i < 4; i++) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+ OUT_BATCH(i << SVB_INDEX_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(0xffffffff);
+ ADVANCE_BATCH();
+ }
}
}
- /* 0x61020000 State Instruction Pointer */
- {
- struct brw_system_instruction_pointer sip;
- memset(&sip, 0, sizeof(sip));
-
- sip.header.opcode = CMD_STATE_INSN_POINTER;
- sip.header.length = 0;
- sip.bits0.pad = 0;
- sip.bits0.system_instruction_pointer = 0;
- BRW_BATCH_STRUCT(brw, &sip);
- }
-
-
- {
- struct brw_vf_statistics vfs;
- memset(&vfs, 0, sizeof(vfs));
-
- vfs.opcode = brw->CMD_VF_STATISTICS;
- if (INTEL_DEBUG & DEBUG_STATS)
- vfs.statistics_enable = 1;
+ BEGIN_BATCH(2);
+ OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
- BRW_BATCH_STRUCT(brw, &vfs);
- }
+ BEGIN_BATCH(1);
+ OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
+ (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
+ ADVANCE_BATCH();
}
-const struct brw_tracked_state brw_invarient_state = {
+const struct brw_tracked_state brw_invariant_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
- .emit = upload_invarient_state
+ .emit = upload_invariant_state
};
/**
{
struct intel_context *intel = &brw->intel;
+ /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
+ * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
+ * programmed prior to STATE_BASE_ADDRESS.
+ *
+ * However, given that the instruction SBA (general state base
+ * address) on this chipset is always set to 0 across X and GL,
+ * maybe this isn't required for us in particular.
+ */
+
if (intel->gen >= 6) {
+ if (intel->gen == 6)
+ intel_emit_post_sync_nonzero_flush(intel);
+
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
- OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
- 1); /* Surface state base address */
- OUT_BATCH(1); /* Dynamic state base address */
- OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* Instruction base address */
+ /* General state base address: stateless DP read/write requests */
+ OUT_BATCH(1);
+ /* Surface state base address:
+ * BINDING_TABLE_STATE
+ * SURFACE_STATE
+ */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+ /* Dynamic state base address:
+ * SAMPLER_STATE
+ * SAMPLER_BORDER_COLOR_STATE
+ * CLIP, SF, WM/CC viewport state
+ * COLOR_CALC_STATE
+ * DEPTH_STENCIL_STATE
+ * BLEND_STATE
+ * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
+ * Disable is clear, which we rely on)
+ */
+ OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+ I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+
+ OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
+ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 1); /* Instruction base address: shader kernels (incl. SIP) */
+
OUT_BATCH(1); /* General state upper bound */
- OUT_BATCH(1); /* Dynamic state upper bound */
+ /* Dynamic state upper bound. Although the documentation says that
+ * programming it to zero will cause it to be ignored, that is a lie.
+ * If this isn't programmed to a real bound, the sampler border color
+ * pointer is rejected, causing border color to mysteriously fail.
+ */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ intel->batch.bo->size | 1);
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
BEGIN_BATCH(8);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* Instruction base address */
- OUT_BATCH(1); /* General state upper bound */
+ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 1); /* Instruction base address */
+ OUT_BATCH(0xfffff001); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
BEGIN_BATCH(6);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
ADVANCE_BATCH();
}
+
+ /* According to section 3.6.1 of VOL1 of the 965 PRM,
+ * STATE_BASE_ADDRESS updates require a reissue of:
+ *
+ * 3DSTATE_PIPELINE_POINTERS
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ * MEDIA_STATE_POINTERS
+ *
+ * and this continues through Ironlake. The Sandy Bridge PRM, vol
+ * 1 part 1 says that the folowing packets must be reissued:
+ *
+ * 3DSTATE_CC_POINTERS
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ * 3DSTATE_SAMPLER_STATE_POINTERS
+ * 3DSTATE_VIEWPORT_STATE_POINTERS
+ * MEDIA_STATE_POINTERS
+ *
+ * Those are always reissued following SBA updates anyway (new
+ * batch time), except in the case of the program cache BO
+ * changing. Having a separate state flag makes the sequence more
+ * obvious.
+ */
+
+ brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
}
const struct brw_tracked_state brw_state_base_address = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE),
.cache = 0,
},
.emit = upload_state_base_address