#include "brw_state.h"
#include "brw_defines.h"
+#include "main/framebuffer.h"
#include "main/fbobject.h"
#include "main/glformats.h"
/* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void upload_drawing_rect(struct brw_context *brw)
+static void
+upload_drawing_rect(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
-
- /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined. */
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
+ const unsigned int fb_width = _mesa_geometric_width(fb);
+ const unsigned int fb_height = _mesa_geometric_height(fb);
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0); /* xmin, ymin */
- OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
- ((ctx->DrawBuffer->Height - 1) << 16));
+ OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_drawing_rect = {
.dirty = {
.mesa = _NEW_BUFFERS,
- .brw = BRW_NEW_CONTEXT,
- .cache = 0
+ .brw = BRW_NEW_BLORP |
+ BRW_NEW_CONTEXT,
},
.emit = upload_drawing_rect
};
* The state pointers in this packet are all relative to the general state
* base address set by CMD_STATE_BASE_ADDRESS, which is 0.
*/
-static void upload_pipelined_state_pointers(struct brw_context *brw )
+static void
+upload_pipelined_state_pointers(struct brw_context *brw)
{
if (brw->gen == 5) {
/* Need to flush before changing clip max threads for errata. */
brw->cc.state_offset);
ADVANCE_BATCH();
- brw->state.dirty.brw |= BRW_NEW_PSP;
+ brw->ctx.NewDriverState |= BRW_NEW_PSP;
}
-static void upload_psp_urb_cbs(struct brw_context *brw )
+static void
+upload_psp_urb_cbs(struct brw_context *brw)
{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = {
.mesa = 0,
- .brw = (BRW_NEW_URB_FENCE |
- BRW_NEW_BATCH |
- BRW_NEW_STATE_BASE_ADDRESS),
- .cache = (CACHE_NEW_VS_UNIT |
- CACHE_NEW_FF_GS_UNIT |
- CACHE_NEW_FF_GS_PROG |
- CACHE_NEW_CLIP_UNIT |
- CACHE_NEW_SF_UNIT |
- CACHE_NEW_WM_UNIT |
- CACHE_NEW_CC_UNIT)
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_FF_GS_PROG_DATA |
+ BRW_NEW_GEN4_UNIT_STATE |
+ BRW_NEW_STATE_BASE_ADDRESS |
+ BRW_NEW_URB_FENCE,
},
.emit = upload_psp_urb_cbs,
};
uint32_t tile_mask_x = 0, tile_mask_y = 0;
if (depth_mt) {
- intel_miptree_get_tile_masks(depth_mt, &tile_mask_x, &tile_mask_y, false);
+ intel_get_tile_masks(depth_mt->tiling, depth_mt->tr_mode,
+ depth_mt->cpp, false,
+ &tile_mask_x, &tile_mask_y);
if (intel_miptree_level_has_hiz(depth_mt, depth_level)) {
uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
- intel_miptree_get_tile_masks(depth_mt->hiz_mt,
- &hiz_tile_mask_x, &hiz_tile_mask_y,
- false);
+ intel_get_tile_masks(depth_mt->hiz_buf->mt->tiling,
+ depth_mt->hiz_buf->mt->tr_mode,
+ depth_mt->hiz_buf->mt->cpp,
+ false, &hiz_tile_mask_x,
+ &hiz_tile_mask_y);
/* Each HiZ row represents 2 rows of pixels */
hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
tile_mask_y |= 63;
} else {
uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
- intel_miptree_get_tile_masks(stencil_mt,
- &stencil_tile_mask_x,
- &stencil_tile_mask_y, false);
+ intel_get_tile_masks(stencil_mt->tiling,
+ stencil_mt->tr_mode,
+ stencil_mt->cpp,
+ false, &stencil_tile_mask_x,
+ &stencil_tile_mask_y);
tile_mask_x |= stencil_tile_mask_x;
tile_mask_y |= stencil_tile_mask_y;
* non-pipelined state that will need the PIPE_CONTROL workaround.
*/
if (brw->gen == 6) {
- intel_emit_post_sync_nonzero_flush(brw);
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
}
unsigned int len;
/* Emit hiz buffer. */
if (hiz) {
- struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+ assert(depth_mt);
+ struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_mt->pitch - 1);
* when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
*/
if (brw->gen >= 6 || hiz) {
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
-
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
GEN5_DEPTH_CLEAR_VALID |
const struct brw_tracked_state brw_depthbuffer = {
.dirty = {
.mesa = _NEW_BUFFERS,
- .brw = BRW_NEW_BATCH,
- .cache = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP,
},
.emit = brw_emit_depthbuffer,
};
-
-
-/***********************************************************************
+/**
* Polygon stipple packet
*/
-
-static void upload_polygon_stipple(struct brw_context *brw)
+static void
+upload_polygon_stipple(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
GLuint i;
if (!ctx->Polygon.StippleFlag)
return;
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
-
BEGIN_BATCH(33);
OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
for (i = 0; i < 32; i++)
OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
- }
- else {
+ } else {
for (i = 0; i < 32; i++)
OUT_BATCH(ctx->PolygonStipple[i]);
}
const struct brw_tracked_state brw_polygon_stipple = {
.dirty = {
- .mesa = (_NEW_POLYGONSTIPPLE |
- _NEW_POLYGON),
+ .mesa = _NEW_POLYGON |
+ _NEW_POLYGONSTIPPLE,
.brw = BRW_NEW_CONTEXT,
- .cache = 0
},
.emit = upload_polygon_stipple
};
-
-/***********************************************************************
+/**
* Polygon stipple offset packet
*/
-
-static void upload_polygon_stipple_offset(struct brw_context *brw)
+static void
+upload_polygon_stipple_offset(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
if (!ctx->Polygon.StippleFlag)
return;
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
-
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
* works just fine, and there's no window system to worry about.
*/
if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
- OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+ OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31);
else
OUT_BATCH(0);
ADVANCE_BATCH();
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
- .mesa = (_NEW_BUFFERS |
- _NEW_POLYGON),
+ .mesa = _NEW_BUFFERS |
+ _NEW_POLYGON,
.brw = BRW_NEW_CONTEXT,
- .cache = 0
},
.emit = upload_polygon_stipple_offset
};
-/**********************************************************************
+/**
* AA Line parameters
*/
-static void upload_aa_line_parameters(struct brw_context *brw)
+static void
+upload_aa_line_parameters(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
if (brw->gen == 4 && !brw->is_g4x)
return;
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
-
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
/* use legacy aa line coverage computation */
.dirty = {
.mesa = _NEW_LINE,
.brw = BRW_NEW_CONTEXT,
- .cache = 0
},
.emit = upload_aa_line_parameters
};
-/***********************************************************************
+/**
* Line stipple packet
*/
-
-static void upload_line_stipple(struct brw_context *brw)
+static void
+upload_line_stipple(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
GLfloat tmp;
if (!ctx->Line.StippleFlag)
return;
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
-
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
OUT_BATCH(ctx->Line.StipplePattern);
if (brw->gen >= 7) {
/* in U1.16 */
- tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+ tmp = 1.0f / ctx->Line.StippleFactor;
tmpi = tmp * (1<<16);
OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
- }
- else {
+ } else {
/* in U1.13 */
- tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+ tmp = 1.0f / ctx->Line.StippleFactor;
tmpi = tmp * (1<<13);
OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
}
.dirty = {
.mesa = _NEW_LINE,
.brw = BRW_NEW_CONTEXT,
- .cache = 0
},
.emit = upload_line_stipple
};
-
-/***********************************************************************
- * Misc invariant state packets
- */
-
void
-brw_upload_invariant_state(struct brw_context *brw)
+brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
{
const bool is_965 = brw->gen == 4 && !brw->is_g4x;
-
- /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
-
- /* Select the 3D pipeline (as opposed to media) */
const uint32_t _3DSTATE_PIPELINE_SELECT =
is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
+
+ if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) {
+ /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+ * PIPELINE_SELECT [DevBWR+]":
+ *
+ * Project: HSW, BDW, CHV, SKL, BXT
+ *
+ * Hardware Binding Tables are only supported for 3D
+ * workloads. Resource streamer must be enabled only for 3D
+ * workloads. Resource streamer must be disabled for Media and GPGPU
+ * workloads.
+ */
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_RS_CONTROL | 0);
+ ADVANCE_BATCH();
+
+ gen7_disable_hw_binding_tables(brw);
+
+ /* XXX - Disable gather constant pool too when we start using it. */
+ }
+
+ if (brw->gen >= 8 && brw->gen < 10) {
+ /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
+ *
+ * Software must clear the COLOR_CALC_STATE Valid field in
+ * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
+ * with Pipeline Select set to GPGPU.
+ *
+ * The internal hardware docs recommend the same workaround for Gen9
+ * hardware too.
+ */
+ if (pipeline == BRW_COMPUTE_PIPELINE) {
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
+ }
+
+ } else if (brw->gen >= 6) {
+ /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+ * PIPELINE_SELECT [DevBWR+]":
+ *
+ * Project: DEVSNB+
+ *
+ * Software must ensure all the write caches are flushed through a
+ * stalling PIPE_CONTROL command followed by another PIPE_CONTROL
+ * command to invalidate read only caches prior to programming
+ * MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
+ */
+ const unsigned dc_flush =
+ brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
+
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ dc_flush |
+ PIPE_CONTROL_NO_WRITE |
+ PIPE_CONTROL_CS_STALL);
+
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+ PIPE_CONTROL_NO_WRITE);
+
+ } else {
+ /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+ * PIPELINE_SELECT [DevBWR+]":
+ *
+ * Project: PRE-DEVSNB
+ *
+ * Software must ensure the current pipeline is flushed via an
+ * MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
+ */
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_FLUSH);
+ ADVANCE_BATCH();
+ }
+
+ /* Select the pipeline */
BEGIN_BATCH(1);
- OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 | 0);
+ OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
+ (brw->gen >= 9 ? (3 << 8) : 0) |
+ (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
ADVANCE_BATCH();
- if (brw->gen < 6) {
- /* Disable depth offset clamping. */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
- OUT_BATCH_F(0.0);
+ if (brw->gen == 7 && !brw->is_haswell &&
+ pipeline == BRW_RENDER_PIPELINE) {
+ /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+ * PIPELINE_SELECT [DevBWR+]":
+ *
+ * Project: DEVIVB, DEVHSW:GT3:A0
+ *
+ * Software must send a pipe_control with a CS stall and a post sync
+ * operation and then a dummy DRAW after every MI_SET_CONTEXT and
+ * after any PIPELINE_SELECT that is enabling 3D mode.
+ */
+ gen7_emit_cs_stall_flush(brw);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+ OUT_BATCH(_3DPRIM_POINTLIST);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
ADVANCE_BATCH();
}
+ if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) {
+ /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+ * PIPELINE_SELECT [DevBWR+]":
+ *
+ * Project: HSW, BDW, CHV, SKL, BXT
+ *
+ * Hardware Binding Tables are only supported for 3D
+ * workloads. Resource streamer must be enabled only for 3D
+ * workloads. Resource streamer must be disabled for Media and GPGPU
+ * workloads.
+ */
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_RS_CONTROL | 1);
+ ADVANCE_BATCH();
+
+ gen7_enable_hw_binding_tables(brw);
+
+ /* XXX - Re-enable gather constant pool here. */
+ }
+}
+
+/**
+ * Misc invariant state packets
+ */
+void
+brw_upload_invariant_state(struct brw_context *brw)
+{
+ const bool is_965 = brw->gen == 4 && !brw->is_g4x;
+
+ brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+ brw->last_pipeline = BRW_RENDER_PIPELINE;
+
if (brw->gen >= 8) {
BEGIN_BATCH(3);
OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
const uint32_t _3DSTATE_VF_STATISTICS =
is_965 ? GEN4_3DSTATE_VF_STATISTICS : GM45_3DSTATE_VF_STATISTICS;
BEGIN_BATCH(1);
- OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 |
- (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
+ OUT_BATCH(_3DSTATE_VF_STATISTICS << 16 | 1);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_invariant_state = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_CONTEXT,
- .cache = 0
+ .brw = BRW_NEW_BLORP |
+ BRW_NEW_CONTEXT,
},
.emit = brw_upload_invariant_state
};
* surface state objects, but not the surfaces that the surface state
* objects point to.
*/
-static void upload_state_base_address( struct brw_context *brw )
+void
+brw_upload_state_base_address(struct brw_context *brw)
{
+ if (brw->batch.state_base_address_emitted)
+ return;
+
/* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
* vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
* programmed prior to STATE_BASE_ADDRESS.
* maybe this isn't required for us in particular.
*/
- if (brw->gen >= 6) {
- uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
+ if (brw->gen >= 8) {
+ uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+ int pkt_len = brw->gen >= 9 ? 19 : 16;
- if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2));
+ /* General state base address: stateless DP read/write requests */
+ OUT_BATCH(mocs_wb << 4 | 1);
+ OUT_BATCH(0);
+ OUT_BATCH(mocs_wb << 16);
+ /* Surface state base address: */
+ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ mocs_wb << 4 | 1);
+ /* Dynamic state base address: */
+ OUT_RELOC64(brw->batch.bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+ mocs_wb << 4 | 1);
+ /* Indirect object base address: MEDIA_OBJECT data */
+ OUT_BATCH(mocs_wb << 4 | 1);
+ OUT_BATCH(0);
+ /* Instruction base address: shader kernels (incl. SIP) */
+ OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ mocs_wb << 4 | 1);
+
+ /* General state buffer size */
+ OUT_BATCH(0xfffff001);
+ /* Dynamic state buffer size */
+ OUT_BATCH(ALIGN(brw->batch.bo->size, 4096) | 1);
+ /* Indirect object upper bound */
+ OUT_BATCH(0xfffff001);
+ /* Instruction access upper bound */
+ OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1);
+ if (brw->gen >= 9) {
+ OUT_BATCH(1);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+ } else if (brw->gen >= 6) {
+ uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
* obvious.
*/
- brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
+ brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
+ brw->batch.state_base_address_emitted = true;
}
-
-const struct brw_tracked_state brw_state_base_address = {
- .dirty = {
- .mesa = 0,
- .brw = (BRW_NEW_BATCH |
- BRW_NEW_PROGRAM_CACHE),
- .cache = 0,
- },
- .emit = upload_state_base_address
-};