UNUSED static void *
emit_dwords(struct brw_context *brw, unsigned n)
{
- intel_batchbuffer_begin(brw, n, RENDER_RING);
+ intel_batchbuffer_begin(brw, n);
uint32_t *map = brw->batch.map_next;
brw->batch.map_next += n;
intel_batchbuffer_advance(brw);
* to a FBO (i.e. any named frame buffer object), we *don't*
* need to invert - we already match the layout.
*/
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (ctx->DrawBuffer->FlipY) {
for (unsigned i = 0; i < 32; i++)
poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */
} else {
* to a user-created FBO then our native pixel coordinate system
* works just fine, and there's no window system to worry about.
*/
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (ctx->DrawBuffer->FlipY) {
poly.PolygonStippleYOffset =
(32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31;
}
}
}
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+ return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+/* The VF cache designers apparently cut corners, and made the cache key's
+ * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
+ * of the address. If you happen to have two vertex buffers which get placed
+ * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
+ * collisions. (These collisions can happen within a single batch.)
+ *
+ * In the soft-pin world, we'd like to assign addresses up front, and never
+ * move buffers. So, we need to do a VF cache invalidate if the buffer for
+ * a particular VB slot has different [48:32] address bits than the last one.
+ *
+ * In the relocation world, we have no idea what the addresses will be, so
+ * we can't apply this workaround. Instead, we tell the kernel to move it
+ * to the low 4GB regardless.
+ */
+static void
+vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ bool need_invalidate = false;
+ unsigned i;
+
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
+
+ if (high_bits != brw->vb.last_bo_high_bits[i]) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[i] = high_bits;
+ }
+ }
+
+ /* Don't bother with draw parameter buffers - those are generated by
+ * the driver so we can select a consistent memory zone.
+ */
+
+ if (need_invalidate) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ }
+#endif
+}
+
+static void
+vf_invalidate_for_ib_48bit_transition(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
+
+ if (high_bits != brw->ib.last_bo_high_bits) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ brw->ib.last_bo_high_bits = high_bits;
+ }
+#endif
+}
+
static void
genX(emit_vertices)(struct brw_context *brw)
{
const unsigned nr_buffers = brw->vb.nr_buffers +
uses_draw_params + uses_derived_draw_params;
+ vf_invalidate_for_vb_48bit_transitions(brw);
+
if (nr_buffers) {
assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
.mesa = _NEW_POLYGON,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
+ BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VERTICES |
BRW_NEW_VS_PROG_DATA,
},
if (index_buffer == NULL)
return;
+ vf_invalidate_for_ib_48bit_transition(brw);
+
brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
#if GEN_GEN < 8 && !GEN_IS_HASWELL
ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
clip.ScreenSpaceViewportYMax = 1;
clip.ViewportXYClipTestEnable = true;
- clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
+ clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
+ ctx->Transform.DepthClampFar);
/* _NEW_TRANSFORM */
if (GEN_GEN == 5 || GEN_IS_G4X) {
#endif
#if GEN_GEN == 7
- clip.FrontWinding = brw->polygon_front_bit == _mesa_is_user_fbo(fb);
+ clip.FrontWinding = brw->polygon_front_bit != fb->FlipY;
if (ctx->Polygon.CullFlag) {
switch (ctx->Polygon.CullFaceMode) {
clip.UserClipDistanceCullTestEnableBitmask =
brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
- clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
+ clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
+ ctx->Transform.DepthClampFar);
#endif
/* _NEW_LIGHT */
#if GEN_GEN <= 7
/* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ bool flip_y = ctx->DrawBuffer->FlipY;
UNUSED const bool multisampled_fbo =
_mesa_geometric_samples(ctx->DrawBuffer) > 1;
#endif
#if GEN_GEN <= 7
/* _NEW_POLYGON */
- sf.FrontWinding = brw->polygon_front_bit == render_to_fbo;
+ sf.FrontWinding = brw->polygon_front_bit != flip_y;
#if GEN_GEN >= 6
sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
* Window coordinates in an FBO are inverted, which means point
* sprite origin must be inverted, too.
*/
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
+ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) {
sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
} else {
sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
/* Pointer to the WM constant buffer. Covered by the set of
* state flags from gen6_upload_wm_push_constants.
*/
- wmcp.PointertoPSConstantBuffer0 = stage_state->push_const_offset;
- wmcp.PSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
+ wmcp.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
+ wmcp.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
}
}
#endif
#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
- wm.LineAntialiasingRegionWidth = _10pixels;
- wm.LineEndCapAntialiasingRegionWidth = _05pixels;
-
- wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
- wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
#else
ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
- if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
- /* These two fields should be the same pre-gen6, which is why we
- * only have one hardware field to program for both dispatch
- * widths.
- */
- assert(wm_prog_data->base.dispatch_grf_start_reg ==
- wm_prog_data->dispatch_grf_start_reg_2);
+#endif
+
+#if GEN_GEN <= 6
+ wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
+ wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
+ wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
+#endif
+
+#if GEN_GEN == 4
+ /* On gen4, we only have one shader kernel */
+ if (brw_wm_state_has_ksp(wm, 0)) {
+ assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0);
+ wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
}
+#elif GEN_GEN == 5
+ /* On gen5, we have multiple shader kernels but only one GRF start
+ * register for all kernels
+ */
+ wm.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
+ wm.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
+ wm.KernelStartPointer2 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
- if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16)
- wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
+ wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
+ wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
- if (stage_state->sampler_count)
- wm.SamplerStatePointer =
- ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
-#if GEN_GEN == 5
- if (wm_prog_data->prog_offset_2)
- wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ wm_prog_data->base.dispatch_grf_start_reg;
+
+ /* Dispatch GRF Start should be the same for all shaders on gen5 */
+ if (brw_wm_state_has_ksp(wm, 1)) {
+ assert(wm_prog_data->base.dispatch_grf_start_reg ==
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1));
+ }
+ if (brw_wm_state_has_ksp(wm, 2)) {
+ assert(wm_prog_data->base.dispatch_grf_start_reg ==
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
+ }
+#elif GEN_GEN == 6
+ /* On gen6, we have multiple shader kernels and we no longer specify a
+ * register count for each one.
+ */
+ wm.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
+ wm.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
+ wm.KernelStartPointer2 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
+
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
+ wm.DispatchGRFStartRegisterForConstantSetupData2 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
#endif
- wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
+#if GEN_GEN <= 5
wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
+ wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
+ wm.SetupURBEntryReadOffset = 0;
wm.EarlyDepthTestEnable = true;
+#endif
+
+#if GEN_GEN >= 6
+ wm.LineAntialiasingRegionWidth = _10pixels;
+ wm.LineEndCapAntialiasingRegionWidth = _05pixels;
+
+ wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+ wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
+#else
+ if (stage_state->sampler_count)
+ wm.SamplerStatePointer =
+ ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
+
wm.LineAntialiasingRegionWidth = _05pixels;
wm.LineEndCapAntialiasingRegionWidth = _10pixels;
wm.BindingTableEntryCount =
wm_prog_data->base.binding_table.size_bytes / 4;
wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
- wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
- wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
- if (GEN_GEN == 6 ||
- wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
- wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
- }
-
-#if GEN_GEN >= 5
- if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) {
- wm.KernelStartPointer2 =
- KSP(brw, stage_state->prog_offset + wm_prog_data->prog_offset_2);
- }
-#endif
#if GEN_GEN == 6
wm.DualSourceBlendEnable =
wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
else
wm.PositionXYOffsetSelect = POSOFFSET_NONE;
-
- wm.DispatchGRFStartRegisterForConstantSetupData2 =
- wm_prog_data->dispatch_grf_start_reg_2;
#endif
if (wm_prog_data->base.total_scratch) {
pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
pkt.SamplerCount = \
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
+ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to \
+ * disable prefetching of binding tables in A0 and B0 steppings. \
+ * TODO: Revisit this WA on C0 stepping. \
+ */ \
pkt.BindingTableEntryCount = \
+ GEN_GEN == 11 ? \
+ 0 : \
stage_prog_data->binding_table.size_bytes / 4; \
pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
\
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {
if (stage_state->push_const_size != 0) {
cvs.Buffer0Valid = true;
- cvs.PointertoVSConstantBuffer0 = stage_state->push_const_offset;
- cvs.VSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
+ cvs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
+ cvs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
}
}
#endif
for (unsigned i = 0; i < viewport_count; i++) {
/* _NEW_VIEWPORT | _NEW_TRANSFORM */
const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i];
- if (ctx->Transform.DepthClamp) {
+ if (ctx->Transform.DepthClampNear && ctx->Transform.DepthClampFar) {
+ ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
+ ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
+ } else if (ctx->Transform.DepthClampNear) {
ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
+ ccv.MaximumDepth = 0.0;
+ } else if (ctx->Transform.DepthClampFar) {
+ ccv.MinimumDepth = 0.0;
ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
} else {
ccv.MinimumDepth = 0.0;
static void
set_scissor_bits(const struct gl_context *ctx, int i,
- bool render_to_fbo, unsigned fb_width, unsigned fb_height,
+ bool flip_y, unsigned fb_width, unsigned fb_height,
struct GENX(SCISSOR_RECT) *sc)
{
int bbox[4];
sc->ScissorRectangleXMax = 0;
sc->ScissorRectangleYMin = 1;
sc->ScissorRectangleYMax = 0;
- } else if (render_to_fbo) {
+ } else if (!flip_y) {
/* texmemory: Y=0=bottom */
sc->ScissorRectangleXMin = bbox[0];
sc->ScissorRectangleXMax = bbox[1] - 1;
genX(upload_scissor_state)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool flip_y = ctx->DrawBuffer->FlipY;
struct GENX(SCISSOR_RECT) scissor;
uint32_t scissor_state_offset;
const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
* inclusive but max is exclusive.
*/
for (unsigned i = 0; i < viewport_count; i++) {
- set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height, &scissor);
+ set_scissor_bits(ctx, i, flip_y, fb_width, fb_height, &scissor);
GENX(SCISSOR_RECT_pack)(
NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
}
*/
const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
+ /* Workaround: prevent gpu hangs on SandyBridge
+ * by disabling guardband clipping for odd dimensions.
+ */
+ if (GEN_GEN == 6 && (fb_width & 1 || fb_height & 1)) {
+ *xmin = -1.0f;
+ *xmax = 1.0f;
+ *ymin = -1.0f;
+ *ymax = 1.0f;
+ return;
+ }
+
if (m00 != 0 && m11 != 0) {
/* First, we compute the screen-space render area */
const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
const unsigned viewport_count = brw->clip.viewport_count;
/* _NEW_BUFFERS */
- const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool flip_y = ctx->DrawBuffer->FlipY;
const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer);
const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
#endif
/* _NEW_BUFFERS */
- if (render_to_fbo) {
- y_scale = 1.0;
- y_bias = 0;
- } else {
+ if (flip_y) {
y_scale = -1.0;
y_bias = (float)fb_height;
+ } else {
+ y_scale = 1.0;
+ y_bias = 0;
}
for (unsigned i = 0; i < brw->clip.viewport_count; i++) {
clv.YMaxClipGuardband = gb_ymax;
#if GEN_GEN < 6
- set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height,
+ set_scissor_bits(ctx, i, flip_y, fb_width, fb_height,
&sfv.ScissorRectangle);
#elif GEN_GEN >= 8
/* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
const float viewport_Ymax =
MIN2(ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height, fb_height);
- if (render_to_fbo) {
+ if (flip_y) {
sfv.XMinViewPort = viewport_Xmin;
sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = viewport_Ymin;
- sfv.YMaxViewPort = viewport_Ymax - 1;
+ sfv.YMinViewPort = fb_height - viewport_Ymax;
+ sfv.YMaxViewPort = fb_height - viewport_Ymin - 1;
} else {
sfv.XMinViewPort = viewport_Xmin;
sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = fb_height - viewport_Ymax;
- sfv.YMaxViewPort = fb_height - viewport_Ymin - 1;
+ sfv.YMinViewPort = viewport_Ymin;
+ sfv.YMaxViewPort = viewport_Ymax - 1;
}
#endif
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
if (active && stage_state->push_const_size != 0) {
cgs.Buffer0Valid = true;
- cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset;
- cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
+ cgs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
+ cgs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
}
}
#endif
#if GEN_GEN < 7
gs.SOStatisticsEnable = true;
if (gs_prog->info.has_transform_feedback_varyings)
- gs.SVBIPayloadEnable = true;
+ gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx);
/* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
* was previously done for gen6.
sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
/* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ bool flip_y = ctx->DrawBuffer->FlipY;
/* _NEW_POINT
*
* Window coordinates in an FBO are inverted, which means point
* sprite origin must be inverted.
*/
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
+ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
else
sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
for (int i = 0; i < 4; i++) {
struct intel_buffer_object *bufferobj =
intel_buffer_object(xfb_obj->Buffers[i]);
+ uint32_t start = xfb_obj->Offset[i];
+ uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
+ uint32_t const size = end - start;
- if (!bufferobj) {
+ if (!bufferobj || !size) {
brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = i;
}
continue;
}
- uint32_t start = xfb_obj->Offset[i];
assert(start % 4 == 0);
- uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
struct brw_bo *bo =
- intel_bufferobj_buffer(brw, bufferobj, start, end - start, true);
+ intel_bufferobj_buffer(brw, bufferobj, start, size, true);
assert(end <= bo->size);
brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
/* BRW_NEW_FS_PROG_DATA */
- ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4;
+ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to disable
+ * prefetching of binding tables in A0 and B0 steppings.
+ * TODO: Revisit this workaround on C0 stepping.
+ */
+ ps.BindingTableEntryCount = GEN_GEN == 11 ?
+ 0 :
+ prog_data->base.binding_table.size_bytes / 4;
if (prog_data->base.use_alt_mode)
ps.FloatingPointMode = Alternate;
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
+ ps._32PixelDispatchEnable = prog_data->dispatch_32;
+
+ /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+ *
+ * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+ * Dispatch must not be enabled for PER_PIXEL dispatch mode."
+ *
+ * Since 16x MSAA is first introduced on SKL, we don't need to apply
+ * the workaround on any older hardware.
+ *
+ * BRW_NEW_NUM_SAMPLES
+ */
+ if (GEN_GEN >= 9 && !prog_data->persample_dispatch &&
+ brw->num_samples == 16) {
+ assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+ ps._32PixelDispatchEnable = false;
+ }
+
ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
- ps.KernelStartPointer0 = stage_state->prog_offset;
+ ps.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = stage_state->prog_offset +
- prog_data->prog_offset_2;
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
if (prog_data->base.total_scratch) {
ps.ScratchSpaceBasePointer =
: 0),
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA,
+ BRW_NEW_FS_PROG_DATA |
+ (GEN_GEN >= 9 ? BRW_NEW_NUM_SAMPLES : 0),
},
.emit = genX(upload_ps),
};
const struct gl_context *ctx = &brw->ctx;
/* _NEW_BUFFERS */
- const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool flip_y = ctx->DrawBuffer->FlipY;
/* _NEW_POLYGON */
const struct gl_polygon_attrib *polygon = &ctx->Polygon;
const struct gl_point_attrib *point = &ctx->Point;
brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
- if (brw->polygon_front_bit == render_to_fbo)
+ if (brw->polygon_front_bit != flip_y)
raster.FrontWinding = CounterClockwise;
if (polygon->CullFlag) {
raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
/* _NEW_TRANSFORM */
- if (!ctx->Transform.DepthClamp) {
+#if GEN_GEN < 9
+ if (!(ctx->Transform.DepthClampNear &&
+ ctx->Transform.DepthClampFar))
+ raster.ViewportZClipTestEnable = true;
+#endif
+
#if GEN_GEN >= 9
- raster.ViewportZFarClipTestEnable = true;
+ if (!ctx->Transform.DepthClampNear)
raster.ViewportZNearClipTestEnable = true;
-#else
- raster.ViewportZClipTestEnable = true;
+
+ if (!ctx->Transform.DepthClampFar)
+ raster.ViewportZFarClipTestEnable = true;
#endif
- }
/* BRW_NEW_CONSERVATIVE_RASTERIZATION */
#if GEN_GEN >= 9