#include "main/state.h"
#include "brw_context.h"
-#if GEN_GEN == 6
-#include "brw_defines.h"
-#endif
#include "brw_draw.h"
#include "brw_multisample_state.h"
#include "brw_state.h"
uint32_t offset;
};
-static uint64_t
-emit_reloc(struct brw_context *brw,
- void *location, struct brw_address address, uint32_t delta)
-{
- uint32_t offset = (char *) location - (char *) brw->batch.map;
-
- return brw_emit_reloc(&brw->batch, offset, address.bo,
- address.offset + delta,
- address.reloc_flags);
-}
-
#define __gen_address_type struct brw_address
#define __gen_user_data struct brw_context
__gen_combine_address(struct brw_context *brw, void *location,
struct brw_address address, uint32_t delta)
{
+ struct intel_batchbuffer *batch = &brw->batch;
+ uint32_t offset;
+
if (address.bo == NULL) {
return address.offset + delta;
} else {
- return emit_reloc(brw, location, address, delta);
+ if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) {
+ offset = (char *) location - (char *) brw->batch.state.map;
+ return brw_state_reloc(batch, offset, address.bo,
+ address.offset + delta,
+ address.reloc_flags);
+ }
+
+ assert(!brw_ptr_in_state_buffer(batch, location));
+
+ offset = (char *) location - (char *) brw->batch.batch.map;
+ return brw_batch_reloc(batch, offset, address.bo,
+ address.offset + delta,
+ address.reloc_flags);
}
}
-static struct brw_address
+UNUSED static struct brw_address
rw_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
}
static struct brw_address
+rw_32_bo(struct brw_bo *bo, uint32_t offset)
+{
+ return (struct brw_address) {
+ .bo = bo,
+ .offset = offset,
+ .reloc_flags = RELOC_WRITE | RELOC_32BIT,
+ };
+}
+
+static struct brw_address
+ro_32_bo(struct brw_bo *bo, uint32_t offset)
+{
+ return (struct brw_address) {
+ .bo = bo,
+ .offset = offset,
+ .reloc_flags = RELOC_32BIT,
+ };
+}
+
+UNUSED static struct brw_address
ggtt_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
struct GENX(VERTEX_BUFFER_STATE) buf_state = {
.VertexBufferIndex = buffer_nr,
.BufferPitch = stride,
- .BufferStartingAddress = ro_bo(bo, start_offset),
+
+ /* The VF cache designers apparently cut corners, and made the cache
+ * only consider the bottom 32 bits of memory addresses. If you happen
+ * to have two vertex buffers which get placed exactly 4 GiB apart and
+ * use them in back-to-back draw calls, you can get collisions. To work
+ * around this problem, we restrict vertex buffers to the low 32 bits of
+ * the address space.
+ */
+ .BufferStartingAddress = ro_32_bo(bo, start_offset),
#if GEN_GEN >= 8
.BufferSize = end_offset - start_offset,
#endif
#endif
#endif
-#if GEN_GEN == 10
+#if GEN_GEN == 11
+ .VertexBufferMOCS = ICL_MOCS_WB,
+#elif GEN_GEN == 10
.VertexBufferMOCS = CNL_MOCS_WB,
#elif GEN_GEN == 9
.VertexBufferMOCS = SKL_MOCS_WB,
}
UNUSED static int
-uploads_needed(uint32_t format)
+uploads_needed(uint32_t format,
+ bool is_dual_slot)
{
if (!is_passthru_format(format))
return 1;
+ if (is_dual_slot)
+ return 2;
+
switch (format) {
case ISL_FORMAT_R64_PASSTHRU:
case ISL_FORMAT_R64G64_PASSTHRU:
if (!is_passthru_format(format))
return format;
+ /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload ==
+ * 1 means that we have been forced to do 2 uploads for a size <= 2. This
+ * happens with gen < 8 and dvec3 or dvec4 vertex shader input
+ * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of
+ * flagging that we want to fill with zeroes this second forced upload.
+ */
switch (format) {
case ISL_FORMAT_R64_PASSTHRU:
- return ISL_FORMAT_R32G32_FLOAT;
+ return upload == 0 ? ISL_FORMAT_R32G32_FLOAT
+ : ISL_FORMAT_R32_FLOAT;
case ISL_FORMAT_R64G64_PASSTHRU:
- return ISL_FORMAT_R32G32B32A32_FLOAT;
+ return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
+ : ISL_FORMAT_R32_FLOAT;
case ISL_FORMAT_R64G64B64_PASSTHRU:
- return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
- : ISL_FORMAT_R32G32_FLOAT;
+ return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
+ : ISL_FORMAT_R32G32_FLOAT;
case ISL_FORMAT_R64G64B64A64_PASSTHRU:
return ISL_FORMAT_R32G32B32A32_FLOAT;
default:
upload_format_size(uint32_t upload_format)
{
switch (upload_format) {
+ case ISL_FORMAT_R32_FLOAT:
+
+ /* downsized_format has returned this one in order to flag that we are
+ * performing a second upload which we want to have filled with
+ * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4
+ * vertex shader input variables.
+ */
+
+ return 0;
case ISL_FORMAT_R32G32_FLOAT:
return 2;
case ISL_FORMAT_R32G32B32A32_FLOAT:
static void
genX(emit_vertices)(struct brw_context *brw)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint32_t *dw;
brw_prepare_vertices(brw);
} else {
brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs);
}
+#endif
- /* Normally we don't need an element for the SGVS attribute because the
- * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
- * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
- * we're using draw parameters then we need an element for the those
- * values. Additionally if there is an edge flag element then the SGVS
- * can't be inserted past that so we need a dummy element to ensure that
- * the edge flag is the last one.
- */
- const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance ||
- ((vs_prog_data->uses_instanceid ||
- vs_prog_data->uses_vertexid)
- && uses_edge_flag));
-#else
const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
vs_prog_data->uses_baseinstance ||
vs_prog_data->uses_instanceid ||
vs_prog_data->uses_vertexid);
-#endif
+
unsigned nr_elements =
brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
- if (uploads_needed(format) > 1)
+ if (uploads_needed(format, input->is_dual_slot) > 1)
nr_elements++;
}
#endif
* vertex element may poke over the end of the buffer by 2 bytes.
*/
const unsigned padding =
- (GEN_GEN <= 7 && !GEN_IS_HASWELL && !brw->is_baytrail) * 2;
+ (GEN_GEN <= 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail) * 2;
const unsigned end = buffer->offset + buffer->size + padding;
dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
buffer->offset,
uint32_t comp1 = VFCOMP_STORE_SRC;
uint32_t comp2 = VFCOMP_STORE_SRC;
uint32_t comp3 = VFCOMP_STORE_SRC;
- const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1;
+ const unsigned num_uploads = GEN_GEN < 8 ?
+ uploads_needed(format, input->is_dual_slot) : 1;
#if GEN_GEN >= 8
/* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
#endif
ib.IndexFormat = brw_get_index_type(index_buffer->index_size);
- ib.BufferStartingAddress = ro_bo(brw->ib.bo, 0);
+
+ /* The VF cache designers apparently cut corners, and made the cache
+ * only consider the bottom 32 bits of memory addresses. If you happen
+ * to have two index buffers which get placed exactly 4 GiB apart and
+ * use them in back-to-back draw calls, you can get collisions. To work
+ * around this problem, we restrict index buffers to the low 32 bits of
+ * the address space.
+ */
+ ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0);
#if GEN_GEN >= 8
ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
ib.BufferSize = brw->ib.size;
/* _NEW_POINT */
const struct gl_point_attrib *point = &ctx->Point;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
+
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
*point_sprite_enables = 0;
- /* BRW_NEW_FRAGMENT_PROGRAM
- *
- * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
- * the full vertex header. Otherwise, we can program the SF to start
- * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
- * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
- * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
- */
-
- bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
- (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+ int first_slot =
+ brw_compute_first_urb_slot_required(fp->info.inputs_read,
+ &brw->vue_map_geom_out);
- *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+ /* Each URB offset packs two varying slots */
+ assert(first_slot % 2 == 0);
+ *urb_entry_read_offset = first_slot / 2;
/* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
* description of dw10 Point Sprite Texture Coordinate Enable:
clip.GuardbandClipTestEnable = true;
clip.ClipperViewportStatePointer =
- ro_bo(brw->batch.bo, brw->clip.vp_offset);
+ ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
clip.ScreenSpaceViewportXMin = -1;
clip.ScreenSpaceViewportXMax = 1;
* something loaded through the GPE (L2 ISC), so it's INSTRUCTION
* domain.
*/
- sf.SetupViewportStateOffset = ro_bo(brw->batch.bo, brw->sf.vp_offset);
+ sf.SetupViewportStateOffset =
+ ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
/* _NEW_LINE */
#if GEN_GEN == 8
- if (brw->is_cherryview)
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+ if (devinfo->is_cherryview)
sf.CHVLineWidth = brw_get_line_width(brw);
else
sf.LineWidth = brw_get_line_width(brw);
sf.SmoothPointEnable = true;
#endif
+#if GEN_GEN == 10
+ /* _NEW_BUFFERS
+ * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+ */
+ const bool multisampled_fbo =
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ if (multisampled_fbo)
+ sf.SmoothPointEnable = false;
+#endif
+
#if GEN_IS_G4X || GEN_GEN >= 5
sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
#endif
_NEW_POINT |
_NEW_PROGRAM |
(GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
- (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
+ (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) |
+ (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
.brw = BRW_NEW_BLORP |
BRW_NEW_VUE_MAP_GEOM_OUT |
(GEN_GEN <= 5 ? BRW_NEW_BATCH |
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
- const struct gl_program *fp = brw->fragment_program;
+ const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
unsigned i;
/* _NEW_BUFFERS */
/* _NEW_COLOR */
if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
- (ctx->Color.ColorMask[i][0] ||
- ctx->Color.ColorMask[i][1] ||
- ctx->Color.ColorMask[i][2] ||
- ctx->Color.ColorMask[i][3])) {
+ GET_COLORMASK(ctx->Color.ColorMask, i)) {
return true;
}
}
if (stage_state->sampler_count)
wm.SamplerStatePointer =
- ro_bo(brw->batch.bo, stage_state->sampler_offset);
+ ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
#if GEN_GEN == 5
if (wm_prog_data->prog_offset_2)
wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
#endif
if (wm_prog_data->base.total_scratch) {
- wm.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0);
+ wm.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
wm.PerThreadScratchSpace =
ffs(stage_state->per_thread_scratch) - 11;
}
/* ---------------------------------------------------------------------- */
+/* We restrict scratch buffers to the bottom 32 bits of the address space
+ * by using rw_32_bo().
+ *
+ * General State Base Address is a bit broken. If the address + size as
+ * seen by STATE_BASE_ADDRESS overflows 48 bits, the GPU appears to treat
+ * all accesses to the buffer as being out of bounds and returns zero.
+ */
+
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
pkt.SamplerCount = \
pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
\
if (stage_prog_data->total_scratch) { \
- pkt.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0); \
+ pkt.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); \
pkt.PerThreadScratchSpace = \
ffs(stage_state->per_thread_scratch) - 11; \
} \
assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
+ assert(GEN_GEN < 11 ||
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
#if GEN_GEN == 6
/* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
vs.StatisticsEnable = false;
vs.SamplerStatePointer =
- ro_bo(brw->batch.bo, stage_state->sampler_offset);
+ ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
#endif
#if GEN_GEN == 5
#elif GEN_GEN >= 8
/* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
* The hardware will take the intersection of the drawing rectangle,
- * scissor rectangle, and the viewport extents. We don't need to be
- * smart, and can therefore just program the viewport extents.
+ * scissor rectangle, and the viewport extents. However, emitting
+ * 3DSTATE_DRAWING_RECTANGLE is expensive since it requires a full
+ * pipeline stall so we're better off just being a little more clever
+ * with our viewport so we can emit it once at context creation time.
*/
+ const float viewport_Xmin = MAX2(ctx->ViewportArray[i].X, 0);
+ const float viewport_Ymin = MAX2(ctx->ViewportArray[i].Y, 0);
const float viewport_Xmax =
- ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width;
+ MIN2(ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width, fb_width);
const float viewport_Ymax =
- ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height;
+ MIN2(ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height, fb_height);
if (render_to_fbo) {
- sfv.XMinViewPort = ctx->ViewportArray[i].X;
+ sfv.XMinViewPort = viewport_Xmin;
sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = ctx->ViewportArray[i].Y;
+ sfv.YMinViewPort = viewport_Ymin;
sfv.YMaxViewPort = viewport_Ymax - 1;
} else {
- sfv.XMinViewPort = ctx->ViewportArray[i].X;
+ sfv.XMinViewPort = viewport_Xmin;
sfv.XMaxViewPort = viewport_Xmax - 1;
sfv.YMinViewPort = fb_height - viewport_Ymax;
- sfv.YMaxViewPort = fb_height - ctx->ViewportArray[i].Y - 1;
+ sfv.YMaxViewPort = fb_height - viewport_Ymin - 1;
}
#endif
UNUSED struct gl_context *ctx = &brw->ctx;
UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->gs.base;
+ const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY];
/* BRW_NEW_GEOMETRY_PROGRAM */
- bool active = GEN_GEN >= 6 && brw->geometry_program;
+ bool active = GEN_GEN >= 6 && gs_prog;
/* BRW_NEW_GS_PROG_DATA */
struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
- if (brw->gt == 2 && brw->gs.enabled != active)
+ if (devinfo->gt == 2 && brw->gs.enabled != active)
gen7_emit_cs_stall_flush(brw);
#endif
#if GEN_GEN < 7
gs.SOStatisticsEnable = true;
- if (brw->geometry_program->info.has_transform_feedback_varyings)
+ if (gs_prog->info.has_transform_feedback_varyings)
gs.SVBIPayloadEnable = true;
/* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
_mesa_enum_to_string(rb_type));
if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
entry->LogicOpEnable = true;
- entry->LogicOpFunction =
- intel_translate_logic_op(ctx->Color.LogicOp);
+ entry->LogicOpFunction = ctx->Color._LogicOp;
}
} else if (blend_enabled && !ctx->Color._AdvancedBlendMode
&& (GEN_GEN <= 5 || !integer)) {
entry.PostBlendColorClampEnable = true;
entry.ColorClampRange = COLORCLAMP_RTFORMAT;
- entry.WriteDisableRed = !ctx->Color.ColorMask[i][0];
- entry.WriteDisableGreen = !ctx->Color.ColorMask[i][1];
- entry.WriteDisableBlue = !ctx->Color.ColorMask[i][2];
- entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3];
+ entry.WriteDisableRed = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 0);
+ entry.WriteDisableGreen = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 1);
+ entry.WriteDisableBlue = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 2);
+ entry.WriteDisableAlpha = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 3);
#if GEN_GEN >= 8
GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
static void
genX(upload_push_constant_packets)(struct brw_context *brw)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
&brw->wm.base,
};
- if (GEN_GEN == 7 && !GEN_IS_HASWELL && !brw->is_baytrail &&
+ if (GEN_GEN == 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail &&
stage_states[MESA_SHADER_VERTEX]->push_constants_dirty)
gen7_emit_vs_workaround_flush(brw);
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
struct brw_stage_state *stage_state = stage_states[stage];
- struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
+ UNUSED struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
if (!stage_state->push_constants_dirty)
continue;
const struct gl_uniform_block *block =
prog->sh.UniformBlocks[range->block];
- const struct gl_uniform_buffer_binding *binding =
+ const struct gl_buffer_binding *binding =
&ctx->UniformBufferBindings[block->Binding];
if (binding->BufferObject == ctx->Shared->NullBufferObj) {
}
stage_state->push_constants_dirty = false;
+ brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
}
-
- brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
}
const struct brw_tracked_state genX(push_constant_packets) = {
{
struct brw_stage_state *stage_state = &brw->vs.base;
- /* _BRW_NEW_VERTEX_PROGRAM */
- const struct brw_program *vp = brw_program_const(brw->vertex_program);
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
/* BRW_NEW_VS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
- gen6_upload_push_constants(brw, &vp->program, prog_data, stage_state);
+ gen6_upload_push_constants(brw, vp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(vs_push_constants) = {
struct brw_stage_state *stage_state = &brw->gs.base;
/* BRW_NEW_GEOMETRY_PROGRAM */
- const struct brw_program *gp = brw_program_const(brw->geometry_program);
+ const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
- if (gp) {
- /* BRW_NEW_GS_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
+ /* BRW_NEW_GS_PROG_DATA */
+ struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
- gen6_upload_push_constants(brw, &gp->program, prog_data, stage_state);
- }
+ gen6_upload_push_constants(brw, gp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(gs_push_constants) = {
{
struct brw_stage_state *stage_state = &brw->wm.base;
/* BRW_NEW_FRAGMENT_PROGRAM */
- const struct brw_program *fp = brw_program_const(brw->fragment_program);
+ const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
/* BRW_NEW_FS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
-
- gen6_upload_push_constants(brw, &fp->program, prog_data, stage_state);
+ gen6_upload_push_constants(brw, fp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(wm_push_constants) = {
static const struct brw_tracked_state genX(multisample_state) = {
.dirty = {
- .mesa = _NEW_MULTISAMPLE,
+ .mesa = _NEW_MULTISAMPLE |
+ (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
.brw = BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
BRW_NEW_NUM_SAMPLES,
cc.StatisticsEnable = brw->stats_wm;
- cc.CCViewportStatePointer = ro_bo(brw->batch.bo, brw->cc.vp_offset);
+ cc.CCViewportStatePointer =
+ ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
#else
/* _NEW_COLOR */
cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
genX(upload_sbe)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
#if GEN_GEN >= 9
/* prepare the active component dwords */
- int input_index = 0;
- for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
- if (!(brw->fragment_program->info.inputs_read &
- BITFIELD64_BIT(attr))) {
- continue;
- }
-
- assert(input_index < 32);
-
- sbe.AttributeActiveComponentFormat[input_index] = ACTIVE_COMPONENT_XYZW;
- ++input_index;
- }
+ for (int i = 0; i < 32; i++)
+ sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
#endif
}
ps.SampleMask = genX(determine_sample_mask(brw));
#endif
- /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
- * it implicitly scales for different GT levels (which have some # of
- * PSDs).
+ /* 3DSTATE_PS expects the number of threads per PSD, which is always 64
+ * for pre Gen11 and 128 for gen11+; On gen11+ If a programmed value is
+ * k, it implies 2(k+1) threads. It implicitly scales for different GT
+ * levels (which have some # of PSDs).
*
- * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
+ * In Gen8 the format is U8-2 whereas in Gen9+ it is U9-1.
*/
#if GEN_GEN >= 9
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
else
ps.PositionXYOffsetSelect = POSOFFSET_NONE;
- ps.RenderTargetFastClearEnable = brw->wm.fast_clear_op;
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
ps.DispatchGRFStartRegisterForConstantSetupData0 =
if (prog_data->base.total_scratch) {
ps.ScratchSpaceBasePointer =
- rw_bo(stage_state->scratch_bo,
- ffs(stage_state->per_thread_scratch) - 11);
+ rw_32_bo(stage_state->scratch_bo,
+ ffs(stage_state->per_thread_scratch) - 11);
}
}
}
if (!tes_prog_data) {
brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
} else {
+ assert(GEN_GEN < 11 ||
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
+
brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
upload_te_state(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
- bool active = brw->tess_eval_program;
+ bool active = brw->programs[MESA_SHADER_TESS_EVAL];
/* BRW_NEW_TES_PROG_DATA */
const struct brw_tes_prog_data *tes_prog_data =
{
struct brw_stage_state *stage_state = &brw->tes.base;
/* BRW_NEW_TESS_PROGRAMS */
- const struct brw_program *tep = brw_program_const(brw->tess_eval_program);
+ const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
- if (tep) {
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
- gen6_upload_push_constants(brw, &tep->program, prog_data, stage_state);
- }
+ /* BRW_NEW_TES_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
+ gen6_upload_push_constants(brw, tep, prog_data, stage_state);
}
static const struct brw_tracked_state genX(tes_push_constants) = {
{
struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
- const struct brw_program *tcp = brw_program_const(brw->tess_ctrl_program);
- bool active = brw->tess_eval_program;
+ const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
- if (active) {
- /* BRW_NEW_TCS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
+ /* BRW_NEW_TCS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
- gen6_upload_push_constants(brw, &tcp->program, prog_data, stage_state);
- }
+ gen6_upload_push_constants(brw, tcp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(tcs_push_constants) = {
/* ---------------------------------------------------------------------- */
#if GEN_GEN >= 7
+static void
+genX(upload_cs_push_constants)(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
+
+ if (cp) {
+ /* BRW_NEW_CS_PROG_DATA */
+ struct brw_cs_prog_data *cs_prog_data =
+ brw_cs_prog_data(brw->cs.base.prog_data);
+
+ _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
+ brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state);
+ }
+}
+
+const struct brw_tracked_state genX(cs_push_constants) = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = genX(upload_cs_push_constants),
+};
+
+/**
+ * Creates a new CS constant buffer reflecting the current CS program's
+ * constants, if needed by the CS program.
+ */
+static void
+genX(upload_cs_pull_constants)(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct brw_program *cp =
+ (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
+
+ /* BRW_NEW_CS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data;
+
+ _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
+ /* _NEW_PROGRAM_CONSTANTS */
+ brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program,
+ stage_state, prog_data);
+}
+
+const struct brw_tracked_state genX(cs_pull_constants) = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = genX(upload_cs_pull_constants),
+};
+
static void
genX(upload_cs_state)(struct brw_context *brw)
{
uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
+ /* The MEDIA_VFE_STATE documentation for Gen8+ says:
+ *
+ * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+ * the only bits that are changed are scoreboard related: Scoreboard
+ * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+ * these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
+ *
+ * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
+ * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+
brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
if (prog_data->total_scratch) {
- uint32_t bo_offset;
+ uint32_t per_thread_scratch_value;
if (GEN_GEN >= 8) {
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
- bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+ per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11;
} else if (GEN_IS_HASWELL) {
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
*/
- bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+ per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12;
} else {
/* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
*/
- bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+ per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1;
}
- vfe.ScratchSpaceBasePointer =
- rw_bo(stage_state->scratch_bo, bo_offset);
+ vfe.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
+ vfe.PerThreadScratchSpace = per_thread_scratch_value;
}
+ /* If brw->screen->subslice_total is greater than one, then
+ * devinfo->max_cs_threads stores number of threads per sub-slice;
+ * thus we need to multiply by that number by subslices to get
+ * the actual maximum number of threads; the -1 is because the HW
+ * has a bias of 1 (would not make sense to say the maximum number
+ * of threads is 0).
+ */
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
+#if GEN_GEN < 11
vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp;
+#endif
#if GEN_GEN < 9
vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
#endif
const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
.KernelStartPointer = brw->cs.base.prog_offset,
.SamplerStatePointer = stage_state->sampler_offset,
- .SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4) >> 2,
+ .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
.BindingTablePointer = stage_state->bind_bo_offset,
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
static void
genX(upload_raster)(struct brw_context *brw)
{
- struct gl_context *ctx = &brw->ctx;
+ const struct gl_context *ctx = &brw->ctx;
/* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
/* _NEW_POLYGON */
- struct gl_polygon_attrib *polygon = &ctx->Polygon;
+ const struct gl_polygon_attrib *polygon = &ctx->Polygon;
/* _NEW_POINT */
- struct gl_point_attrib *point = &ctx->Point;
+ const struct gl_point_attrib *point = &ctx->Point;
brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
if (brw->polygon_front_bit == render_to_fbo)
raster.CullMode = CULLMODE_NONE;
}
- point->SmoothFlag = raster.SmoothPointEnable;
+ raster.SmoothPointEnable = point->SmoothFlag;
raster.DXMultisampleRasterizationEnable =
_mesa_is_multisample_enabled(ctx);
/* _NEW_LINE */
raster.AntialiasingEnable = ctx->Line.SmoothFlag;
+#if GEN_GEN == 10
+ /* _NEW_BUFFERS
+ * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+ */
+ const bool multisampled_fbo =
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ if (multisampled_fbo)
+ raster.AntialiasingEnable = false;
+#endif
+
/* _NEW_SCISSOR */
raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
texObj->StencilSampling,
&border_color_offset);
}
- if (GEN_GEN < 6) {
+#if GEN_GEN < 6
samp_st.BorderColorPointer =
- brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
- brw->batch.bo, border_color_offset, 0);
- } else {
+ ro_bo(brw->batch.state.bo, border_color_offset);
+#else
samp_st.BorderColorPointer = border_color_offset;
- }
+#endif
#if GEN_GEN >= 8
samp_st.LODPreClampMode = CLAMP_MODE_OGL;
genX(upload_fs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+ struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
genX(upload_sampler_state_table)(brw, fs, &brw->wm.base);
}
genX(upload_vs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_VERTEX_PROGRAM */
- struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+ struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
genX(upload_sampler_state_table)(brw, vs, &brw->vs.base);
}
genX(upload_gs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_GEOMETRY_PROGRAM */
- struct gl_program *gs = (struct gl_program *) brw->geometry_program;
+ struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
if (!gs)
return;
genX(upload_tcs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
+ struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
if (!tcs)
return;
genX(upload_tes_samplers)(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
+ struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
if (!tes)
return;
genX(upload_cs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_COMPUTE_PROGRAM */
- struct gl_program *cs = (struct gl_program *) brw->compute_program;
+ struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
if (!cs)
return;
/* Command packets:
*/
- &brw_invariant_state,
-
&brw_binding_table_pointers,
&genX(blend_constant_color),
*/
&brw_vs_pull_constants,
&brw_vs_ubo_surfaces,
- &brw_vs_abo_surfaces,
&brw_tcs_pull_constants,
&brw_tcs_ubo_surfaces,
- &brw_tcs_abo_surfaces,
&brw_tes_pull_constants,
&brw_tes_ubo_surfaces,
- &brw_tes_abo_surfaces,
&brw_gs_pull_constants,
&brw_gs_ubo_surfaces,
- &brw_gs_abo_surfaces,
&brw_wm_pull_constants,
&brw_wm_ubo_surfaces,
- &brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
&brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
*/
&brw_vs_pull_constants,
&brw_vs_ubo_surfaces,
- &brw_vs_abo_surfaces,
&brw_tcs_pull_constants,
&brw_tcs_ubo_surfaces,
- &brw_tcs_abo_surfaces,
&brw_tes_pull_constants,
&brw_tes_ubo_surfaces,
- &brw_tes_abo_surfaces,
&brw_gs_pull_constants,
&brw_gs_ubo_surfaces,
- &brw_gs_abo_surfaces,
&brw_wm_pull_constants,
&brw_wm_ubo_surfaces,
- &brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
&brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
{
&gen7_l3_state,
&brw_cs_image_surfaces,
- &gen7_cs_push_constants,
- &brw_cs_pull_constants,
+ &genX(cs_push_constants),
+ &genX(cs_pull_constants),
&brw_cs_ubo_surfaces,
- &brw_cs_abo_surfaces,
&brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
&genX(cs_samplers),