#include "main/context.h"
#include "main/enums.h"
#include "main/macros.h"
+#include "main/state.h"
#include "brw_context.h"
#if GEN_GEN == 6
#include "main/fbobject.h"
#include "main/framebuffer.h"
#include "main/glformats.h"
+#include "main/samplerobj.h"
#include "main/shaderapi.h"
#include "main/stencil.h"
#include "main/transformfeedback.h"
#include "main/varray.h"
#include "main/viewport.h"
+#include "util/half_float.h"
UNUSED static void *
emit_dwords(struct brw_context *brw, unsigned n)
};
}
+static inline struct brw_address
+instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
+{
+ return (struct brw_address) {
+ .bo = bo,
+ .offset = offset,
+ .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+ .write_domain = 0,
+ };
+}
+
static inline struct brw_address
vertex_bo(struct brw_bo *bo, uint32_t offset)
{
};
}
+#if GEN_GEN == 4
+static inline struct brw_address
+KSP(struct brw_context *brw, uint32_t offset)
+{
+ return instruction_bo(brw->cache.bo, offset);
+}
+
+static inline struct brw_address
+KSP_ro(struct brw_context *brw, uint32_t offset)
+{
+ return instruction_ro_bo(brw->cache.bo, offset);
+}
+#else
+static inline uint32_t
+KSP(struct brw_context *brw, uint32_t offset)
+{
+ return offset;
+}
+
+#define KSP_ro KSP
+
+#endif
+
#include "genxml/genX_pack.h"
#define _brw_cmd_length(cmd) cmd ## _length
#endif
#endif
-#if GEN_GEN == 9
+#if GEN_GEN == 10
+ .VertexBufferMOCS = CNL_MOCS_WB,
+#elif GEN_GEN == 9
.VertexBufferMOCS = SKL_MOCS_WB,
#elif GEN_GEN == 8
.VertexBufferMOCS = BDW_MOCS_WB,
}
UNUSED static int
-genX(uploads_needed)(uint32_t format)
+uploads_needed(uint32_t format)
{
if (!is_passthru_format(format))
return 1;
#if GEN_GEN >= 8
struct gl_context *ctx = &brw->ctx;
- bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL);
+ const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL);
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
unsigned vue = brw->vb.nr_enabled;
struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
- if (genX(uploads_needed(format)) > 1)
+ if (uploads_needed(format) > 1)
nr_elements++;
}
#endif
* a VE loads from them.
*/
if (nr_elements == 0) {
- dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 1 + GENX(VERTEX_ELEMENT_STATE_length));
+ dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
+ 1 + GENX(VERTEX_ELEMENT_STATE_length));
struct GENX(VERTEX_ELEMENT_STATE) elem = {
.Valid = true,
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
uses_draw_params + vs_prog_data->uses_drawid;
if (nr_buffers) {
-#if GEN_GEN >= 6
- assert(nr_buffers <= 33);
-#else
- assert(nr_buffers <= 17);
-#endif
assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS),
* half-float and 8 and 16-bit integer formats. This means that the
* vertex element may poke over the end of the buffer by 2 bytes.
*/
- unsigned padding =
+ const unsigned padding =
(GEN_GEN <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
+ const unsigned end = buffer->offset + buffer->size + padding;
dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
buffer->offset,
- buffer->offset + buffer->size + padding,
+ end,
buffer->stride,
buffer->step_rate);
}
*/
#if GEN_GEN >= 6
assert(nr_elements <= 34);
- struct brw_vertex_element *gen6_edgeflag_input = NULL;
+ const struct brw_vertex_element *gen6_edgeflag_input = NULL;
#else
assert(nr_elements <= 18);
#endif
1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements);
unsigned i;
for (i = 0; i < brw->vb.nr_enabled; i++) {
- struct brw_vertex_element *input = brw->vb.enabled[i];
+ const struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
uint32_t comp0 = VFCOMP_STORE_SRC;
uint32_t comp1 = VFCOMP_STORE_SRC;
uint32_t comp2 = VFCOMP_STORE_SRC;
uint32_t comp3 = VFCOMP_STORE_SRC;
- unsigned num_uploads = 1;
+ const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1;
#if GEN_GEN >= 8
/* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
}
#endif
-#if GEN_GEN < 8
- num_uploads = genX(uploads_needed(format));
-#endif
-
for (unsigned c = 0; c < num_uploads; c++) {
- uint32_t upload_format = GEN_GEN >= 8 ? format :
+ const uint32_t upload_format = GEN_GEN >= 8 ? format :
downsize_format_if_needed(format, c);
/* If we need more that one upload, the offset stride would be 128
* bits (16 bytes), as for previous uploads we are using the full
* entry. */
- unsigned int offset = input->offset + c * 16;
- int size = input->glarray->Size;
+ const unsigned offset = input->offset + c * 16;
- if (GEN_GEN < 8 && is_passthru_format(format))
- size = upload_format_size(upload_format);
+ const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
+ upload_format_size(upload_format) : input->glarray->Size;
switch (size) {
case 0: comp0 = VFCOMP_STORE_0;
#if GEN_GEN >= 6
if (gen6_edgeflag_input) {
- uint32_t format =
+ const uint32_t format =
brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
.emit = genX(emit_vertices),
};
+static void
+genX(emit_index_buffer)(struct brw_context *brw)
+{
+ const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+
+ if (index_buffer == NULL)
+ return;
+
+ brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
+#if GEN_GEN < 8 && !GEN_IS_HASWELL
+ ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
+#endif
+ ib.IndexFormat = brw_get_index_type(index_buffer->index_size);
+ ib.BufferStartingAddress = vertex_bo(brw->ib.bo, 0);
+#if GEN_GEN >= 8
+ ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+ ib.BufferSize = brw->ib.size;
+#else
+ ib.BufferEndingAddress = vertex_bo(brw->ib.bo, brw->ib.size - 1);
+#endif
+ }
+}
+
+static const struct brw_tracked_state genX(index_buffer) = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_INDEX_BUFFER,
+ },
+ .emit = genX(emit_index_buffer),
+};
+
#if GEN_IS_HASWELL || GEN_GEN >= 8
static void
genX(upload_cut_index)(struct brw_context *brw)
genX(get_attr_override)(&attribute,
&brw->vue_map_geom_out,
*urb_entry_read_offset, attr,
- brw->ctx.VertexProgram._TwoSideEnabled,
+ _mesa_vertex_program_two_side_enabled(ctx),
&max_source_attr);
}
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
-static void
-genX(upload_depth_stencil_state)(struct brw_context *brw)
+#if GEN_GEN >= 8
+typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
+#elif GEN_GEN >= 6
+typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML;
+#else
+typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML;
+#endif
+
+static inline void
+set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds)
{
struct gl_context *ctx = &brw->ctx;
struct gl_stencil_attrib *stencil = &ctx->Stencil;
const int b = stencil->_BackFace;
+ if (depth->Test && depth_irb) {
+ ds->DepthTestEnable = true;
+ ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
+ ds->DepthTestFunction = intel_translate_compare_func(depth->Func);
+ }
+
+ if (brw->stencil_enabled) {
+ ds->StencilTestEnable = true;
+ ds->StencilWriteMask = stencil->WriteMask[0] & 0xff;
+ ds->StencilTestMask = stencil->ValueMask[0] & 0xff;
+
+ ds->StencilTestFunction =
+ intel_translate_compare_func(stencil->Function[0]);
+ ds->StencilFailOp =
+ intel_translate_stencil_op(stencil->FailFunc[0]);
+ ds->StencilPassDepthPassOp =
+ intel_translate_stencil_op(stencil->ZPassFunc[0]);
+ ds->StencilPassDepthFailOp =
+ intel_translate_stencil_op(stencil->ZFailFunc[0]);
+
+ ds->StencilBufferWriteEnable = brw->stencil_write_enabled;
+
+ if (brw->stencil_two_sided) {
+ ds->DoubleSidedStencilEnable = true;
+ ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
+ ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
+
+ ds->BackfaceStencilTestFunction =
+ intel_translate_compare_func(stencil->Function[b]);
+ ds->BackfaceStencilFailOp =
+ intel_translate_stencil_op(stencil->FailFunc[b]);
+ ds->BackfaceStencilPassDepthPassOp =
+ intel_translate_stencil_op(stencil->ZPassFunc[b]);
+ ds->BackfaceStencilPassDepthFailOp =
+ intel_translate_stencil_op(stencil->ZFailFunc[b]);
+ }
+
+#if GEN_GEN <= 5 || GEN_GEN >= 9
+ ds->StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
+ ds->BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
+#endif
+ }
+}
+
+#if GEN_GEN >= 6
+static void
+genX(upload_depth_stencil_state)(struct brw_context *brw)
+{
#if GEN_GEN >= 8
brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
+ set_depth_stencil_bits(brw, &wmds);
+ }
#else
uint32_t ds_offset;
- brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, wmds) {
-#endif
- if (depth->Test && depth_irb) {
- wmds.DepthTestEnable = true;
- wmds.DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
- wmds.DepthTestFunction = intel_translate_compare_func(depth->Func);
- }
-
- if (stencil->_Enabled) {
- wmds.StencilTestEnable = true;
- wmds.StencilWriteMask = stencil->WriteMask[0] & 0xff;
- wmds.StencilTestMask = stencil->ValueMask[0] & 0xff;
-
- wmds.StencilTestFunction =
- intel_translate_compare_func(stencil->Function[0]);
- wmds.StencilFailOp =
- intel_translate_stencil_op(stencil->FailFunc[0]);
- wmds.StencilPassDepthPassOp =
- intel_translate_stencil_op(stencil->ZPassFunc[0]);
- wmds.StencilPassDepthFailOp =
- intel_translate_stencil_op(stencil->ZFailFunc[0]);
-
- wmds.StencilBufferWriteEnable = stencil->_WriteEnabled;
-
- if (stencil->_TestTwoSide) {
- wmds.DoubleSidedStencilEnable = true;
- wmds.BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
- wmds.BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
-
- wmds.BackfaceStencilTestFunction =
- intel_translate_compare_func(stencil->Function[b]);
- wmds.BackfaceStencilFailOp =
- intel_translate_stencil_op(stencil->FailFunc[b]);
- wmds.BackfaceStencilPassDepthPassOp =
- intel_translate_stencil_op(stencil->ZPassFunc[b]);
- wmds.BackfaceStencilPassDepthFailOp =
- intel_translate_stencil_op(stencil->ZFailFunc[b]);
- }
-
-#if GEN_GEN >= 9
- wmds.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
- wmds.BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
-#endif
- }
+ brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, ds) {
+ set_depth_stencil_bits(brw, &ds);
}
+ /* Now upload a pointer to the indirect state */
#if GEN_GEN == 6
brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
ptr.DEPTH_STENCIL_STATEChange = true;
}
-#elif GEN_GEN == 7
+#else
brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
}
#endif
+#endif
}
static const struct brw_tracked_state genX(depth_stencil_state) = {
#endif
#if GEN_GEN == 7
- clip.FrontWinding = ctx->Polygon._FrontBit == _mesa_is_user_fbo(fb);
+ clip.FrontWinding = brw->polygon_front_bit == _mesa_is_user_fbo(fb);
if (ctx->Polygon.CullFlag) {
switch (ctx->Polygon.CullFaceMode) {
clip.ClipMode = CLIPMODE_NORMAL;
}
- clip.ClipEnable = brw->primitive != _3DPRIM_RECTLIST;
+ clip.ClipEnable = true;
/* _NEW_POLYGON,
* BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
static void
genX(upload_sf)(struct brw_context *brw)
{
#if GEN_GEN <= 7
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
- const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ UNUSED const bool multisampled_fbo =
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1;
#endif
+#if GEN_GEN < 6
+ const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data;
+
+ ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+
+ brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
+ sf.KernelStartPointer = KSP_ro(brw, brw->sf.prog_offset);
+ sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
+ sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
+ sf.DispatchGRFStartRegisterForURBData = 3;
+ sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
+ sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
+ sf.NumberofURBEntries = brw->urb.nr_sf_entries;
+ sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
+
+ /* STATE_PREFETCH command description describes this state as being
+ * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
+ * domain.
+ */
+ sf.SetupViewportStateOffset =
+ instruction_ro_bo(brw->batch.bo, brw->sf.vp_offset);
+
+ sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+
+ /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */
+ /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */
+
+ sf.MaximumNumberofThreads =
+ MIN2(GEN_GEN == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1;
+
+ sf.SpritePointEnable = ctx->Point.PointSprite;
+
+ sf.DestinationOriginHorizontalBias = 0.5;
+ sf.DestinationOriginVerticalBias = 0.5;
+#else
brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
sf.StatisticsEnable = true;
- sf.ViewportTransformEnable = brw->sf.viewport_transform_enable;
+#endif
+ sf.ViewportTransformEnable = true;
#if GEN_GEN == 7
/* _NEW_BUFFERS */
#if GEN_GEN <= 7
/* _NEW_POLYGON */
- sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
+ sf.FrontWinding = brw->polygon_front_bit == render_to_fbo;
+#if GEN_GEN >= 6
sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
unreachable("not reached");
}
+ if (multisampled_fbo && ctx->Multisample.Enabled)
+ sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
+
+ sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+ sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+ sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+#endif
+
sf.ScissorRectangleEnable = true;
if (ctx->Polygon.CullFlag) {
sf.LineStippleEnable = ctx->Line.StippleFlag;
#endif
- if (multisampled_fbo && ctx->Multisample.Enabled)
- sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-
- sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
- sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
- sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
#endif
/* _NEW_LINE */
sf.SmoothPointEnable = true;
#endif
+#if GEN_IS_G4X || GEN_GEN >= 5
sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
+#endif
/* _NEW_LIGHT */
if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
.dirty = {
.mesa = _NEW_LIGHT |
_NEW_LINE |
- _NEW_MULTISAMPLE |
_NEW_POINT |
_NEW_PROGRAM |
+ (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
(GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
.brw = BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
BRW_NEW_VUE_MAP_GEOM_OUT |
- (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
+ (GEN_GEN <= 5 ? BRW_NEW_BATCH |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_SF_PROG_DATA |
+ BRW_NEW_SF_VP |
+ BRW_NEW_URB_FENCE
+ : 0) |
+ (GEN_GEN >= 6 ? BRW_NEW_CONTEXT : 0) |
+ (GEN_GEN >= 6 && GEN_GEN <= 7 ?
+ BRW_NEW_GS_PROG_DATA |
BRW_NEW_PRIMITIVE |
BRW_NEW_TES_PROG_DATA
: 0) |
},
.emit = genX(upload_sf),
};
-#endif
/* ---------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
- pkt.KernelStartPointer = stage_state->prog_offset; \
+ pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
pkt.SamplerCount = \
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
pkt.BindingTableEntryCount = \
pkt.StatisticsEnable = true; \
pkt.Enable = true;
-#if GEN_GEN >= 6
static void
genX(upload_vs_state)(struct brw_context *brw)
{
+ UNUSED struct gl_context *ctx = &brw->ctx;
const struct gen_device_info *devinfo = &brw->screen->devinfo;
- const struct brw_stage_state *stage_state = &brw->vs.base;
+ struct brw_stage_state *stage_state = &brw->vs.base;
/* BRW_NEW_VS_PROG_DATA */
const struct brw_vue_prog_data *vue_prog_data =
if (GEN_GEN == 7 && devinfo->is_ivybridge)
gen7_emit_vs_workaround_flush(brw);
+#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
+#else
+ ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+ brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
+#endif
INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
+#if GEN_GEN < 6
+ vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
+ vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
+ vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
+
+ vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0);
+ vs.URBEntryAllocationSize = brw->urb.vsize - 1;
+
+ vs.MaximumNumberofThreads =
+ CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
+
+ vs.StatisticsEnable = false;
+ vs.SamplerStatePointer =
+ instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
+#endif
+
+#if GEN_GEN == 5
+ /* Force single program flow on Ironlake. We cannot reliably get
+ * all applications working without it. See:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=29172
+ *
+ * The most notable and reliably failing application is the Humus
+ * demo "CelShading"
+ */
+ vs.SingleProgramFlow = true;
+ vs.SamplerCount = 0; /* hardware requirement */
+#endif
+
#if GEN_GEN >= 8
vs.SIMD8DispatchEnable =
vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
BRW_NEW_VS_PROG_DATA |
- (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0),
+ (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
+ (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_SAMPLER_STATE_TABLE |
+ BRW_NEW_URB_FENCE
+ : 0),
},
.emit = genX(upload_vs_state),
};
+
+/* ---------------------------------------------------------------------- */
+
+static void
+genX(upload_cc_viewport)(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ /* BRW_NEW_VIEWPORT_COUNT */
+ const unsigned viewport_count = brw->clip.viewport_count;
+
+ struct GENX(CC_VIEWPORT) ccv;
+ uint32_t cc_vp_offset;
+ uint32_t *cc_map =
+ brw_state_batch(brw, 4 * GENX(CC_VIEWPORT_length) * viewport_count,
+ 32, &cc_vp_offset);
+
+ for (unsigned i = 0; i < viewport_count; i++) {
+ /* _NEW_VIEWPORT | _NEW_TRANSFORM */
+ const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i];
+ if (ctx->Transform.DepthClamp) {
+ ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
+ ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
+ } else {
+ ccv.MinimumDepth = 0.0;
+ ccv.MaximumDepth = 1.0;
+ }
+ GENX(CC_VIEWPORT_pack)(NULL, cc_map, &ccv);
+ cc_map += GENX(CC_VIEWPORT_length);
+ }
+
+#if GEN_GEN >= 7
+ brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
+ ptr.CCViewportPointer = cc_vp_offset;
+ }
+#elif GEN_GEN == 6
+ brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
+ vp.CCViewportStateChange = 1;
+ vp.PointertoCC_VIEWPORT = cc_vp_offset;
+ }
+#else
+ brw->cc.vp_offset = cc_vp_offset;
+ ctx->NewDriverState |= BRW_NEW_CC_VP;
#endif
+}
+
+const struct brw_tracked_state genX(cc_vp) = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM |
+ _NEW_VIEWPORT,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_VIEWPORT_COUNT,
+ },
+ .emit = genX(upload_cc_viewport)
+};
/* ---------------------------------------------------------------------- */
+static inline void
+set_scissor_bits(const struct gl_context *ctx, int i,
+ bool render_to_fbo, unsigned fb_width, unsigned fb_height,
+ struct GENX(SCISSOR_RECT) *sc)
+{
+ int bbox[4];
+
+ bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
+ bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
+ bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
+ bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
+ _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
+
+ if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
+ /* If the scissor was out of bounds and got clamped to 0 width/height
+ * at the bounds, the subtraction of 1 from maximums could produce a
+ * negative number and thus not clip anything. Instead, just provide
+ * a min > max scissor inside the bounds, which produces the expected
+ * no rendering.
+ */
+ sc->ScissorRectangleXMin = 1;
+ sc->ScissorRectangleXMax = 0;
+ sc->ScissorRectangleYMin = 1;
+ sc->ScissorRectangleYMax = 0;
+ } else if (render_to_fbo) {
+ /* texmemory: Y=0=bottom */
+ sc->ScissorRectangleXMin = bbox[0];
+ sc->ScissorRectangleXMax = bbox[1] - 1;
+ sc->ScissorRectangleYMin = bbox[2];
+ sc->ScissorRectangleYMax = bbox[3] - 1;
+ } else {
+ /* memory: Y=0=top */
+ sc->ScissorRectangleXMin = bbox[0];
+ sc->ScissorRectangleXMax = bbox[1] - 1;
+ sc->ScissorRectangleYMin = fb_height - bbox[3];
+ sc->ScissorRectangleYMax = fb_height - bbox[2] - 1;
+ }
+}
+
#if GEN_GEN >= 6
static void
-brw_calculate_guardband_size(const struct gen_device_info *devinfo,
- uint32_t fb_width, uint32_t fb_height,
+genX(upload_scissor_state)(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ struct GENX(SCISSOR_RECT) scissor;
+ uint32_t scissor_state_offset;
+ const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
+ const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
+ uint32_t *scissor_map;
+
+ /* BRW_NEW_VIEWPORT_COUNT */
+ const unsigned viewport_count = brw->clip.viewport_count;
+
+ scissor_map = brw_state_batch(
+ brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count,
+ 32, &scissor_state_offset);
+
+ /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
+
+ /* The scissor only needs to handle the intersection of drawable and
+ * scissor rect. Clipping to the boundaries of static shared buffers
+ * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+ *
+ * Note that the hardware's coordinates are inclusive, while Mesa's min is
+ * inclusive but max is exclusive.
+ */
+ for (unsigned i = 0; i < viewport_count; i++) {
+ set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height, &scissor);
+ GENX(SCISSOR_RECT_pack)(
+ NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
+ }
+
+ brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
+ ptr.ScissorRectPointer = scissor_state_offset;
+ }
+}
+
+static const struct brw_tracked_state genX(scissor_state) = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS |
+ _NEW_SCISSOR |
+ _NEW_VIEWPORT,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_VIEWPORT_COUNT,
+ },
+ .emit = genX(upload_scissor_state),
+};
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+static void
+brw_calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
float m00, float m11, float m30, float m31,
float *xmin, float *xmax,
float *ymin, float *ymax)
*
* So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
*/
- const float gb_size = devinfo->gen >= 7 ? 16384.0f : 8192.0f;
+ const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
if (m00 != 0 && m11 != 0) {
/* First, we compute the screen-space render area */
{
struct gl_context *ctx = &brw->ctx;
float y_scale, y_bias;
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
/* BRW_NEW_VIEWPORT_COUNT */
const unsigned viewport_count = brw->clip.viewport_count;
#define clv sfv
struct GENX(SF_CLIP_VIEWPORT) sfv;
uint32_t sf_clip_vp_offset;
- uint32_t *sf_clip_map = brw_state_batch(brw, 16 * 4 * viewport_count,
- 64, &sf_clip_vp_offset);
+ uint32_t *sf_clip_map =
+ brw_state_batch(brw, GENX(SF_CLIP_VIEWPORT_length) * 4 * viewport_count,
+ 64, &sf_clip_vp_offset);
#else
struct GENX(SF_VIEWPORT) sfv;
struct GENX(CLIP_VIEWPORT) clv;
- uint32_t *sf_map = brw_state_batch(brw, 8 * 4 * viewport_count,
- 32, &brw->sf.vp_offset);
- uint32_t *clip_map = brw_state_batch(brw, 4 * 4 * viewport_count,
- 32, &brw->clip.vp_offset);
+ uint32_t sf_vp_offset, clip_vp_offset;
+ uint32_t *sf_map =
+ brw_state_batch(brw, GENX(SF_VIEWPORT_length) * 4 * viewport_count,
+ 32, &sf_vp_offset);
+ uint32_t *clip_map =
+ brw_state_batch(brw, GENX(CLIP_VIEWPORT_length) * 4 * viewport_count,
+ 32, &clip_vp_offset);
#endif
/* _NEW_BUFFERS */
sfv.ViewportMatrixElementm30 = translate[0],
sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias,
sfv.ViewportMatrixElementm32 = translate[2],
- brw_calculate_guardband_size(devinfo, fb_width, fb_height,
+ brw_calculate_guardband_size(fb_width, fb_height,
sfv.ViewportMatrixElementm00,
sfv.ViewportMatrixElementm11,
sfv.ViewportMatrixElementm30,
clv.YMinClipGuardband = gb_ymin;
clv.YMaxClipGuardband = gb_ymax;
-#if GEN_GEN >= 8
+#if GEN_GEN < 6
+ set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height,
+ &sfv.ScissorRectangle);
+#elif GEN_GEN >= 8
/* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
* The hardware will take the intersection of the drawing rectangle,
* scissor rectangle, and the viewport extents. We don't need to be
#if GEN_GEN >= 7
GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv);
- sf_clip_map += 16;
+ sf_clip_map += GENX(SF_CLIP_VIEWPORT_length);
#else
GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv);
GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv);
- sf_map += 8;
- clip_map += 4;
+ sf_map += GENX(SF_VIEWPORT_length);
+ clip_map += GENX(CLIP_VIEWPORT_length);
#endif
}
brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
ptr.SFClipViewportPointer = sf_clip_vp_offset;
}
+#elif GEN_GEN == 6
+ brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
+ vp.SFViewportStateChange = 1;
+ vp.CLIPViewportStateChange = 1;
+ vp.PointertoCLIP_VIEWPORT = clip_vp_offset;
+ vp.PointertoSF_VIEWPORT = sf_vp_offset;
+ }
#else
+ brw->sf.vp_offset = sf_vp_offset;
+ brw->clip.vp_offset = clip_vp_offset;
brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP;
#endif
}
static const struct brw_tracked_state genX(sf_clip_viewport) = {
.dirty = {
.mesa = _NEW_BUFFERS |
- _NEW_VIEWPORT,
+ _NEW_VIEWPORT |
+ (GEN_GEN <= 5 ? _NEW_SCISSOR : 0),
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_VIEWPORT_COUNT,
},
.emit = genX(upload_sf_clip_viewport),
};
-#endif
/* ---------------------------------------------------------------------- */
brw_gs_prog_data(stage_prog_data);
#endif
-#if GEN_GEN < 7
+#if GEN_GEN == 6
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
if (active && stage_state->push_const_size != 0) {
cgs.Buffer0Valid = true;
gen7_emit_cs_stall_flush(brw);
#endif
- if (active) {
- brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+ brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+ if (active) {
INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
#if GEN_GEN >= 7
gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
#endif
- }
#if GEN_GEN < 7
- } else if (brw->ff_gs.prog_active) {
- /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
- * program. This function provides the needed 3DSTATE_GS for this.
- */
- upload_gs_state_for_tf(brw);
+ } else if (brw->ff_gs.prog_active) {
+ /* In gen6, transform feedback for the VS stage is done with an
+ * ad-hoc GS program. This function provides the needed 3DSTATE_GS
+ * for this.
+ */
+ gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset);
+ gs.SingleProgramFlow = true;
+ gs.VectorMaskEnable = true;
+ gs.DispatchGRFStartRegisterForURBData = 2;
+ gs.VertexURBEntryReadLength = brw->ff_gs.prog_data->urb_read_length;
+ gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
+ gs.StatisticsEnable = true;
+ gs.SOStatisticsEnable = true;
+ gs.RenderingEnabled = true;
+ gs.SVBIPayloadEnable = true;
+ gs.SVBIPostIncrementEnable = true;
+ gs.SVBIPostIncrementValue =
+ brw->ff_gs.prog_data->svbi_postincrement_value;
+ gs.Enable = true;
#endif
- } else {
- brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+ } else {
gs.StatisticsEnable = true;
#if GEN_GEN < 7
gs.RenderingEnabled = true;
#endif
}
}
-#if GEN_GEN < 7
+
+#if GEN_GEN == 6
brw->gs.enabled = active;
#endif
}
/* ---------------------------------------------------------------------- */
+UNUSED static GLenum
+fix_dual_blend_alpha_to_one(GLenum function)
+{
+ switch (function) {
+ case GL_SRC1_ALPHA:
+ return GL_ONE;
+
+ case GL_ONE_MINUS_SRC1_ALPHA:
+ return GL_ZERO;
+ }
+
+ return function;
+}
+
#define blend_factor(x) brw_translate_blend_factor(x)
#define blend_eqn(x) brw_translate_blend_equation(x)
-#if GEN_GEN >= 6
-static void
-genX(upload_blend_state)(struct brw_context *brw)
+/**
+ * Modify blend function to force destination alpha to 1.0
+ *
+ * If \c function specifies a blend function that uses destination alpha,
+ * replace it with a function that hard-wires destination alpha to 1.0. This
+ * is used when rendering to xRGB targets.
+ */
+static GLenum
+brw_fix_xRGB_alpha(GLenum function)
+{
+ switch (function) {
+ case GL_DST_ALPHA:
+ return GL_ONE;
+
+ case GL_ONE_MINUS_DST_ALPHA:
+ case GL_SRC_ALPHA_SATURATE:
+ return GL_ZERO;
+ }
+
+ return function;
+}
+
+#if GEN_GEN >= 6
+typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
+#else
+typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
+#endif
+
+UNUSED static bool
+set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i,
+ bool alpha_to_one)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ /* _NEW_BUFFERS */
+ const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+ bool independent_alpha_blend = false;
+
+ /* Used for implementing the following bit of GL_EXT_texture_integer:
+ * "Per-fragment operations that require floating-point color
+ * components, including multisample alpha operations, alpha test,
+ * blending, and dithering, have no effect when the corresponding
+ * colors are written to an integer color buffer."
+ */
+ const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
+
+ const unsigned blend_enabled = GEN_GEN >= 6 ?
+ ctx->Color.BlendEnabled & (1 << i) : ctx->Color.BlendEnabled;
+
+ /* _NEW_COLOR */
+ if (ctx->Color.ColorLogicOpEnabled) {
+ GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
+ : GL_UNSIGNED_NORMALIZED;
+ WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
+ rb_type != GL_UNSIGNED_NORMALIZED &&
+ rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
+ "renderbuffer\n",
+ _mesa_enum_to_string(ctx->Color.LogicOp),
+ _mesa_enum_to_string(rb_type));
+ if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
+ entry->LogicOpEnable = true;
+ entry->LogicOpFunction =
+ intel_translate_logic_op(ctx->Color.LogicOp);
+ }
+ } else if (blend_enabled && !ctx->Color._AdvancedBlendMode
+ && (GEN_GEN <= 5 || !integer)) {
+ GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
+ GLenum eqA = ctx->Color.Blend[i].EquationA;
+ GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
+ GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
+ GLenum srcA = ctx->Color.Blend[i].SrcA;
+ GLenum dstA = ctx->Color.Blend[i].DstA;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX)
+ srcRGB = dstRGB = GL_ONE;
+
+ if (eqA == GL_MIN || eqA == GL_MAX)
+ srcA = dstA = GL_ONE;
+
+ /* Due to hardware limitations, the destination may have information
+ * in an alpha channel even when the format specifies no alpha
+ * channel. In order to avoid getting any incorrect blending due to
+ * that alpha channel, coerce the blend factors to values that will
+ * not read the alpha channel, but will instead use the correct
+ * implicit value for alpha.
+ */
+ if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
+ GL_TEXTURE_ALPHA_TYPE)) {
+ srcRGB = brw_fix_xRGB_alpha(srcRGB);
+ srcA = brw_fix_xRGB_alpha(srcA);
+ dstRGB = brw_fix_xRGB_alpha(dstRGB);
+ dstA = brw_fix_xRGB_alpha(dstA);
+ }
+
+ /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
+ * "If Dual Source Blending is enabled, this bit must be disabled."
+ *
+ * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
+ * and leave it enabled anyway.
+ */
+ if (GEN_GEN >= 6 && ctx->Color.Blend[i]._UsesDualSrc && alpha_to_one) {
+ srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+ srcA = fix_dual_blend_alpha_to_one(srcA);
+ dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+ dstA = fix_dual_blend_alpha_to_one(dstA);
+ }
+
+ entry->ColorBufferBlendEnable = true;
+ entry->DestinationBlendFactor = blend_factor(dstRGB);
+ entry->SourceBlendFactor = blend_factor(srcRGB);
+ entry->DestinationAlphaBlendFactor = blend_factor(dstA);
+ entry->SourceAlphaBlendFactor = blend_factor(srcA);
+ entry->ColorBlendFunction = blend_eqn(eqRGB);
+ entry->AlphaBlendFunction = blend_eqn(eqA);
+
+ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
+ independent_alpha_blend = true;
+ }
+
+ return independent_alpha_blend;
+}
+
+#if GEN_GEN >= 6
+static void
+genX(upload_blend_state)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
int size;
#else
{
#endif
-
- /* _NEW_BUFFERS */
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-
- /* Used for implementing the following bit of GL_EXT_texture_integer:
- * "Per-fragment operations that require floating-point color
- * components, including multisample alpha operations, alpha test,
- * blending, and dithering, have no effect when the corresponding
- * colors are written to an integer color buffer."
- */
- bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
-
- /* _NEW_COLOR */
- if (ctx->Color.ColorLogicOpEnabled) {
- GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
- : GL_UNSIGNED_NORMALIZED;
- WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
- rb_type != GL_UNSIGNED_NORMALIZED &&
- rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
- "renderbuffer\n",
- _mesa_enum_to_string(ctx->Color.LogicOp),
- _mesa_enum_to_string(rb_type));
- if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
- entry.LogicOpEnable = true;
- entry.LogicOpFunction =
- intel_translate_logic_op(ctx->Color.LogicOp);
- }
- } else if (ctx->Color.BlendEnabled & (1 << i) && !integer &&
- !ctx->Color._AdvancedBlendMode) {
- GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
- GLenum eqA = ctx->Color.Blend[i].EquationA;
- GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
- GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
- GLenum srcA = ctx->Color.Blend[i].SrcA;
- GLenum dstA = ctx->Color.Blend[i].DstA;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX)
- srcRGB = dstRGB = GL_ONE;
-
- if (eqA == GL_MIN || eqA == GL_MAX)
- srcA = dstA = GL_ONE;
-
- /* Due to hardware limitations, the destination may have information
- * in an alpha channel even when the format specifies no alpha
- * channel. In order to avoid getting any incorrect blending due to
- * that alpha channel, coerce the blend factors to values that will
- * not read the alpha channel, but will instead use the correct
- * implicit value for alpha.
- */
- if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
- GL_TEXTURE_ALPHA_TYPE)) {
- srcRGB = brw_fix_xRGB_alpha(srcRGB);
- srcA = brw_fix_xRGB_alpha(srcA);
- dstRGB = brw_fix_xRGB_alpha(dstRGB);
- dstA = brw_fix_xRGB_alpha(dstA);
- }
-
- entry.ColorBufferBlendEnable = true;
- entry.DestinationBlendFactor = blend_factor(dstRGB);
- entry.SourceBlendFactor = blend_factor(srcRGB);
- entry.DestinationAlphaBlendFactor = blend_factor(dstA);
- entry.SourceAlphaBlendFactor = blend_factor(srcA);
- entry.ColorBlendFunction = blend_eqn(eqRGB);
- entry.AlphaBlendFunction = blend_eqn(eqA);
-
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
- blend.IndependentAlphaBlendEnable = true;
- }
+ blend.IndependentAlphaBlendEnable =
+ set_blend_entry_bits(brw, &entry, i, blend.AlphaToOneEnable) ||
+ blend.IndependentAlphaBlendEnable;
/* See section 8.1.6 "Pre-Blend Color Clamping" of the
* SandyBridge PRM Volume 2 Part 1 for HW requirements.
entry.WriteDisableBlue = !ctx->Color.ColorMask[i][2];
entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3];
- /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
- * "If Dual Source Blending is enabled, this bit must be disabled."
- */
- WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc &&
- _mesa_is_multisample_enabled(ctx) &&
- ctx->Multisample.SampleAlphaToOne,
- "HW workaround: disabling alpha to one with dual src "
- "blending\n");
- if (ctx->Color.Blend[i]._UsesDualSrc)
- blend.AlphaToOneEnable = false;
#if GEN_GEN >= 8
GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
#else
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
-static void
-genX(upload_scissor_state)(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->ctx;
- const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
- struct GENX(SCISSOR_RECT) scissor;
- uint32_t scissor_state_offset;
- const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
- const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
- uint32_t *scissor_map;
-
- /* BRW_NEW_VIEWPORT_COUNT */
- const unsigned viewport_count = brw->clip.viewport_count;
-
- scissor_map = brw_state_batch(
- brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count,
- 32, &scissor_state_offset);
-
- /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
-
- /* The scissor only needs to handle the intersection of drawable and
- * scissor rect. Clipping to the boundaries of static shared buffers
- * for front/back/depth is covered by looping over cliprects in brw_draw.c.
- *
- * Note that the hardware's coordinates are inclusive, while Mesa's min is
- * inclusive but max is exclusive.
- */
- for (unsigned i = 0; i < viewport_count; i++) {
- int bbox[4];
-
- bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
- bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
- bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
- bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
- _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
-
- if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
- /* If the scissor was out of bounds and got clamped to 0 width/height
- * at the bounds, the subtraction of 1 from maximums could produce a
- * negative number and thus not clip anything. Instead, just provide
- * a min > max scissor inside the bounds, which produces the expected
- * no rendering.
- */
- scissor.ScissorRectangleXMin = 1;
- scissor.ScissorRectangleXMax = 0;
- scissor.ScissorRectangleYMin = 1;
- scissor.ScissorRectangleYMax = 0;
- } else if (render_to_fbo) {
- /* texmemory: Y=0=bottom */
- scissor.ScissorRectangleXMin = bbox[0];
- scissor.ScissorRectangleXMax = bbox[1] - 1;
- scissor.ScissorRectangleYMin = bbox[2];
- scissor.ScissorRectangleYMax = bbox[3] - 1;
- } else {
- /* memory: Y=0=top */
- scissor.ScissorRectangleXMin = bbox[0];
- scissor.ScissorRectangleXMax = bbox[1] - 1;
- scissor.ScissorRectangleYMin = fb_height - bbox[3];
- scissor.ScissorRectangleYMax = fb_height - bbox[2] - 1;
- }
-
- GENX(SCISSOR_RECT_pack)(
- NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
- }
-
- brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
- ptr.ScissorRectPointer = scissor_state_offset;
- }
-}
-
-static const struct brw_tracked_state genX(scissor_state) = {
- .dirty = {
- .mesa = _NEW_BUFFERS |
- _NEW_SCISSOR |
- _NEW_VIEWPORT,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP |
- BRW_NEW_VIEWPORT_COUNT,
- },
- .emit = genX(upload_scissor_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
#if GEN_GEN >= 7
UNUSED static const uint32_t push_constant_opcodes[] = {
[MESA_SHADER_VERTEX] = 21,
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
if (active) {
-#if GEN_GEN >= 9
- pkt.ConstantBody.ConstantBuffer2ReadLength =
- stage_state->push_const_size;
- pkt.ConstantBody.PointerToConstantBuffer2 =
- render_ro_bo(brw->batch.bo, stage_state->push_const_offset);
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
+ pkt.ConstantBody.Buffer[2] =
+ render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
#else
- pkt.ConstantBody.ConstantBuffer0ReadLength =
- stage_state->push_const_size;
- pkt.ConstantBody.PointerToConstantBuffer0.offset =
+ pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
+ pkt.ConstantBody.Buffer[0].offset =
stage_state->push_const_offset | mocs;
#endif
}
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
static void
genX(upload_color_calc_state)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) {
+#if GEN_GEN <= 5
+ cc.IndependentAlphaBlendEnable =
+ set_blend_entry_bits(brw, &cc, 0, false);
+ set_depth_stencil_bits(brw, &cc);
+
+ if (ctx->Color.AlphaEnabled &&
+ ctx->DrawBuffer->_NumColorDrawBuffers <= 1) {
+ cc.AlphaTestEnable = true;
+ cc.AlphaTestFunction =
+ intel_translate_compare_func(ctx->Color.AlphaFunc);
+ }
+
+ cc.ColorDitherEnable = ctx->Color.DitherFlag;
+
+ cc.StatisticsEnable = brw->stats_wm;
+
+ cc.CCViewportStatePointer =
+ instruction_ro_bo(brw->batch.bo, brw->cc.vp_offset);
+#else
/* _NEW_COLOR */
- cc.AlphaTestFormat = ALPHATEST_UNORM8;
- UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
- ctx->Color.AlphaRef);
+ cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
+ cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
+ cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
+ cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
#if GEN_GEN < 9
/* _NEW_STENCIL */
cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
cc.BackfaceStencilReferenceValue =
_mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
+#endif
+
#endif
/* _NEW_COLOR */
- cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
- cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
- cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
- cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
+ ctx->Color.AlphaRef);
}
+#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
ptr.ColorCalcStatePointer = brw->cc.state_offset;
#if GEN_GEN != 7
ptr.ColorCalcStatePointerValid = true;
#endif
}
+#else
+ brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+#endif
}
static const struct brw_tracked_state genX(color_calc_state) = {
.dirty = {
.mesa = _NEW_COLOR |
- _NEW_STENCIL,
+ _NEW_STENCIL |
+ (GEN_GEN <= 5 ? _NEW_BUFFERS |
+ _NEW_DEPTH
+ : 0),
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_CC_STATE |
- BRW_NEW_STATE_BASE_ADDRESS,
+ (GEN_GEN <= 5 ? BRW_NEW_CC_VP |
+ BRW_NEW_STATS_WM
+ : BRW_NEW_CC_STATE |
+ BRW_NEW_STATE_BASE_ADDRESS),
},
.emit = genX(upload_color_calc_state),
};
-#endif
/* ---------------------------------------------------------------------- */
* command feels strange -- each dword pair contains a SO_DECL per stream.
*/
for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
- int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
- struct GENX(SO_DECL) decl = {0};
- int varying = linked_xfb_info->Outputs[i].OutputRegister;
- const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
- unsigned component_mask = (1 << components) - 1;
- unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
- unsigned decl_buffer_slot = buffer;
+ const struct gl_transform_feedback_output *output =
+ &linked_xfb_info->Outputs[i];
+ const int buffer = output->OutputBuffer;
+ const int varying = output->OutputRegister;
+ const unsigned stream_id = output->StreamId;
assert(stream_id < MAX_VERTEX_STREAMS);
- /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
- * gl_Layer is stored in VARYING_SLOT_PSIZ.y
- * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
- */
- if (varying == VARYING_SLOT_PSIZ) {
- assert(components == 1);
- component_mask <<= 3;
- } else if (varying == VARYING_SLOT_LAYER) {
- assert(components == 1);
- component_mask <<= 1;
- } else if (varying == VARYING_SLOT_VIEWPORT) {
- assert(components == 1);
- component_mask <<= 2;
- } else {
- component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
- }
-
buffer_mask[stream_id] |= 1 << buffer;
- decl.OutputBufferSlot = decl_buffer_slot;
- if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
- decl.RegisterIndex = vue_map->varying_to_slot[VARYING_SLOT_PSIZ];
- } else {
- assert(vue_map->varying_to_slot[varying] >= 0);
- decl.RegisterIndex = vue_map->varying_to_slot[varying];
- }
- decl.ComponentMask = component_mask;
+ assert(vue_map->varying_to_slot[varying] >= 0);
/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
* array. Instead, it simply increments DstOffset for the following
* program as many size = 4 holes as we can, then a final hole to
* accommodate the final 1, 2, or 3 remaining.
*/
- int skip_components =
- linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
+ int skip_components = output->DstOffset - next_offset[buffer];
- next_offset[buffer] += skip_components;
-
- while (skip_components >= 4) {
- struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
- d->HoleFlag = 1;
- d->OutputBufferSlot = decl_buffer_slot;
- d->ComponentMask = 0xf;
+ while (skip_components > 0) {
+ so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+ .HoleFlag = 1,
+ .OutputBufferSlot = output->OutputBuffer,
+ .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
+ };
skip_components -= 4;
}
- if (skip_components > 0) {
- struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
- d->HoleFlag = 1;
- d->OutputBufferSlot = decl_buffer_slot;
- d->ComponentMask = (1 << skip_components) - 1;
- }
-
- assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
+ next_offset[buffer] = output->DstOffset + output->NumComponents;
- next_offset[buffer] += components;
-
- so_decl[stream_id][decls[stream_id]++] = decl;
+ so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+ .OutputBufferSlot = output->OutputBuffer,
+ .RegisterIndex = vue_map->varying_to_slot[varying],
+ .ComponentMask =
+ ((1 << output->NumComponents) - 1) << output->ComponentOffset,
+ };
if (decls[stream_id] > max_decls)
max_decls = decls[stream_id];
#else
struct brw_transform_feedback_object *brw_obj =
(struct brw_transform_feedback_object *) xfb_obj;
- uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+ uint32_t mocs_wb = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
#endif
/* Set up the up to 4 output buffers. These are the ranges defined in the
},
.emit = genX(upload_tcs_push_constants),
};
+
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+#if GEN_GEN >= 7
+static void
+genX(upload_cs_state)(struct brw_context *brw)
+{
+ if (!brw->cs.base.prog_data)
+ return;
+
+ uint32_t offset;
+ uint32_t *desc = (uint32_t*) brw_state_batch(
+ brw, GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t), 64,
+ &offset);
+
+ struct brw_stage_state *stage_state = &brw->cs.base;
+ struct brw_stage_prog_data *prog_data = stage_state->prog_data;
+ struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ brw_emit_buffer_surface_state(
+ brw, &stage_state->surf_offset[
+ prog_data->binding_table.shader_time_start],
+ brw->shader_time.bo, 0, ISL_FORMAT_RAW,
+ brw->shader_time.bo->size, 1, true);
+ }
+
+ uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
+ 32, &stage_state->bind_bo_offset);
+
+ brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
+ if (prog_data->total_scratch) {
+ uint32_t bo_offset;
+
+ if (GEN_GEN >= 8) {
+ /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
+ * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
+ */
+ bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+ } else if (GEN_IS_HASWELL) {
+ /* Haswell's Per Thread Scratch Space is in the range [0, 10]
+ * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
+ */
+ bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+ } else {
+ /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
+ * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
+ */
+ bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+ }
+ vfe.ScratchSpaceBasePointer =
+ render_bo(stage_state->scratch_bo, bo_offset);
+ }
+
+ const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
+ vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
+ vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
+ vfe.ResetGatewayTimer =
+ Resettingrelativetimerandlatchingtheglobaltimestamp;
+#if GEN_GEN < 9
+ vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
+#endif
+#if GEN_GEN == 7
+ vfe.GPGPUMode = 1;
+#endif
+
+ /* We are uploading duplicated copies of push constant uniforms for each
+ * thread. Although the local id data needs to vary per thread, it won't
+ * change for other uniform data. Unfortunately this duplication is
+ * required for gen7. As of Haswell, this duplication can be avoided,
+ * but this older mechanism with duplicated data continues to work.
+ *
+ * FINISHME: As of Haswell, we could make use of the
+ * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length"
+ * field to only store one copy of uniform data.
+ *
+ * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
+ * which is described in the GPGPU_WALKER command and in the Broadwell
+ * PRM Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
+ * Operations => GPGPU Mode => Indirect Payload Storage.
+ *
+ * Note: The constant data is built in brw_upload_cs_push_constants
+ * below.
+ */
+ vfe.URBEntryAllocationSize = GEN_GEN >= 8 ? 2 : 0;
+
+ const uint32_t vfe_curbe_allocation =
+ ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+ cs_prog_data->push.cross_thread.regs, 2);
+ vfe.CURBEAllocationSize = vfe_curbe_allocation;
+ }
+
+ if (cs_prog_data->push.total.size > 0) {
+ brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
+ curbe.CURBETotalDataLength =
+ ALIGN(cs_prog_data->push.total.size, 64);
+ curbe.CURBEDataStartAddress = stage_state->push_const_offset;
+ }
+ }
+
+ /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+ memcpy(bind, stage_state->surf_offset,
+ prog_data->binding_table.size_bytes);
+ const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
+ .KernelStartPointer = brw->cs.base.prog_offset,
+ .SamplerStatePointer = stage_state->sampler_offset,
+ .SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4) >> 2,
+ .BindingTablePointer = stage_state->bind_bo_offset,
+ .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
+ .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
+ .SharedLocalMemorySize = encode_slm_size(devinfo->gen,
+ prog_data->total_shared),
+ .BarrierEnable = cs_prog_data->uses_barrier,
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ .CrossThreadConstantDataReadLength =
+ cs_prog_data->push.cross_thread.regs,
+#endif
+ };
+
+ GENX(INTERFACE_DESCRIPTOR_DATA_pack)(brw, desc, &idd);
+
+ brw_batch_emit(brw, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
+ load.InterfaceDescriptorTotalLength =
+ GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
+ load.InterfaceDescriptorDataStartAddress = offset;
+ }
+}
+
+static const struct brw_tracked_state genX(cs_state) = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_CS_PROG_DATA |
+ BRW_NEW_SAMPLER_STATE_TABLE |
+ BRW_NEW_SURFACES,
+ },
+ .emit = genX(upload_cs_state)
+};
+
#endif
/* ---------------------------------------------------------------------- */
struct gl_point_attrib *point = &ctx->Point;
brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
- if (polygon->_FrontBit == render_to_fbo)
+ if (brw->polygon_front_bit == render_to_fbo)
raster.FrontWinding = CounterClockwise;
if (polygon->CullFlag) {
/* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
pb.HasWriteableRT = brw_color_buffer_write_enabled(brw);
+ bool alpha_to_one = false;
+
if (!buffer0_is_integer) {
/* _NEW_MULTISAMPLE */
- pb.AlphaToCoverageEnable =
- _mesa_is_multisample_enabled(ctx) &&
- ctx->Multisample.SampleAlphaToCoverage;
+
+ if (_mesa_is_multisample_enabled(ctx)) {
+ pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage;
+ alpha_to_one = ctx->Multisample.SampleAlphaToOne;
+ }
pb.AlphaTestEnable = color->AlphaEnabled;
}
dstA = brw_fix_xRGB_alpha(dstA);
}
+ /* Alpha to One doesn't work with Dual Color Blending. Override
+ * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO.
+ */
+ if (alpha_to_one && color->Blend[0]._UsesDualSrc) {
+ srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+ srcA = fix_dual_blend_alpha_to_one(srcA);
+ dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+ dstA = fix_dual_blend_alpha_to_one(dstA);
+ }
+
pb.ColorBufferBlendEnable = true;
pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
},
.emit = genX(upload_ps_blend)
};
+#endif
+/* ---------------------------------------------------------------------- */
+
+#if GEN_GEN >= 8
+static void
+genX(emit_vf_topology)(struct brw_context *brw)
+{
+ brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) {
+ vftopo.PrimitiveTopologyType = brw->primitive;
+ }
+}
+
+static const struct brw_tracked_state genX(vf_topology) = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BLORP |
+ BRW_NEW_PRIMITIVE,
+ },
+ .emit = genX(emit_vf_topology),
+};
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+#if GEN_GEN >= 7
+static void
+genX(emit_mi_report_perf_count)(struct brw_context *brw,
+ struct brw_bo *bo,
+ uint32_t offset_in_bytes,
+ uint32_t report_id)
+{
+ brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
+ mi_rpc.MemoryAddress = instruction_bo(bo, offset_in_bytes);
+ mi_rpc.ReportID = report_id;
+ }
+}
#endif
/* ---------------------------------------------------------------------- */
+/**
+ * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
+ */
+static void
+genX(emit_sampler_state_pointers_xs)(struct brw_context *brw,
+ struct brw_stage_state *stage_state)
+{
+#if GEN_GEN >= 7
+ static const uint16_t packet_headers[] = {
+ [MESA_SHADER_VERTEX] = 43,
+ [MESA_SHADER_TESS_CTRL] = 44,
+ [MESA_SHADER_TESS_EVAL] = 45,
+ [MESA_SHADER_GEOMETRY] = 46,
+ [MESA_SHADER_FRAGMENT] = 47,
+ };
+
+ /* Ivybridge requires a workaround flush before VS packets. */
+ if (GEN_GEN == 7 && !GEN_IS_HASWELL &&
+ stage_state->stage == MESA_SHADER_VERTEX) {
+ gen7_emit_vs_workaround_flush(brw);
+ }
+
+ brw_batch_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
+ ptr._3DCommandSubOpcode = packet_headers[stage_state->stage];
+ ptr.PointertoVSSamplerState = stage_state->sampler_offset;
+ }
+#endif
+}
+
+UNUSED static bool
+has_component(mesa_format format, int i)
+{
+ if (_mesa_is_format_color_format(format))
+ return _mesa_format_has_color_component(format, i);
+
+ /* depth and stencil have only one component */
+ return i == 0;
+}
+
+/**
+ * Upload SAMPLER_BORDER_COLOR_STATE.
+ */
+static void
+genX(upload_default_color)(struct brw_context *brw,
+ const struct gl_sampler_object *sampler,
+ mesa_format format, GLenum base_format,
+ bool is_integer_format, bool is_stencil_sampling,
+ uint32_t *sdc_offset)
+{
+ union gl_color_union color;
+
+ switch (base_format) {
+ case GL_DEPTH_COMPONENT:
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the
+ * channels for safety.
+ */
+ color.ui[0] = sampler->BorderColor.ui[0];
+ color.ui[1] = sampler->BorderColor.ui[0];
+ color.ui[2] = sampler->BorderColor.ui[0];
+ color.ui[3] = sampler->BorderColor.ui[0];
+ break;
+ case GL_ALPHA:
+ color.ui[0] = 0u;
+ color.ui[1] = 0u;
+ color.ui[2] = 0u;
+ color.ui[3] = sampler->BorderColor.ui[3];
+ break;
+ case GL_INTENSITY:
+ color.ui[0] = sampler->BorderColor.ui[0];
+ color.ui[1] = sampler->BorderColor.ui[0];
+ color.ui[2] = sampler->BorderColor.ui[0];
+ color.ui[3] = sampler->BorderColor.ui[0];
+ break;
+ case GL_LUMINANCE:
+ color.ui[0] = sampler->BorderColor.ui[0];
+ color.ui[1] = sampler->BorderColor.ui[0];
+ color.ui[2] = sampler->BorderColor.ui[0];
+ color.ui[3] = float_as_int(1.0);
+ break;
+ case GL_LUMINANCE_ALPHA:
+ color.ui[0] = sampler->BorderColor.ui[0];
+ color.ui[1] = sampler->BorderColor.ui[0];
+ color.ui[2] = sampler->BorderColor.ui[0];
+ color.ui[3] = sampler->BorderColor.ui[3];
+ break;
+ default:
+ color.ui[0] = sampler->BorderColor.ui[0];
+ color.ui[1] = sampler->BorderColor.ui[1];
+ color.ui[2] = sampler->BorderColor.ui[2];
+ color.ui[3] = sampler->BorderColor.ui[3];
+ break;
+ }
+
+ /* In some cases we use an RGBA surface format for GL RGB textures,
+ * where we've initialized the A channel to 1.0. We also have to set
+ * the border color alpha to 1.0 in that case.
+ */
+ if (base_format == GL_RGB)
+ color.ui[3] = float_as_int(1.0);
+
+ int alignment = 32;
+ if (brw->gen >= 8) {
+ alignment = 64;
+ } else if (brw->is_haswell && (is_integer_format || is_stencil_sampling)) {
+ alignment = 512;
+ }
+
+ uint32_t *sdc = brw_state_batch(
+ brw, GENX(SAMPLER_BORDER_COLOR_STATE_length) * sizeof(uint32_t),
+ alignment, sdc_offset);
+
+ struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 };
+
+#define ASSIGN(dst, src) \
+ do { \
+ dst = src; \
+ } while (0)
+
+#define ASSIGNu16(dst, src) \
+ do { \
+ dst = (uint16_t)src; \
+ } while (0)
+
+#define ASSIGNu8(dst, src) \
+ do { \
+ dst = (uint8_t)src; \
+ } while (0)
+
+#define BORDER_COLOR_ATTR(macro, _color_type, src) \
+ macro(state.BorderColor ## _color_type ## Red, src[0]); \
+ macro(state.BorderColor ## _color_type ## Green, src[1]); \
+ macro(state.BorderColor ## _color_type ## Blue, src[2]); \
+ macro(state.BorderColor ## _color_type ## Alpha, src[3]);
+
+#if GEN_GEN >= 8
+ /* On Broadwell, the border color is represented as four 32-bit floats,
+ * integers, or unsigned values, interpreted according to the surface
+ * format. This matches the sampler->BorderColor union exactly; just
+ * memcpy the values.
+ */
+ BORDER_COLOR_ATTR(ASSIGN, 32bit, color.ui);
+#elif GEN_IS_HASWELL
+ if (is_integer_format || is_stencil_sampling) {
+ bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling;
+ const int bits_per_channel =
+ _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS);
+
+ /* From the Haswell PRM, "Command Reference: Structures", Page 36:
+ * "If any color channel is missing from the surface format,
+ * corresponding border color should be programmed as zero and if
+ * alpha channel is missing, corresponding Alpha border color should
+ * be programmed as 1."
+ */
+ unsigned c[4] = { 0, 0, 0, 1 };
+ for (int i = 0; i < 4; i++) {
+ if (has_component(format, i))
+ c[i] = color.ui[i];
+ }
+
+ switch (bits_per_channel) {
+ case 8:
+ /* Copy RGBA in order. */
+ BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c);
+ break;
+ case 10:
+ /* R10G10B10A2_UINT is treated like a 16-bit format. */
+ case 16:
+ BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c);
+ break;
+ case 32:
+ if (base_format == GL_RG) {
+ /* Careful inspection of the tables reveals that for RG32 formats,
+ * the green channel needs to go where blue normally belongs.
+ */
+ state.BorderColor32bitRed = c[0];
+ state.BorderColor32bitBlue = c[1];
+ state.BorderColor32bitAlpha = 1;
+ } else {
+ /* Copy RGBA in order. */
+ BORDER_COLOR_ATTR(ASSIGN, 32bit, c);
+ }
+ break;
+ default:
+ assert(!"Invalid number of bits per channel in integer format.");
+ break;
+ }
+ } else {
+ BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
+ }
+#elif GEN_GEN == 5 || GEN_GEN == 6
+ BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color.f);
+ BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color.f);
+ BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color.f);
+
+#define MESA_FLOAT_TO_HALF(dst, src) \
+ dst = _mesa_float_to_half(src);
+
+ BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color.f);
+
+#undef MESA_FLOAT_TO_HALF
+
+ state.BorderColorSnorm8Red = state.BorderColorSnorm16Red >> 8;
+ state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8;
+ state.BorderColorSnorm8Blue = state.BorderColorSnorm16Blue >> 8;
+ state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8;
+
+ BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
+#elif GEN_GEN == 4
+ BORDER_COLOR_ATTR(ASSIGN, , color.f);
+#else
+ BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
+#endif
+
+#undef ASSIGN
+#undef BORDER_COLOR_ATTR
+
+ GENX(SAMPLER_BORDER_COLOR_STATE_pack)(brw, sdc, &state);
+}
+
+static uint32_t
+translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
+{
+ switch (wrap) {
+ case GL_REPEAT:
+ return TCM_WRAP;
+ case GL_CLAMP:
+#if GEN_GEN >= 8
+ /* GL_CLAMP is the weird mode where coordinates are clamped to
+ * [0.0, 1.0], so linear filtering of coordinates outside of
+ * [0.0, 1.0] give you half edge texel value and half border
+ * color.
+ *
+ * Gen8+ supports this natively.
+ */
+ return TCM_HALF_BORDER;
+#else
+ /* On Gen4-7.5, we clamp the coordinates in the fragment shader
+ * and set clamp_border here, which gets the result desired.
+ * We just use clamp(_to_edge) for nearest, because for nearest
+ * clamping to 1.0 gives border color instead of the desired
+ * edge texels.
+ */
+ if (using_nearest)
+ return TCM_CLAMP;
+ else
+ return TCM_CLAMP_BORDER;
+#endif
+ case GL_CLAMP_TO_EDGE:
+ return TCM_CLAMP;
+ case GL_CLAMP_TO_BORDER:
+ return TCM_CLAMP_BORDER;
+ case GL_MIRRORED_REPEAT:
+ return TCM_MIRROR;
+ case GL_MIRROR_CLAMP_TO_EDGE:
+ return TCM_MIRROR_ONCE;
+ default:
+ return TCM_WRAP;
+ }
+}
+
+/**
+ * Return true if the given wrap mode requires the border color to exist.
+ */
+static bool
+wrap_mode_needs_border_color(unsigned wrap_mode)
+{
+#if GEN_GEN >= 8
+ return wrap_mode == TCM_CLAMP_BORDER ||
+ wrap_mode == TCM_HALF_BORDER;
+#else
+ return wrap_mode == TCM_CLAMP_BORDER;
+#endif
+}
+
+/**
+ * Sets the sampler state for a single unit based off of the sampler key
+ * entry.
+ */
+static void
+genX(update_sampler_state)(struct brw_context *brw,
+ GLenum target, bool tex_cube_map_seamless,
+ GLfloat tex_unit_lod_bias,
+ mesa_format format, GLenum base_format,
+ const struct gl_texture_object *texObj,
+ const struct gl_sampler_object *sampler,
+ uint32_t *sampler_state,
+ uint32_t batch_offset_for_sampler_state)
+{
+ struct GENX(SAMPLER_STATE) samp_st = { 0 };
+
+ /* Select min and mip filters. */
+ switch (sampler->MinFilter) {
+ case GL_NEAREST:
+ samp_st.MinModeFilter = MAPFILTER_NEAREST;
+ samp_st.MipModeFilter = MIPFILTER_NONE;
+ break;
+ case GL_LINEAR:
+ samp_st.MinModeFilter = MAPFILTER_LINEAR;
+ samp_st.MipModeFilter = MIPFILTER_NONE;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ samp_st.MinModeFilter = MAPFILTER_NEAREST;
+ samp_st.MipModeFilter = MIPFILTER_NEAREST;
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ samp_st.MinModeFilter = MAPFILTER_LINEAR;
+ samp_st.MipModeFilter = MIPFILTER_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ samp_st.MinModeFilter = MAPFILTER_NEAREST;
+ samp_st.MipModeFilter = MIPFILTER_LINEAR;
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ samp_st.MinModeFilter = MAPFILTER_LINEAR;
+ samp_st.MipModeFilter = MIPFILTER_LINEAR;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ /* Select mag filter. */
+ samp_st.MagModeFilter = sampler->MagFilter == GL_LINEAR ?
+ MAPFILTER_LINEAR : MAPFILTER_NEAREST;
+
+ /* Enable anisotropic filtering if desired. */
+ samp_st.MaximumAnisotropy = RATIO21;
+
+ if (sampler->MaxAnisotropy > 1.0f) {
+ if (samp_st.MinModeFilter == MAPFILTER_LINEAR)
+ samp_st.MinModeFilter = MAPFILTER_ANISOTROPIC;
+ if (samp_st.MagModeFilter == MAPFILTER_LINEAR)
+ samp_st.MagModeFilter = MAPFILTER_ANISOTROPIC;
+
+ if (sampler->MaxAnisotropy > 2.0f) {
+ samp_st.MaximumAnisotropy =
+ MIN2((sampler->MaxAnisotropy - 2) / 2, RATIO161);
+ }
+ }
+
+ /* Set address rounding bits if not using nearest filtering. */
+ if (samp_st.MinModeFilter != MAPFILTER_NEAREST) {
+ samp_st.UAddressMinFilterRoundingEnable = true;
+ samp_st.VAddressMinFilterRoundingEnable = true;
+ samp_st.RAddressMinFilterRoundingEnable = true;
+ }
+
+ if (samp_st.MagModeFilter != MAPFILTER_NEAREST) {
+ samp_st.UAddressMagFilterRoundingEnable = true;
+ samp_st.VAddressMagFilterRoundingEnable = true;
+ samp_st.RAddressMagFilterRoundingEnable = true;
+ }
+
+ bool either_nearest =
+ sampler->MinFilter == GL_NEAREST || sampler->MagFilter == GL_NEAREST;
+ unsigned wrap_s = translate_wrap_mode(brw, sampler->WrapS, either_nearest);
+ unsigned wrap_t = translate_wrap_mode(brw, sampler->WrapT, either_nearest);
+ unsigned wrap_r = translate_wrap_mode(brw, sampler->WrapR, either_nearest);
+
+ if (target == GL_TEXTURE_CUBE_MAP ||
+ target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+ /* Cube maps must use the same wrap mode for all three coordinate
+ * dimensions. Prior to Haswell, only CUBE and CLAMP are valid.
+ *
+ * Ivybridge and Baytrail seem to have problems with CUBE mode and
+ * integer formats. Fall back to CLAMP for now.
+ */
+ if ((tex_cube_map_seamless || sampler->CubeMapSeamless) &&
+ !(GEN_GEN == 7 && !GEN_IS_HASWELL && texObj->_IsIntegerFormat)) {
+ wrap_s = TCM_CUBE;
+ wrap_t = TCM_CUBE;
+ wrap_r = TCM_CUBE;
+ } else {
+ wrap_s = TCM_CLAMP;
+ wrap_t = TCM_CLAMP;
+ wrap_r = TCM_CLAMP;
+ }
+ } else if (target == GL_TEXTURE_1D) {
+ /* There's a bug in 1D texture sampling - it actually pays
+ * attention to the wrap_t value, though it should not.
+ * Override the wrap_t value here to GL_REPEAT to keep
+ * any nonexistent border pixels from floating in.
+ */
+ wrap_t = TCM_WRAP;
+ }
+
+ samp_st.TCXAddressControlMode = wrap_s;
+ samp_st.TCYAddressControlMode = wrap_t;
+ samp_st.TCZAddressControlMode = wrap_r;
+
+ samp_st.ShadowFunction =
+ sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB ?
+ intel_translate_shadow_compare_func(sampler->CompareFunc) : 0;
+
+#if GEN_GEN >= 7
+ /* Set shadow function. */
+ samp_st.AnisotropicAlgorithm =
+ samp_st.MinModeFilter == MAPFILTER_ANISOTROPIC ?
+ EWAApproximation : LEGACY;
+#endif
+
+#if GEN_GEN >= 6
+ samp_st.NonnormalizedCoordinateEnable = target == GL_TEXTURE_RECTANGLE;
+#endif
+
+ const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13;
+ samp_st.MinLOD = CLAMP(sampler->MinLod, 0, hw_max_lod);
+ samp_st.MaxLOD = CLAMP(sampler->MaxLod, 0, hw_max_lod);
+ samp_st.TextureLODBias =
+ CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15);
+
#if GEN_GEN == 6
+ samp_st.BaseMipLevel =
+ CLAMP(texObj->MinLevel + texObj->BaseLevel, 0, hw_max_lod);
+ samp_st.MinandMagStateNotEqual =
+ samp_st.MinModeFilter != samp_st.MagModeFilter;
+#endif
+
+ /* Upload the border color if necessary. If not, just point it at
+ * offset 0 (the start of the batch) - the color should be ignored,
+ * but that address won't fault in case something reads it anyway.
+ */
+ uint32_t border_color_offset = 0;
+ if (wrap_mode_needs_border_color(wrap_s) ||
+ wrap_mode_needs_border_color(wrap_t) ||
+ wrap_mode_needs_border_color(wrap_r)) {
+ genX(upload_default_color)(brw, sampler, format, base_format,
+ texObj->_IsIntegerFormat,
+ texObj->StencilSampling,
+ &border_color_offset);
+ }
+
+ samp_st.BorderColorPointer = border_color_offset;
+
+ if (GEN_GEN < 6) {
+ samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
+ brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
+ brw->batch.bo, border_color_offset,
+ I915_GEM_DOMAIN_SAMPLER, 0);
+ }
+
+#if GEN_GEN >= 8
+ samp_st.LODPreClampMode = CLAMP_MODE_OGL;
+#else
+ samp_st.LODPreClampEnable = true;
+#endif
+
+ GENX(SAMPLER_STATE_pack)(brw, sampler_state, &samp_st);
+}
+
static void
-genX(upload_viewport_state_pointers)(struct brw_context *brw)
+update_sampler_state(struct brw_context *brw,
+ int unit,
+ uint32_t *sampler_state,
+ uint32_t batch_offset_for_sampler_state)
{
- brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
- vp.CCViewportStateChange = 1;
- vp.SFViewportStateChange = 1;
- vp.CLIPViewportStateChange = 1;
- vp.PointertoCLIP_VIEWPORT = brw->clip.vp_offset;
- vp.PointertoSF_VIEWPORT = brw->sf.vp_offset;
- vp.PointertoCC_VIEWPORT = brw->cc.vp_offset;
+ struct gl_context *ctx = &brw->ctx;
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ const struct gl_texture_object *texObj = texUnit->_Current;
+ const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+
+ /* These don't use samplers at all. */
+ if (texObj->Target == GL_TEXTURE_BUFFER)
+ return;
+
+ struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
+ genX(update_sampler_state)(brw, texObj->Target,
+ ctx->Texture.CubeMapSeamless,
+ texUnit->LodBias,
+ firstImage->TexFormat, firstImage->_BaseFormat,
+ texObj, sampler,
+ sampler_state, batch_offset_for_sampler_state);
+}
+
+static void
+genX(upload_sampler_state_table)(struct brw_context *brw,
+ struct gl_program *prog,
+ struct brw_stage_state *stage_state)
+{
+ struct gl_context *ctx = &brw->ctx;
+ uint32_t sampler_count = stage_state->sampler_count;
+
+ GLbitfield SamplersUsed = prog->SamplersUsed;
+
+ if (sampler_count == 0)
+ return;
+
+ /* SAMPLER_STATE is 4 DWords on all platforms. */
+ const int dwords = GENX(SAMPLER_STATE_length);
+ const int size_in_bytes = dwords * sizeof(uint32_t);
+
+ uint32_t *sampler_state = brw_state_batch(brw,
+ sampler_count * size_in_bytes,
+ 32, &stage_state->sampler_offset);
+ /* memset(sampler_state, 0, sampler_count * size_in_bytes); */
+
+ uint32_t batch_offset_for_sampler_state = stage_state->sampler_offset;
+
+ for (unsigned s = 0; s < sampler_count; s++) {
+ if (SamplersUsed & (1 << s)) {
+ const unsigned unit = prog->SamplerUnits[s];
+ if (ctx->Texture.Unit[unit]._Current) {
+ update_sampler_state(brw, unit, sampler_state,
+ batch_offset_for_sampler_state);
+ }
+ }
+
+ sampler_state += dwords;
+ batch_offset_for_sampler_state += size_in_bytes;
+ }
+
+ if (GEN_GEN >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) {
+ /* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */
+ genX(emit_sampler_state_pointers_xs)(brw, stage_state);
+ } else {
+ /* Flag that the sampler state table pointer has changed; later atoms
+ * will handle it.
+ */
+ brw->ctx.NewDriverState |= BRW_NEW_SAMPLER_STATE_TABLE;
}
}
-static const struct brw_tracked_state genX(viewport_state) = {
+static void
+genX(upload_fs_samplers)(struct brw_context *brw)
+{
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+ genX(upload_sampler_state_table)(brw, fs, &brw->wm.base);
+}
+
+static const struct brw_tracked_state genX(fs_samplers) = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_CC_VP |
- BRW_NEW_CLIP_VP |
- BRW_NEW_SF_VP |
- BRW_NEW_STATE_BASE_ADDRESS,
+ BRW_NEW_FRAGMENT_PROGRAM,
+ },
+ .emit = genX(upload_fs_samplers),
+};
+
+static void
+genX(upload_vs_samplers)(struct brw_context *brw)
+{
+ /* BRW_NEW_VERTEX_PROGRAM */
+ struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+ genX(upload_sampler_state_table)(brw, vs, &brw->vs.base);
+}
+
+static const struct brw_tracked_state genX(vs_samplers) = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_VERTEX_PROGRAM,
+ },
+ .emit = genX(upload_vs_samplers),
+};
+
+#if GEN_GEN >= 6
+static void
+genX(upload_gs_samplers)(struct brw_context *brw)
+{
+ /* BRW_NEW_GEOMETRY_PROGRAM */
+ struct gl_program *gs = (struct gl_program *) brw->geometry_program;
+ if (!gs)
+ return;
+
+ genX(upload_sampler_state_table)(brw, gs, &brw->gs.base);
+}
+
+
+static const struct brw_tracked_state genX(gs_samplers) = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_GEOMETRY_PROGRAM,
+ },
+ .emit = genX(upload_gs_samplers),
+};
+#endif
+
+#if GEN_GEN >= 7
+static void
+genX(upload_tcs_samplers)(struct brw_context *brw)
+{
+ /* BRW_NEW_TESS_PROGRAMS */
+ struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
+ if (!tcs)
+ return;
+
+ genX(upload_sampler_state_table)(brw, tcs, &brw->tcs.base);
+}
+
+static const struct brw_tracked_state genX(tcs_samplers) = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_TESS_PROGRAMS,
},
- .emit = genX(upload_viewport_state_pointers),
+ .emit = genX(upload_tcs_samplers),
+};
+#endif
+
+#if GEN_GEN >= 7
+static void
+genX(upload_tes_samplers)(struct brw_context *brw)
+{
+ /* BRW_NEW_TESS_PROGRAMS */
+ struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
+ if (!tes)
+ return;
+
+ genX(upload_sampler_state_table)(brw, tes, &brw->tes.base);
+}
+
+static const struct brw_tracked_state genX(tes_samplers) = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_TESS_PROGRAMS,
+ },
+ .emit = genX(upload_tes_samplers),
+};
+#endif
+
+#if GEN_GEN >= 7
+static void
+genX(upload_cs_samplers)(struct brw_context *brw)
+{
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct gl_program *cs = (struct gl_program *) brw->compute_program;
+ if (!cs)
+ return;
+
+ genX(upload_sampler_state_table)(brw, cs, &brw->cs.base);
+}
+
+const struct brw_tracked_state genX(cs_samplers) = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_COMPUTE_PROGRAM,
+ },
+ .emit = genX(upload_cs_samplers),
+};
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+#if GEN_GEN <= 5
+
+static void genX(upload_blend_constant_color)(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
+ blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
+ blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
+ blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
+ blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
+ }
+}
+
+static const struct brw_tracked_state genX(blend_constant_color) = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = BRW_NEW_CONTEXT |
+ BRW_NEW_BLORP,
+ },
+ .emit = genX(upload_blend_constant_color)
};
#endif
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
- &brw_cc_vp,
- &brw_cc_unit,
+ &genX(cc_vp),
+ &genX(color_calc_state),
/* Surface state setup. Must come before the VS/WM unit. The binding
* table upload must be last.
&brw_vs_binding_table,
&brw_wm_binding_table,
- &brw_fs_samplers,
- &brw_vs_samplers,
+ &genX(fs_samplers),
+ &genX(vs_samplers),
/* These set up state for brw_psp_urb_cbs */
&brw_wm_unit,
- &brw_sf_vp,
- &brw_sf_unit,
- &brw_vs_unit, /* always required, enabled or not */
+ &genX(sf_clip_viewport),
+ &genX(sf_state),
+ &genX(vs_state), /* always required, enabled or not */
&brw_clip_unit,
&brw_gs_unit,
&brw_invariant_state,
&brw_binding_table_pointers,
- &brw_blend_constant_color,
+ &genX(blend_constant_color),
&brw_depthbuffer,
&genX(drawing_rect),
&brw_indices, /* must come before brw_vertices */
- &brw_index_buffer,
+ &genX(index_buffer),
&genX(vertices),
&brw_constant_buffer
/* Command packets: */
- &brw_cc_vp,
- &genX(viewport_state), /* must do after *_vp stages */
+ &genX(cc_vp),
&gen6_urb,
&genX(blend_state), /* must do before cc unit */
&gen6_gs_binding_table,
&brw_wm_binding_table,
- &brw_fs_samplers,
- &brw_vs_samplers,
- &brw_gs_samplers,
+ &genX(fs_samplers),
+ &genX(vs_samplers),
+ &genX(gs_samplers),
&gen6_sampler_state,
&genX(multisample_state),
&genX(drawing_rect),
&brw_indices, /* must come before brw_vertices */
- &brw_index_buffer,
+ &genX(index_buffer),
&genX(vertices),
};
#elif GEN_GEN == 7
{
/* Command packets: */
- &brw_cc_vp,
+ &genX(cc_vp),
&genX(sf_clip_viewport),
&gen7_l3_state,
&brw_gs_binding_table,
&brw_wm_binding_table,
- &brw_fs_samplers,
- &brw_vs_samplers,
- &brw_tcs_samplers,
- &brw_tes_samplers,
- &brw_gs_samplers,
+ &genX(fs_samplers),
+ &genX(vs_samplers),
+ &genX(tcs_samplers),
+ &genX(tes_samplers),
+ &genX(gs_samplers),
&genX(multisample_state),
&genX(vs_state),
&genX(drawing_rect),
&brw_indices, /* must come before brw_vertices */
- &brw_index_buffer,
+ &genX(index_buffer),
&genX(vertices),
#if GEN_IS_HASWELL
#elif GEN_GEN >= 8
static const struct brw_tracked_state *render_atoms[] =
{
- &brw_cc_vp,
+ &genX(cc_vp),
&genX(sf_clip_viewport),
&gen7_l3_state,
&brw_gs_binding_table,
&brw_wm_binding_table,
- &brw_fs_samplers,
- &brw_vs_samplers,
- &brw_tcs_samplers,
- &brw_tes_samplers,
- &brw_gs_samplers,
+ &genX(fs_samplers),
+ &genX(vs_samplers),
+ &genX(tcs_samplers),
+ &genX(tes_samplers),
+ &genX(gs_samplers),
&genX(multisample_state),
&genX(vs_state),
&genX(drawing_rect),
- &gen8_vf_topology,
+ &genX(vf_topology),
&brw_indices,
- &gen8_index_buffer,
+ &genX(index_buffer),
&genX(vertices),
&genX(cut_index),
&brw_cs_abo_surfaces,
&brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
- &brw_cs_samplers,
- &brw_cs_state,
+ &genX(cs_samplers),
+ &genX(cs_state),
};
STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
compute_atoms, ARRAY_SIZE(compute_atoms));
+
+ brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count);
#endif
}