#include "drivers/common/meta.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
+#include "brw_vs.h"
+#include "brw_ff_gs.h"
+#include "brw_gs.h"
+#include "brw_wm.h"
static const struct brw_tracked_state *gen4_atoms[] =
{
- &brw_vs_prog, /* must do before GS prog, state base address. */
- &brw_ff_gs_prog, /* must do before state base address */
-
&brw_interpolation_map,
&brw_clip_prog, /* must do before state base address */
&brw_sf_prog, /* must do before state base address */
- &brw_wm_prog, /* must do before state base address */
/* Once all the programs are done, we know how large urb entry
* sizes need to be and can decide if we need to change the urb
static const struct brw_tracked_state *gen6_atoms[] =
{
- &brw_vs_prog, /* must do before state base address */
- &brw_gs_prog, /* must do before state base address */
- &brw_wm_prog, /* must do before state base address */
-
&gen6_clip_vp,
&gen6_sf_vp,
static const struct brw_tracked_state *gen7_atoms[] =
{
- &brw_vs_prog,
- &brw_gs_prog,
- &brw_wm_prog,
-
/* Command packets: */
/* must do before binding table pointers, cc state ptrs */
&brw_state_base_address,
&brw_cc_vp,
- &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
&gen7_sf_clip_viewport,
&gen7_push_constant_space,
static const struct brw_tracked_state *gen8_atoms[] =
{
- &brw_vs_prog,
- &brw_gs_prog,
- &brw_wm_prog,
-
/* Command packets: */
&gen8_state_base_address,
&brw_cc_vp,
- &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
&gen8_sf_clip_viewport,
&gen7_push_constant_space,
&gen8_vertices,
&haswell_cut_index,
+ &gen8_pma_fix,
};
static void
if (!brw->hw_ctx)
return;
+ if (brw->gen == 6)
+ intel_emit_post_sync_nonzero_flush(brw);
+
brw_upload_invariant_state(brw);
if (brw->gen >= 8) {
const struct brw_tracked_state **atoms;
int num_atoms;
+ STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->atoms));
+ STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->atoms));
+ STATIC_ASSERT(ARRAY_SIZE(gen7_atoms) <= ARRAY_SIZE(brw->atoms));
+ STATIC_ASSERT(ARRAY_SIZE(gen8_atoms) <= ARRAY_SIZE(brw->atoms));
+
brw_init_caches(brw);
if (brw->gen >= 8) {
num_atoms = ARRAY_SIZE(gen4_atoms);
}
- brw->atoms = atoms;
brw->num_atoms = num_atoms;
+ /* This is to work around brw_context::atoms being declared const. We want
+ * it to be const, but it needs to be initialized somehow!
+ */
+ struct brw_tracked_state *context_atoms =
+ (struct brw_tracked_state *) &brw->atoms[0];
+
+ for (int i = 0; i < num_atoms; i++)
+ context_atoms[i] = *atoms[i];
+
while (num_atoms--) {
- assert((*atoms)->dirty.mesa |
- (*atoms)->dirty.brw |
- (*atoms)->dirty.cache);
+ assert((*atoms)->dirty.mesa | (*atoms)->dirty.brw);
assert((*atoms)->emit);
atoms++;
}
brw->state.dirty.mesa = ~0;
brw->state.dirty.brw = ~0ull;
+ /* ~0 is a nonsensical value which won't match anything we program, so
+ * the programming will take effect on the first time around.
+ */
+ brw->pma_stall_bits = ~0;
+
/* Make sure that brw->state.dirty.brw has enough bits to hold all possible
* dirty flags.
*/
ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+ ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
}
static bool
check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
{
- return ((a->mesa & b->mesa) |
- (a->brw & b->brw) |
- (a->cache & b->cache)) != 0;
+ return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
}
static void accumulate_state( struct brw_state_flags *a,
{
a->mesa |= b->mesa;
a->brw |= b->brw;
- a->cache |= b->cache;
}
{
result->mesa = a->mesa ^ b->mesa;
result->brw = a->brw ^ b->brw;
- result->cache = a->cache ^ b->cache;
}
struct dirty_bit_map {
};
static struct dirty_bit_map brw_bits[] = {
+ DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
+ DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
+ DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
+ DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
+ DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
+ DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
+ DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
DEFINE_BIT(BRW_NEW_URB_FENCE),
DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
+ DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
+ DEFINE_BIT(BRW_NEW_CC_VP),
+ DEFINE_BIT(BRW_NEW_SF_VP),
+ DEFINE_BIT(BRW_NEW_CLIP_VP),
+ DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
+ DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
{0, 0, 0}
};
-static struct dirty_bit_map cache_bits[] = {
- DEFINE_BIT(CACHE_NEW_CC_VP),
- DEFINE_BIT(CACHE_NEW_CC_UNIT),
- DEFINE_BIT(CACHE_NEW_WM_PROG),
- DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG),
- DEFINE_BIT(CACHE_NEW_SAMPLER),
- DEFINE_BIT(CACHE_NEW_WM_UNIT),
- DEFINE_BIT(CACHE_NEW_SF_PROG),
- DEFINE_BIT(CACHE_NEW_SF_VP),
- DEFINE_BIT(CACHE_NEW_SF_UNIT),
- DEFINE_BIT(CACHE_NEW_VS_UNIT),
- DEFINE_BIT(CACHE_NEW_VS_PROG),
- DEFINE_BIT(CACHE_NEW_FF_GS_UNIT),
- DEFINE_BIT(CACHE_NEW_FF_GS_PROG),
- DEFINE_BIT(CACHE_NEW_GS_PROG),
- DEFINE_BIT(CACHE_NEW_CLIP_VP),
- DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
- DEFINE_BIT(CACHE_NEW_CLIP_PROG),
- {0, 0, 0}
-};
-
-
static void
brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
{
brw_print_dirty_count(struct dirty_bit_map *bit_map)
{
for (int i = 0; bit_map[i].bit != 0; i++) {
- fprintf(stderr, "0x%016lx: %12d (%s)\n",
- bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+ if (bit_map[i].count > 1) {
+ fprintf(stderr, "0x%016lx: %12d (%s)\n",
+ bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+ }
}
}
+static void
+brw_upload_programs(struct brw_context *brw)
+{
+ brw_upload_vs_prog(brw);
+
+ if (brw->gen < 6)
+ brw_upload_ff_gs_prog(brw);
+ else
+ brw_upload_gs_prog(brw);
+
+ brw_upload_wm_prog(brw);
+}
+
/***********************************************************************
* Emit all state:
*/
/* Always re-emit all state. */
state->mesa |= ~0;
state->brw |= ~0ull;
- state->cache |= ~0;
}
if (brw->fragment_program != ctx->FragmentProgram._Current) {
brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES;
}
- if ((state->mesa | state->cache | state->brw) == 0)
+ if ((state->mesa | state->brw) == 0)
return;
+ /* Emit Sandybridge workaround flushes on every primitive, for safety. */
+ if (brw->gen == 6)
+ intel_emit_post_sync_nonzero_flush(brw);
+
+ brw_upload_programs(brw);
+
if (unlikely(INTEL_DEBUG)) {
/* Debug version which enforces various sanity checks on the
* state flags which are generated and checked to help ensure
prev = *state;
for (i = 0; i < brw->num_atoms; i++) {
- const struct brw_tracked_state *atom = brw->atoms[i];
+ const struct brw_tracked_state *atom = &brw->atoms[i];
struct brw_state_flags generated;
if (check_state(state, &atom->dirty)) {
}
else {
for (i = 0; i < brw->num_atoms; i++) {
- const struct brw_tracked_state *atom = brw->atoms[i];
+ const struct brw_tracked_state *atom = &brw->atoms[i];
if (check_state(state, &atom->dirty)) {
atom->emit(brw);
if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
- STATIC_ASSERT(ARRAY_SIZE(cache_bits) == BRW_MAX_CACHE + 1);
brw_update_dirty_count(mesa_bits, state->mesa);
brw_update_dirty_count(brw_bits, state->brw);
- brw_update_dirty_count(cache_bits, state->cache);
if (dirty_count++ % 1000 == 0) {
brw_print_dirty_count(mesa_bits);
brw_print_dirty_count(brw_bits);
- brw_print_dirty_count(cache_bits);
fprintf(stderr, "\n");
}
}