gen7_allocate_push_constants(struct brw_context *brw)
{
unsigned avail_size = 16;
- unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1;
+ unsigned multiplier =
+ (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1;
/* BRW_NEW_GEOMETRY_PROGRAM */
bool gs_present = brw->geometry_program;
offset += gs_size;
OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
- OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+ OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
ADVANCE_BATCH();
/* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
*
* No such restriction exists for Haswell.
*/
- if (!brw->is_haswell) {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
- /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
- * CS Stall):
- *
- * One of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- *
- * We choose to do a Post-Sync Operation (Write Immediate Data), since
- * it seems like it will incur the least additional performance penalty.
- */
- OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC(brw->batch.workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
+ if (brw->gen < 8 && !brw->is_haswell)
+ gen7_emit_cs_stall_flush(brw);
}
const struct brw_tracked_state gen7_push_constant_space = {
static void
gen7_upload_urb(struct brw_context *brw)
{
- const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;
+ const int push_size_kB =
+ (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
/* CACHE_NEW_VS_PROG */
unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
* additional space it could actually make use of).
*/
- /* VS always requires at least 32 URB entries */
+ /* VS has a lower limit on the number of URB entries */
unsigned vs_chunks =
- ALIGN(32 * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes;
+ ALIGN(brw->urb.min_vs_entries * vs_entry_size_bytes, chunk_size_bytes) /
+ chunk_size_bytes;
unsigned vs_wants =
ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes,
chunk_size_bytes) / chunk_size_bytes - vs_chunks;
/* Finally, sanity check to make sure we have at least the minimum number
* of entries needed for each stage.
*/
- assert(nr_vs_entries >= 32);
+ assert(nr_vs_entries >= brw->urb.min_vs_entries);
if (gs_present)
assert(nr_gs_entries >= 2);
brw->urb.vs_start = push_constant_chunks;
brw->urb.gs_start = push_constant_chunks + vs_chunks;
- gen7_emit_vs_workaround_flush(brw);
+ if (brw->gen == 7 && !brw->is_haswell)
+ gen7_emit_vs_workaround_flush(brw);
gen7_emit_urb_state(brw,
brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start,
brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start);