#include "brw_state.h"
#include "intel_batchbuffer.h"
+static const GLuint stage_to_bt_edit[] = {
+ [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS,
+ [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS,
+ [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS,
+};
+
+static uint32_t
+reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
+{
+ /* From the Broadwell PRM, Volume 16, "Workarounds",
+ * WaStateBindingTableOverfetch:
+ * "HW over-fetches two cache lines of binding table indices. When
+ * using the resource streamer, SW needs to pad binding table pointer
+ * updates with an additional two cache lines."
+ *
+ * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
+ * the binding table pool buffer.
+ */
+ if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
+ gen7_reset_hw_bt_pool_offsets(brw);
+ }
+
+ uint32_t offset = brw->hw_bt_pool.next_offset;
+
+ /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
+ * 3DSTATE_BINDING_TABLE_POINTERS_xS:
+ *
+ * "If HW Binding Table is enabled, the offset is relative to the
+ * Binding Table Pool Base Address and the alignment is 64 bytes."
+ */
+ brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
+
+ return offset;
+}
+
/**
* Upload a shader stage's binding table as indirect state.
*
*/
void
brw_upload_binding_table(struct brw_context *brw,
- GLbitfield brw_new_binding_table,
- struct brw_stage_state *stage_state,
- unsigned binding_table_entries,
- int shader_time_surf_index)
+ uint32_t packet_name,
+ const struct brw_stage_prog_data *prog_data,
+ struct brw_stage_state *stage_state)
{
- if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
- gen7_create_shader_time_surface(brw, &stage_state->surf_offset[shader_time_surf_index]);
- }
+ if (prog_data->binding_table.size_bytes == 0) {
+ /* There are no surfaces; skip making the binding table altogether. */
+ if (stage_state->bind_bo_offset == 0 && brw->gen < 9)
+ return;
- /* If there are no surfaces, skip making the binding table altogether. */
- if (binding_table_entries == 0) {
- if (stage_state->bind_bo_offset != 0) {
- brw->state.dirty.brw |= brw_new_binding_table;
- stage_state->bind_bo_offset = 0;
+ stage_state->bind_bo_offset = 0;
+ } else {
+ /* Upload a new binding table. */
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ brw_emit_buffer_surface_state(
+ brw, &stage_state->surf_offset[
+ prog_data->binding_table.shader_time_start],
+ brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
+ brw->shader_time.bo->size, 1, true);
}
- return;
- }
-
- size_t table_size_in_bytes = binding_table_entries * sizeof(uint32_t);
+ /* When RS is enabled use hw-binding table uploads, otherwise fallback to
+ * software-uploads.
+ */
+ if (brw->use_resource_streamer) {
+ gen7_update_binding_table_from_array(brw, stage_state->stage,
+ stage_state->surf_offset,
+ prog_data->binding_table
+ .size_bytes / 4);
+ } else {
+ uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ prog_data->binding_table.size_bytes,
+ 32,
+ &stage_state->bind_bo_offset);
- uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
- table_size_in_bytes, 32,
- &stage_state->bind_bo_offset);
+ /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+ memcpy(bind, stage_state->surf_offset,
+ prog_data->binding_table.size_bytes);
+ }
+ }
- /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
- memcpy(bind, stage_state->surf_offset, table_size_in_bytes);
+ brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
- brw->state.dirty.brw |= brw_new_binding_table;
+ if (brw->gen >= 7) {
+ if (brw->use_resource_streamer) {
+ stage_state->bind_bo_offset =
+ reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
+ }
+ BEGIN_BATCH(2);
+ OUT_BATCH(packet_name << 16 | (2 - 2));
+ /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
+ * when hw-generated binding table is enabled.
+ */
+ OUT_BATCH(brw->use_resource_streamer ?
+ (stage_state->bind_bo_offset >> 1) :
+ stage_state->bind_bo_offset);
+ ADVANCE_BATCH();
+ }
}
/**
static void
brw_vs_upload_binding_table(struct brw_context *brw)
{
- struct brw_stage_state *stage_state = &brw->vs.base;
- /* CACHE_NEW_VS_PROG */
- const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
-
- /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
- brw_upload_binding_table(brw, BRW_NEW_VS_BINDING_TABLE, stage_state,
- prog_data->binding_table_size,
- SURF_INDEX_VEC4_SHADER_TIME);
+ /* BRW_NEW_VS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
+ brw_upload_binding_table(brw,
+ _3DSTATE_BINDING_TABLE_POINTERS_VS,
+ prog_data,
+ &brw->vs.base);
}
const struct brw_tracked_state brw_vs_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
BRW_NEW_VS_CONSTBUF |
+ BRW_NEW_VS_PROG_DATA |
BRW_NEW_SURFACES,
- .cache = CACHE_NEW_VS_PROG
},
.emit = brw_vs_upload_binding_table,
};
static void
brw_upload_wm_binding_table(struct brw_context *brw)
{
- struct brw_stage_state *stage_state = &brw->wm.base;
-
- /* BRW_NEW_SURFACES and CACHE_NEW_WM_PROG */
- brw_upload_binding_table(brw, BRW_NEW_PS_BINDING_TABLE, stage_state,
- brw->wm.prog_data->binding_table_size,
- SURF_INDEX_WM_SHADER_TIME);
+ /* BRW_NEW_FS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
+ brw_upload_binding_table(brw,
+ _3DSTATE_BINDING_TABLE_POINTERS_PS,
+ prog_data,
+ &brw->wm.base);
}
const struct brw_tracked_state brw_wm_binding_table = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH | BRW_NEW_SURFACES,
- .cache = CACHE_NEW_WM_PROG
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_SURFACES,
},
.emit = brw_upload_wm_binding_table,
};
+/** Upload the TCS binding table (if tessellation stages are active). */
+static void
+brw_tcs_upload_binding_table(struct brw_context *brw)
+{
+ /* Skip if the tessellation stages are disabled. */
+ if (brw->tess_eval_program == NULL)
+ return;
+
+ /* BRW_NEW_TCS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
+ brw_upload_binding_table(brw,
+ _3DSTATE_BINDING_TABLE_POINTERS_HS,
+ prog_data,
+ &brw->tcs.base);
+}
+
+const struct brw_tracked_state brw_tcs_binding_table = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_DEFAULT_TESS_LEVELS |
+ BRW_NEW_SURFACES |
+ BRW_NEW_TCS_CONSTBUF |
+ BRW_NEW_TCS_PROG_DATA,
+ },
+ .emit = brw_tcs_upload_binding_table,
+};
+
+/** Upload the TES binding table (if TES is active). */
+static void
+brw_tes_upload_binding_table(struct brw_context *brw)
+{
+ /* If there's no TES, skip changing anything. */
+ if (brw->tess_eval_program == NULL)
+ return;
+
+ /* BRW_NEW_TES_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
+ brw_upload_binding_table(brw,
+ _3DSTATE_BINDING_TABLE_POINTERS_DS,
+ prog_data,
+ &brw->tes.base);
+}
+
+const struct brw_tracked_state brw_tes_binding_table = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_SURFACES |
+ BRW_NEW_TES_CONSTBUF |
+ BRW_NEW_TES_PROG_DATA,
+ },
+ .emit = brw_tes_upload_binding_table,
+};
+
/** Upload the GS binding table (if GS is active). */
static void
brw_gs_upload_binding_table(struct brw_context *brw)
{
- struct brw_stage_state *stage_state = &brw->gs.base;
-
/* If there's no GS, skip changing anything. */
- if (!brw->gs.prog_data)
+ if (brw->geometry_program == NULL)
return;
- /* CACHE_NEW_GS_PROG */
- const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
-
- /* BRW_NEW_SURFACES and BRW_NEW_GS_CONSTBUF */
- brw_upload_binding_table(brw, BRW_NEW_GS_BINDING_TABLE, stage_state,
- prog_data->binding_table_size,
- SURF_INDEX_VEC4_SHADER_TIME);
+ /* BRW_NEW_GS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
+ brw_upload_binding_table(brw,
+ _3DSTATE_BINDING_TABLE_POINTERS_GS,
+ prog_data,
+ &brw->gs.base);
}
const struct brw_tracked_state brw_gs_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
BRW_NEW_GS_CONSTBUF |
+ BRW_NEW_GS_PROG_DATA |
BRW_NEW_SURFACES,
- .cache = CACHE_NEW_GS_PROG
},
.emit = brw_gs_upload_binding_table,
};
+/**
+ * Edit a single entry in a hardware-generated binding table
+ */
+void
+gen7_edit_hw_binding_table_entry(struct brw_context *brw,
+ gl_shader_stage stage,
+ uint32_t index,
+ uint32_t surf_offset)
+{
+ assert(stage < ARRAY_SIZE(stage_to_bt_edit));
+ assert(stage_to_bt_edit[stage]);
+
+ uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
+ (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
+ HSW_SURFACE_STATE_EDIT(surf_offset));
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
+ OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
+ OUT_BATCH(dw2);
+ ADVANCE_BATCH();
+}
+
+/**
+ * Upload a whole hardware binding table for the given stage.
+ *
+ * Takes an array of surface offsets and the number of binding table
+ * entries.
+ */
+void
+gen7_update_binding_table_from_array(struct brw_context *brw,
+ gl_shader_stage stage,
+ const uint32_t* binding_table,
+ int num_surfaces)
+{
+ uint32_t dw2 = 0;
+
+ assert(stage < ARRAY_SIZE(stage_to_bt_edit));
+ assert(stage_to_bt_edit[stage]);
+
+ BEGIN_BATCH(num_surfaces + 2);
+ OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
+ OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
+ for (int i = 0; i < num_surfaces; i++) {
+ dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
+ (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
+ HSW_SURFACE_STATE_EDIT(binding_table[i]));
+ OUT_BATCH(dw2);
+ }
+ ADVANCE_BATCH();
+}
+
+/**
+ * Disable hardware binding table support, falling back to the
+ * older software-generated binding table mechanism.
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+ if (!brw->use_resource_streamer)
+ return;
+ /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ *
+ * "When switching between HW and SW binding table generation, SW must
+ * issue a state cache invalidate."
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+ int pkt_len = brw->gen >= 8 ? 4 : 3;
+
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+ if (brw->gen >= 8) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+}
+
+/**
+ * Enable hardware binding tables and set up the binding table pool.
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+ if (!brw->use_resource_streamer)
+ return;
+
+ if (!brw->hw_bt_pool.bo) {
+ /* We use a single re-usable buffer object for the lifetime of the
+ * context and size it to maximum allowed binding tables that can be
+ * programmed per batch:
+ *
+ * From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
+ */
+ static const int max_size = 16383 * 4;
+ brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+ max_size, 64);
+ brw->hw_bt_pool.next_offset = 0;
+ }
+
+ /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ *
+ * "When switching between HW and SW binding table generation, SW must
+ * issue a state cache invalidate."
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+ int pkt_len = brw->gen >= 8 ? 4 : 3;
+ uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
+ if (brw->is_haswell) {
+ dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
+ HSW_BT_POOL_ALLOC_MUST_BE_ONE;
+ } else if (brw->gen >= 8) {
+ dw1 |= BDW_MOCS_WB;
+ }
+
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+ if (brw->gen >= 8) {
+ OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_BATCH(brw->hw_bt_pool.bo->size);
+ } else {
+ OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ brw->hw_bt_pool.bo->size);
+ }
+ ADVANCE_BATCH();
+}
+
+void
+gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
+{
+ brw->hw_bt_pool.next_offset = 0;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP,
+ },
+ .emit = gen7_enable_hw_binding_tables
+};
+
/** @} */
/**
const struct brw_tracked_state brw_binding_table_pointers = {
.dirty = {
.mesa = 0,
- .brw = (BRW_NEW_BATCH |
- BRW_NEW_STATE_BASE_ADDRESS |
- BRW_NEW_VS_BINDING_TABLE |
- BRW_NEW_GS_BINDING_TABLE |
- BRW_NEW_PS_BINDING_TABLE),
- .cache = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_BINDING_TABLE_POINTERS |
+ BRW_NEW_STATE_BASE_ADDRESS,
},
.emit = gen4_upload_binding_table_pointers,
};
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
- OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
+ if (brw->ff_gs.prog_active)
+ OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
+ else
+ OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
const struct brw_tracked_state gen6_binding_table_pointers = {
.dirty = {
.mesa = 0,
- .brw = (BRW_NEW_BATCH |
- BRW_NEW_STATE_BASE_ADDRESS |
- BRW_NEW_VS_BINDING_TABLE |
- BRW_NEW_GS_BINDING_TABLE |
- BRW_NEW_PS_BINDING_TABLE),
- .cache = 0,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_BINDING_TABLE_POINTERS |
+ BRW_NEW_STATE_BASE_ADDRESS,
},
.emit = gen6_upload_binding_table_pointers,
};
-/* Gen7+ code lives in gen7_{vs,gs,wm}_state.c. */
-
/** @} */