#include "brw_state.h"
#include "intel_batchbuffer.h"
-static const GLuint stage_to_bt_edit[] = {
- [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS,
- [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS,
- [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS,
-};
-
-static uint32_t
-reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
-{
- /* From the Broadwell PRM, Volume 16, "Workarounds",
- * WaStateBindingTableOverfetch:
- * "HW over-fetches two cache lines of binding table indices. When
- * using the resource streamer, SW needs to pad binding table pointer
- * updates with an additional two cache lines."
- *
- * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
- * the binding table pool buffer.
- */
- if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
- gen7_reset_hw_bt_pool_offsets(brw);
- }
-
- uint32_t offset = brw->hw_bt_pool.next_offset;
-
- /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
- * 3DSTATE_BINDING_TABLE_POINTERS_xS:
- *
- * "If HW Binding Table is enabled, the offset is relative to the
- * Binding Table Pool Base Address and the alignment is 64 bytes."
- */
- brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
-
- return offset;
-}
-
/**
* Upload a shader stage's binding table as indirect state.
*
brw->shader_time.bo, 0, ISL_FORMAT_RAW,
brw->shader_time.bo->size, 1, true);
}
- /* When RS is enabled use hw-binding table uploads, otherwise fallback to
- * software-uploads.
- */
- if (brw->use_resource_streamer) {
- gen7_update_binding_table_from_array(brw, stage_state->stage,
- stage_state->surf_offset,
- prog_data->binding_table
- .size_bytes / 4);
- } else {
- uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
- prog_data->binding_table.size_bytes,
- 32,
- &stage_state->bind_bo_offset);
-
- /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
- memcpy(bind, stage_state->surf_offset,
- prog_data->binding_table.size_bytes);
- }
+ uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ prog_data->binding_table.size_bytes,
+ 32,
+ &stage_state->bind_bo_offset);
+
+ /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+ memcpy(bind, stage_state->surf_offset,
+ prog_data->binding_table.size_bytes);
}
brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
if (brw->gen >= 7) {
- if (brw->use_resource_streamer) {
- stage_state->bind_bo_offset =
- reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
- }
BEGIN_BATCH(2);
OUT_BATCH(packet_name << 16 | (2 - 2));
/* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
* when hw-generated binding table is enabled.
*/
- OUT_BATCH(brw->use_resource_streamer ?
- (stage_state->bind_bo_offset >> 1) :
- stage_state->bind_bo_offset);
+ OUT_BATCH(stage_state->bind_bo_offset);
ADVANCE_BATCH();
}
}
},
.emit = brw_gs_upload_binding_table,
};
-
-/**
- * Edit a single entry in a hardware-generated binding table
- */
-void
-gen7_edit_hw_binding_table_entry(struct brw_context *brw,
- gl_shader_stage stage,
- uint32_t index,
- uint32_t surf_offset)
-{
- assert(stage < ARRAY_SIZE(stage_to_bt_edit));
- assert(stage_to_bt_edit[stage]);
-
- uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
- (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
- HSW_SURFACE_STATE_EDIT(surf_offset));
-
- BEGIN_BATCH(3);
- OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
- OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
- OUT_BATCH(dw2);
- ADVANCE_BATCH();
-}
-
-/**
- * Upload a whole hardware binding table for the given stage.
- *
- * Takes an array of surface offsets and the number of binding table
- * entries.
- */
-void
-gen7_update_binding_table_from_array(struct brw_context *brw,
- gl_shader_stage stage,
- const uint32_t* binding_table,
- int num_surfaces)
-{
- uint32_t dw2 = 0;
-
- assert(stage < ARRAY_SIZE(stage_to_bt_edit));
- assert(stage_to_bt_edit[stage]);
-
- BEGIN_BATCH(num_surfaces + 2);
- OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
- OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
- for (int i = 0; i < num_surfaces; i++) {
- dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
- (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
- HSW_SURFACE_STATE_EDIT(binding_table[i]));
- OUT_BATCH(dw2);
- }
- ADVANCE_BATCH();
-}
-
-/**
- * Disable hardware binding table support, falling back to the
- * older software-generated binding table mechanism.
- */
-void
-gen7_disable_hw_binding_tables(struct brw_context *brw)
-{
- if (!brw->use_resource_streamer)
- return;
- /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
- * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
- *
- * "When switching between HW and SW binding table generation, SW must
- * issue a state cache invalidate."
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-
- int pkt_len = brw->gen >= 8 ? 4 : 3;
-
- BEGIN_BATCH(pkt_len);
- OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
- if (brw->gen >= 8) {
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- } else {
- OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
- OUT_BATCH(0);
- }
- ADVANCE_BATCH();
-}
-
-/**
- * Enable hardware binding tables and set up the binding table pool.
- */
-void
-gen7_enable_hw_binding_tables(struct brw_context *brw)
-{
- if (!brw->use_resource_streamer)
- return;
-
- if (!brw->hw_bt_pool.bo) {
- /* We use a single re-usable buffer object for the lifetime of the
- * context and size it to maximum allowed binding tables that can be
- * programmed per batch:
- *
- * From the Haswell PRM, Volume 7: 3D Media GPGPU,
- * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
- * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
- */
- static const int max_size = 16383 * 4;
- brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
- max_size, 64);
- brw->hw_bt_pool.next_offset = 0;
- }
-
- /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
- * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
- *
- * "When switching between HW and SW binding table generation, SW must
- * issue a state cache invalidate."
- */
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-
- int pkt_len = brw->gen >= 8 ? 4 : 3;
- uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
- if (brw->is_haswell) {
- dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
- HSW_BT_POOL_ALLOC_MUST_BE_ONE;
- } else if (brw->gen >= 8) {
- dw1 |= BDW_MOCS_WB;
- }
-
- BEGIN_BATCH(pkt_len);
- OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
- if (brw->gen >= 8) {
- OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
- OUT_BATCH(brw->hw_bt_pool.bo->size);
- } else {
- OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
- OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
- brw->hw_bt_pool.bo->size);
- }
- ADVANCE_BATCH();
-}
-
-void
-gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
-{
- brw->hw_bt_pool.next_offset = 0;
-}
-
-const struct brw_tracked_state gen7_hw_binding_tables = {
- .dirty = {
- .mesa = 0,
- .brw = BRW_NEW_BATCH |
- BRW_NEW_BLORP,
- },
- .emit = gen7_enable_hw_binding_tables
-};
-
/** @} */
/**
brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
- brw->use_resource_streamer = screen->has_resource_streamer &&
- (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
- env_var_as_boolean("INTEL_USE_GATHER", false));
-
ctx->VertexProgram._MaintainTnlProgram = true;
ctx->FragmentProgram._MaintainTexEnvProgram = true;
if (brw->wm.base.scratch_bo)
drm_intel_bo_unreference(brw->wm.base.scratch_bo);
- gen7_reset_hw_bt_pool_offsets(brw);
- drm_intel_bo_unreference(brw->hw_bt_pool.bo);
- brw->hw_bt_pool.bo = NULL;
-
drm_intel_gem_context_destroy(brw->hw_ctx);
if (ctx->swrast_context) {
struct brw_stage_state base;
} cs;
- /* RS hardware binding table */
- struct {
- drm_intel_bo *bo;
- uint32_t next_offset;
- } hw_bt_pool;
-
struct {
uint32_t state_offset;
uint32_t blend_state_offset;
#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
-#define _3DSTATE_BINDING_TABLE_POOL_ALLOC 0x7919 /* GEN7.5+ */
-#define BRW_HW_BINDING_TABLE_ENABLE (1 << 11)
-#define GEN7_HW_BT_POOL_MOCS_SHIFT 7
-#define GEN7_HW_BT_POOL_MOCS_MASK INTEL_MASK(10, 7)
-#define GEN8_HW_BT_POOL_MOCS_SHIFT 0
-#define GEN8_HW_BT_POOL_MOCS_MASK INTEL_MASK(6, 0)
-/* Only required in HSW */
-#define HSW_BT_POOL_ALLOC_MUST_BE_ONE (3 << 5)
-
-#define _3DSTATE_BINDING_TABLE_EDIT_VS 0x7843 /* GEN7.5 */
-#define _3DSTATE_BINDING_TABLE_EDIT_GS 0x7844 /* GEN7.5 */
-#define _3DSTATE_BINDING_TABLE_EDIT_HS 0x7845 /* GEN7.5 */
-#define _3DSTATE_BINDING_TABLE_EDIT_DS 0x7846 /* GEN7.5 */
-#define _3DSTATE_BINDING_TABLE_EDIT_PS 0x7847 /* GEN7.5 */
-#define BRW_BINDING_TABLE_INDEX_SHIFT 16
-#define BRW_BINDING_TABLE_INDEX_MASK INTEL_MASK(23, 16)
-
-#define BRW_BINDING_TABLE_EDIT_TARGET_ALL 3
-#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1 2
-#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0 1
-/* In HSW, when editing binding table entries to surface state offsets,
- * the surface state offset is a 16-bit value aligned to 32 bytes. But
- * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset
- * by 5 bits so it won't disturb bit 16 (which is used as the binding
- * table index entry), otherwise it would hang the GPU.
- */
-#define HSW_SURFACE_STATE_EDIT(value) (value >> 5)
-/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/
-#define GEN8_SURFACE_STATE_EDIT(value) (value >> 6)
-
#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
# define GS_SAMPLER_STATE_CHANGE (1 << 9)
#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */
#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */
-/* Resource streamer gather constants */
-#define _3DSTATE_GATHER_POOL_ALLOC 0x791A /* GEN7.5+ */
-#define HSW_GATHER_POOL_ALLOC_MUST_BE_ONE (3 << 4) /* GEN7.5 only */
-
-#define _3DSTATE_GATHER_CONSTANT_VS 0x7834 /* GEN7.5+ */
-#define _3DSTATE_GATHER_CONSTANT_GS 0x7835
-#define _3DSTATE_GATHER_CONSTANT_HS 0x7836
-#define _3DSTATE_GATHER_CONSTANT_DS 0x7837
-#define _3DSTATE_GATHER_CONSTANT_PS 0x7838
-#define HSW_GATHER_CONSTANT_ENABLE (1 << 11)
-#define HSW_GATHER_CONSTANT_BUFFER_VALID_SHIFT 16
-#define HSW_GATHER_CONSTANT_BUFFER_VALID_MASK INTEL_MASK(31, 16)
-#define HSW_GATHER_CONSTANT_BINDING_TABLE_BLOCK_SHIFT 12
-#define HSW_GATHER_CONSTANT_BINDING_TABLE_BLOCK_MASK INTEL_MASK(15, 12)
-#define HSW_GATHER_CONSTANT_CONST_BUFFER_OFFSET_SHIFT 8
-#define HSW_GATHER_CONSTANT_CONST_BUFFER_OFFSET_MASK INTEL_MASK(15, 8)
-#define HSW_GATHER_CONSTANT_CHANNEL_MASK_SHIFT 4
-#define HSW_GATHER_CONSTANT_CHANNEL_MASK_MASK INTEL_MASK(7, 4)
-
#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */
/* DW1 */
# define SO_FUNCTION_ENABLE (1 << 31)
/* Load a value from memory into a register. Only available on Gen7+. */
#define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
-/* Haswell RS control */
-#define MI_RS_CONTROL (CMD_MI | (0x6 << 23))
-#define MI_RS_STORE_DATA_IMM (CMD_MI | (0x2b << 23))
/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23))
const uint32_t _3DSTATE_PIPELINE_SELECT =
is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
- if (brw->use_resource_streamer && pipeline != BRW_RENDER_PIPELINE) {
- /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
- * PIPELINE_SELECT [DevBWR+]":
- *
- * Project: HSW, BDW, CHV, SKL, BXT
- *
- * Hardware Binding Tables are only supported for 3D
- * workloads. Resource streamer must be enabled only for 3D
- * workloads. Resource streamer must be disabled for Media and GPGPU
- * workloads.
- */
- BEGIN_BATCH(1);
- OUT_BATCH(MI_RS_CONTROL | 0);
- ADVANCE_BATCH();
-
- gen7_disable_hw_binding_tables(brw);
-
- /* XXX - Disable gather constant pool too when we start using it. */
- }
-
if (brw->gen >= 8 && brw->gen < 10) {
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
*
OUT_BATCH(0);
ADVANCE_BATCH();
}
-
- if (brw->use_resource_streamer && pipeline == BRW_RENDER_PIPELINE) {
- /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
- * PIPELINE_SELECT [DevBWR+]":
- *
- * Project: HSW, BDW, CHV, SKL, BXT
- *
- * Hardware Binding Tables are only supported for 3D
- * workloads. Resource streamer must be enabled only for 3D
- * workloads. Resource streamer must be disabled for Media and GPGPU
- * workloads.
- */
- BEGIN_BATCH(1);
- OUT_BATCH(MI_RS_CONTROL | 1);
- ADVANCE_BATCH();
-
- gen7_enable_hw_binding_tables(brw);
-
- /* XXX - Re-enable gather constant pool here. */
- }
}
/**
extern const struct brw_tracked_state gen7_urb;
extern const struct brw_tracked_state gen7_vs_state;
extern const struct brw_tracked_state gen7_wm_state;
-extern const struct brw_tracked_state gen7_hw_binding_tables;
extern const struct brw_tracked_state haswell_cut_index;
extern const struct brw_tracked_state gen8_blend_state;
extern const struct brw_tracked_state gen8_ds_state;
const struct brw_stage_state *stage_state,
bool active, unsigned opcode);
-void gen7_rs_control(struct brw_context *brw, int enable);
-
-void gen7_edit_hw_binding_table_entry(struct brw_context *brw,
- gl_shader_stage stage,
- uint32_t index,
- uint32_t surf_offset);
-void gen7_update_binding_table_from_array(struct brw_context *brw,
- gl_shader_stage stage,
- const uint32_t* binding_table,
- int num_surfaces);
-void gen7_enable_hw_binding_tables(struct brw_context *brw);
-void gen7_disable_hw_binding_tables(struct brw_context *brw);
-void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
-
/* brw_clip.c */
void brw_upload_clip_prog(struct brw_context *brw);
&gen6_color_calc_state, /* must do before cc unit */
&gen6_depth_stencil_state, /* must do before cc unit */
- &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
-
&brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
&brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
&brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
&gen8_blend_state,
&gen6_color_calc_state,
- &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
-
&brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
&brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
&brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
gen7_l3_state.emit(brw);
#endif
- if (brw->use_resource_streamer)
- gen7_disable_hw_binding_tables(brw);
-
brw_emit_depth_stall_flushes(brw);
#if GEN_GEN == 8
if (brw->gen >= 6 && batch->ring == BLT_RING) {
flags = I915_EXEC_BLT;
} else {
- flags = I915_EXEC_RENDER |
- (brw->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0);
+ flags = I915_EXEC_RENDER;
}
if (batch->needs_sol_reset)
flags |= I915_EXEC_GEN7_SOL_RESET;
drm_intel_bo_wait_rendering(brw->batch.bo);
}
- if (brw->use_resource_streamer)
- gen7_reset_hw_bt_pool_offsets(brw);
-
/* Start a new batch buffer. */
brw_new_batch(brw);
screen->compiler->shader_perf_log = shader_perf_log_mesa;
screen->program_id = 1;
- if (screen->devinfo.has_resource_streamer) {
- screen->has_resource_streamer =
- intel_get_boolean(screen, I915_PARAM_HAS_RESOURCE_STREAMER);
- }
-
screen->has_exec_fence =
intel_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE);
int hw_has_timestamp;
- /**
- * Does the kernel support resource streamer?
- */
- bool has_resource_streamer;
-
/**
* Does the kernel support context reset notifications?
*/