dst[8 * t] = t;
}
+/**
+ * Allocate scratch BOs as needed for the given per-thread size and stage.
+ *
+ * Returns the 32-bit "Scratch Space Base Pointer" value.
+ */
+uint32_t
+iris_get_scratch_space(struct iris_context *ice,
+ unsigned per_thread_scratch,
+ gl_shader_stage stage)
+{
+ struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+ struct iris_bufmgr *bufmgr = screen->bufmgr;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+
+ unsigned encoded_size = ffs(per_thread_scratch) - 11;
+ assert(encoded_size < (1 << 16));
+
+ struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
+
+ /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
+ *
+ * "Scratch Space per slice is computed based on 4 sub-slices. SW must
+ * allocate scratch space enough so that each slice has 4 slices
+ * allowed."
+ *
+ * According to the other driver team, this applies to compute shaders
+ * as well. This is not currently documented at all.
+ */
+ unsigned subslice_total = 4 * devinfo->num_slices;
+ assert(subslice_total >= screen->subslice_total);
+
+ if (!*bop) {
+ unsigned scratch_ids_per_subslice = devinfo->max_cs_threads;
+ uint32_t max_threads[] = {
+ [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
+ [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
+ [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
+ [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
+ [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
+ [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total,
+ };
+
+ uint32_t size = per_thread_scratch * max_threads[stage];
+
+ *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER);
+ }
+
+ return (*bop)->gtt_offset;
+}
+
void
iris_init_program_functions(struct pipe_context *ctx)
{
// XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS
pkt.SamplerCount = \
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
- pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \
- ffs(stage_state->per_thread_scratch) - 11; \
#endif
// prefetching of binding tables in A0 and B0 steppings. XXX: Revisit
// this WA on C0 stepping.
-#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
+#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix, stage) \
pkt.KernelStartPointer = KSP(shader); \
pkt.BindingTableEntryCount = GEN_GEN == 11 ? 0 : \
prog_data->binding_table.size_bytes / 4; \
pkt.prefix##URBEntryReadOffset = 0; \
\
pkt.StatisticsEnable = true; \
- pkt.Enable = true;
+ pkt.Enable = true; \
+ \
+ if (prog_data->total_scratch) { \
+ uint32_t scratch_addr = \
+ iris_get_scratch_space(ice, prog_data->total_scratch, stage); \
+ pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \
+ pkt.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr); \
+ }
/**
* Encode most of 3DSTATE_VS based on the compiled shader.
*/
static void
-iris_store_vs_state(const struct gen_device_info *devinfo,
+iris_store_vs_state(struct iris_context *ice,
+ const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
- INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
+ INIT_THREAD_DISPATCH_FIELDS(vs, Vertex, MESA_SHADER_VERTEX);
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
vs.SIMD8DispatchEnable = true;
vs.UserClipDistanceCullTestEnableBitmask =
* Encode most of 3DSTATE_HS based on the compiled shader.
*/
static void
-iris_store_tcs_state(const struct gen_device_info *devinfo,
+iris_store_tcs_state(struct iris_context *ice,
+ const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
- INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
+ INIT_THREAD_DISPATCH_FIELDS(hs, Vertex, MESA_SHADER_TESS_CTRL);
hs.InstanceCount = tcs_prog_data->instances - 1;
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
* Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader.
*/
static void
-iris_store_tes_state(const struct gen_device_info *devinfo,
+iris_store_tes_state(struct iris_context *ice,
+ const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
}
iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
- INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
+ INIT_THREAD_DISPATCH_FIELDS(ds, Patch, MESA_SHADER_TESS_EVAL);
ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
* Encode most of 3DSTATE_GS based on the compiled shader.
*/
static void
-iris_store_gs_state(const struct gen_device_info *devinfo,
+iris_store_gs_state(struct iris_context *ice,
+ const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
- INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
+ INIT_THREAD_DISPATCH_FIELDS(gs, Vertex, MESA_SHADER_GEOMETRY);
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
gs.OutputTopology = gs_prog_data->output_topology;
* Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader.
*/
static void
-iris_store_fs_state(const struct gen_device_info *devinfo,
+iris_store_fs_state(struct iris_context *ice,
+ const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
ps.KernelStartPointer2 =
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
+
+ if (prog_data->total_scratch) {
+ uint32_t scratch_addr =
+ iris_get_scratch_space(ice, prog_data->total_scratch,
+ MESA_SHADER_FRAGMENT);
+ ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
+ ps.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr);
+ }
}
iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
* This must match the data written by the iris_store_xs_state() functions.
*/
static void
-iris_store_cs_state(const struct gen_device_info *devinfo,
+iris_store_cs_state(struct iris_context *ice,
+ const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader)
{
struct brw_stage_prog_data *prog_data = shader->prog_data;
* get most of the state packet without having to reconstruct it.
*/
static void
-iris_store_derived_program_state(const struct gen_device_info *devinfo,
+iris_store_derived_program_state(struct iris_context *ice,
enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader)
{
+ struct iris_screen *screen = (void *) ice->ctx.screen;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+
switch (cache_id) {
case IRIS_CACHE_VS:
- iris_store_vs_state(devinfo, shader);
+ iris_store_vs_state(ice, devinfo, shader);
break;
case IRIS_CACHE_TCS:
- iris_store_tcs_state(devinfo, shader);
+ iris_store_tcs_state(ice, devinfo, shader);
break;
case IRIS_CACHE_TES:
- iris_store_tes_state(devinfo, shader);
+ iris_store_tes_state(ice, devinfo, shader);
break;
case IRIS_CACHE_GS:
- iris_store_gs_state(devinfo, shader);
+ iris_store_gs_state(ice, devinfo, shader);
break;
case IRIS_CACHE_FS:
- iris_store_fs_state(devinfo, shader);
+ iris_store_fs_state(ice, devinfo, shader);
break;
case IRIS_CACHE_CS:
- iris_store_cs_state(devinfo, shader);
+ iris_store_cs_state(ice, devinfo, shader);
case IRIS_CACHE_BLORP:
break;
default:
iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
if (prog_data->total_scratch) {
- /* Per Thread Scratch Space is in the range [0, 11] where
- * 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
- */
- // XXX: vfe.ScratchSpaceBasePointer
- //vfe.PerThreadScratchSpace =
- //ffs(stage_state->per_thread_scratch) - 11;
+ uint32_t scratch_addr =
+ iris_get_scratch_space(ice, prog_data->total_scratch,
+ MESA_SHADER_COMPUTE);
+ vfe.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
+ vfe.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr);
}
vfe.MaximumNumberofThreads =