#define IRIS_DIRTY_BINDINGS_GS (1ull << 45)
#define IRIS_DIRTY_BINDINGS_FS (1ull << 46)
#define IRIS_DIRTY_BINDINGS_CS (1ull << 47)
+#define IRIS_DIRTY_SO_BUFFERS (1ull << 48)
+#define IRIS_DIRTY_SO_DECL_LIST (1ull << 49)
+#define IRIS_DIRTY_STREAMOUT (1ull << 50)
struct iris_depth_stencil_alpha_state;
/** The program data (owned by the program cache hash table) */
struct brw_stage_prog_data *prog_data;
+ /**
+ * Derived 3DSTATE_SO_DECL_LIST packet (for transform feedback).
+ */
+ uint32_t *so_decl_list;
+
/**
* Shader packets and other data derived from prog_data. These must be
* completely determined from prog_data.
void (*store_derived_program_state)(const struct gen_device_info *devinfo,
enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader);
+ uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,
+ const struct brw_vue_map *vue_map);
void (*populate_vs_key)(const struct iris_context *ice,
struct brw_vs_prog_key *key);
void (*populate_tcs_key)(const struct iris_context *ice,
unsigned num_samplers[MESA_SHADER_STAGES];
unsigned num_textures[MESA_SHADER_STAGES];
+ uint32_t *so_decl_list;
+
struct iris_state_ref unbound_tex;
struct u_upload_mgr *surface_uploader;
enum iris_program_cache_id cache_id,
const void *key,
const void *assembly,
- struct brw_stage_prog_data *prog_data);
+ struct brw_stage_prog_data *prog_data,
+ uint32_t *so_decl_list);
const void *iris_find_previous_compile(const struct iris_context *ice,
enum iris_program_cache_id cache_id,
unsigned program_string_id);
ish->program_id = get_new_program_id(screen);
ish->base.type = PIPE_SHADER_IR_NIR;
ish->base.ir.nir = nir;
+ memcpy(&ish->base.stream_output, &state->stream_output,
+ sizeof(struct pipe_stream_output_info));
return ish;
}
iris_setup_push_uniform_range(compiler, prog_data);
- iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data);
+ uint32_t *so_decls =
+ ice->vtbl.create_so_decl_list(&ish->base.stream_output,
+ &vue_prog_data->vue_map);
+
+ iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data,
+ so_decls);
ralloc_free(mem_ctx);
return true;
iris_setup_push_uniform_range(compiler, prog_data);
- iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data);
+ uint32_t *so_decls =
+ ice->vtbl.create_so_decl_list(&ish->base.stream_output,
+ &vue_prog_data->vue_map);
+
+ iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data,
+ so_decls);
ralloc_free(mem_ctx);
return true;
iris_setup_push_uniform_range(compiler, prog_data);
- iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data);
+ uint32_t *so_decls =
+ ice->vtbl.create_so_decl_list(&ish->base.stream_output,
+ &vue_prog_data->vue_map);
+
+ iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data,
+ so_decls);
ralloc_free(mem_ctx);
return true;
iris_setup_push_uniform_range(compiler, prog_data);
- iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data);
+ iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data,
+ NULL);
ralloc_free(mem_ctx);
return true;
ice->shaders.last_vue_map);
}
-static void
-update_last_vue_map(struct iris_context *ice)
+static struct iris_compiled_shader *
+last_vue_shader(struct iris_context *ice)
{
- struct brw_stage_prog_data *prog_data;
-
if (ice->shaders.prog[MESA_SHADER_GEOMETRY])
- prog_data = ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
- else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
- prog_data = ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;
- else
- prog_data = ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data;
+ return ice->shaders.prog[MESA_SHADER_GEOMETRY];
+
+ if (ice->shaders.prog[MESA_SHADER_TESS_EVAL])
+ return ice->shaders.prog[MESA_SHADER_TESS_EVAL];
+ return ice->shaders.prog[MESA_SHADER_VERTEX];
+}
+
+static void
+update_last_vue_map(struct iris_context *ice,
+ struct brw_stage_prog_data *prog_data)
+{
struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
struct brw_vue_map *old_map = ice->shaders.last_vue_map;
if (dirty & IRIS_DIRTY_UNCOMPILED_GS)
iris_update_compiled_gs(ice);
- update_last_vue_map(ice);
+ struct iris_compiled_shader *shader = last_vue_shader(ice);
+ update_last_vue_map(ice, shader->prog_data);
+ if (ice->state.so_decl_list != shader->so_decl_list) {
+ ice->state.so_decl_list = shader->so_decl_list;
+ ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST;
+ }
if (dirty & IRIS_DIRTY_UNCOMPILED_FS)
iris_update_compiled_fs(ice);
uint32_t key_size,
const void *key,
const void *assembly,
- struct brw_stage_prog_data *prog_data)
+ struct brw_stage_prog_data *prog_data,
+ uint32_t *so_decl_list)
{
struct iris_screen *screen = (void *) ice->ctx.screen;
struct gen_device_info *devinfo = &screen->devinfo;
}
shader->prog_data = prog_data;
+ shader->so_decl_list = so_decl_list;
ralloc_steal(shader, shader->prog_data);
ralloc_steal(shader->prog_data, prog_data->param);
ralloc_steal(shader->prog_data, prog_data->pull_param);
+ ralloc_steal(shader, shader->so_decl_list);
/* Store the 3DSTATE shader packets and other derived state. */
ice->vtbl.store_derived_program_state(devinfo, cache_id, shader);
enum iris_program_cache_id cache_id,
const void *key,
const void *assembly,
- struct brw_stage_prog_data *prog_data)
+ struct brw_stage_prog_data *prog_data,
+ uint32_t *so_decl_list)
{
assert(cache_id != IRIS_CACHE_BLORP);
struct iris_compiled_shader *shader =
iris_upload_shader(ice, cache_id, key_size_for_cache(cache_id), key,
- assembly, prog_data);
+ assembly, prog_data, so_decl_list);
ice->shaders.prog[cache_id] = shader;
ice->state.dirty |= dirty_flag_for_cache(cache_id);
struct iris_compiled_shader *shader =
iris_upload_shader(ice, IRIS_CACHE_BLORP, key_size, key, kernel,
- prog_data);
+ prog_data, NULL);
struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
*kernel_out =
{
}
+static uint32_t *
+iris_create_so_decl_list(const struct pipe_stream_output_info *info,
+ const struct brw_vue_map *vue_map)
+{
+ struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
+ int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
+ int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
+ int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
+ int max_decls = 0;
+ STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
+
+ if (info->num_outputs == 0)
+ return NULL;
+
+ memset(so_decl, 0, sizeof(so_decl));
+
+ /* Construct the list of SO_DECLs to be emitted. The formatting of the
+ * command feels strange -- each dword pair contains a SO_DECL per stream.
+ */
+ for (unsigned i = 0; i < info->num_outputs; i++) {
+ const struct pipe_stream_output *output = &info->output[i];
+ const int buffer = output->output_buffer;
+ const int varying = output->register_index;
+ const unsigned stream_id = output->stream;
+ assert(stream_id < MAX_VERTEX_STREAMS);
+
+ buffer_mask[stream_id] |= 1 << buffer;
+
+ assert(vue_map->varying_to_slot[varying] >= 0);
+
+ /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
+ * array. Instead, it simply increments DstOffset for the following
+ * input by the number of components that should be skipped.
+ *
+ * Our hardware is unusual in that it requires us to program SO_DECLs
+ * for fake "hole" components, rather than simply taking the offset
+ * for each real varying. Each hole can have size 1, 2, 3, or 4; we
+ * program as many size = 4 holes as we can, then a final hole to
+ * accommodate the final 1, 2, or 3 remaining.
+ */
+ int skip_components = output->dst_offset - next_offset[buffer];
+
+ while (skip_components > 0) {
+ so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+ .HoleFlag = 1,
+ .OutputBufferSlot = output->output_buffer,
+ .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
+ };
+ skip_components -= 4;
+ }
+
+ next_offset[buffer] = output->dst_offset + output->num_components;
+
+ so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+ .OutputBufferSlot = output->output_buffer,
+ .RegisterIndex = vue_map->varying_to_slot[varying],
+ .ComponentMask =
+ ((1 << output->num_components) - 1) << output->start_component,
+ };
+
+ if (decls[stream_id] > max_decls)
+ max_decls = decls[stream_id];
+ }
+
+ uint32_t *dw = ralloc_size(NULL, sizeof(uint32_t) * (3 + 2 * max_decls));
+
+ iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), dw, list) {
+ list.DWordLength = 3 + 2 * max_decls - 2;
+ list.StreamtoBufferSelects0 = buffer_mask[0];
+ list.StreamtoBufferSelects1 = buffer_mask[1];
+ list.StreamtoBufferSelects2 = buffer_mask[2];
+ list.StreamtoBufferSelects3 = buffer_mask[3];
+ list.NumEntries0 = decls[0];
+ list.NumEntries1 = decls[1];
+ list.NumEntries2 = decls[2];
+ list.NumEntries3 = decls[3];
+ }
+
+ for (int i = 0; i < max_decls; i++) {
+ iris_pack_state(GENX(SO_DECL_ENTRY), dw + 2 + i * 2, entry) {
+ entry.Stream0Decl = so_decl[0][i];
+ entry.Stream1Decl = so_decl[1][i];
+ entry.Stream2Decl = so_decl[2][i];
+ entry.Stream3Decl = so_decl[3][i];
+ }
+ }
+
+ return dw;
+}
+
static void
iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots,
const struct brw_vue_map *last_vue_map,
}
}
- // XXX: 3DSTATE_SO_BUFFER
-
if (clean & IRIS_DIRTY_DEPTH_BUFFER) {
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
}
}
+ if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.so_decl_list) {
+ iris_batch_emit(batch, ice->state.so_decl_list,
+ 4 * ((ice->state.so_decl_list[0] & 0xff) + 2));
+ }
+
// XXX: SOL:
// 3DSTATE_STREAMOUT
- // 3DSTATE_SO_BUFFER
- // 3DSTATE_SO_DECL_LIST
if (dirty & IRIS_DIRTY_CLIP) {
struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
ice->vtbl.derived_program_state_size = iris_derived_program_state_size;
ice->vtbl.store_derived_program_state = iris_store_derived_program_state;
+ ice->vtbl.create_so_decl_list = iris_create_so_decl_list;
ice->vtbl.populate_vs_key = iris_populate_vs_key;
ice->vtbl.populate_tcs_key = iris_populate_tcs_key;
ice->vtbl.populate_tes_key = iris_populate_tes_key;