From 5c00f5fdca26647006c3653e73f7006f62c439b3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 29 Jun 2018 12:58:31 -0700 Subject: [PATCH] iris: Implement 3DSTATE_SO_DECL_LIST --- src/gallium/drivers/iris/iris_context.h | 15 ++- src/gallium/drivers/iris/iris_program.c | 55 +++++++--- src/gallium/drivers/iris/iris_program_cache.c | 12 ++- src/gallium/drivers/iris/iris_state.c | 100 +++++++++++++++++- 4 files changed, 159 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 4d1b616750a..ca8596623e1 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -93,6 +93,9 @@ struct blorp_params; #define IRIS_DIRTY_BINDINGS_GS (1ull << 45) #define IRIS_DIRTY_BINDINGS_FS (1ull << 46) #define IRIS_DIRTY_BINDINGS_CS (1ull << 47) +#define IRIS_DIRTY_SO_BUFFERS (1ull << 48) +#define IRIS_DIRTY_SO_DECL_LIST (1ull << 49) +#define IRIS_DIRTY_STREAMOUT (1ull << 50) struct iris_depth_stencil_alpha_state; @@ -167,6 +170,11 @@ struct iris_compiled_shader { /** The program data (owned by the program cache hash table) */ struct brw_stage_prog_data *prog_data; + /** + * Derived 3DSTATE_SO_DECL_LIST packet (for transform feedback). + */ + uint32_t *so_decl_list; + /** * Shader packets and other data derived from prog_data. These must be * completely determined from prog_data. @@ -203,6 +211,8 @@ struct iris_vtable { void (*store_derived_program_state)(const struct gen_device_info *devinfo, enum iris_program_cache_id cache_id, struct iris_compiled_shader *shader); + uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol, + const struct brw_vue_map *vue_map); void (*populate_vs_key)(const struct iris_context *ice, struct brw_vs_prog_key *key); void (*populate_tcs_key)(const struct iris_context *ice, @@ -274,6 +284,8 @@ struct iris_context { unsigned num_samplers[MESA_SHADER_STAGES]; unsigned num_textures[MESA_SHADER_STAGES]; + uint32_t *so_decl_list; + struct iris_state_ref unbound_tex; struct u_upload_mgr *surface_uploader; @@ -379,7 +391,8 @@ void iris_upload_and_bind_shader(struct iris_context *ice, enum iris_program_cache_id cache_id, const void *key, const void *assembly, - struct brw_stage_prog_data *prog_data); + struct brw_stage_prog_data *prog_data, + uint32_t *so_decl_list); const void *iris_find_previous_compile(const struct iris_context *ice, enum iris_program_cache_id cache_id, unsigned program_string_id); diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index d966dcd6d0a..35fbda92c9e 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -67,6 +67,8 @@ iris_create_shader_state(struct pipe_context *ctx, ish->program_id = get_new_program_id(screen); ish->base.type = PIPE_SHADER_IR_NIR; ish->base.ir.nir = nir; + memcpy(&ish->base.stream_output, &state->stream_output, + sizeof(struct pipe_stream_output_info)); return ish; } @@ -280,7 +282,12 @@ iris_compile_vs(struct iris_context *ice, iris_setup_push_uniform_range(compiler, prog_data); - iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data); + uint32_t *so_decls = + ice->vtbl.create_so_decl_list(&ish->base.stream_output, + &vue_prog_data->vue_map); + + iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data, + so_decls); ralloc_free(mem_ctx); return true; @@ -343,7 +350,12 @@ iris_compile_tes(struct iris_context *ice, iris_setup_push_uniform_range(compiler, prog_data); - iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data); + uint32_t *so_decls = + ice->vtbl.create_so_decl_list(&ish->base.stream_output, + &vue_prog_data->vue_map); + + iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data, + so_decls); ralloc_free(mem_ctx); return true; @@ -405,7 +417,12 @@ iris_compile_gs(struct iris_context *ice, iris_setup_push_uniform_range(compiler, prog_data); - iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data); + uint32_t *so_decls = + ice->vtbl.create_so_decl_list(&ish->base.stream_output, + &vue_prog_data->vue_map); + + iris_upload_and_bind_shader(ice, IRIS_CACHE_GS, key, program, prog_data, + so_decls); ralloc_free(mem_ctx); return true; @@ -468,7 +485,8 @@ iris_compile_fs(struct iris_context *ice, iris_setup_push_uniform_range(compiler, prog_data); - iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data); + iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data, + NULL); ralloc_free(mem_ctx); return true; @@ -488,18 +506,22 @@ iris_update_compiled_fs(struct iris_context *ice) ice->shaders.last_vue_map); } -static void -update_last_vue_map(struct iris_context *ice) +static struct iris_compiled_shader * +last_vue_shader(struct iris_context *ice) { - struct brw_stage_prog_data *prog_data; - if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) - prog_data = ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; - else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) - prog_data = ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data; - else - prog_data = ice->shaders.prog[MESA_SHADER_VERTEX]->prog_data; + return ice->shaders.prog[MESA_SHADER_GEOMETRY]; + + if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) + return ice->shaders.prog[MESA_SHADER_TESS_EVAL]; + return ice->shaders.prog[MESA_SHADER_VERTEX]; +} + +static void +update_last_vue_map(struct iris_context *ice, + struct brw_stage_prog_data *prog_data) +{ struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; struct brw_vue_map *vue_map = &vue_prog_data->vue_map; struct brw_vue_map *old_map = ice->shaders.last_vue_map; @@ -553,7 +575,12 @@ iris_update_compiled_shaders(struct iris_context *ice) if (dirty & IRIS_DIRTY_UNCOMPILED_GS) iris_update_compiled_gs(ice); - update_last_vue_map(ice); + struct iris_compiled_shader *shader = last_vue_shader(ice); + update_last_vue_map(ice, shader->prog_data); + if (ice->state.so_decl_list != shader->so_decl_list) { + ice->state.so_decl_list = shader->so_decl_list; + ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST; + } if (dirty & IRIS_DIRTY_UNCOMPILED_FS) iris_update_compiled_fs(ice); diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c index 17e92566392..c2323bd0995 100644 --- a/src/gallium/drivers/iris/iris_program_cache.c +++ b/src/gallium/drivers/iris/iris_program_cache.c @@ -211,7 +211,8 @@ iris_upload_shader(struct iris_context *ice, uint32_t key_size, const void *key, const void *assembly, - struct brw_stage_prog_data *prog_data) + struct brw_stage_prog_data *prog_data, + uint32_t *so_decl_list) { struct iris_screen *screen = (void *) ice->ctx.screen; struct gen_device_info *devinfo = &screen->devinfo; @@ -241,10 +242,12 @@ iris_upload_shader(struct iris_context *ice, } shader->prog_data = prog_data; + shader->so_decl_list = so_decl_list; ralloc_steal(shader, shader->prog_data); ralloc_steal(shader->prog_data, prog_data->param); ralloc_steal(shader->prog_data, prog_data->pull_param); + ralloc_steal(shader, shader->so_decl_list); /* Store the 3DSTATE shader packets and other derived state. */ ice->vtbl.store_derived_program_state(devinfo, cache_id, shader); @@ -265,13 +268,14 @@ iris_upload_and_bind_shader(struct iris_context *ice, enum iris_program_cache_id cache_id, const void *key, const void *assembly, - struct brw_stage_prog_data *prog_data) + struct brw_stage_prog_data *prog_data, + uint32_t *so_decl_list) { assert(cache_id != IRIS_CACHE_BLORP); struct iris_compiled_shader *shader = iris_upload_shader(ice, cache_id, key_size_for_cache(cache_id), key, - assembly, prog_data); + assembly, prog_data, so_decl_list); ice->shaders.prog[cache_id] = shader; ice->state.dirty |= dirty_flag_for_cache(cache_id); @@ -318,7 +322,7 @@ iris_blorp_upload_shader(struct blorp_batch *blorp_batch, struct iris_compiled_shader *shader = iris_upload_shader(ice, IRIS_CACHE_BLORP, key_size, key, kernel, - prog_data); + prog_data, NULL); struct iris_bo *bo = iris_resource_bo(shader->assembly.res); *kernel_out = diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 7ba9c500cab..5db1319984f 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1655,6 +1655,96 @@ iris_set_stream_output_targets(struct pipe_context *ctx, { } +static uint32_t * +iris_create_so_decl_list(const struct pipe_stream_output_info *info, + const struct brw_vue_map *vue_map) +{ + struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128]; + int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; + int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; + int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; + int max_decls = 0; + STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); + + if (info->num_outputs == 0) + return NULL; + + memset(so_decl, 0, sizeof(so_decl)); + + /* Construct the list of SO_DECLs to be emitted. The formatting of the + * command feels strange -- each dword pair contains a SO_DECL per stream. + */ + for (unsigned i = 0; i < info->num_outputs; i++) { + const struct pipe_stream_output *output = &info->output[i]; + const int buffer = output->output_buffer; + const int varying = output->register_index; + const unsigned stream_id = output->stream; + assert(stream_id < MAX_VERTEX_STREAMS); + + buffer_mask[stream_id] |= 1 << buffer; + + assert(vue_map->varying_to_slot[varying] >= 0); + + /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] + * array. Instead, it simply increments DstOffset for the following + * input by the number of components that should be skipped. + * + * Our hardware is unusual in that it requires us to program SO_DECLs + * for fake "hole" components, rather than simply taking the offset + * for each real varying. Each hole can have size 1, 2, 3, or 4; we + * program as many size = 4 holes as we can, then a final hole to + * accommodate the final 1, 2, or 3 remaining. + */ + int skip_components = output->dst_offset - next_offset[buffer]; + + while (skip_components > 0) { + so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { + .HoleFlag = 1, + .OutputBufferSlot = output->output_buffer, + .ComponentMask = (1 << MIN2(skip_components, 4)) - 1, + }; + skip_components -= 4; + } + + next_offset[buffer] = output->dst_offset + output->num_components; + + so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) { + .OutputBufferSlot = output->output_buffer, + .RegisterIndex = vue_map->varying_to_slot[varying], + .ComponentMask = + ((1 << output->num_components) - 1) << output->start_component, + }; + + if (decls[stream_id] > max_decls) + max_decls = decls[stream_id]; + } + + uint32_t *dw = ralloc_size(NULL, sizeof(uint32_t) * (3 + 2 * max_decls)); + + iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), dw, list) { + list.DWordLength = 3 + 2 * max_decls - 2; + list.StreamtoBufferSelects0 = buffer_mask[0]; + list.StreamtoBufferSelects1 = buffer_mask[1]; + list.StreamtoBufferSelects2 = buffer_mask[2]; + list.StreamtoBufferSelects3 = buffer_mask[3]; + list.NumEntries0 = decls[0]; + list.NumEntries1 = decls[1]; + list.NumEntries2 = decls[2]; + list.NumEntries3 = decls[3]; + } + + for (int i = 0; i < max_decls; i++) { + iris_pack_state(GENX(SO_DECL_ENTRY), dw + 2 + i * 2, entry) { + entry.Stream0Decl = so_decl[0][i]; + entry.Stream1Decl = so_decl[1][i]; + entry.Stream2Decl = so_decl[2][i]; + entry.Stream3Decl = so_decl[3][i]; + } + } + + return dw; +} + static void iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots, const struct brw_vue_map *last_vue_map, @@ -2459,8 +2549,6 @@ iris_restore_context_saved_bos(struct iris_context *ice, } } - // XXX: 3DSTATE_SO_BUFFER - if (clean & IRIS_DIRTY_DEPTH_BUFFER) { struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; @@ -2695,10 +2783,13 @@ iris_upload_render_state(struct iris_context *ice, } } + if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.so_decl_list) { + iris_batch_emit(batch, ice->state.so_decl_list, + 4 * ((ice->state.so_decl_list[0] & 0xff) + 2)); + } + // XXX: SOL: // 3DSTATE_STREAMOUT - // 3DSTATE_SO_BUFFER - // 3DSTATE_SO_DECL_LIST if (dirty & IRIS_DIRTY_CLIP) { struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; @@ -3391,6 +3482,7 @@ genX(init_state)(struct iris_context *ice) ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; ice->vtbl.derived_program_state_size = iris_derived_program_state_size; ice->vtbl.store_derived_program_state = iris_store_derived_program_state; + ice->vtbl.create_so_decl_list = iris_create_so_decl_list; ice->vtbl.populate_vs_key = iris_populate_vs_key; ice->vtbl.populate_tcs_key = iris_populate_tcs_key; ice->vtbl.populate_tes_key = iris_populate_tes_key; -- 2.30.2