From e459d6d6dff1317a43b3f98d41e4b8bde39d1858 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 28 May 2019 17:33:58 -0500 Subject: [PATCH] iris: Enable nir_opt_large_constants Shader-db results on Kaby Lake: total instructions in shared programs: 15306230 -> 15304726 (<.01%) instructions in affected programs: 4570 -> 3066 (-32.91%) helped: 16 HURT: 0 total cycles in shared programs: 361703436 -> 361680041 (<.01%) cycles in affected programs: 129388 -> 105993 (-18.08%) helped: 16 HURT: 0 LOST: 0 GAINED: 2 The helped programs were in XCom 2, Deus Ex: Mankind Divided, and Kerbal Space Program Reviewed-by: Kenneth Graunke --- src/gallium/drivers/iris/iris_context.h | 6 +++ src/gallium/drivers/iris/iris_program.c | 66 +++++++++++++++++++++++++ src/gallium/drivers/iris/iris_screen.c | 1 + src/gallium/drivers/iris/iris_state.c | 9 ++++ 4 files changed, 82 insertions(+) diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 3009c8d5627..d54f348debe 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -273,6 +273,12 @@ struct iris_uncompiled_shader { /** Should we use ALT mode for math? Useful for ARB programs. */ bool use_alt_mode; + + /** Constant data scraped from the shader by nir_opt_large_constants */ + struct pipe_resource *const_data; + + /** Surface state for const_data */ + struct iris_state_ref const_data_state; }; /** diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index e0a6a5c1edd..87b887aa3b6 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -273,6 +273,14 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo, prog_data->binding_table.image_start = 0xd0d0d0d0; } + /* Allocate a slot in the UBO section for NIR constants if present. + * We don't include them in iris_compiled_shader::num_cbufs because + * they are uploaded separately from shs->constbuf[], but from a shader + * point of view, they're another UBO (at the end of the section). + */ + if (nir->constant_data_size > 0) + num_cbufs++; + if (num_cbufs) { //assert(info->num_ubos <= BRW_MAX_UBO); prog_data->binding_table.ubo_start = next_binding_table_offset; @@ -361,6 +369,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler, b.cursor = nir_before_block(nir_start_block(impl)); nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32); + nir_ssa_def *temp_const_ubo_name = NULL; /* Turn system value intrinsics into uniforms */ nir_foreach_block(block, impl) { @@ -372,6 +381,34 @@ iris_setup_uniforms(const struct brw_compiler *compiler, nir_ssa_def *offset; switch (intrin->intrinsic) { + case nir_intrinsic_load_constant: { + /* This one is special because it reads from the shader constant + * data and not cbuf0 which gallium uploads for us. + */ + b.cursor = nir_before_instr(instr); + nir_ssa_def *offset = + nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1), + nir_intrinsic_base(intrin)); + + if (temp_const_ubo_name == NULL) + temp_const_ubo_name = nir_imm_int(&b, 0); + + nir_intrinsic_instr *load_ubo = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); + load_ubo->num_components = intrin->num_components; + load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name); + load_ubo->src[1] = nir_src_for_ssa(offset); + nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, + intrin->dest.ssa.name); + nir_builder_instr_insert(&b, &load_ubo->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load_ubo->dest.ssa)); + nir_instr_remove(&intrin->instr); + continue; + } case nir_intrinsic_load_user_clip_plane: { unsigned ucp = nir_intrinsic_ucp_id(intrin); @@ -529,6 +566,16 @@ iris_setup_uniforms(const struct brw_compiler *compiler, if (num_cbufs || num_system_values || nir->num_uniforms) num_cbufs++; + /* Constant loads (if any) need to go at the end of the constant buffers so + * we need to know num_cbufs before we can lower to them. + */ + if (temp_const_ubo_name != NULL) { + nir_load_const_instr *const_ubo_index = + nir_instr_as_load_const(temp_const_ubo_name->parent_instr); + assert(const_ubo_index->def.bit_size == 32); + const_ubo_index->value[0].u32 = num_cbufs; + } + *out_system_values = system_values; *out_num_system_values = num_system_values; *out_num_cbufs = num_cbufs; @@ -1514,6 +1561,7 @@ iris_create_uncompiled_shader(struct pipe_context *ctx, nir_shader *nir, const struct pipe_stream_output_info *so_info) { + struct iris_context *ice = (void *)ctx; struct iris_screen *screen = (struct iris_screen *)ctx->screen; const struct gen_device_info *devinfo = &screen->devinfo; @@ -1527,6 +1575,19 @@ iris_create_uncompiled_shader(struct pipe_context *ctx, NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo); NIR_PASS_V(nir, iris_lower_storage_image_derefs); + if (nir->constant_data_size > 0) { + unsigned data_offset; + u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size, + 32, nir->constant_data, &data_offset, &ish->const_data); + + struct pipe_shader_buffer psb = { + .buffer = ish->const_data, + .buffer_offset = data_offset, + .buffer_size = nir->constant_data_size, + }; + iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false); + } + ish->program_id = get_new_program_id(screen); ish->nir = nir; if (so_info) { @@ -1771,6 +1832,11 @@ iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage ice->state.dirty |= IRIS_DIRTY_UNCOMPILED_VS << stage; } + if (ish->const_data) { + pipe_resource_reference(&ish->const_data, NULL); + pipe_resource_reference(&ish->const_data_state.res, NULL); + } + ralloc_free(ish->nir); free(ish); } diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 611718e90df..b3c4466805e 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -647,6 +647,7 @@ iris_screen_create(int fd, const struct pipe_screen_config *config) screen->compiler->shader_debug_log = iris_shader_debug_log; screen->compiler->shader_perf_log = iris_shader_perf_log; screen->compiler->supports_pull_constants = false; + screen->compiler->supports_shader_constants = true; iris_disk_cache_init(screen); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 0f6290ae8dc..fb72c4b5b7d 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4120,6 +4120,7 @@ iris_populate_binding_table(struct iris_context *ice, bool pin_only) { const struct iris_binder *binder = &ice->state.binder; + struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage]; struct iris_compiled_shader *shader = ice->shaders.prog[stage]; if (!shader) return; @@ -4194,6 +4195,14 @@ iris_populate_binding_table(struct iris_context *ice, push_bt_entry(addr); } + if (ish->const_data) { + iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data), false); + iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data_state.res), + false); + uint32_t addr = ish->const_data_state.offset; + push_bt_entry(addr); + } + bt_assert(ssbo_start, info->num_abos + info->num_ssbos > 0); /* XXX: st is wasting 16 binding table slots for ABOs. Should add a cap -- 2.30.2