From: Kenneth Graunke Date: Tue, 10 Sep 2019 05:21:17 +0000 (-0700) Subject: intel/compiler: Record whether any pull constant loads occur X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0e4a75f9171398261ab8bbdc974dafbcaac0161c;p=mesa.git intel/compiler: Record whether any pull constant loads occur I would like for iris to be able to avoid setting up SURFACE_STATE for UBOs in the common case where all constants are pushed. Unfortunately, we don't know up front whether everything will be pushed: the backend is allowed to demote pushed UBOs to pull loads fairly late in the process. This is probably desirable though, as we'd like the backend to be able to re-pull pushed data to break up long live ranges in response to register pressure. Here we simply add a "are there any pull loads at all" boolean to prog_data, which is a bit crude but at least allows us to skip work in the common "everything pushed" case. We could skip more work by tracking exactly which UBO surfaces are pulled in a bitmask, but I wanted to avoid bringing back the old mark_surface_used() mechanism. Finer-grained tracking could allow us to skip a bit more work when multiple UBOs are in use and /some/ are 100% pushed, but others are accessed via pulls. However, I'm not sure how common this is and it would save at most 4 pull descriptors, so we defer that for now. Reviewed-by: Caio Marcelo de Oliveira Filho --- diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 340c8db566c..eb084448083 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -654,6 +654,9 @@ struct brw_stage_prog_data { unsigned program_size; + /** Does this program pull from any UBO or other constant buffers? */ + bool has_ubo_pull; + /** * Register where the thread expects to find input data from the URB * (typically uniforms, followed by vertex or fragment attributes). diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a677c8fd968..f1fe468d0c9 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2426,6 +2426,8 @@ fs_visitor::get_pull_locs(const fs_reg &src, *out_surf_index = prog_data->binding_table.ubo_start + range->block; *out_pull_index = (32 * range->start + src.offset) / 4; + + prog_data->has_ubo_pull = true; return true; } @@ -2435,6 +2437,8 @@ fs_visitor::get_pull_locs(const fs_reg &src, /* A regular uniform push constant */ *out_surf_index = stage_prog_data->binding_table.pull_constants_start; *out_pull_index = pull_constant_loc[location]; + + prog_data->has_ubo_pull = true; return true; } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 210b710daa2..043b4f196f3 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4323,6 +4323,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, base_offset, i * type_sz(dest.type)); + + prog_data->has_ubo_pull = true; } else { /* Even if we are loading doubles, a pull constant load will load * a 32-bit vec4, so should only reserve vgrf space for that. If we @@ -4362,6 +4364,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } } + prog_data->has_ubo_pull = true; + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0); const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);