From cfc7ed75eb8baab69308ed520f97a3efd75fb548 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 28 Sep 2017 19:04:31 -0700 Subject: [PATCH] i965: Store image_param in brw_context instead of prog_data This burns an extra 10k of memory or so in the case where you don't have any images. However, if you have several shaders which use images, this should be much less memory. It also gets rid of a part of prog_data that really has nothing to do with the compiler. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_compiler.h | 4 ---- src/intel/vulkan/anv_pipeline_cache.c | 6 ++---- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_cs.c | 4 ---- src/mesa/drivers/dri/i965/brw_curbe.c | 4 ++-- src/mesa/drivers/dri/i965/brw_gs.c | 4 ---- src/mesa/drivers/dri/i965/brw_program.c | 1 - src/mesa/drivers/dri/i965/brw_state.h | 2 +- src/mesa/drivers/dri/i965/brw_tcs.c | 5 ----- src/mesa/drivers/dri/i965/brw_tes.c | 4 ---- src/mesa/drivers/dri/i965/brw_vs.c | 5 ----- src/mesa/drivers/dri/i965/brw_wm.c | 4 ---- .../drivers/dri/i965/brw_wm_surface_state.c | 2 +- .../drivers/dri/i965/gen6_constant_state.c | 19 +++++++++---------- 14 files changed, 17 insertions(+), 49 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index f2f9be750a0..94fe63e46f1 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -574,7 +574,6 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; - unsigned nr_image_params; unsigned curb_read_length; unsigned total_scratch; @@ -596,9 +595,6 @@ struct brw_stage_prog_data { */ uint32_t *param; uint32_t *pull_param; - - /** Image metadata passed to the shader as uniforms. */ - struct brw_image_param *image_param; }; static inline void diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index f782d04fbe6..ad9c810f1f4 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -37,7 +37,7 @@ anv_shader_bin_create(struct anv_device *device, struct anv_shader_bin *shader; struct anv_shader_bin_key *key; struct brw_stage_prog_data *prog_data; - const union gl_constant_value **prog_data_param; + uint32_t *prog_data_param; struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor; ANV_MULTIALLOC(ma); @@ -169,9 +169,7 @@ anv_shader_bin_create_from_blob(struct anv_device *device, * * - Review prog_data struct for size and cacheability: struct * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 - * bit quantities etc; param, pull_param, and image_params are pointers, we - * just need the compation map. use bit fields for all bools, eg - * dual_src_blend. + * bit quantities etc; use bit fields for all bools, eg dual_src_blend. */ static uint32_t diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index dea35ee04f6..272197f1e2f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -580,6 +580,8 @@ struct brw_stage_state uint32_t sampler_count; uint32_t sampler_offset; + struct brw_image_param image_param[BRW_MAX_IMAGES]; + /** Need to re-emit 3DSTATE_CONSTANT_XS? */ bool push_constants_dirty; }; diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 68fca098f76..0c505b3e505 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -91,11 +91,7 @@ brw_codegen_cs_prog(struct brw_context *brw, param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.image_param = - rzalloc_array(NULL, struct brw_image_param, - cp->program.info.num_images); prog_data.base.nr_params = param_count; - prog_data.base.nr_image_params = cp->program.info.num_images; brw_nir_setup_glsl_uniforms(cp->program.nir, &cp->program,&prog_data.base, true); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 9a9c6d08a0f..c747110e310 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -227,7 +227,7 @@ brw_upload_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.wm_start * 16; /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ - brw_populate_constant_data(brw, fp, brw->wm.base.prog_data, &buf[offset], + brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset], brw->wm.base.prog_data->param, brw->wm.base.prog_data->nr_params); } @@ -268,7 +268,7 @@ brw_upload_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ - brw_populate_constant_data(brw, vp, brw->vs.base.prog_data, &buf[offset], + brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset], brw->vs.base.prog_data->param, brw->vs.base.prog_data->nr_params); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index c040665a3b6..917742a5922 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -101,11 +101,7 @@ brw_codegen_gs_prog(struct brw_context *brw, prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.image_param = - rzalloc_array(NULL, struct brw_image_param, - gp->program.info.num_images); prog_data.base.base.nr_params = param_count; - prog_data.base.base.nr_image_params = gp->program.info.num_images; brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program, &prog_data.base.base, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index c11ac871e58..f88977fb723 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -618,7 +618,6 @@ brw_stage_prog_data_free(const void *p) ralloc_free(prog_data->param); ralloc_free(prog_data->pull_param); - ralloc_free(prog_data->image_param); } void diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 64acc1d9bf7..8db354cf232 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -237,7 +237,7 @@ void brw_emit_sampler_state(struct brw_context *brw, void brw_populate_constant_data(struct brw_context *brw, const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, + const struct brw_stage_state *stage_state, void *dst, const uint32_t *param, unsigned nr_params); diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 272545453a4..8fd7364a1b1 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -196,11 +196,6 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, brw_assign_common_binding_table_offsets(devinfo, &tcp->program, &prog_data.base.base, 0); - prog_data.base.base.image_param = - rzalloc_array(NULL, struct brw_image_param, - tcp->program.info.num_images); - prog_data.base.base.nr_image_params = tcp->program.info.num_images; - brw_nir_setup_glsl_uniforms(nir, &tcp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c index 7ee925b0891..763207f7172 100644 --- a/src/mesa/drivers/dri/i965/brw_tes.c +++ b/src/mesa/drivers/dri/i965/brw_tes.c @@ -92,11 +92,7 @@ brw_codegen_tes_prog(struct brw_context *brw, prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.image_param = - rzalloc_array(NULL, struct brw_image_param, - tep->program.info.num_images); prog_data.base.base.nr_params = param_count; - prog_data.base.base.nr_image_params = tep->program.info.num_images; brw_nir_setup_glsl_uniforms(nir, &tep->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 9dd812e1981..8672d1ec180 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -184,8 +184,6 @@ brw_codegen_vs_prog(struct brw_context *brw, */ int param_count = vp->program.nir->num_uniforms / 4; - prog_data.base.base.nr_image_params = vp->program.info.num_images; - /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ @@ -193,9 +191,6 @@ brw_codegen_vs_prog(struct brw_context *brw, stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count); - stage_prog_data->image_param = - rzalloc_array(NULL, struct brw_image_param, - stage_prog_data->nr_image_params); stage_prog_data->nr_params = param_count; if (!vp->program.is_arb_asm) { diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ddafa526198..6f2f6f224ce 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -150,14 +150,10 @@ brw_codegen_wm_prog(struct brw_context *brw, * by the state cache. */ int param_count = fp->program.nir->num_uniforms / 4; - prog_data.base.nr_image_params = fp->program.info.num_images; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.image_param = - rzalloc_array(NULL, struct brw_image_param, - prog_data.base.nr_image_params); prog_data.base.nr_params = param_count; if (!fp->program.is_arb_asm) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index b62d1e22a12..f4e9cf48c6a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1630,7 +1630,7 @@ brw_upload_image_surfaces(struct brw_context *brw, update_image_surface(brw, u, prog->sh.ImageAccess[i], surf_idx, &stage_state->surf_offset[surf_idx], - &prog_data->image_param[i]); + &stage_state->image_param[i]); } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index 93a12c706ff..eb9e29187c4 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -38,7 +38,7 @@ f_as_u32(float f) static uint32_t brw_param_value(struct brw_context *brw, const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, + const struct brw_stage_state *stage_state, uint32_t param) { struct gl_context *ctx = &brw->ctx; @@ -80,9 +80,8 @@ brw_param_value(struct brw_context *brw, case BRW_PARAM_DOMAIN_IMAGE: { unsigned idx = BRW_PARAM_IMAGE_IDX(param); unsigned offset = BRW_PARAM_IMAGE_OFFSET(param); - assert(idx < prog_data->nr_image_params); - assert(offset < sizeof(struct brw_image_param)); - return ((uint32_t *)&prog_data->image_param[idx])[offset]; + assert(offset < ARRAY_SIZE(stage_state->image_param)); + return ((uint32_t *)&stage_state->image_param[idx])[offset]; } default: @@ -94,14 +93,14 @@ brw_param_value(struct brw_context *brw, void brw_populate_constant_data(struct brw_context *brw, const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, + const struct brw_stage_state *stage_state, void *void_dst, const uint32_t *param, unsigned nr_params) { uint32_t *dst = void_dst; for (unsigned i = 0; i < nr_params; i++) - dst[i] = brw_param_value(brw, prog, prog_data, param[i]); + dst[i] = brw_param_value(brw, prog, stage_state, param[i]); } @@ -159,7 +158,7 @@ gen6_upload_push_constants(struct brw_context *brw, * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS * wouldn't be set for them. */ - brw_populate_constant_data(brw, prog, prog_data, param, + brw_populate_constant_data(brw, prog, stage_state, param, prog_data->param, prog_data->nr_params); @@ -246,7 +245,7 @@ brw_upload_pull_constants(struct brw_context *brw, STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - brw_populate_constant_data(brw, prog, prog_data, constants, + brw_populate_constant_data(brw, prog, stage_state, constants, prog_data->pull_param, prog_data->nr_pull_params); @@ -312,7 +311,7 @@ brw_upload_cs_push_constants(struct brw_context *brw, for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { - param_copy[i] = brw_param_value(brw, prog, prog_data, + param_copy[i] = brw_param_value(brw, prog, stage_state, prog_data->param[i]); } } @@ -325,7 +324,7 @@ brw_upload_cs_push_constants(struct brw_context *brw, unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { if (src != cs_prog_data->thread_local_id_index) { - param[dst] = brw_param_value(brw, prog, prog_data, + param[dst] = brw_param_value(brw, prog, stage_state, prog_data->param[src]); } else { param[dst] = t * cs_prog_data->simd_size; -- 2.30.2