From f144b78dfbb97a70121be6f20d10bad8111267e3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 26 Aug 2012 21:19:05 -0700 Subject: [PATCH] i965: Make the param pointer arrays for the WM dynamically sized. Saves 26.5MB of wasted memory allocation in the l4d2 demo. v2: Rebase on compare func change, fix comments. Reviewed-by: Ian Romanick (v1) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 7 +++-- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 -- src/mesa/drivers/dri/i965/brw_state_cache.c | 5 +++ src/mesa/drivers/dri/i965/brw_wm.c | 35 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_wm.h | 1 + 5 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 12b28bd3352..0fe640c0934 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -322,8 +322,8 @@ struct brw_wm_prog_data { * These must be the last fields of the struct (see * brw_wm_prog_data_compare()). */ - const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ - const float *pull_param[MAX_UNIFORMS * 4]; + const float **param; + const float **pull_param; }; /** @@ -631,6 +631,7 @@ struct brw_cache_item { typedef bool (*cache_aux_compare_func)(const void *a, const void *b, int aux_size, const void *key); +typedef void (*cache_aux_free_func)(const void *aux); struct brw_cache { struct brw_context *brw; @@ -648,6 +649,8 @@ struct brw_cache { * outside of the prog_data). If NULL, a plain memcmp is done. */ cache_aux_compare_func aux_compare[BRW_MAX_CACHE]; + /** Optional functions for freeing other pointers attached to a prog_data. */ + cache_aux_free_func aux_free[BRW_MAX_CACHE]; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 167ea08a079..cbeefe5bd55 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -528,8 +528,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) for (unsigned int i = 0; i < type->vector_elements; i++) { unsigned int param = c->prog_data.nr_params++; - assert(param < ARRAY_SIZE(c->prog_data.param)); - this->param_index[param] = loc; this->param_offset[param] = i; } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 92b81c7ad17..b8a2fc3a539 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -343,6 +343,7 @@ brw_init_caches(struct brw_context *brw) cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare; cache->aux_compare[BRW_WM_PROG] = brw_wm_prog_data_compare; + cache->aux_free[BRW_WM_PROG] = brw_wm_prog_data_free; } static void @@ -357,6 +358,10 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { next = c->next; + if (cache->aux_free[c->cache_id]) { + const void *item_aux = c->key + c->key_size; + cache->aux_free[c->cache_id](item_aux); + } free((void *)c->key); free(c); } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 6ef8cc28372..995e8f3fa21 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -273,6 +273,15 @@ brw_wm_prog_data_compare(const void *in_a, const void *in_b, return true; } +void +brw_wm_prog_data_free(const void *in_prog_data) +{ + const struct brw_wm_prog_data *prog_data = in_prog_data; + + ralloc_free((void *)prog_data->param); + ralloc_free((void *)prog_data->pull_param); +} + /** * All Mesa program -> GPU code generation goes through this function. * Depending on the instructions used (i.e. flow control instructions) @@ -286,8 +295,12 @@ bool do_wm_prog(struct brw_context *brw, struct intel_context *intel = &brw->intel; struct brw_wm_compile *c; const GLuint *program; + struct gl_shader *fs = NULL; GLuint program_size; + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + c = brw->wm.compile_data; if (c == NULL) { brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile); @@ -310,6 +323,28 @@ bool do_wm_prog(struct brw_context *brw, c->vreg = vreg; c->refs = refs; } + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + if (fs) { + int param_count = fs->num_uniform_components; + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * BRW_MAX_TEX_UNIT; + + c->prog_data.param = rzalloc_array(c, const float *, param_count); + c->prog_data.pull_param = rzalloc_array(c, const float *, param_count); + } else { + /* brw_wm_pass0.c will also add references to 0.0 and 1.0 which are + * uploaded as push parameters. + */ + int param_count = (fp->program.Base.Parameters->NumParameters + 2) * 4; + c->prog_data.param = rzalloc_array(c, const float *, param_count); + /* The old backend never does pull constants. */ + c->prog_data.pull_param = NULL; + } + memcpy(&c->key, key, sizeof(*key)); c->fp = fp; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index d2936c21449..a4b20467c18 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -486,5 +486,6 @@ void brw_wm_debug_recompile(struct brw_context *brw, const struct brw_wm_prog_key *key); bool brw_wm_prog_data_compare(const void *a, const void *b, int aux_size, const void *key); +void brw_wm_prog_data_free(const void *in_prog_data); #endif -- 2.30.2