From: Jason Ekstrand Date: Fri, 29 Sep 2017 04:45:41 +0000 (-0700) Subject: intel: Allocate prog_data::[pull_]param deeper inside the compiler X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=29737eac985cf028b19d977cb8fa0d7320cf91cf;p=mesa.git intel: Allocate prog_data::[pull_]param deeper inside the compiler Now that we're always growing the param array as-needed, we can allocate the param array in common code and stop repeating the allocation everywere. In order to keep things sane, we ralloc the [pull_]param array off of the compile context and then steal it back to a NULL context later. This doesn't get us all the way to where prog_data::[pull_]param is purely an out parameter of the back-end compiler but it gets us a lot closer. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 6f64a731cac..1493b742e42 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -134,12 +134,13 @@ enum brw_reg_type brw_type_for_nir_type(const struct gen_device_info *devinfo, enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type); -void brw_nir_setup_glsl_uniforms(nir_shader *shader, +void brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader, const struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data, bool is_scalar); -void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, +void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, + struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data); void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 8614886967c..04304fcb457 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1762,8 +1762,7 @@ vec4_visitor::setup_uniforms(int reg) * matter what, or the GPU would hang. */ if (devinfo->gen < 6 && this->uniforms == 0) { - stage_prog_data->param = - reralloc(NULL, stage_prog_data->param, uint32_t, 4); + brw_stage_prog_data_add_params(stage_prog_data, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO; diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index dacb25e5eea..be7680def7c 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -77,18 +77,8 @@ brw_codegen_cs_prog(struct brw_context *brw, assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count = cp->program.nir->num_uniforms / 4; - - prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.nr_params = param_count; - - brw_nir_setup_glsl_uniforms(cp->program.nir, &cp->program,&prog_data.base, - true); + brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir, + &cp->program, &prog_data.base, true); if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && @@ -149,6 +139,9 @@ brw_codegen_cs_prog(struct brw_context *brw, prog_data.base.total_scratch, scratch_ids_per_subslice * subslices); + /* The param and pull_param arrays will be freed by the shader cache. */ + ralloc_steal(NULL, prog_data.base.param); + ralloc_steal(NULL, prog_data.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG, key, sizeof(*key), program, program_size, diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 917742a5922..007629cbbbe 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -87,23 +87,11 @@ brw_codegen_gs_prog(struct brw_context *brw, memset(&prog_data, 0, sizeof(prog_data)); - assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); - - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - * - * Note: param_count needs to be num_uniform_components * 4, since we add - * padding around uniform values below vec4 size, so the worst case is that - * every uniform is a float which gets padded to the size of a vec4. - */ - int param_count = gp->program.nir->num_uniforms / 4; + void *mem_ctx = ralloc_context(NULL); - prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.nr_params = param_count; + assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); - brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program, + brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, &gp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_GEOMETRY]); brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, @@ -124,7 +112,6 @@ brw_codegen_gs_prog(struct brw_context *brw, start_time = get_time(); } - void *mem_ctx = ralloc_context(NULL); unsigned program_size; char *error_str; const unsigned *program = @@ -155,6 +142,9 @@ brw_codegen_gs_prog(struct brw_context *brw, prog_data.base.base.total_scratch, devinfo->max_gs_threads); + /* The param and pull_param arrays will be freed by the shader cache. */ + ralloc_steal(NULL, prog_data.base.base.param); + ralloc_steal(NULL, prog_data.base.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, key, sizeof(*key), program, program_size, diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index de6df4835d4..a3e7b12acd3 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -187,10 +187,16 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, } void -brw_nir_setup_glsl_uniforms(nir_shader *shader, const struct gl_program *prog, +brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader, + const struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data, bool is_scalar) { + unsigned nr_params = shader->num_uniforms / 4; + stage_prog_data->nr_params = nr_params; + stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params); + stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t, nr_params); + nir_foreach_variable(var, &shader->uniforms) { /* UBO's, atomics and samplers don't take up space in the uniform file */ @@ -208,11 +214,17 @@ brw_nir_setup_glsl_uniforms(nir_shader *shader, const struct gl_program *prog, } void -brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, +brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, + struct gl_program *prog, struct brw_stage_prog_data *stage_prog_data) { struct gl_program_parameter_list *plist = prog->Parameters; + unsigned nr_params = plist->NumParameters * 4; + stage_prog_data->nr_params = nr_params; + stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params); + stage_prog_data->pull_param = rzalloc_array(mem_ctx, uint32_t, nr_params); + /* For ARB programs, prog_to_nir generates a single "parameters" variable * for all uniform data. nir_lower_wpos_ytransform may also create an * additional variable. diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 8fd7364a1b1..6c9cb153d31 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -178,25 +178,12 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, memset(&prog_data, 0, sizeof(prog_data)); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - * - * Note: param_count needs to be num_uniform_components * 4, since we add - * padding around uniform values below vec4 size, so the worst case is that - * every uniform is a float which gets padded to the size of a vec4. - */ - int param_count = nir->num_uniforms / 4; - - prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.nr_params = param_count; - if (tcp) { brw_assign_common_binding_table_offsets(devinfo, &tcp->program, &prog_data.base.base, 0); - brw_nir_setup_glsl_uniforms(nir, &tcp->program, &prog_data.base.base, + brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program, + &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, prog_data.base.base.ubo_ranges); @@ -204,6 +191,10 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, /* Upload the Patch URB Header as the first two uniforms. * Do the annoying scrambling so the shader doesn't have to. */ + assert(nir->num_uniforms == 32); + prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8); + prog_data.base.base.nr_params = 8; + uint32_t *param = prog_data.base.base.param; for (int i = 0; i < 8; i++) param[i] = BRW_PARAM_BUILTIN_ZERO; @@ -272,6 +263,9 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, prog_data.base.base.total_scratch, devinfo->max_tcs_threads); + /* The param and pull_param arrays will be freed by the shader cache. */ + ralloc_steal(NULL, prog_data.base.base.param); + ralloc_steal(NULL, prog_data.base.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG, key, sizeof(*key), program, program_size, diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c index 763207f7172..47f3b510a54 100644 --- a/src/mesa/drivers/dri/i965/brw_tes.c +++ b/src/mesa/drivers/dri/i965/brw_tes.c @@ -77,24 +77,13 @@ brw_codegen_tes_prog(struct brw_context *brw, memset(&prog_data, 0, sizeof(prog_data)); + void *mem_ctx = ralloc_context(NULL); + brw_assign_common_binding_table_offsets(devinfo, &tep->program, &prog_data.base.base, 0); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - * - * Note: param_count needs to be num_uniform_components * 4, since we add - * padding around uniform values below vec4 size, so the worst case is that - * every uniform is a float which gets padded to the size of a vec4. - */ - int param_count = nir->num_uniforms / 4; - - prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.base.nr_params = param_count; - - brw_nir_setup_glsl_uniforms(nir, &tep->program, &prog_data.base.base, + brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program, + &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, prog_data.base.base.ubo_ranges); @@ -112,7 +101,6 @@ brw_codegen_tes_prog(struct brw_context *brw, brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, key->patch_inputs_read); - void *mem_ctx = ralloc_context(NULL); unsigned program_size; char *error_str; const unsigned *program = @@ -145,6 +133,9 @@ brw_codegen_tes_prog(struct brw_context *brw, prog_data.base.base.total_scratch, devinfo->max_tes_threads); + /* The param and pull_param arrays will be freed by the shader cache. */ + ralloc_steal(NULL, prog_data.base.base.param); + ralloc_steal(NULL, prog_data.base.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG, key, sizeof(*key), program, program_size, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index c3440fde58d..fb5ea4e7ed1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -178,24 +178,14 @@ brw_codegen_vs_prog(struct brw_context *brw, brw_assign_common_binding_table_offsets(devinfo, &vp->program, &prog_data.base.base, 0); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count = vp->program.nir->num_uniforms / 4; - - stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count); - stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count); - stage_prog_data->nr_params = param_count; - if (!vp->program.is_arb_asm) { - brw_nir_setup_glsl_uniforms(vp->program.nir, &vp->program, + brw_nir_setup_glsl_uniforms(mem_ctx, vp->program.nir, &vp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_VERTEX]); brw_nir_analyze_ubo_ranges(compiler, vp->program.nir, prog_data.base.base.ubo_ranges); } else { - brw_nir_setup_arb_uniforms(vp->program.nir, &vp->program, + brw_nir_setup_arb_uniforms(mem_ctx, vp->program.nir, &vp->program, &prog_data.base.base); } @@ -262,6 +252,9 @@ brw_codegen_vs_prog(struct brw_context *brw, prog_data.base.base.total_scratch, devinfo->max_vs_threads); + /* The param and pull_param arrays will be freed by the shader cache. */ + ralloc_steal(NULL, prog_data.base.base.param); + ralloc_steal(NULL, prog_data.base.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, key, sizeof(struct brw_vs_prog_key), program, program_size, diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1b5774e4e8a..69d8e61e402 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -144,22 +144,13 @@ brw_codegen_wm_prog(struct brw_context *brw, assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data); - /* Allocate the references to the uniforms that will end up in the - * prog_data associated with the compiled program, and which will be freed - * by the state cache. - */ - int param_count = fp->program.nir->num_uniforms / 4; - prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); - prog_data.base.nr_params = param_count; - if (!fp->program.is_arb_asm) { - brw_nir_setup_glsl_uniforms(fp->program.nir, &fp->program, + brw_nir_setup_glsl_uniforms(mem_ctx, fp->program.nir, &fp->program, &prog_data.base, true); brw_nir_analyze_ubo_ranges(brw->screen->compiler, fp->program.nir, prog_data.base.ubo_ranges); } else { - brw_nir_setup_arb_uniforms(fp->program.nir, &fp->program, + brw_nir_setup_arb_uniforms(mem_ctx, fp->program.nir, &fp->program, &prog_data.base); if (unlikely(INTEL_DEBUG & DEBUG_WM)) @@ -217,6 +208,9 @@ brw_codegen_wm_prog(struct brw_context *brw, if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm)) fprintf(stderr, "\n"); + /* The param and pull_param arrays will be freed by the shader cache. */ + ralloc_steal(NULL, prog_data.base.param); + ralloc_steal(NULL, prog_data.base.pull_param); brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, key, sizeof(struct brw_wm_prog_key), program, program_size,