From: Jason Ekstrand Date: Thu, 28 Sep 2017 23:25:31 +0000 (-0700) Subject: intel: Rewrite the world of push/pull params X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2975e4c56a7aeade5a324aa4d446f18cc176fa06;p=mesa.git intel: Rewrite the world of push/pull params This moves us away to the array of pointers model and onto a model where each param is represented by a generic uint32_t handle. We reserve 2^16 of these handles for builtins that get generated by somewhere inside the compiler and have well-defined meanings. Generic params have handles whose meanings are defined by the driver. The primary downside to this new approach is that it moves a little bit of the work that we would normally do at compile time to draw time. On my laptop this hurts OglBatch6 by no more than 1% and doesn't seem to have any measurable affect on OglBatch7. So, while this may come back to bite us, it doesn't look too bad. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index a426a030d29..7cc6335f2f6 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -225,7 +225,7 @@ blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx, const unsigned *program = brw_compile_vs(compiler, blorp->driver_ctx, mem_ctx, &vs_key, vs_prog_data, nir, - NULL, false, -1, program_size, NULL); + false, -1, program_size, NULL); return program; } diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 038f3f95512..f2f9be750a0 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -36,7 +36,6 @@ extern "C" { struct ra_regs; struct nir_shader; struct brw_program; -union gl_constant_value; struct brw_compiler { const struct gen_device_info *devinfo; @@ -491,6 +490,66 @@ struct brw_ubo_range uint8_t length; }; +/* We reserve the first 2^16 values for builtins */ +#define BRW_PARAM_IS_BUILTIN(param) (((param) & 0xffff0000) == 0) + +enum brw_param_builtin { + BRW_PARAM_BUILTIN_ZERO, + + BRW_PARAM_BUILTIN_CLIP_PLANE_0_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_0_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_0_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_0_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_W, + + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X, + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y, + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Z, + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W, + BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X, + BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y, +}; + +#define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \ + (BRW_PARAM_BUILTIN_CLIP_PLANE_0_X + ((idx) << 2) + (comp)) + +#define BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param) \ + ((param) >= BRW_PARAM_BUILTIN_CLIP_PLANE_0_X && \ + (param) <= BRW_PARAM_BUILTIN_CLIP_PLANE_7_W) + +#define BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param) \ + (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) >> 2) + +#define BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param) \ + (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) & 0x3) + struct brw_stage_prog_data { struct { /** size of our binding table. */ @@ -529,11 +588,14 @@ struct brw_stage_prog_data { bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ - /* Pointers to tracked values (only valid once - * _mesa_load_state_parameters has been called at runtime). + /* 32-bit identifiers for all push/pull parameters. These can be anything + * the driver wishes them to be; the core of the back-end compiler simply + * re-arranges them. The one restriction is that the bottom 2^16 values + * are reserved for builtins defined in the brw_param_builtin enum defined + * above. */ - const union gl_constant_value **param; - const union gl_constant_value **pull_param; + uint32_t *param; + uint32_t *pull_param; /** Image metadata passed to the shader as uniforms. */ struct brw_image_param *image_param; @@ -1020,7 +1082,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, const struct nir_shader *shader, - gl_clip_plane *clip_planes, bool use_legacy_snorm_formula, int shader_time_index, unsigned *final_assembly_size, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a40b910c1a0..c1d67750a3a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2084,10 +2084,9 @@ fs_visitor::assign_constant_locations() /* As the uniforms are going to be reordered, take the data from a temporary * copy of the original param[]. */ - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - stage_prog_data->nr_params); + uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params); memcpy(param, stage_prog_data->param, - sizeof(gl_constant_value*) * stage_prog_data->nr_params); + sizeof(uint32_t) * stage_prog_data->nr_params); stage_prog_data->nr_params = num_push_constants; stage_prog_data->nr_pull_params = num_pull_constants; @@ -2115,8 +2114,7 @@ fs_visitor::assign_constant_locations() */ int new_thread_local_id_index = -1; for (unsigned int i = 0; i < uniforms; i++) { - const gl_constant_value *value = param[i]; - + uint32_t value = param[i]; if (pull_constant_loc[i] != -1) { stage_prog_data->pull_param[pull_constant_loc[i]] = value; } else if (push_constant_loc[i] != -1) { @@ -5967,7 +5965,7 @@ fs_visitor::allocate_registers(bool allow_spilling) } bool -fs_visitor::run_vs(gl_clip_plane *clip_planes) +fs_visitor::run_vs() { assert(stage == MESA_SHADER_VERTEX); @@ -5981,7 +5979,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) if (failed) return false; - compute_clip_distance(clip_planes); + compute_clip_distance(); emit_urb_writes(); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f1ba193de7e..20405750b78 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -80,8 +80,8 @@ public: fs_reg vgrf(const glsl_type *const type); void import_uniforms(fs_visitor *v); - void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); - void compute_clip_distance(gl_clip_plane *clip_planes); + void setup_uniform_clipplane_values(); + void compute_clip_distance(); fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, @@ -95,7 +95,7 @@ public: void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); bool run_fs(bool allow_spilling, bool do_rep_send); - bool run_vs(gl_clip_plane *clip_planes); + bool run_vs(); bool run_tcs_single_patch(); bool run_tes(); bool run_gs(); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 758c8bf44a9..4ee1d4e0022 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -465,7 +465,7 @@ fs_visitor::emit_fb_writes() } void -fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) +fs_visitor::setup_uniform_clipplane_values() { const struct brw_vs_prog_key *key = (const struct brw_vs_prog_key *) this->key; @@ -474,7 +474,7 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) this->userplane[i] = fs_reg(UNIFORM, uniforms); for (int j = 0; j < 4; ++j) { stage_prog_data->param[uniforms + j] = - (gl_constant_value *) &clip_planes[i][j]; + BRW_PARAM_BUILTIN_CLIP_PLANE(i, j); } uniforms += 4; } @@ -486,7 +486,7 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) * This does nothing if the shader uses gl_ClipDistance or user clipping is * disabled altogether. */ -void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) +void fs_visitor::compute_clip_distance() { struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); const struct brw_vs_prog_key *key = @@ -518,7 +518,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) if (outputs[clip_vertex].file == BAD_FILE) return; - setup_uniform_clipplane_values(clip_planes); + setup_uniform_clipplane_values(); const fs_builder abld = bld.annotate("user clip distances"); diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 9b9f5863721..8614886967c 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -698,10 +698,9 @@ vec4_visitor::pack_uniform_registers() /* As the uniforms are going to be reordered, take the data from a temporary * copy of the original param[]. */ - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - stage_prog_data->nr_params); + uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params); memcpy(param, stage_prog_data->param, - sizeof(gl_constant_value*) * stage_prog_data->nr_params); + sizeof(uint32_t) * stage_prog_data->nr_params); /* Now, figure out a packing of the live uniform vectors into our * push constants. Start with dvec{3,4} because they are aligned to @@ -907,7 +906,7 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { - const gl_constant_value **values = &stage_prog_data->param[i]; + uint32_t *values = &stage_prog_data->param[i]; /* Try to find an existing copy of this uniform in the pull * constants if it was part of an array access already. @@ -1764,11 +1763,10 @@ vec4_visitor::setup_uniforms(int reg) */ if (devinfo->gen < 6 && this->uniforms == 0) { stage_prog_data->param = - reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); + reralloc(NULL, stage_prog_data->param, uint32_t, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[slot] = &zero; + stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO; } this->uniforms++; @@ -2742,7 +2740,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, const nir_shader *src_shader, - gl_clip_plane *clip_planes, bool use_legacy_snorm_formula, int shader_time_index, unsigned *final_assembly_size, @@ -2866,7 +2863,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ shader, 8, shader_time_index); - if (!v.run_vs(clip_planes)) { + if (!v.run_vs()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); @@ -2895,7 +2892,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_vs_visitor v(compiler, log_data, key, prog_data, - shader, clip_planes, mem_ctx, + shader, mem_ctx, shader_time_index, use_legacy_snorm_formula); if (!v.run()) { if (error_str) diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index a8e445c473c..5df6d562ce6 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -890,10 +890,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * values. */ const unsigned param_count = prog_data->base.base.nr_params; - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - param_count); + uint32_t *param = ralloc_array(NULL, uint32_t, param_count); memcpy(param, prog_data->base.base.param, - sizeof(gl_constant_value*) * param_count); + sizeof(uint32_t) * param_count); if (v.run()) { /* Success! Backup is not needed */ @@ -910,7 +909,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * FIXME: Could more variables be modified by this execution? */ memcpy(prog_data->base.base.param, param, - sizeof(gl_constant_value*) * param_count); + sizeof(uint32_t) * param_count); prog_data->base.base.nr_params = param_count; prog_data->base.base.nr_pull_params = 0; ralloc_free(param); diff --git a/src/intel/compiler/brw_vec4_vs.h b/src/intel/compiler/brw_vec4_vs.h index cd07e0e99de..b2a862fdbde 100644 --- a/src/intel/compiler/brw_vec4_vs.h +++ b/src/intel/compiler/brw_vec4_vs.h @@ -36,7 +36,6 @@ public: const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, const nir_shader *shader, - gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula); @@ -57,8 +56,6 @@ private: const struct brw_vs_prog_key *const key; struct brw_vs_prog_data * const vs_prog_data; - gl_clip_plane *clip_planes; - bool use_legacy_snorm_formula; }; diff --git a/src/intel/compiler/brw_vec4_vs_visitor.cpp b/src/intel/compiler/brw_vec4_vs_visitor.cpp index ad7f067ad60..86f365e2f0f 100644 --- a/src/intel/compiler/brw_vec4_vs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_vs_visitor.cpp @@ -124,7 +124,7 @@ vec4_vs_visitor::setup_uniform_clipplane_values() this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { stage_prog_data->param[this->uniforms * 4 + j] = - (gl_constant_value *) &clip_planes[i][j]; + BRW_PARAM_BUILTIN_CLIP_PLANE(i, j); } ++this->uniforms; } @@ -164,7 +164,6 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, const nir_shader *shader, - gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula) @@ -172,7 +171,6 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, mem_ctx, false /* no_spills */, shader_time_index), key(key), vs_prog_data(vs_prog_data), - clip_planes(clip_planes), use_legacy_snorm_formula(use_legacy_snorm_formula) { } diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5eec67cb607..64d1417f5b1 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -629,6 +629,26 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, return state; } +static uint32_t +anv_push_constant_value(struct anv_push_constants *data, uint32_t param) +{ + if (BRW_PARAM_IS_BUILTIN(param)) { + switch (param) { + case BRW_PARAM_BUILTIN_ZERO: + return 0; + default: + unreachable("Invalid param builtin"); + } + } else { + uint32_t offset = ANV_PARAM_PUSH_OFFSET(param); + assert(offset % sizeof(uint32_t) == 0); + if (offset < data->size) + return *(uint32_t *)((uint8_t *)data + offset); + else + return 0; + } +} + struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) @@ -653,10 +673,8 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, /* Walk through the param array and fill the buffer with data */ uint32_t *u32_map = state.map; - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); - } + for (unsigned i = 0; i < prog_data->nr_params; i++) + u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); anv_state_flush(cmd_buffer->device, state); @@ -695,8 +713,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); } } @@ -708,8 +725,8 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { if (src != cs_prog_data->thread_local_id_index) { - uint32_t offset = (uintptr_t)prog_data->param[src]; - u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset); + u32_map[dst] = + anv_push_constant_value(data, prog_data->param[src]); } else { u32_map[dst] = t * cs_prog_data->simd_size; } diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index a35687379c8..f5a274d1a53 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -304,17 +304,13 @@ apply_pipeline_layout_block(nir_block *block, } static void -setup_vec4_uniform_value(const union gl_constant_value **params, - const union gl_constant_value *values, - unsigned n) +setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n) { - static const gl_constant_value zero = { 0 }; - for (unsigned i = 0; i < n; ++i) - params[i] = &values[i]; + params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t)); for (unsigned i = n; i < 4; ++i) - params[i] = &zero; + params[i] = BRW_PARAM_BUILTIN_ZERO; } void @@ -478,22 +474,21 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, } struct anv_push_constants *null_data = NULL; - const gl_constant_value **param = - prog_data->param + (shader->num_uniforms / 4); + uint32_t *param = prog_data->param + (shader->num_uniforms / 4); const struct brw_image_param *image_param = null_data->images; for (uint32_t i = 0; i < map->image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const union gl_constant_value *)&image_param->surface_idx, 1); + (uintptr_t)&image_param->surface_idx, 1); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const union gl_constant_value *)image_param->offset, 2); + (uintptr_t)image_param->offset, 2); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const union gl_constant_value *)image_param->size, 3); + (uintptr_t)image_param->size, 3); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const union gl_constant_value *)image_param->stride, 4); + (uintptr_t)image_param->stride, 4); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (const union gl_constant_value *)image_param->tiling, 3); + (uintptr_t)image_param->tiling, 3); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const union gl_constant_value *)image_param->swizzling, 2); + (uintptr_t)image_param->swizzling, 2); param += BRW_IMAGE_PARAM_SIZE; image_param ++; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 3db9b9a6246..e6a7393a3d3 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -415,8 +415,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (prog_data->nr_params > 0) { /* XXX: I think we're leaking this */ - prog_data->param = (const union gl_constant_value **) - malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + prog_data->param = malloc(prog_data->nr_params * sizeof(uint32_t)); /* We now set the param values to be offsets into a * anv_push_constant_data structure. Since the compiler doesn't @@ -427,8 +426,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (nir->num_uniforms > 0) { /* Fill out the push constants section of the param array */ for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) - prog_data->param[i] = (const union gl_constant_value *) - &null_data->client_data[i * sizeof(float)]; + prog_data->param[i] = ANV_PARAM_PUSH( + (uintptr_t)&null_data->client_data[i * sizeof(float)]); } } @@ -540,7 +539,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir, - NULL, false, -1, &code_size, NULL); + false, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index b33370c3da1..abc278b40b7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1567,6 +1567,9 @@ struct anv_vertex_binding { VkDeviceSize offset; }; +#define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset)) +#define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff) + struct anv_push_constants { /* Current allocated size of this push constants data structure. * Because a decent chunk of it may not be used (images on SKL, for diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index bc09abd912c..68fca098f76 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -89,10 +89,8 @@ brw_codegen_cs_prog(struct brw_context *brw, /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - prog_data.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.image_param = rzalloc_array(NULL, struct brw_image_param, cp->program.info.num_images); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index e7062ee7899..c040665a3b6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -99,10 +99,8 @@ brw_codegen_gs_prog(struct brw_context *brw, */ int param_count = gp->program.nir->num_uniforms / 4; - prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, gp->program.info.num_images); diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index f0bccac14ef..de6df4835d4 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,6 +23,7 @@ #include "compiler/brw_nir.h" #include "compiler/glsl/ir_uniform.h" +#include "brw_program.h" static void brw_nir_setup_glsl_builtin_uniform(nir_variable *var, @@ -60,23 +61,21 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var, last_swiz = swiz; stage_prog_data->param[uniform_index++] = - &prog->Parameters->ParameterValues[index][swiz]; + BRW_PARAM_PARAMETER(index, swiz); } } } static void -setup_vec4_uniform_value(const gl_constant_value **params, - const gl_constant_value *values, - unsigned n) +setup_vec4_image_param(uint32_t *params, uint32_t idx, + unsigned offset, unsigned n) { - static const gl_constant_value zero = { 0 }; - + assert(offset % sizeof(uint32_t) == 0); for (unsigned i = 0; i < n; ++i) - params[i] = &values[i]; + params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); for (unsigned i = n; i < 4; ++i) - params[i] = &zero; + params[i] = BRW_PARAM_BUILTIN_ZERO; } static void @@ -85,29 +84,32 @@ brw_setup_image_uniform_values(gl_shader_stage stage, unsigned param_start_index, const gl_uniform_storage *storage) { - const gl_constant_value **param = - &stage_prog_data->param[param_start_index]; + uint32_t *param = &stage_prog_data->param[param_start_index]; for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { const unsigned image_idx = storage->opaque[stage].index + i; - const brw_image_param *image_param = - &stage_prog_data->image_param[image_idx]; /* Upload the brw_image_param structure. The order is expected to match * the BRW_IMAGE_PARAM_*_OFFSET defines. */ - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const gl_constant_value *)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const gl_constant_value *)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const gl_constant_value *)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const gl_constant_value *)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (const gl_constant_value *)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const gl_constant_value *)image_param->swizzling, 2); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + image_idx, + offsetof(brw_image_param, surface_idx), 1); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + image_idx, + offsetof(brw_image_param, offset), 2); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + image_idx, + offsetof(brw_image_param, size), 3); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + image_idx, + offsetof(brw_image_param, stride), 4); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET, + image_idx, + offsetof(brw_image_param, tiling), 3); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + image_idx, + offsetof(brw_image_param, swizzling), 2); param += BRW_IMAGE_PARAM_SIZE; brw_mark_surface_used( @@ -167,14 +169,16 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, for (unsigned s = 0; s < vector_count; s++) { unsigned i; for (i = 0; i < vector_size; i++) { - stage_prog_data->param[uniform_index++] = components++; + uint32_t idx = components - prog->sh.data->UniformDataSlots; + stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx); + components++; } if (!is_scalar) { /* Pad out with zeros if needed (only needed for vec4) */ for (; i < max_vector_size; i++) { - static const gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniform_index++] = &zero; + stage_prog_data->param[uniform_index++] = + BRW_PARAM_BUILTIN_ZERO; } } } @@ -223,12 +227,9 @@ brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, assert(plist->Parameters[p].Size <= 4); unsigned i; - for (i = 0; i < plist->Parameters[p].Size; i++) { - stage_prog_data->param[4 * p + i] = &plist->ParameterValues[p][i]; - } - for (; i < 4; i++) { - static const gl_constant_value zero = { 0.0 }; - stage_prog_data->param[4 * p + i] = &zero; - } + for (i = 0; i < plist->Parameters[p].Size; i++) + stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i); + for (; i < 4; i++) + stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; } } diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index c52193c691c..701b8da482e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -33,6 +33,29 @@ extern "C" { struct brw_context; +enum brw_param_domain { + BRW_PARAM_DOMAIN_BUILTIN = 0, + BRW_PARAM_DOMAIN_PARAMETER, + BRW_PARAM_DOMAIN_UNIFORM, + BRW_PARAM_DOMAIN_IMAGE, +}; + +#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val)) +#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24) +#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff) + +#define BRW_PARAM_PARAMETER(idx, comp) \ + BRW_PARAM(PARAMETER, ((idx) << 2) | (comp)) +#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2) +#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3) + +#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx)) +#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param) + +#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset)) +#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8) +#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf) + struct nir_shader *brw_create_nir(struct brw_context *brw, const struct gl_shader_program *shader_prog, struct gl_program *prog, diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 42769b1deda..64acc1d9bf7 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -239,7 +239,7 @@ brw_populate_constant_data(struct brw_context *brw, const struct gl_program *prog, const struct brw_stage_prog_data *prog_data, void *dst, - const union gl_constant_value **param, + const uint32_t *param, unsigned nr_params); void brw_upload_pull_constants(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 474787b0ea6..272545453a4 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -188,10 +188,8 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, */ int param_count = nir->num_uniforms / 4; - prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.nr_params = param_count; if (tcp) { @@ -211,26 +209,25 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, /* Upload the Patch URB Header as the first two uniforms. * Do the annoying scrambling so the shader doesn't have to. */ - const float **param = (const float **) prog_data.base.base.param; - static float zero = 0.0f; + uint32_t *param = prog_data.base.base.param; for (int i = 0; i < 8; i++) - param[i] = &zero; + param[i] = BRW_PARAM_BUILTIN_ZERO; if (key->tes_primitive_mode == GL_QUADS) { for (int i = 0; i < 4; i++) - param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i]; + param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; - param[3] = &ctx->TessCtrlProgram.patch_default_inner_level[0]; - param[2] = &ctx->TessCtrlProgram.patch_default_inner_level[1]; + param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; + param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y; } else if (key->tes_primitive_mode == GL_TRIANGLES) { for (int i = 0; i < 3; i++) - param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i]; + param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; - param[4] = &ctx->TessCtrlProgram.patch_default_inner_level[0]; + param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; } else { assert(key->tes_primitive_mode == GL_ISOLINES); - param[7] = &ctx->TessCtrlProgram.patch_default_outer_level[1]; - param[6] = &ctx->TessCtrlProgram.patch_default_outer_level[0]; + param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y; + param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; } } diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c index 6564fefc06e..7ee925b0891 100644 --- a/src/mesa/drivers/dri/i965/brw_tes.c +++ b/src/mesa/drivers/dri/i965/brw_tes.c @@ -90,10 +90,8 @@ brw_codegen_tes_prog(struct brw_context *brw, */ int param_count = nir->num_uniforms / 4; - prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, tep->program.info.num_images); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 756e13db31f..9dd812e1981 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -191,10 +191,8 @@ brw_codegen_vs_prog(struct brw_context *brw, */ param_count += key->nr_userclip_plane_consts * 4; - stage_prog_data->param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - stage_prog_data->pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count); + stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count); stage_prog_data->image_param = rzalloc_array(NULL, struct brw_image_param, stage_prog_data->nr_image_params); @@ -244,7 +242,6 @@ brw_codegen_vs_prog(struct brw_context *brw, char *error_str; program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data, vp->program.nir, - brw_select_clip_planes(&brw->ctx), !_mesa_is_gles3(&brw->ctx), st_index, &program_size, &error_str); if (program == NULL) { diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 3ab8e3930ae..ddafa526198 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -153,10 +153,8 @@ brw_codegen_wm_prog(struct brw_context *brw, prog_data.base.nr_image_params = fp->program.info.num_images; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; - prog_data.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.image_param = rzalloc_array(NULL, struct brw_image_param, prog_data.base.nr_image_params); diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index b2e357fd9d9..93a12c706ff 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -24,21 +24,84 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_program.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" #include "program/prog_parameter.h" +static uint32_t +f_as_u32(float f) +{ + return *(uint32_t *)&f; +} + +static uint32_t +brw_param_value(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + uint32_t param) +{ + struct gl_context *ctx = &brw->ctx; + + switch (BRW_PARAM_DOMAIN(param)) { + case BRW_PARAM_DOMAIN_BUILTIN: + if (param == BRW_PARAM_BUILTIN_ZERO) { + return 0; + } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) { + gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); + unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param); + unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param); + return ((uint32_t *)clip_planes[idx])[comp]; + } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X && + param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) { + unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; + return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]); + } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) { + return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]); + } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { + return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]); + } else { + unreachable("Invalid param builtin"); + } + + case BRW_PARAM_DOMAIN_PARAMETER: { + unsigned idx = BRW_PARAM_PARAMETER_IDX(param); + unsigned comp = BRW_PARAM_PARAMETER_COMP(param); + assert(idx < prog->Parameters->NumParameters); + return prog->Parameters->ParameterValues[idx][comp].u; + } + + case BRW_PARAM_DOMAIN_UNIFORM: { + unsigned idx = BRW_PARAM_UNIFORM_IDX(param); + assert(idx < prog->sh.data->NumUniformDataSlots); + return prog->sh.data->UniformDataSlots[idx].u; + } + + case BRW_PARAM_DOMAIN_IMAGE: { + unsigned idx = BRW_PARAM_IMAGE_IDX(param); + unsigned offset = BRW_PARAM_IMAGE_OFFSET(param); + assert(idx < prog_data->nr_image_params); + assert(offset < sizeof(struct brw_image_param)); + return ((uint32_t *)&prog_data->image_param[idx])[offset]; + } + + default: + unreachable("Invalid param domain"); + } +} + + void brw_populate_constant_data(struct brw_context *brw, const struct gl_program *prog, const struct brw_stage_prog_data *prog_data, void *void_dst, - const union gl_constant_value **param, + const uint32_t *param, unsigned nr_params) { - gl_constant_value *dst = void_dst; + uint32_t *dst = void_dst; for (unsigned i = 0; i < nr_params; i++) - dst[i] = *param[i]; + dst[i] = brw_param_value(brw, prog, prog_data, param[i]); } @@ -234,7 +297,7 @@ brw_upload_cs_push_constants(struct brw_context *brw, } - gl_constant_value *param = (gl_constant_value*) + uint32_t *param = brw_state_batch(brw, ALIGN(cs_prog_data->push.total.size, 64), 64, &stage_state->push_const_offset); assert(param); @@ -242,18 +305,18 @@ brw_upload_cs_push_constants(struct brw_context *brw, STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); if (cs_prog_data->push.cross_thread.size > 0) { - gl_constant_value *param_copy = param; + uint32_t *param_copy = param; assert(cs_prog_data->thread_local_id_index < 0 || cs_prog_data->thread_local_id_index >= cs_prog_data->push.cross_thread.dwords); for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { - param_copy[i] = *prog_data->param[i]; + param_copy[i] = brw_param_value(brw, prog, prog_data, + prog_data->param[i]); } } - gl_constant_value thread_id; if (cs_prog_data->push.per_thread.size > 0) { for (unsigned t = 0; t < cs_prog_data->threads; t++) { unsigned dst = @@ -261,11 +324,11 @@ brw_upload_cs_push_constants(struct brw_context *brw, cs_prog_data->push.cross_thread.regs); unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { - if (src != cs_prog_data->thread_local_id_index) - param[dst] = *prog_data->param[src]; - else { - thread_id.u = t * cs_prog_data->simd_size; - param[dst] = thread_id; + if (src != cs_prog_data->thread_local_id_index) { + param[dst] = brw_param_value(brw, prog, prog_data, + prog_data->param[src]); + } else { + param[dst] = t * cs_prog_data->simd_size; } } }