From a070d5f363e99b0f846d555e9ca3a74ec807fdc0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 12:50:16 -0700 Subject: [PATCH] i965/vs: Start adding support for uniforms There's no clever packing here, no pull constants, and no array support. --- src/mesa/drivers/dri/i965/brw_context.h | 22 ++- src/mesa/drivers/dri/i965/brw_curbe.c | 27 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 5 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 73 +++++++-- .../drivers/dri/i965/brw_vec4_visitor.cpp | 150 +++++++++++++++++- .../drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_vs_state.c | 21 ++- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +- 9 files changed, 256 insertions(+), 48 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7b6b64c1a5c..4a1abd6252e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -248,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -317,6 +318,13 @@ struct brw_vs_prog_data { /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index ae11c487a2c..960be10006e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } } @@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + } + } else { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 10168fc1cb0..01058243f04 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -356,6 +356,8 @@ public: * for the ir->location's used. */ dst_reg output_reg[VERT_RESULT_MAX]; + int uniform_size[MAX_UNIFORMS]; + int uniforms; struct hash_table *variable_ht; @@ -363,7 +365,10 @@ public: void fail(const char *msg, ...); int virtual_grf_alloc(int size); + int setup_uniform_values(int loc, const glsl_type *type); + void setup_builtin_uniform_values(ir_variable *ir); int setup_attributes(int payload_reg); + int setup_uniforms(int payload_reg); void setup_payload(); void reg_allocate_trivial(); void reg_allocate(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index bdc7a79d83d..1f2853e1118 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg) prog_data->urb_read_length = (nr_attributes + 1) / 2; - return nr_attributes; + return payload_reg + nr_attributes; } -void -vec4_visitor::setup_payload(void) +int +vec4_visitor::setup_uniforms(int reg) { - int reg = 0; - - /* r0 is always reserved, as it contains the payload with the URB - * handles that are passed on to the URB write at the end of the - * thread. - */ - reg++; - /* User clip planes from curbe: */ if (c->key.nr_userclip) { @@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void) } } - /* FINISHME: push constants */ + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (this->uniforms == 0) { + this->uniform_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + + c->prog_data.param[slot] = NULL; + c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; + } + + this->uniforms++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + /* for now, we are not doing any elimination of unused slots, nor + * are we packing our uniforms. + */ + c->prog_data.nr_params = this->uniforms * 4; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = 0; - /* XXX 0 causes a bug elsewhere... */ - if (intel->gen < 6 && c->prog_data.nr_params == 0) - c->prog_data.nr_params = 4; + c->prog_data.uses_new_param_layout = true; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; - reg += setup_attributes(reg); + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); this->first_non_payload_grf = reg; } @@ -174,6 +201,18 @@ vec4_instruction::get_src(int i) } break; + case UNIFORM: + brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, + ((src[i].reg + src[i].reg_offset) % 2) * 4), + 0, 4, 1); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + case HW_REG: brw_reg = src[i].fixed_hw_reg; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index bba1d810f19..91abd40faad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -22,7 +22,10 @@ */ #include "brw_vec4.h" +extern "C" { #include "main/macros.h" +#include "program/prog_parameter.h" +} namespace brw { @@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } +/* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ +int +vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) +{ + unsigned int offset = 0; + float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; + + if (type->is_matrix()) { + const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, + 1); + + for (unsigned int i = 0; i < type->matrix_columns; i++) { + offset += setup_uniform_values(loc + offset, column); + } + + return offset; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->vector_elements; i++) { + int slot = this->uniforms * 4 + i; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + } + c->prog_data.param[slot] = &values[i]; + } + + for (unsigned int i = type->vector_elements; i < 4; i++) { + c->prog_data.param_convert[this->uniforms * 4 + i] = + PARAM_CONVERT_ZERO; + c->prog_data.param[this->uniforms * 4 + i] = NULL; + } + + this->uniform_size[this->uniforms] = type->vector_elements; + this->uniforms++; + + return 1; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, + type->fields.structure[i].type); + } + return offset; + + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, type->fields.array); + } + return offset; + + case GLSL_TYPE_SAMPLER: + /* The sampler takes up a slot, but we don't use any values from it. */ + return 1; + + default: + assert(!"not reached"); + return 0; + } +} + +/* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ +void +vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) +{ + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->vp->Base.Parameters, + (gl_state_index *)slots[i].tokens); + float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; + + this->uniform_size[this->uniforms] = 0; + /* Add each of the unique swizzled channels of the element. + * This will end up matching the size of the glsl_type of this field. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + if (swiz == last_swiz) + break; + last_swiz = swiz; + + c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; + c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; + this->uniform_size[this->uniforms]++; + } + this->uniforms++; + } +} + dst_reg * vec4_visitor::variable_storage(ir_variable *var) { @@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir) switch (ir->mode) { case ir_var_in: reg = new(mem_ctx) dst_reg(ATTR, ir->location); - reg->type = brw_type_for_base_type(ir->type); - hash_table_insert(this->variable_ht, reg, ir); break; case ir_var_out: reg = new(mem_ctx) dst_reg(this, ir->type); - hash_table_insert(this->variable_ht, reg, ir); for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->location + i] = *reg; @@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir) case ir_var_temporary: reg = new(mem_ctx) dst_reg(this, ir->type); - hash_table_insert(this->variable_ht, reg, ir); - break; case ir_var_uniform: - /* FINISHME: uniforms */ + reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } break; } + + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); } void @@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->current_annotation = NULL; this->c = c; + this->vp = brw->vertex_program; /* FINISHME: change for precompile */ this->prog_data = &c->prog_data; this->variable_ht = hash_table_ctor(0, @@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->virtual_grf_sizes = NULL; this->virtual_grf_count = 0; this->virtual_grf_array_size = 0; + + this->uniforms = 0; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); } vec4_visitor::~vec4_visitor() diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index fb4fb146f8d..ad909789d82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw) constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - *brw->wm.prog_data->pull_param[i]); + brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e70454416bf..affa72c7324 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) params_uploaded++; } - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } } } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3d525248f25..07e9995f53b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index a102ca772b3..1d80e96778e 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { -- 2.30.2