X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_nir_uniforms.cpp;h=639bdfc0f2cbe1750dc78200367e2d51d87e3b77;hb=71c0dcf26621f0403586a1b09df7e7cecb063979;hp=69da83ad364bbddfb2750fbb7b43f5d7cff63b47;hpb=5c9d47d9c6508b0b01bfadfd129e55886a016ed1;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 69da83ad364..639bdfc0f2c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,6 +23,7 @@ #include "compiler/brw_nir.h" #include "compiler/glsl/ir_uniform.h" +#include "compiler/nir/nir_builder.h" #include "brw_program.h" static void @@ -79,22 +80,19 @@ setup_vec4_image_param(uint32_t *params, uint32_t idx, } static void -brw_setup_image_uniform_values(gl_shader_stage stage, - struct brw_stage_prog_data *stage_prog_data, - unsigned param_start_index, - const gl_uniform_storage *storage) +brw_setup_image_uniform_values(nir_variable *var, + struct brw_stage_prog_data *prog_data) { - uint32_t *param = &stage_prog_data->param[param_start_index]; + unsigned param_start_index = var->data.driver_location / 4; + uint32_t *param = &prog_data->param[param_start_index]; + unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); - for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { - const unsigned image_idx = storage->opaque[stage].index + i; + for (unsigned i = 0; i < num_images; i++) { + const unsigned image_idx = var->data.binding + i; /* Upload the brw_image_param structure. The order is expected to match * the BRW_IMAGE_PARAM_*_OFFSET defines. */ - setup_vec4_image_param(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - image_idx, - offsetof(brw_image_param, surface_idx), 1); setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, image_idx, offsetof(brw_image_param, offset), 2); @@ -111,11 +109,39 @@ brw_setup_image_uniform_values(gl_shader_stage stage, image_idx, offsetof(brw_image_param, swizzling), 2); param += BRW_IMAGE_PARAM_SIZE; + } +} + +static unsigned +count_uniform_storage_slots(const struct glsl_type *type) +{ + /* gl_uniform_storage can cope with one level of array, so if the + * type is a composite type or an array where each element occupies + * more than one slot than we need to recursively process it. + */ + if (glsl_type_is_struct_or_ifc(type)) { + unsigned location_count = 0; + + for (unsigned i = 0; i < glsl_get_length(type); i++) { + const struct glsl_type *field_type = glsl_get_struct_field(type, i); + + location_count += count_uniform_storage_slots(field_type); + } - brw_mark_surface_used( - stage_prog_data, - stage_prog_data->binding_table.image_start + image_idx); + return location_count; } + + if (glsl_type_is_array(type)) { + const struct glsl_type *element_type = glsl_get_array_element(type); + + if (glsl_type_is_array(element_type) || + glsl_type_is_struct_or_ifc(element_type)) { + unsigned element_count = count_uniform_storage_slots(element_type); + return element_count * glsl_get_length(type); + } + } + + return 1; } static void @@ -124,62 +150,62 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, struct brw_stage_prog_data *stage_prog_data, bool is_scalar) { - int namelen = strlen(var->name); + if (var->type->without_array()->is_sampler()) + return; + + if (var->type->without_array()->is_image()) { + brw_setup_image_uniform_values(var, stage_prog_data); + return; + } /* The data for our (non-builtin) uniforms is stored in a series of * gl_uniform_storage structs for each subcomponent that * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. + * order we'd walk the type, so walk the list of storage that matches the + * range of slots covered by this variable. */ unsigned uniform_index = var->data.driver_location / 4; - for (unsigned u = 0; u < prog->sh.data->NumUniformStorage; u++) { + unsigned num_slots = count_uniform_storage_slots(var->type); + for (unsigned u = 0; u < num_slots; u++) { struct gl_uniform_storage *storage = - &prog->sh.data->UniformStorage[u]; + &prog->sh.data->UniformStorage[var->data.location + u]; - if (storage->builtin || storage->type->is_sampler()) + /* We already handled samplers and images via the separate top-level + * variables created by gl_nir_lower_samplers_as_deref(), but they're + * still part of the structure's storage, and so we'll see them while + * walking it to set up the other regular fields. Just skip over them. + */ + if (storage->builtin || + storage->type->is_sampler() || + storage->type->is_image()) continue; - if (strncmp(var->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; + gl_constant_value *components = storage->storage; + unsigned vector_count = (MAX2(storage->array_elements, 1) * + storage->type->matrix_columns); + unsigned vector_size = storage->type->vector_elements; + unsigned max_vector_size = 4; + if (storage->type->base_type == GLSL_TYPE_DOUBLE || + storage->type->base_type == GLSL_TYPE_UINT64 || + storage->type->base_type == GLSL_TYPE_INT64) { + vector_size *= 2; + if (vector_size > 4) + max_vector_size = 8; } - if (storage->type->is_image()) { - brw_setup_image_uniform_values(stage, stage_prog_data, - uniform_index, storage); - uniform_index += - BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1); - } else { - gl_constant_value *components = storage->storage; - unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); - unsigned vector_size = storage->type->vector_elements; - unsigned max_vector_size = 4; - if (storage->type->base_type == GLSL_TYPE_DOUBLE || - storage->type->base_type == GLSL_TYPE_UINT64 || - storage->type->base_type == GLSL_TYPE_INT64) { - vector_size *= 2; - if (vector_size > 4) - max_vector_size = 8; + for (unsigned s = 0; s < vector_count; s++) { + unsigned i; + for (i = 0; i < vector_size; i++) { + uint32_t idx = components - prog->sh.data->UniformDataSlots; + stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx); + components++; } - for (unsigned s = 0; s < vector_count; s++) { - unsigned i; - for (i = 0; i < vector_size; i++) { - uint32_t idx = components - prog->sh.data->UniformDataSlots; - stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx); - components++; - } - - if (!is_scalar) { - /* Pad out with zeros if needed (only needed for vec4) */ - for (; i < max_vector_size; i++) { - stage_prog_data->param[uniform_index++] = - BRW_PARAM_BUILTIN_ZERO; - } + if (!is_scalar) { + /* Pad out with zeros if needed (only needed for vec4) */ + for (; i < max_vector_size; i++) { + stage_prog_data->param[uniform_index++] = + BRW_PARAM_BUILTIN_ZERO; } } } @@ -202,7 +228,7 @@ brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader, if (var->interface_type != NULL || var->type->contains_atomic()) continue; - if (strncmp(var->name, "gl_", 3) == 0) { + if (var->num_state_slots > 0) { brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data, is_scalar); } else { @@ -224,10 +250,9 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params); /* For ARB programs, prog_to_nir generates a single "parameters" variable - * for all uniform data. nir_lower_wpos_ytransform may also create an - * additional variable. + * for all uniform data. There may be additional sampler variables, and + * an extra uniform from nir_lower_wpos_ytransform. */ - assert(shader->uniforms.length() <= 2); for (unsigned p = 0; p < plist->NumParameters; p++) { /* Parameters should be either vec4 uniforms or single component @@ -244,28 +269,188 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader, } } +static nir_ssa_def * +get_aoa_deref_offset(nir_builder *b, + nir_deref_instr *deref, + unsigned elem_size) +{ + unsigned array_size = elem_size; + nir_ssa_def *offset = nir_imm_int(b, 0); + + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + /* This level's element size is the previous level's array size */ + nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); + assert(deref->arr.index.ssa); + offset = nir_iadd(b, offset, + nir_imul(b, index, nir_imm_int(b, array_size))); + + deref = nir_deref_instr_parent(deref); + assert(glsl_type_is_array(deref->type)); + array_size *= glsl_get_length(deref->type); + } + + /* Accessing an invalid surface index with the dataport can result in a + * hang. According to the spec "if the index used to select an individual + * element is negative or greater than or equal to the size of the array, + * the results of the operation are undefined but may not lead to + * termination" -- which is one of the possible outcomes of the hang. + * Clamp the index to prevent access outside of the array bounds. + */ + return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size)); +} + void -brw_nir_lower_patch_vertices_in_to_uniform(nir_shader *nir) +brw_nir_lower_gl_images(nir_shader *shader, + const struct gl_program *prog) { - nir_foreach_variable_safe(var, &nir->system_values) { - if (var->data.location != SYSTEM_VALUE_VERTICES_IN) + /* We put image uniforms at the end */ + nir_foreach_variable(var, &shader->uniforms) { + if (!var->type->contains_image()) continue; - gl_state_index16 tokens[STATE_LENGTH] = { - STATE_INTERNAL, - nir->info.stage == MESA_SHADER_TESS_CTRL ? - (gl_state_index16)STATE_TCS_PATCH_VERTICES_IN : - (gl_state_index16)STATE_TES_PATCH_VERTICES_IN, - }; - var->num_state_slots = 1; - var->state_slots = - ralloc_array(var, nir_state_slot, var->num_state_slots); - memcpy(var->state_slots[0].tokens, tokens, sizeof(tokens)); - var->state_slots[0].swizzle = SWIZZLE_XXXX; - - var->data.mode = nir_var_uniform; - var->data.location = -1; - exec_node_remove(&var->node); - exec_list_push_tail(&nir->uniforms, &var->node); + /* GL Only allows arrays of arrays of images */ + assert(var->type->without_array()->is_image()); + const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size()); + + var->data.driver_location = shader->num_uniforms; + shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4; + } + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_load_raw_intel: + case nir_intrinsic_image_deref_store_raw_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + struct gl_uniform_storage *storage = + &prog->sh.data->UniformStorage[var->data.location]; + const unsigned image_var_idx = + storage->opaque[shader->info.stage].index; + + b.cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx), + get_aoa_deref_offset(&b, deref, 1)); + nir_rewrite_image_intrinsic(intrin, index, false); + break; + } + + case nir_intrinsic_image_deref_load_param_intel: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + const unsigned num_images = + MAX2(1, var->type->arrays_of_arrays_size()); + + b.cursor = nir_instr_remove(&intrin->instr); + + const unsigned param = nir_intrinsic_base(intrin); + nir_ssa_def *offset = + get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4); + offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16)); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_load_uniform); + nir_intrinsic_set_base(load, var->data.driver_location); + nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4); + load->src[0] = nir_src_for_ssa(offset); + load->num_components = intrin->dest.ssa.num_components; + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + break; + } + + default: + break; + } + } + } +} + +void +brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts, + struct brw_stage_prog_data *prog_data) +{ + if (nr_userclip_plane_consts == 0) + return; + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + + nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false, + NULL); + nir_lower_io_to_temporaries(nir, impl, true, false); + nir_lower_global_vars_to_local(nir); + nir_lower_vars_to_ssa(nir); + + const unsigned clip_plane_base = nir->num_uniforms; + + assert(nir->num_uniforms == prog_data->nr_params * 4); + const unsigned num_clip_floats = 4 * nr_userclip_plane_consts; + uint32_t *clip_param = + brw_stage_prog_data_add_params(prog_data, num_clip_floats); + nir->num_uniforms += num_clip_floats * sizeof(float); + assert(nir->num_uniforms == prog_data->nr_params * 4); + + for (unsigned i = 0; i < num_clip_floats; i++) + clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4); + + nir_builder b; + nir_builder_init(&b, impl); + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane) + continue; + + b.cursor = nir_before_instr(instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); + load->num_components = 4; + load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); + nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) * + nir_intrinsic_ucp_id(intrin)); + nir_intrinsic_set_range(load, 4 * sizeof(float)); + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + nir_instr_remove(instr); + } } }