X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.c;h=e17b11cdd4a31b8c1025f9c0151774400dee3a6e;hb=5c5555a862754a5b43fee2abf4fc34e888d22a06;hp=37fd9cb5c56bf2b560ad092fa1411da351ab63f4;hpb=6eb051e36f04b9b6d1cd01a3d86c023ab87d2f9f;p=mesa.git diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 37fd9cb5c56..e17b11cdd4a 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -26,8 +26,15 @@ */ #include "nir.h" +#include "nir_builder.h" #include "nir_control_flow_private.h" +#include "util/half_float.h" +#include #include +#include +#include "util/u_math.h" + +#include "main/menums.h" /* BITFIELD64_MASK */ nir_shader * nir_shader_create(void *mem_ctx, @@ -37,28 +44,24 @@ nir_shader_create(void *mem_ctx, { nir_shader *shader = rzalloc(mem_ctx, nir_shader); - exec_list_make_empty(&shader->uniforms); - exec_list_make_empty(&shader->inputs); - exec_list_make_empty(&shader->outputs); - exec_list_make_empty(&shader->shared); + exec_list_make_empty(&shader->variables); shader->options = options; - shader->info = si ? si : rzalloc(shader, shader_info); + if (si) { + assert(si->stage == stage); + shader->info = *si; + } else { + shader->info.stage = stage; + } exec_list_make_empty(&shader->functions); - exec_list_make_empty(&shader->registers); - exec_list_make_empty(&shader->globals); - exec_list_make_empty(&shader->system_values); - shader->reg_alloc = 0; shader->num_inputs = 0; shader->num_outputs = 0; shader->num_uniforms = 0; shader->num_shared = 0; - shader->stage = stage; - return shader; } @@ -74,7 +77,6 @@ reg_create(void *mem_ctx, struct exec_list *list) reg->num_components = 0; reg->bit_size = 32; reg->num_array_elems = 0; - reg->is_packed = false; reg->name = NULL; exec_list_push_tail(list, ®->node); @@ -82,22 +84,11 @@ reg_create(void *mem_ctx, struct exec_list *list) return reg; } -nir_register * -nir_global_reg_create(nir_shader *shader) -{ - nir_register *reg = reg_create(shader, &shader->registers); - reg->index = shader->reg_alloc++; - reg->is_global = true; - - return reg; -} - nir_register * nir_local_reg_create(nir_function_impl *impl) { nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); reg->index = impl->reg_alloc++; - reg->is_global = false; return reg; } @@ -112,44 +103,34 @@ void nir_shader_add_variable(nir_shader *shader, nir_variable *var) { switch (var->data.mode) { - case nir_var_all: - assert(!"invalid mode"); - break; - - case nir_var_local: + case nir_var_function_temp: assert(!"nir_shader_add_variable cannot be used for local variables"); - break; - - case nir_var_param: - assert(!"nir_shader_add_variable cannot be used for function parameters"); - break; - - case nir_var_global: - exec_list_push_tail(&shader->globals, &var->node); - break; + return; + case nir_var_shader_temp: case nir_var_shader_in: - exec_list_push_tail(&shader->inputs, &var->node); - break; - case nir_var_shader_out: - exec_list_push_tail(&shader->outputs, &var->node); - break; - case nir_var_uniform: - case nir_var_shader_storage: - exec_list_push_tail(&shader->uniforms, &var->node); + case nir_var_mem_ubo: + case nir_var_mem_ssbo: + case nir_var_mem_shared: + case nir_var_system_value: break; - case nir_var_shared: - assert(shader->stage == MESA_SHADER_COMPUTE); - exec_list_push_tail(&shader->shared, &var->node); - break; + case nir_var_mem_global: + assert(!"nir_shader_add_variable cannot be used for global memory"); + return; - case nir_var_system_value: - exec_list_push_tail(&shader->system_values, &var->node); - break; + case nir_var_mem_push_const: + assert(!"nir_var_push_constant is not supposed to be used for variables"); + return; + + default: + assert(!"invalid mode"); + return; } + + exec_list_push_tail(&shader->variables, &var->node); } nir_variable * @@ -160,9 +141,12 @@ nir_variable_create(nir_shader *shader, nir_variable_mode mode, var->name = ralloc_strdup(var, name); var->type = type; var->data.mode = mode; + var->data.how_declared = nir_var_declared_normally; - if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || - (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) + if ((mode == nir_var_shader_in && + shader->info.stage != MESA_SHADER_VERTEX) || + (mode == nir_var_shader_out && + shader->info.stage != MESA_SHADER_FRAGMENT)) var->data.interpolation = INTERP_MODE_SMOOTH; if (mode == nir_var_shader_in || mode == nir_var_uniform) @@ -180,13 +164,39 @@ nir_local_variable_create(nir_function_impl *impl, nir_variable *var = rzalloc(impl->function->shader, nir_variable); var->name = ralloc_strdup(var, name); var->type = type; - var->data.mode = nir_var_local; + var->data.mode = nir_var_function_temp; nir_function_impl_add_variable(impl, var); return var; } +nir_variable * +nir_find_variable_with_location(nir_shader *shader, + nir_variable_mode mode, + unsigned location) +{ + assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp); + nir_foreach_variable_with_modes(var, shader, mode) { + if (var->data.location == location) + return var; + } + return NULL; +} + +nir_variable * +nir_find_variable_with_driver_location(nir_shader *shader, + nir_variable_mode mode, + unsigned location) +{ + assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp); + nir_foreach_variable_with_modes(var, shader, mode) { + if (var->data.driver_location == location) + return var; + } + return NULL; +} + nir_function * nir_function_create(nir_shader *shader, const char *name) { @@ -198,12 +208,15 @@ nir_function_create(nir_shader *shader, const char *name) func->shader = shader; func->num_params = 0; func->params = NULL; - func->return_type = glsl_void_type(); func->impl = NULL; + func->is_entrypoint = false; return func; } +/* NOTE: if the instruction you are copying a src to is already added + * to the IR, use nir_instr_rewrite_src() instead. + */ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) { dest->is_ssa = src->is_ssa; @@ -245,7 +258,7 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, nir_src_copy(&dest->src, &src->src, &instr->instr); dest->abs = src->abs; dest->negate = src->negate; - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) dest->swizzle[i] = src->swizzle[i]; } @@ -279,9 +292,6 @@ nir_function_impl_create_bare(nir_shader *shader) exec_list_make_empty(&impl->body); exec_list_make_empty(&impl->registers); exec_list_make_empty(&impl->locals); - impl->num_params = 0; - impl->params = NULL; - impl->return_var = NULL; impl->reg_alloc = 0; impl->ssa_alloc = 0; impl->valid_metadata = nir_metadata_none; @@ -310,26 +320,6 @@ nir_function_impl_create(nir_function *function) function->impl = impl; impl->function = function; - impl->num_params = function->num_params; - impl->params = ralloc_array(function->shader, - nir_variable *, impl->num_params); - - for (unsigned i = 0; i < impl->num_params; i++) { - impl->params[i] = rzalloc(function->shader, nir_variable); - impl->params[i]->type = function->params[i].type; - impl->params[i]->data.mode = nir_var_param; - impl->params[i]->data.location = i; - } - - if (!glsl_type_is_void(function->return_type)) { - impl->return_var = rzalloc(function->shader, nir_variable); - impl->return_var->type = function->return_type; - impl->return_var->data.mode = nir_var_param; - impl->return_var->data.location = -1; - } else { - impl->return_var = NULL; - } - return impl; } @@ -341,19 +331,17 @@ nir_block_create(nir_shader *shader) cf_init(&block->cf_node, nir_cf_node_block); block->successors[0] = block->successors[1] = NULL; - block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, - _mesa_key_pointer_equal); + block->predecessors = _mesa_pointer_set_create(block); block->imm_dom = NULL; /* XXX maybe it would be worth it to defer allocation? This - * way it doesn't get allocated for shader ref's that never run + * way it doesn't get allocated for shader refs that never run * nir_calc_dominance? For example, state-tracker creates an * initial IR, clones that, runs appropriate lowering pass, passes * to driver which does common lowering/opt, and then stores ref * which is later used to do state specific lowering and futher * opt. Do any of the references not need dominance metadata? */ - block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, - _mesa_key_pointer_equal); + block->dom_frontier = _mesa_pointer_set_create(block); exec_list_make_empty(&block->instr_list); @@ -374,6 +362,8 @@ nir_if_create(nir_shader *shader) { nir_if *if_stmt = ralloc(shader, nir_if); + if_stmt->control = nir_selection_control_none; + cf_init(&if_stmt->cf_node, nir_cf_node_if); src_init(&if_stmt->condition); @@ -438,10 +428,8 @@ alu_src_init(nir_alu_src *src) { src_init(&src->src); src->abs = src->negate = false; - src->swizzle[0] = 0; - src->swizzle[1] = 1; - src->swizzle[2] = 2; - src->swizzle[3] = 3; + for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) + src->swizzle[i] = i; } nir_alu_instr * @@ -462,6 +450,27 @@ nir_alu_instr_create(nir_shader *shader, nir_op op) return instr; } +nir_deref_instr * +nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) +{ + nir_deref_instr *instr = + rzalloc_size(shader, sizeof(nir_deref_instr)); + + instr_init(&instr->instr, nir_instr_type_deref); + + instr->deref_type = deref_type; + if (deref_type != nir_deref_type_var) + src_init(&instr->parent); + + if (deref_type == nir_deref_type_array || + deref_type == nir_deref_type_ptr_as_array) + src_init(&instr->arr.index); + + dest_init(&instr->dest); + + return instr; +} + nir_jump_instr * nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { @@ -475,7 +484,8 @@ nir_load_const_instr * nir_load_const_instr_create(nir_shader *shader, unsigned num_components, unsigned bit_size) { - nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); + nir_load_const_instr *instr = + rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value)); instr_init(&instr->instr, nir_instr_type_load_const); nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL); @@ -507,17 +517,28 @@ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) nir_call_instr * nir_call_instr_create(nir_shader *shader, nir_function *callee) { - nir_call_instr *instr = ralloc(shader, nir_call_instr); - instr_init(&instr->instr, nir_instr_type_call); + const unsigned num_params = callee->num_params; + nir_call_instr *instr = + rzalloc_size(shader, sizeof(*instr) + + num_params * sizeof(instr->params[0])); + instr_init(&instr->instr, nir_instr_type_call); instr->callee = callee; - instr->num_params = callee->num_params; - instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); - instr->return_deref = NULL; + instr->num_params = num_params; + for (unsigned i = 0; i < num_params; i++) + src_init(&instr->params[i]); return instr; } +static int8_t default_tg4_offsets[4][2] = +{ + { 0, 1 }, + { 1, 1 }, + { 1, 0 }, + { 0, 0 }, +}; + nir_tex_instr * nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) { @@ -532,14 +553,34 @@ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) src_init(&instr->src[i].src); instr->texture_index = 0; - instr->texture_array_size = 0; - instr->texture = NULL; instr->sampler_index = 0; - instr->sampler = NULL; + memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets)); return instr; } +void +nir_tex_instr_add_src(nir_tex_instr *tex, + nir_tex_src_type src_type, + nir_src src) +{ + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, + &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + tex->src[tex->num_srcs].src_type = src_type; + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src); + tex->num_srcs++; +} + void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) { @@ -556,6 +597,15 @@ nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) tex->num_srcs--; } +bool +nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex) +{ + if (tex->op != nir_texop_tg4) + return false; + return memcmp(tex->tg4_offsets, default_tg4_offsets, + sizeof(tex->tg4_offsets)) != 0; +} + nir_phi_instr * nir_phi_instr_create(nir_shader *shader) { @@ -591,271 +641,73 @@ nir_ssa_undef_instr_create(nir_shader *shader, return instr; } -nir_deref_var * -nir_deref_var_create(void *mem_ctx, nir_variable *var) -{ - nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var); - deref->deref.deref_type = nir_deref_type_var; - deref->deref.child = NULL; - deref->deref.type = var->type; - deref->var = var; - return deref; -} - -nir_deref_array * -nir_deref_array_create(void *mem_ctx) -{ - nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array); - deref->deref.deref_type = nir_deref_type_array; - deref->deref.child = NULL; - deref->deref_array_type = nir_deref_array_type_direct; - src_init(&deref->indirect); - deref->base_offset = 0; - return deref; -} - -nir_deref_struct * -nir_deref_struct_create(void *mem_ctx, unsigned field_index) +static nir_const_value +const_value_float(double d, unsigned bit_size) { - nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct); - deref->deref.deref_type = nir_deref_type_struct; - deref->deref.child = NULL; - deref->index = field_index; - return deref; -} - -nir_deref_var * -nir_deref_var_clone(const nir_deref_var *deref, void *mem_ctx) -{ - if (deref == NULL) - return NULL; - - nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_deref_clone(deref->deref.child, ret); - return ret; -} - -static nir_deref_array * -deref_array_clone(const nir_deref_array *deref, void *mem_ctx) -{ - nir_deref_array *ret = nir_deref_array_create(mem_ctx); - ret->base_offset = deref->base_offset; - ret->deref_array_type = deref->deref_array_type; - if (deref->deref_array_type == nir_deref_array_type_indirect) { - nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx); - } - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_deref_clone(deref->deref.child, ret); - return ret; -} - -static nir_deref_struct * -deref_struct_clone(const nir_deref_struct *deref, void *mem_ctx) -{ - nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_deref_clone(deref->deref.child, ret); - return ret; -} - -nir_deref * -nir_deref_clone(const nir_deref *deref, void *mem_ctx) -{ - if (deref == NULL) - return NULL; - - switch (deref->deref_type) { - case nir_deref_type_var: - return &nir_deref_var_clone(nir_deref_as_var(deref), mem_ctx)->deref; - case nir_deref_type_array: - return &deref_array_clone(nir_deref_as_array(deref), mem_ctx)->deref; - case nir_deref_type_struct: - return &deref_struct_clone(nir_deref_as_struct(deref), mem_ctx)->deref; - default: - unreachable("Invalid dereference type"); - } - - return NULL; -} - -/* This is the second step in the recursion. We've found the tail and made a - * copy. Now we need to iterate over all possible leaves and call the - * callback on each one. - */ -static bool -deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail, - nir_deref_foreach_leaf_cb cb, void *state) -{ - unsigned length; - union { - nir_deref_array arr; - nir_deref_struct str; - } tmp; - - assert(tail->child == NULL); - switch (glsl_get_base_type(tail->type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - if (glsl_type_is_vector_or_scalar(tail->type)) - return cb(deref, state); - /* Fall Through */ - - case GLSL_TYPE_ARRAY: - tmp.arr.deref.deref_type = nir_deref_type_array; - tmp.arr.deref.type = glsl_get_array_element(tail->type); - tmp.arr.deref_array_type = nir_deref_array_type_direct; - tmp.arr.indirect = NIR_SRC_INIT; - tail->child = &tmp.arr.deref; - - length = glsl_get_length(tail->type); - for (unsigned i = 0; i < length; i++) { - tmp.arr.deref.child = NULL; - tmp.arr.base_offset = i; - if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state)) - return false; - } - return true; - - case GLSL_TYPE_STRUCT: - tmp.str.deref.deref_type = nir_deref_type_struct; - tail->child = &tmp.str.deref; - - length = glsl_get_length(tail->type); - for (unsigned i = 0; i < length; i++) { - tmp.arr.deref.child = NULL; - tmp.str.deref.type = glsl_get_struct_field(tail->type, i); - tmp.str.index = i; - if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state)) - return false; - } - return true; - + nir_const_value v; + memset(&v, 0, sizeof(v)); + switch (bit_size) { + case 16: v.u16 = _mesa_float_to_half(d); break; + case 32: v.f32 = d; break; + case 64: v.f64 = d; break; default: - unreachable("Invalid type for dereference"); + unreachable("Invalid bit size"); } + return v; } -/* This is the first step of the foreach_leaf recursion. In this step we are - * walking to the end of the deref chain and making a copy in the stack as we - * go. This is because we don't want to mutate the deref chain that was - * passed in by the caller. The downside is that this deref chain is on the - * stack and , if the caller wants to do anything with it, they will have to - * make their own copy because this one will go away. - */ -static bool -deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail, - nir_deref_foreach_leaf_cb cb, void *state) -{ - union { - nir_deref_array arr; - nir_deref_struct str; - } c; - - if (tail->child) { - switch (tail->child->deref_type) { - case nir_deref_type_array: - c.arr = *nir_deref_as_array(tail->child); - tail->child = &c.arr.deref; - return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state); - - case nir_deref_type_struct: - c.str = *nir_deref_as_struct(tail->child); - tail->child = &c.str.deref; - return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state); - - case nir_deref_type_var: - default: - unreachable("Invalid deref type for a child"); - } - } else { - /* We've gotten to the end of the original deref. Time to start - * building our own derefs. - */ - return deref_foreach_leaf_build_recur(deref, tail, cb, state); - } -} - -/** - * This function iterates over all of the possible derefs that can be created - * with the given deref as the head. It then calls the provided callback with - * a full deref for each one. - * - * The deref passed to the callback will be allocated on the stack. You will - * need to make a copy if you want it to hang around. - */ -bool -nir_deref_foreach_leaf(nir_deref_var *deref, - nir_deref_foreach_leaf_cb cb, void *state) +static nir_const_value +const_value_int(int64_t i, unsigned bit_size) { - nir_deref_var copy = *deref; - return deref_foreach_leaf_copy_recur(©, ©.deref, cb, state); -} - -/* Returns a load_const instruction that represents the constant - * initializer for the given deref chain. The caller is responsible for - * ensuring that there actually is a constant initializer. - */ -nir_load_const_instr * -nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) -{ - nir_constant *constant = deref->var->constant_initializer; - assert(constant); - - const nir_deref *tail = &deref->deref; - unsigned matrix_col = 0; - while (tail->child) { - switch (tail->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(tail->child); - assert(arr->deref_array_type == nir_deref_array_type_direct); - if (glsl_type_is_matrix(tail->type)) { - assert(arr->deref.child == NULL); - matrix_col = arr->base_offset; - } else { - constant = constant->elements[arr->base_offset]; - } - break; - } - - case nir_deref_type_struct: { - constant = constant->elements[nir_deref_as_struct(tail->child)->index]; - break; - } - - default: - unreachable("Invalid deref child type"); - } - - tail = tail->child; - } - - unsigned bit_size = glsl_get_bit_size(tail->type); - nir_load_const_instr *load = - nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type), - bit_size); - - switch (glsl_get_base_type(tail->type)) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_UINT64: - case GLSL_TYPE_INT64: - case GLSL_TYPE_BOOL: - load->value = constant->values[matrix_col]; - break; + nir_const_value v; + memset(&v, 0, sizeof(v)); + switch (bit_size) { + case 1: v.b = i & 1; break; + case 8: v.i8 = i; break; + case 16: v.i16 = i; break; + case 32: v.i32 = i; break; + case 64: v.i64 = i; break; + default: + unreachable("Invalid bit size"); + } + return v; +} + +nir_const_value +nir_alu_binop_identity(nir_op binop, unsigned bit_size) +{ + const int64_t max_int = (1ull << (bit_size - 1)) - 1; + const int64_t min_int = -max_int - 1; + switch (binop) { + case nir_op_iadd: + return const_value_int(0, bit_size); + case nir_op_fadd: + return const_value_float(0, bit_size); + case nir_op_imul: + return const_value_int(1, bit_size); + case nir_op_fmul: + return const_value_float(1, bit_size); + case nir_op_imin: + return const_value_int(max_int, bit_size); + case nir_op_umin: + return const_value_int(~0ull, bit_size); + case nir_op_fmin: + return const_value_float(INFINITY, bit_size); + case nir_op_imax: + return const_value_int(min_int, bit_size); + case nir_op_umax: + return const_value_int(0, bit_size); + case nir_op_fmax: + return const_value_float(-INFINITY, bit_size); + case nir_op_iand: + return const_value_int(~0ull, bit_size); + case nir_op_ior: + return const_value_int(0, bit_size); + case nir_op_ixor: + return const_value_int(0, bit_size); default: - unreachable("Invalid immediate type"); + unreachable("Invalid reduction operation"); } - - return load; } nir_function_impl * @@ -1055,7 +907,7 @@ remove_defs_uses(nir_instr *instr) nir_foreach_src(instr, remove_use_cb, instr); } -void nir_instr_remove(nir_instr *instr) +void nir_instr_remove_v(nir_instr *instr) { remove_defs_uses(instr); exec_node_remove(&instr->node); @@ -1078,20 +930,16 @@ nir_index_local_regs(nir_function_impl *impl) impl->reg_alloc = index; } -void -nir_index_global_regs(nir_shader *shader) +static bool +visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) { - unsigned index = 0; - foreach_list_typed(nir_register, reg, node, &shader->registers) { - reg->index = index++; - } - shader->reg_alloc = index; + return cb(&instr->dest.dest, state); } static bool -visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) +visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state) { - return cb(&instr->dest.dest, state); + return cb(&instr->dest, state); } static bool @@ -1135,6 +983,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) switch (instr->type) { case nir_instr_type_alu: return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_deref: + return visit_deref_dest(nir_instr_as_deref(instr), cb, state); case nir_instr_type_intrinsic: return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); case nir_instr_type_tex: @@ -1180,6 +1030,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) { switch (instr->type) { case nir_instr_type_alu: + case nir_instr_type_deref: case nir_instr_type_tex: case nir_instr_type_intrinsic: case nir_instr_type_phi: @@ -1200,6 +1051,53 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) } } +nir_ssa_def * +nir_instr_ssa_def(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); + return &nir_instr_as_alu(instr)->dest.dest.ssa; + + case nir_instr_type_deref: + assert(nir_instr_as_deref(instr)->dest.is_ssa); + return &nir_instr_as_deref(instr)->dest.ssa; + + case nir_instr_type_tex: + assert(nir_instr_as_tex(instr)->dest.is_ssa); + return &nir_instr_as_tex(instr)->dest.ssa; + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { + assert(intrin->dest.is_ssa); + return &intrin->dest.ssa; + } else { + return NULL; + } + } + + case nir_instr_type_phi: + assert(nir_instr_as_phi(instr)->dest.is_ssa); + return &nir_instr_as_phi(instr)->dest.ssa; + + case nir_instr_type_parallel_copy: + unreachable("Parallel copies are unsupported by this function"); + + case nir_instr_type_load_const: + return &nir_instr_as_load_const(instr)->def; + + case nir_instr_type_ssa_undef: + return &nir_instr_as_ssa_undef(instr)->def; + + case nir_instr_type_call: + case nir_instr_type_jump: + return NULL; + } + + unreachable("Invalid instruction type"); +} + static bool visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) { @@ -1211,36 +1109,29 @@ visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) } static bool -visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb, - void *state) +visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) { - if (deref->deref_array_type == nir_deref_array_type_indirect) - return visit_src(&deref->indirect, cb, state); + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + return true; } static bool -visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state) +visit_deref_instr_src(nir_deref_instr *instr, + nir_foreach_src_cb cb, void *state) { - nir_deref *cur = &deref->deref; - while (cur != NULL) { - if (cur->deref_type == nir_deref_type_array) { - if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state)) - return false; - } - - cur = cur->child; + if (instr->deref_type != nir_deref_type_var) { + if (!visit_src(&instr->parent, cb, state)) + return false; } - return true; -} - -static bool -visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) -{ - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - if (!visit_src(&instr->src[i].src, cb, state)) + if (instr->deref_type == nir_deref_type_array || + instr->deref_type == nir_deref_type_ptr_as_array) { + if (!visit_src(&instr->arr.index, cb, state)) return false; + } return true; } @@ -1253,16 +1144,6 @@ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) return false; } - if (instr->texture != NULL) { - if (!visit_deref_src(instr->texture, cb, state)) - return false; - } - - if (instr->sampler != NULL) { - if (!visit_deref_src(instr->sampler, cb, state)) - return false; - } - return true; } @@ -1276,10 +1157,14 @@ visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, return false; } - unsigned num_vars = - nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - if (!visit_deref_src(instr->variables[i], cb, state)) + return true; +} + +static bool +visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) +{ + for (unsigned i = 0; i < instr->num_params; i++) { + if (!visit_src(&instr->params[i], cb, state)) return false; } @@ -1333,6 +1218,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) return false; break; + case nir_instr_type_deref: + if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state)) + return false; + break; case nir_instr_type_intrinsic: if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) return false; @@ -1342,7 +1231,8 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) return false; break; case nir_instr_type_call: - /* Call instructions have no regular sources */ + if (!visit_call_src(nir_instr_as_call(instr), cb, state)) + return false; break; case nir_instr_type_load_const: /* Constant load instructions have no regular sources */ @@ -1371,6 +1261,67 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); } +bool +nir_foreach_phi_src_leaving_block(nir_block *block, + nir_foreach_src_cb cb, + void *state) +{ + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { + if (block->successors[i] == NULL) + continue; + + nir_foreach_instr(instr, block->successors[i]) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src(phi_src, phi) { + if (phi_src->pred == block) { + if (!cb(&phi_src->src, state)) + return false; + } + } + } + } + + return true; +} + +nir_const_value +nir_const_value_for_float(double f, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + switch (bit_size) { + case 16: + v.u16 = _mesa_float_to_half(f); + break; + case 32: + v.f32 = f; + break; + case 64: + v.f64 = f; + break; + default: + unreachable("Invalid bit size"); + } + + return v; +} + +double +nir_const_value_as_float(nir_const_value value, unsigned bit_size) +{ + switch (bit_size) { + case 16: return _mesa_half_to_float(value.u16); + case 32: return value.f32; + case 64: return value.f64; + default: + unreachable("Invalid bit size"); + } +} + nir_const_value * nir_src_as_const_value(nir_src src) { @@ -1382,7 +1333,7 @@ nir_src_as_const_value(nir_src src) nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); - return &load->value; + return load->value; } /** @@ -1403,11 +1354,24 @@ nir_src_is_dynamically_uniform(nir_src src) /* As are uniform variables */ if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); - - if (intr->intrinsic == nir_intrinsic_load_uniform) + if (intr->intrinsic == nir_intrinsic_load_uniform && + nir_src_is_dynamically_uniform(intr->src[0])) return true; } + /* Operating together dynamically uniform expressions produces a + * dynamically uniform result + */ + if (src.ssa->parent_instr->type == nir_instr_type_alu) { + nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr); + for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if (!nir_src_is_dynamically_uniform(alu->src[i].src)) + return false; + } + + return true; + } + /* XXX: this could have many more tests, such as when a sampler function is * called with dynamically uniform arguments. */ @@ -1487,7 +1451,7 @@ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) { if (dest->is_ssa) { /* We can only overwrite an SSA destination if it has no uses. */ - assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses)); + assert(list_is_empty(&dest->ssa.uses) && list_is_empty(&dest->ssa.if_uses)); } else { list_del(&dest->reg.def_link); if (dest->reg.indirect) @@ -1518,6 +1482,7 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, list_inithead(&def->if_uses); def->num_components = num_components; def->bit_size = bit_size; + def->divergent = true; /* This is the safer default */ if (instr->block) { nir_function_impl *impl = @@ -1585,7 +1550,8 @@ void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, nir_instr *after_me) { - assert(!new_src.is_ssa || def != new_src.ssa); + if (new_src.is_ssa && def == new_src.ssa) + return; nir_foreach_use_safe(use_src, def) { assert(use_src->parent_instr != def->parent_instr); @@ -1601,28 +1567,25 @@ nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, nir_if_rewrite_condition(use_src->parent_if, new_src); } -uint8_t -nir_ssa_def_components_read(nir_ssa_def *def) +nir_component_mask_t +nir_ssa_def_components_read(const nir_ssa_def *def) { - uint8_t read_mask = 0; + nir_component_mask_t read_mask = 0; nir_foreach_use(use, def) { if (use->parent_instr->type == nir_instr_type_alu) { nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src); int src_idx = alu_src - &alu->src[0]; assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs); - - for (unsigned c = 0; c < 4; c++) { - if (!nir_alu_instr_channel_used(alu, src_idx, c)) - continue; - - read_mask |= (1 << alu_src->swizzle[c]); - } + read_mask |= nir_alu_instr_src_read_mask(alu, src_idx); } else { return (1 << def->num_components) - 1; } } + if (!list_is_empty(&def->if_uses)) + read_mask |= 1; + return read_mask; } @@ -1651,8 +1614,8 @@ nir_block_cf_tree_next(nir_block *block) return nir_if_first_else_block(if_stmt); assert(block == nir_if_last_else_block(if_stmt)); - /* fall through */ } + /* fallthrough */ case nir_cf_node_loop: return nir_cf_node_as_block(nir_cf_node_next(parent)); @@ -1687,8 +1650,8 @@ nir_block_cf_tree_prev(nir_block *block) return nir_if_last_then_block(if_stmt); assert(block == nir_if_first_then_block(if_stmt)); - /* fall through */ } + /* fallthrough */ case nir_cf_node_loop: return nir_cf_node_as_block(nir_cf_node_prev(parent)); @@ -1811,7 +1774,10 @@ nir_index_blocks(nir_function_impl *impl) block->index = index++; } - impl->num_blocks = index; + /* The end_block isn't really part of the program, which is why its index + * is >= num_blocks. + */ + impl->num_blocks = impl->end_block->index = index; } static bool @@ -1857,6 +1823,172 @@ nir_index_instrs(nir_function_impl *impl) return index; } +unsigned +nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes) +{ + unsigned count = 0; + nir_foreach_variable_with_modes(var, shader, modes) + var->index = count++; + return count; +} + +unsigned +nir_function_impl_index_vars(nir_function_impl *impl) +{ + unsigned count = 0; + nir_foreach_function_temp_variable(var, impl) + var->index = count++; + return count; +} + +static nir_instr * +cursor_next_instr(nir_cursor cursor) +{ + switch (cursor.option) { + case nir_cursor_before_block: + for (nir_block *block = cursor.block; block; + block = nir_block_cf_tree_next(block)) { + nir_instr *instr = nir_block_first_instr(block); + if (instr) + return instr; + } + return NULL; + + case nir_cursor_after_block: + cursor.block = nir_block_cf_tree_next(cursor.block); + if (cursor.block == NULL) + return NULL; + + cursor.option = nir_cursor_before_block; + return cursor_next_instr(cursor); + + case nir_cursor_before_instr: + return cursor.instr; + + case nir_cursor_after_instr: + if (nir_instr_next(cursor.instr)) + return nir_instr_next(cursor.instr); + + cursor.option = nir_cursor_after_block; + cursor.block = cursor.instr->block; + return cursor_next_instr(cursor); + } + + unreachable("Inavlid cursor option"); +} + +ASSERTED static bool +dest_is_ssa(nir_dest *dest, void *_state) +{ + (void) _state; + return dest->is_ssa; +} + +bool +nir_function_impl_lower_instructions(nir_function_impl *impl, + nir_instr_filter_cb filter, + nir_lower_instr_cb lower, + void *cb_data) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_metadata preserved = nir_metadata_block_index | + nir_metadata_dominance; + + bool progress = false; + nir_cursor iter = nir_before_cf_list(&impl->body); + nir_instr *instr; + while ((instr = cursor_next_instr(iter)) != NULL) { + if (filter && !filter(instr, cb_data)) { + iter = nir_after_instr(instr); + continue; + } + + assert(nir_foreach_dest(instr, dest_is_ssa, NULL)); + nir_ssa_def *old_def = nir_instr_ssa_def(instr); + if (old_def == NULL) { + iter = nir_after_instr(instr); + continue; + } + + /* We're about to ask the callback to generate a replacement for instr. + * Save off the uses from instr's SSA def so we know what uses to + * rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the + * case where the generated replacement code uses the result of instr + * itself. If we use nir_ssa_def_rewrite_uses_after (which is the + * normal solution to this problem), it doesn't work well if control- + * flow is inserted as part of the replacement, doesn't handle cases + * where the replacement is something consumed by instr, and suffers + * from performance issues. This is the only way to 100% guarantee + * that we rewrite the correct set efficiently. + */ + struct list_head old_uses, old_if_uses; + list_replace(&old_def->uses, &old_uses); + list_inithead(&old_def->uses); + list_replace(&old_def->if_uses, &old_if_uses); + list_inithead(&old_def->if_uses); + + b.cursor = nir_after_instr(instr); + nir_ssa_def *new_def = lower(&b, instr, cb_data); + if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS) { + assert(old_def != NULL); + if (new_def->parent_instr->block != instr->block) + preserved = nir_metadata_none; + + nir_src new_src = nir_src_for_ssa(new_def); + list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + + list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link) + nir_if_rewrite_condition(use_src->parent_if, new_src); + + if (list_is_empty(&old_def->uses) && list_is_empty(&old_def->if_uses)) { + iter = nir_instr_remove(instr); + } else { + iter = nir_after_instr(instr); + } + progress = true; + } else { + /* We didn't end up lowering after all. Put the uses back */ + if (old_def) { + list_replace(&old_uses, &old_def->uses); + list_replace(&old_if_uses, &old_def->if_uses); + } + iter = nir_after_instr(instr); + + if (new_def == NIR_LOWER_INSTR_PROGRESS) + progress = true; + } + } + + if (progress) { + nir_metadata_preserve(impl, preserved); + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } + + return progress; +} + +bool +nir_shader_lower_instructions(nir_shader *shader, + nir_instr_filter_cb filter, + nir_lower_instr_cb lower, + void *cb_data) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl && + nir_function_impl_lower_instructions(function->impl, + filter, lower, cb_data)) + progress = true; + } + + return progress; +} + nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val) { @@ -1871,10 +2003,20 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_base_instance; case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: return nir_intrinsic_load_vertex_id_zero_base; + case SYSTEM_VALUE_IS_INDEXED_DRAW: + return nir_intrinsic_load_is_indexed_draw; + case SYSTEM_VALUE_FIRST_VERTEX: + return nir_intrinsic_load_first_vertex; case SYSTEM_VALUE_BASE_VERTEX: return nir_intrinsic_load_base_vertex; case SYSTEM_VALUE_INVOCATION_ID: return nir_intrinsic_load_invocation_id; + case SYSTEM_VALUE_FRAG_COORD: + return nir_intrinsic_load_frag_coord; + case SYSTEM_VALUE_POINT_COORD: + return nir_intrinsic_load_point_coord; + case SYSTEM_VALUE_LINE_COORD: + return nir_intrinsic_load_line_coord; case SYSTEM_VALUE_FRONT_FACE: return nir_intrinsic_load_front_face; case SYSTEM_VALUE_SAMPLE_ID: @@ -1899,10 +2041,48 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_tess_level_outer; case SYSTEM_VALUE_TESS_LEVEL_INNER: return nir_intrinsic_load_tess_level_inner; + case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT: + return nir_intrinsic_load_tess_level_outer_default; + case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT: + return nir_intrinsic_load_tess_level_inner_default; case SYSTEM_VALUE_VERTICES_IN: return nir_intrinsic_load_patch_vertices_in; case SYSTEM_VALUE_HELPER_INVOCATION: return nir_intrinsic_load_helper_invocation; + case SYSTEM_VALUE_COLOR0: + return nir_intrinsic_load_color0; + case SYSTEM_VALUE_COLOR1: + return nir_intrinsic_load_color1; + case SYSTEM_VALUE_VIEW_INDEX: + return nir_intrinsic_load_view_index; + case SYSTEM_VALUE_SUBGROUP_SIZE: + return nir_intrinsic_load_subgroup_size; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return nir_intrinsic_load_subgroup_invocation; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + return nir_intrinsic_load_subgroup_eq_mask; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + return nir_intrinsic_load_subgroup_ge_mask; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + return nir_intrinsic_load_subgroup_gt_mask; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + return nir_intrinsic_load_subgroup_le_mask; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: + return nir_intrinsic_load_subgroup_lt_mask; + case SYSTEM_VALUE_NUM_SUBGROUPS: + return nir_intrinsic_load_num_subgroups; + case SYSTEM_VALUE_SUBGROUP_ID: + return nir_intrinsic_load_subgroup_id; + case SYSTEM_VALUE_LOCAL_GROUP_SIZE: + return nir_intrinsic_load_local_group_size; + case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: + return nir_intrinsic_load_global_invocation_id; + case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: + return nir_intrinsic_load_global_invocation_index; + case SYSTEM_VALUE_WORK_DIM: + return nir_intrinsic_load_work_dim; + case SYSTEM_VALUE_USER_DATA_AMD: + return nir_intrinsic_load_user_data_amd; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1922,10 +2102,20 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_BASE_INSTANCE; case nir_intrinsic_load_vertex_id_zero_base: return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + case nir_intrinsic_load_first_vertex: + return SYSTEM_VALUE_FIRST_VERTEX; + case nir_intrinsic_load_is_indexed_draw: + return SYSTEM_VALUE_IS_INDEXED_DRAW; case nir_intrinsic_load_base_vertex: return SYSTEM_VALUE_BASE_VERTEX; case nir_intrinsic_load_invocation_id: return SYSTEM_VALUE_INVOCATION_ID; + case nir_intrinsic_load_frag_coord: + return SYSTEM_VALUE_FRAG_COORD; + case nir_intrinsic_load_point_coord: + return SYSTEM_VALUE_POINT_COORD; + case nir_intrinsic_load_line_coord: + return SYSTEM_VALUE_LINE_COORD; case nir_intrinsic_load_front_face: return SYSTEM_VALUE_FRONT_FACE; case nir_intrinsic_load_sample_id: @@ -1950,133 +2140,148 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_TESS_LEVEL_OUTER; case nir_intrinsic_load_tess_level_inner: return SYSTEM_VALUE_TESS_LEVEL_INNER; + case nir_intrinsic_load_tess_level_outer_default: + return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT; + case nir_intrinsic_load_tess_level_inner_default: + return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT; case nir_intrinsic_load_patch_vertices_in: return SYSTEM_VALUE_VERTICES_IN; case nir_intrinsic_load_helper_invocation: return SYSTEM_VALUE_HELPER_INVOCATION; + case nir_intrinsic_load_color0: + return SYSTEM_VALUE_COLOR0; + case nir_intrinsic_load_color1: + return SYSTEM_VALUE_COLOR1; + case nir_intrinsic_load_view_index: + return SYSTEM_VALUE_VIEW_INDEX; + case nir_intrinsic_load_subgroup_size: + return SYSTEM_VALUE_SUBGROUP_SIZE; + case nir_intrinsic_load_subgroup_invocation: + return SYSTEM_VALUE_SUBGROUP_INVOCATION; + case nir_intrinsic_load_subgroup_eq_mask: + return SYSTEM_VALUE_SUBGROUP_EQ_MASK; + case nir_intrinsic_load_subgroup_ge_mask: + return SYSTEM_VALUE_SUBGROUP_GE_MASK; + case nir_intrinsic_load_subgroup_gt_mask: + return SYSTEM_VALUE_SUBGROUP_GT_MASK; + case nir_intrinsic_load_subgroup_le_mask: + return SYSTEM_VALUE_SUBGROUP_LE_MASK; + case nir_intrinsic_load_subgroup_lt_mask: + return SYSTEM_VALUE_SUBGROUP_LT_MASK; + case nir_intrinsic_load_num_subgroups: + return SYSTEM_VALUE_NUM_SUBGROUPS; + case nir_intrinsic_load_subgroup_id: + return SYSTEM_VALUE_SUBGROUP_ID; + case nir_intrinsic_load_local_group_size: + return SYSTEM_VALUE_LOCAL_GROUP_SIZE; + case nir_intrinsic_load_global_invocation_id: + return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + case nir_intrinsic_load_user_data_amd: + return SYSTEM_VALUE_USER_DATA_AMD; default: unreachable("intrinsic doesn't produce a system value"); } } -nir_op -nir_type_conversion_op(nir_alu_type src, nir_alu_type dst) -{ - nir_alu_type src_base_type = (nir_alu_type) nir_alu_type_get_base_type(src); - nir_alu_type dst_base_type = (nir_alu_type) nir_alu_type_get_base_type(dst); - unsigned src_bitsize = nir_alu_type_get_type_size(src); - unsigned dst_bitsize = nir_alu_type_get_type_size(dst); - - if (src_bitsize == dst_bitsize) { - switch (src_base_type) { - case nir_type_int: - case nir_type_uint: - if (dst_base_type == nir_type_uint || dst_base_type == nir_type_int) - return nir_op_imov; - break; - case nir_type_float: - if (dst_base_type == nir_type_float) - return nir_op_fmov; - break; - case nir_type_bool: - if (dst_base_type == nir_type_bool) - return nir_op_imov; - break; - default: - unreachable("Invalid conversion"); - } - } +/* OpenGL utility method that remaps the location attributes if they are + * doubles. Not needed for vulkan due the differences on the input location + * count for doubles on vulkan vs OpenGL + * + * The bitfield returned in dual_slot is one bit for each double input slot in + * the original OpenGL single-slot input numbering. The mapping from old + * locations to new locations is as follows: + * + * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc)) + */ +void +nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot) +{ + assert(shader->info.stage == MESA_SHADER_VERTEX); - switch (src_base_type) { - case nir_type_int: - switch (dst_base_type) { - case nir_type_int: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_i2i32 : nir_op_i2i64; - case nir_type_uint: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_i2u32 : nir_op_i2u64; - case nir_type_float: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_i2f : nir_op_i2d; - case 64: - return (dst_bitsize == 32) ? nir_op_i642f : nir_op_i642d; - default: - unreachable("Invalid conversion"); - } - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b; - default: - unreachable("Invalid conversion"); + *dual_slot = 0; + nir_foreach_shader_in_variable(var, shader) { + if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { + unsigned slots = glsl_count_attribute_slots(var->type, true); + *dual_slot |= BITFIELD64_MASK(slots) << var->data.location; } + } - case nir_type_uint: - switch (dst_base_type) { - case nir_type_int: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_u2i32 : nir_op_u2i64; - case nir_type_uint: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_u2u32 : nir_op_u2u64; - case nir_type_float: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_u2f : nir_op_u2d; - case 64: - return (dst_bitsize == 32) ? nir_op_u642f : nir_op_u642d; - default: - unreachable("Invalid conversion"); - } - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b; - default: - unreachable("Invalid conversion"); - } + nir_foreach_shader_in_variable(var, shader) { + var->data.location += + util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location)); + } +} - case nir_type_float: - switch (dst_base_type) { - case nir_type_int: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_f2i : nir_op_f2i64; - case 64: - return (dst_bitsize == 32) ? nir_op_d2i : nir_op_f2i64; - default: - unreachable("Invalid conversion"); - } - case nir_type_uint: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_f2u : nir_op_f2u64; - case 64: - return (dst_bitsize == 32) ? nir_op_d2u : nir_op_f2u64; - default: - unreachable("Invalid conversion"); - } - case nir_type_float: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_d2f : nir_op_f2d; - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_f2b : nir_op_d2b; - default: - unreachable("Invalid conversion"); - } +/* Returns an attribute mask that has been re-compacted using the given + * dual_slot mask. + */ +uint64_t +nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot) +{ + while (dual_slot) { + unsigned loc = u_bit_scan64(&dual_slot); + /* mask of all bits up to and including loc */ + uint64_t mask = BITFIELD64_MASK(loc + 1); + attribs = (attribs & mask) | ((attribs & ~mask) >> 1); + } + return attribs; +} - case nir_type_bool: - switch (dst_base_type) { - case nir_type_int: - case nir_type_uint: - return (dst_bitsize == 32) ? nir_op_b2i : nir_op_b2i64; - case nir_type_float: - /* GLSL just emits f2d(b2f(x)) for b2d */ - assert(dst_bitsize == 32); - return nir_op_b2f; - default: - unreachable("Invalid conversion"); - } +void +nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, + bool bindless) +{ + enum gl_access_qualifier access = nir_intrinsic_access(intrin); + switch (intrin->intrinsic) { +#define CASE(op) \ + case nir_intrinsic_image_deref_##op: \ + intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \ + : nir_intrinsic_image_##op; \ + break; + CASE(load) + CASE(store) + CASE(atomic_add) + CASE(atomic_imin) + CASE(atomic_umin) + CASE(atomic_imax) + CASE(atomic_umax) + CASE(atomic_and) + CASE(atomic_or) + CASE(atomic_xor) + CASE(atomic_exchange) + CASE(atomic_comp_swap) + CASE(atomic_fadd) + CASE(atomic_inc_wrap) + CASE(atomic_dec_wrap) + CASE(size) + CASE(samples) + CASE(load_raw_intel) + CASE(store_raw_intel) +#undef CASE default: - unreachable("Invalid conversion"); + unreachable("Unhanded image intrinsic"); } + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type)); + nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type)); + nir_intrinsic_set_access(intrin, access | var->data.access); + nir_intrinsic_set_format(intrin, var->data.image.format); + + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(src)); +} + +unsigned +nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr) +{ + enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); + int coords = glsl_get_sampler_dim_coordinate_components(dim); + if (dim == GLSL_SAMPLER_DIM_CUBE) + return coords; + else + return coords + nir_intrinsic_image_array(instr); }