X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.c;h=d759dfdce2101df9fe54b38606b453204f29e539;hb=909d1f50f3ffc62d5a23669ad0bc8eedb9416af0;hp=92bbc378ec1181647d6fcf2a744d336659ccd18f;hpb=d800b7daa5440f6b49b5e0ae6e404d240c6a4ddc;p=mesa.git diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 92bbc378ec1..d759dfdce21 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -27,14 +27,21 @@ #include "nir.h" #include "nir_control_flow_private.h" +#include "util/half_float.h" +#include #include +#include +#include "util/u_math.h" + +#include "main/menums.h" /* BITFIELD64_MASK */ nir_shader * nir_shader_create(void *mem_ctx, gl_shader_stage stage, - const nir_shader_compiler_options *options) + const nir_shader_compiler_options *options, + shader_info *si) { - nir_shader *shader = ralloc(mem_ctx, nir_shader); + nir_shader *shader = rzalloc(mem_ctx, nir_shader); exec_list_make_empty(&shader->uniforms); exec_list_make_empty(&shader->inputs); @@ -42,7 +49,13 @@ nir_shader_create(void *mem_ctx, exec_list_make_empty(&shader->shared); shader->options = options; - memset(&shader->info, 0, sizeof(shader->info)); + + if (si) { + assert(si->stage == stage); + shader->info = *si; + } else { + shader->info.stage = stage; + } exec_list_make_empty(&shader->functions); exec_list_make_empty(&shader->registers); @@ -55,8 +68,6 @@ nir_shader_create(void *mem_ctx, shader->num_uniforms = 0; shader->num_shared = 0; - shader->stage = stage; - return shader; } @@ -114,15 +125,11 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var) assert(!"invalid mode"); break; - case nir_var_local: + case nir_var_function_temp: assert(!"nir_shader_add_variable cannot be used for local variables"); break; - case nir_var_param: - assert(!"nir_shader_add_variable cannot be used for function parameters"); - break; - - case nir_var_global: + case nir_var_shader_temp: exec_list_push_tail(&shader->globals, &var->node); break; @@ -135,15 +142,20 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var) break; case nir_var_uniform: - case nir_var_shader_storage: + case nir_var_mem_ubo: + case nir_var_mem_ssbo: exec_list_push_tail(&shader->uniforms, &var->node); break; - case nir_var_shared: - assert(shader->stage == MESA_SHADER_COMPUTE); + case nir_var_mem_shared: + assert(gl_shader_stage_is_compute(shader->info.stage)); exec_list_push_tail(&shader->shared, &var->node); break; + case nir_var_mem_global: + assert(!"nir_shader_add_variable cannot be used for global memory"); + break; + case nir_var_system_value: exec_list_push_tail(&shader->system_values, &var->node); break; @@ -158,10 +170,13 @@ nir_variable_create(nir_shader *shader, nir_variable_mode mode, var->name = ralloc_strdup(var, name); var->type = type; var->data.mode = mode; + var->data.how_declared = nir_var_declared_normally; - if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || - (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) - var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + if ((mode == nir_var_shader_in && + shader->info.stage != MESA_SHADER_VERTEX) || + (mode == nir_var_shader_out && + shader->info.stage != MESA_SHADER_FRAGMENT)) + var->data.interpolation = INTERP_MODE_SMOOTH; if (mode == nir_var_shader_in || mode == nir_var_uniform) var->data.read_only = true; @@ -178,7 +193,7 @@ nir_local_variable_create(nir_function_impl *impl, nir_variable *var = rzalloc(impl->function->shader, nir_variable); var->name = ralloc_strdup(var, name); var->type = type; - var->data.mode = nir_var_local; + var->data.mode = nir_var_function_temp; nir_function_impl_add_variable(impl, var); @@ -196,12 +211,15 @@ nir_function_create(nir_shader *shader, const char *name) func->shader = shader; func->num_params = 0; func->params = NULL; - func->return_type = glsl_void_type(); func->impl = NULL; + func->is_entrypoint = false; return func; } +/* NOTE: if the instruction you are copying a src to is already added + * to the IR, use nir_instr_rewrite_src() instead. + */ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) { dest->is_ssa = src->is_ssa; @@ -243,7 +261,7 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, nir_src_copy(&dest->src, &src->src, &instr->instr); dest->abs = src->abs; dest->negate = src->negate; - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) dest->swizzle[i] = src->swizzle[i]; } @@ -277,9 +295,6 @@ nir_function_impl_create_bare(nir_shader *shader) exec_list_make_empty(&impl->body); exec_list_make_empty(&impl->registers); exec_list_make_empty(&impl->locals); - impl->num_params = 0; - impl->params = NULL; - impl->return_var = NULL; impl->reg_alloc = 0; impl->ssa_alloc = 0; impl->valid_metadata = nir_metadata_none; @@ -308,50 +323,28 @@ nir_function_impl_create(nir_function *function) function->impl = impl; impl->function = function; - impl->num_params = function->num_params; - impl->params = ralloc_array(function->shader, - nir_variable *, impl->num_params); - - for (unsigned i = 0; i < impl->num_params; i++) { - impl->params[i] = rzalloc(function->shader, nir_variable); - impl->params[i]->type = function->params[i].type; - impl->params[i]->data.mode = nir_var_param; - impl->params[i]->data.location = i; - } - - if (!glsl_type_is_void(function->return_type)) { - impl->return_var = rzalloc(function->shader, nir_variable); - impl->return_var->type = function->return_type; - impl->return_var->data.mode = nir_var_param; - impl->return_var->data.location = -1; - } else { - impl->return_var = NULL; - } - return impl; } nir_block * nir_block_create(nir_shader *shader) { - nir_block *block = ralloc(shader, nir_block); + nir_block *block = rzalloc(shader, nir_block); cf_init(&block->cf_node, nir_cf_node_block); block->successors[0] = block->successors[1] = NULL; - block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, - _mesa_key_pointer_equal); + block->predecessors = _mesa_pointer_set_create(block); block->imm_dom = NULL; /* XXX maybe it would be worth it to defer allocation? This - * way it doesn't get allocated for shader ref's that never run + * way it doesn't get allocated for shader refs that never run * nir_calc_dominance? For example, state-tracker creates an * initial IR, clones that, runs appropriate lowering pass, passes * to driver which does common lowering/opt, and then stores ref * which is later used to do state specific lowering and futher * opt. Do any of the references not need dominance metadata? */ - block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, - _mesa_key_pointer_equal); + block->dom_frontier = _mesa_pointer_set_create(block); exec_list_make_empty(&block->instr_list); @@ -391,7 +384,7 @@ nir_if_create(nir_shader *shader) nir_loop * nir_loop_create(nir_shader *shader) { - nir_loop *loop = ralloc(shader, nir_loop); + nir_loop *loop = rzalloc(shader, nir_loop); cf_init(&loop->cf_node, nir_cf_node_loop); @@ -436,19 +429,18 @@ alu_src_init(nir_alu_src *src) { src_init(&src->src); src->abs = src->negate = false; - src->swizzle[0] = 0; - src->swizzle[1] = 1; - src->swizzle[2] = 2; - src->swizzle[3] = 3; + for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) + src->swizzle[i] = i; } nir_alu_instr * nir_alu_instr_create(nir_shader *shader, nir_op op) { unsigned num_srcs = nir_op_infos[op].num_inputs; + /* TODO: don't use rzalloc */ nir_alu_instr *instr = - ralloc_size(shader, - sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); + rzalloc_size(shader, + sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); instr_init(&instr->instr, nir_instr_type_alu); instr->op = op; @@ -459,6 +451,27 @@ nir_alu_instr_create(nir_shader *shader, nir_op op) return instr; } +nir_deref_instr * +nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) +{ + nir_deref_instr *instr = + rzalloc_size(shader, sizeof(nir_deref_instr)); + + instr_init(&instr->instr, nir_instr_type_deref); + + instr->deref_type = deref_type; + if (deref_type != nir_deref_type_var) + src_init(&instr->parent); + + if (deref_type == nir_deref_type_array || + deref_type == nir_deref_type_ptr_as_array) + src_init(&instr->arr.index); + + dest_init(&instr->dest); + + return instr; +} + nir_jump_instr * nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { @@ -472,7 +485,7 @@ nir_load_const_instr * nir_load_const_instr_create(nir_shader *shader, unsigned num_components, unsigned bit_size) { - nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); + nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr); instr_init(&instr->instr, nir_instr_type_load_const); nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL); @@ -484,8 +497,9 @@ nir_intrinsic_instr * nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) { unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; + /* TODO: don't use rzalloc */ nir_intrinsic_instr *instr = - ralloc_size(shader, + rzalloc_size(shader, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); instr_init(&instr->instr, nir_instr_type_intrinsic); @@ -503,13 +517,16 @@ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) nir_call_instr * nir_call_instr_create(nir_shader *shader, nir_function *callee) { - nir_call_instr *instr = ralloc(shader, nir_call_instr); - instr_init(&instr->instr, nir_instr_type_call); + const unsigned num_params = callee->num_params; + nir_call_instr *instr = + rzalloc_size(shader, sizeof(*instr) + + num_params * sizeof(instr->params[0])); + instr_init(&instr->instr, nir_instr_type_call); instr->callee = callee; - instr->num_params = callee->num_params; - instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); - instr->return_deref = NULL; + instr->num_params = num_params; + for (unsigned i = 0; i < num_params; i++) + src_init(&instr->params[i]); return instr; } @@ -529,13 +546,49 @@ nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) instr->texture_index = 0; instr->texture_array_size = 0; - instr->texture = NULL; instr->sampler_index = 0; - instr->sampler = NULL; return instr; } +void +nir_tex_instr_add_src(nir_tex_instr *tex, + nir_tex_src_type src_type, + nir_src src) +{ + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, + &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + tex->src[tex->num_srcs].src_type = src_type; + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src); + tex->num_srcs++; +} + +void +nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) +{ + assert(src_idx < tex->num_srcs); + + /* First rewrite the source to NIR_SRC_INIT */ + nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT); + + /* Now, move all of the other sources down */ + for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) { + tex->src[i-1].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); + } + tex->num_srcs--; +} + nir_phi_instr * nir_phi_instr_create(nir_shader *shader) { @@ -571,155 +624,71 @@ nir_ssa_undef_instr_create(nir_shader *shader, return instr; } -nir_deref_var * -nir_deref_var_create(void *mem_ctx, nir_variable *var) -{ - nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var); - deref->deref.deref_type = nir_deref_type_var; - deref->deref.child = NULL; - deref->deref.type = var->type; - deref->var = var; - return deref; -} - -nir_deref_array * -nir_deref_array_create(void *mem_ctx) -{ - nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array); - deref->deref.deref_type = nir_deref_type_array; - deref->deref.child = NULL; - deref->deref_array_type = nir_deref_array_type_direct; - src_init(&deref->indirect); - deref->base_offset = 0; - return deref; -} - -nir_deref_struct * -nir_deref_struct_create(void *mem_ctx, unsigned field_index) -{ - nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct); - deref->deref.deref_type = nir_deref_type_struct; - deref->deref.child = NULL; - deref->index = field_index; - return deref; -} - -static nir_deref_var * -copy_deref_var(void *mem_ctx, nir_deref_var *deref) +static nir_const_value +const_value_float(double d, unsigned bit_size) { - nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_copy_deref(ret, deref->deref.child); - return ret; -} - -static nir_deref_array * -copy_deref_array(void *mem_ctx, nir_deref_array *deref) -{ - nir_deref_array *ret = nir_deref_array_create(mem_ctx); - ret->base_offset = deref->base_offset; - ret->deref_array_type = deref->deref_array_type; - if (deref->deref_array_type == nir_deref_array_type_indirect) { - nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx); + nir_const_value v; + switch (bit_size) { + case 16: v.u16[0] = _mesa_float_to_half(d); break; + case 32: v.f32[0] = d; break; + case 64: v.f64[0] = d; break; + default: + unreachable("Invalid bit size"); } - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_copy_deref(ret, deref->deref.child); - return ret; + return v; } -static nir_deref_struct * -copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) +static nir_const_value +const_value_int(int64_t i, unsigned bit_size) { - nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_copy_deref(ret, deref->deref.child); - return ret; -} - -nir_deref * -nir_copy_deref(void *mem_ctx, nir_deref *deref) -{ - switch (deref->deref_type) { - case nir_deref_type_var: - return ©_deref_var(mem_ctx, nir_deref_as_var(deref))->deref; - case nir_deref_type_array: - return ©_deref_array(mem_ctx, nir_deref_as_array(deref))->deref; - case nir_deref_type_struct: - return ©_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref; + nir_const_value v; + switch (bit_size) { + case 1: v.b[0] = i & 1; break; + case 8: v.i8[0] = i; break; + case 16: v.i16[0] = i; break; + case 32: v.i32[0] = i; break; + case 64: v.i64[0] = i; break; default: - unreachable("Invalid dereference type"); + unreachable("Invalid bit size"); } - - return NULL; -} - -/* Returns a load_const instruction that represents the constant - * initializer for the given deref chain. The caller is responsible for - * ensuring that there actually is a constant initializer. - */ -nir_load_const_instr * -nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) -{ - nir_constant *constant = deref->var->constant_initializer; - assert(constant); - - const nir_deref *tail = &deref->deref; - unsigned matrix_offset = 0; - while (tail->child) { - switch (tail->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(tail->child); - assert(arr->deref_array_type == nir_deref_array_type_direct); - if (glsl_type_is_matrix(tail->type)) { - assert(arr->deref.child == NULL); - matrix_offset = arr->base_offset; - } else { - constant = constant->elements[arr->base_offset]; - } - break; - } - - case nir_deref_type_struct: { - constant = constant->elements[nir_deref_as_struct(tail->child)->index]; - break; - } - - default: - unreachable("Invalid deref child type"); - } - - tail = tail->child; - } - - unsigned bit_size = glsl_get_bit_size(glsl_get_base_type(tail->type)); - nir_load_const_instr *load = - nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type), - bit_size); - - matrix_offset *= load->def.num_components; - for (unsigned i = 0; i < load->def.num_components; i++) { - switch (glsl_get_base_type(tail->type)) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - load->value.u32[i] = constant->value.u[matrix_offset + i]; - break; - case GLSL_TYPE_DOUBLE: - load->value.f64[i] = constant->value.d[matrix_offset + i]; - break; - case GLSL_TYPE_BOOL: - load->value.u32[i] = constant->value.b[matrix_offset + i] ? - NIR_TRUE : NIR_FALSE; - break; - default: - unreachable("Invalid immediate type"); - } + return v; +} + +nir_const_value +nir_alu_binop_identity(nir_op binop, unsigned bit_size) +{ + const int64_t max_int = (1ull << (bit_size - 1)) - 1; + const int64_t min_int = -max_int - 1; + switch (binop) { + case nir_op_iadd: + return const_value_int(0, bit_size); + case nir_op_fadd: + return const_value_float(0, bit_size); + case nir_op_imul: + return const_value_int(1, bit_size); + case nir_op_fmul: + return const_value_float(1, bit_size); + case nir_op_imin: + return const_value_int(max_int, bit_size); + case nir_op_umin: + return const_value_int(~0ull, bit_size); + case nir_op_fmin: + return const_value_float(INFINITY, bit_size); + case nir_op_imax: + return const_value_int(min_int, bit_size); + case nir_op_umax: + return const_value_int(0, bit_size); + case nir_op_fmax: + return const_value_float(-INFINITY, bit_size); + case nir_op_iand: + return const_value_int(~0ull, bit_size); + case nir_op_ior: + return const_value_int(0, bit_size); + case nir_op_ixor: + return const_value_int(0, bit_size); + default: + unreachable("Invalid reduction operation"); } - - return load; } nir_function_impl * @@ -893,6 +862,8 @@ src_is_valid(const nir_src *src) static bool remove_use_cb(nir_src *src, void *state) { + (void) state; + if (src_is_valid(src)) list_del(&src->use_link); @@ -902,6 +873,8 @@ remove_use_cb(nir_src *src, void *state) static bool remove_def_cb(nir_dest *dest, void *state) { + (void) state; + if (!dest->is_ssa) list_del(&dest->reg.def_link); @@ -915,7 +888,7 @@ remove_defs_uses(nir_instr *instr) nir_foreach_src(instr, remove_use_cb, instr); } -void nir_instr_remove(nir_instr *instr) +void nir_instr_remove_v(nir_instr *instr) { remove_defs_uses(instr); exec_node_remove(&instr->node); @@ -954,6 +927,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) return cb(&instr->dest.dest, state); } +static bool +visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest, state); +} + static bool visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, void *state) @@ -981,7 +960,7 @@ static bool visit_parallel_copy_dest(nir_parallel_copy_instr *instr, nir_foreach_dest_cb cb, void *state) { - nir_foreach_parallel_copy_entry(instr, entry) { + nir_foreach_parallel_copy_entry(entry, instr) { if (!cb(&entry->dest, state)) return false; } @@ -995,6 +974,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) switch (instr->type) { case nir_instr_type_alu: return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_deref: + return visit_deref_dest(nir_instr_as_deref(instr), cb, state); case nir_instr_type_intrinsic: return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); case nir_instr_type_tex: @@ -1040,6 +1021,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) { switch (instr->type) { case nir_instr_type_alu: + case nir_instr_type_deref: case nir_instr_type_tex: case nir_instr_type_intrinsic: case nir_instr_type_phi: @@ -1071,36 +1053,29 @@ visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) } static bool -visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb, - void *state) +visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) { - if (deref->deref_array_type == nir_deref_array_type_indirect) - return visit_src(&deref->indirect, cb, state); + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + return true; } static bool -visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state) +visit_deref_instr_src(nir_deref_instr *instr, + nir_foreach_src_cb cb, void *state) { - nir_deref *cur = &deref->deref; - while (cur != NULL) { - if (cur->deref_type == nir_deref_type_array) { - if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state)) - return false; - } - - cur = cur->child; + if (instr->deref_type != nir_deref_type_var) { + if (!visit_src(&instr->parent, cb, state)) + return false; } - return true; -} - -static bool -visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) -{ - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - if (!visit_src(&instr->src[i].src, cb, state)) + if (instr->deref_type == nir_deref_type_array || + instr->deref_type == nir_deref_type_ptr_as_array) { + if (!visit_src(&instr->arr.index, cb, state)) return false; + } return true; } @@ -1113,16 +1088,6 @@ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) return false; } - if (instr->texture != NULL) { - if (!visit_deref_src(instr->texture, cb, state)) - return false; - } - - if (instr->sampler != NULL) { - if (!visit_deref_src(instr->sampler, cb, state)) - return false; - } - return true; } @@ -1136,33 +1101,24 @@ visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, return false; } - unsigned num_vars = - nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - if (!visit_deref_src(instr->variables[i], cb, state)) - return false; - } - return true; } static bool visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) { - return true; -} + for (unsigned i = 0; i < instr->num_params; i++) { + if (!visit_src(&instr->params[i], cb, state)) + return false; + } -static bool -visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb, - void *state) -{ return true; } static bool visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) { - nir_foreach_phi_src(instr, src) { + nir_foreach_phi_src(src, instr) { if (!visit_src(&src->src, cb, state)) return false; } @@ -1174,7 +1130,7 @@ static bool visit_parallel_copy_src(nir_parallel_copy_instr *instr, nir_foreach_src_cb cb, void *state) { - nir_foreach_parallel_copy_entry(instr, entry) { + nir_foreach_parallel_copy_entry(entry, instr) { if (!visit_src(&entry->src, cb, state)) return false; } @@ -1206,6 +1162,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) return false; break; + case nir_instr_type_deref: + if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state)) + return false; + break; case nir_instr_type_intrinsic: if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) return false; @@ -1219,8 +1179,7 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) return false; break; case nir_instr_type_load_const: - if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state)) - return false; + /* Constant load instructions have no regular sources */ break; case nir_instr_type_phi: if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) @@ -1246,6 +1205,98 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); } +int64_t +nir_src_comp_as_int(nir_src src, unsigned comp) +{ + assert(nir_src_is_const(src)); + nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); + + assert(comp < load->def.num_components); + switch (load->def.bit_size) { + /* int1_t uses 0/-1 convention */ + case 1: return -(int)load->value.b[comp]; + case 8: return load->value.i8[comp]; + case 16: return load->value.i16[comp]; + case 32: return load->value.i32[comp]; + case 64: return load->value.i64[comp]; + default: + unreachable("Invalid bit size"); + } +} + +uint64_t +nir_src_comp_as_uint(nir_src src, unsigned comp) +{ + assert(nir_src_is_const(src)); + nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); + + assert(comp < load->def.num_components); + switch (load->def.bit_size) { + case 1: return load->value.b[comp]; + case 8: return load->value.u8[comp]; + case 16: return load->value.u16[comp]; + case 32: return load->value.u32[comp]; + case 64: return load->value.u64[comp]; + default: + unreachable("Invalid bit size"); + } +} + +bool +nir_src_comp_as_bool(nir_src src, unsigned comp) +{ + int64_t i = nir_src_comp_as_int(src, comp); + + /* Booleans of any size use 0/-1 convention */ + assert(i == 0 || i == -1); + + return i; +} + +double +nir_src_comp_as_float(nir_src src, unsigned comp) +{ + assert(nir_src_is_const(src)); + nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); + + assert(comp < load->def.num_components); + switch (load->def.bit_size) { + case 16: return _mesa_half_to_float(load->value.u16[comp]); + case 32: return load->value.f32[comp]; + case 64: return load->value.f64[comp]; + default: + unreachable("Invalid bit size"); + } +} + +int64_t +nir_src_as_int(nir_src src) +{ + assert(nir_src_num_components(src) == 1); + return nir_src_comp_as_int(src, 0); +} + +uint64_t +nir_src_as_uint(nir_src src) +{ + assert(nir_src_num_components(src) == 1); + return nir_src_comp_as_uint(src, 0); +} + +bool +nir_src_as_bool(nir_src src) +{ + assert(nir_src_num_components(src) == 1); + return nir_src_comp_as_bool(src, 0); +} + +double +nir_src_as_float(nir_src src) +{ + assert(nir_src_num_components(src) == 1); + return nir_src_comp_as_float(src, 0); +} + nir_const_value * nir_src_as_const_value(nir_src src) { @@ -1419,10 +1470,10 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) { assert(!new_src.is_ssa || def != new_src.ssa); - nir_foreach_use_safe(def, use_src) + nir_foreach_use_safe(use_src, def) nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); - nir_foreach_if_use_safe(def, use_src) + nir_foreach_if_use_safe(use_src, def) nir_if_rewrite_condition(use_src->parent_if, new_src); } @@ -1460,9 +1511,10 @@ void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, nir_instr *after_me) { - assert(!new_src.is_ssa || def != new_src.ssa); + if (new_src.is_ssa && def == new_src.ssa) + return; - nir_foreach_use_safe(def, use_src) { + nir_foreach_use_safe(use_src, def) { assert(use_src->parent_instr != def->parent_instr); /* Since def already dominates all of its uses, the only way a use can * not be dominated by after_me is if it is between def and after_me in @@ -1472,138 +1524,169 @@ nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); } - nir_foreach_if_use_safe(def, use_src) + nir_foreach_if_use_safe(use_src, def) nir_if_rewrite_condition(use_src->parent_if, new_src); } -uint8_t -nir_ssa_def_components_read(nir_ssa_def *def) +nir_component_mask_t +nir_ssa_def_components_read(const nir_ssa_def *def) { - uint8_t read_mask = 0; - nir_foreach_use(def, use) { + nir_component_mask_t read_mask = 0; + nir_foreach_use(use, def) { if (use->parent_instr->type == nir_instr_type_alu) { nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src); int src_idx = alu_src - &alu->src[0]; assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs); - - for (unsigned c = 0; c < 4; c++) { - if (!nir_alu_instr_channel_used(alu, src_idx, c)) - continue; - - read_mask |= (1 << alu_src->swizzle[c]); - } + read_mask |= nir_alu_instr_src_read_mask(alu, src_idx); } else { return (1 << def->num_components) - 1; } } + if (!list_empty(&def->if_uses)) + read_mask |= 1; + return read_mask; } -static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, - bool reverse, void *state); - -static inline bool -foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) +nir_block * +nir_block_cf_tree_next(nir_block *block) { - if (reverse) { - foreach_list_typed_reverse_safe(nir_cf_node, node, node, - &if_stmt->else_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } + if (block == NULL) { + /* nir_foreach_block_safe() will call this function on a NULL block + * after the last iteration, but it won't use the result so just return + * NULL here. + */ + return NULL; + } - foreach_list_typed_reverse_safe(nir_cf_node, node, node, - &if_stmt->then_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - } else { - foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } + nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); + if (cf_next) + return nir_cf_node_cf_tree_first(cf_next); - foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } + nir_cf_node *parent = block->cf_node.parent; + + switch (parent->type) { + case nir_cf_node_if: { + /* Are we at the end of the if? Go to the beginning of the else */ + nir_if *if_stmt = nir_cf_node_as_if(parent); + if (block == nir_if_last_then_block(if_stmt)) + return nir_if_first_else_block(if_stmt); + + assert(block == nir_if_last_else_block(if_stmt)); + /* fall through */ } - return true; + case nir_cf_node_loop: + return nir_cf_node_as_block(nir_cf_node_next(parent)); + + case nir_cf_node_function: + return NULL; + + default: + unreachable("unknown cf node type"); + } } -static inline bool -foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) +nir_block * +nir_block_cf_tree_prev(nir_block *block) { - if (reverse) { - foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - } else { - foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } + if (block == NULL) { + /* do this for consistency with nir_block_cf_tree_next() */ + return NULL; } - return true; -} + nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node); + if (cf_prev) + return nir_cf_node_cf_tree_last(cf_prev); + + nir_cf_node *parent = block->cf_node.parent; + + switch (parent->type) { + case nir_cf_node_if: { + /* Are we at the beginning of the else? Go to the end of the if */ + nir_if *if_stmt = nir_cf_node_as_if(parent); + if (block == nir_if_first_else_block(if_stmt)) + return nir_if_last_then_block(if_stmt); + + assert(block == nir_if_first_then_block(if_stmt)); + /* fall through */ + } -static bool -foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, - bool reverse, void *state) -{ - switch (node->type) { - case nir_cf_node_block: - return cb(nir_cf_node_as_block(node), state); - case nir_cf_node_if: - return foreach_if(nir_cf_node_as_if(node), cb, reverse, state); case nir_cf_node_loop: - return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state); - break; + return nir_cf_node_as_block(nir_cf_node_prev(parent)); + + case nir_cf_node_function: + return NULL; default: - unreachable("Invalid CFG node type"); - break; + unreachable("unknown cf node type"); } - - return false; } -bool -nir_foreach_block_in_cf_node_call(nir_cf_node *node, nir_foreach_block_cb cb, - void *state) +nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node) { - return foreach_cf_node(node, cb, false, state); -} + switch (node->type) { + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + return nir_start_block(impl); + } -bool -nir_foreach_block_call(nir_function_impl *impl, nir_foreach_block_cb cb, void *state) -{ - foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { - if (!foreach_cf_node(node, cb, false, state)) - return false; + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + return nir_if_first_then_block(if_stmt); + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + return nir_loop_first_block(loop); } - return cb(impl->end_block, state); + case nir_cf_node_block: { + return nir_cf_node_as_block(node); + } + + default: + unreachable("unknown node type"); + } } -bool -nir_foreach_block_reverse_call(nir_function_impl *impl, nir_foreach_block_cb cb, - void *state) +nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node) { - if (!cb(impl->end_block, state)) - return false; + switch (node->type) { + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + return nir_impl_last_block(impl); + } - foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) { - if (!foreach_cf_node(node, cb, true, state)) - return false; + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + return nir_if_last_else_block(if_stmt); } - return true; + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + return nir_loop_last_block(loop); + } + + case nir_cf_node_block: { + return nir_cf_node_as_block(node); + } + + default: + unreachable("unknown node type"); + } +} + +nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_block_cf_tree_next(nir_cf_node_as_block(node)); + else if (node->type == nir_cf_node_function) + return NULL; + else + return nir_cf_node_as_block(nir_cf_node_next(node)); } nir_if * @@ -1639,13 +1722,6 @@ nir_block_get_following_loop(nir_block *block) return nir_cf_node_as_loop(next_node); } -static bool -index_block(nir_block *block, void *state) -{ - unsigned *index = state; - block->index = (*index)++; - return true; -} void nir_index_blocks(nir_function_impl *impl) @@ -1655,9 +1731,14 @@ nir_index_blocks(nir_function_impl *impl) if (impl->valid_metadata & nir_metadata_block_index) return; - nir_foreach_block_call(impl, index_block, &index); + nir_foreach_block(block, impl) { + block->index = index++; + } - impl->num_blocks = index; + /* The end_block isn't really part of the program, which is why its index + * is >= num_blocks. + */ + impl->num_blocks = impl->end_block->index = index; } static bool @@ -1669,15 +1750,6 @@ index_ssa_def_cb(nir_ssa_def *def, void *state) return true; } -static bool -index_ssa_block(nir_block *block, void *state) -{ - nir_foreach_instr(block, instr) - nir_foreach_ssa_def(instr, index_ssa_def_cb, state); - - return true; -} - /** * The indices are applied top-to-bottom which has the very nice property * that, if A dominates B, then A->index <= B->index. @@ -1686,18 +1758,13 @@ void nir_index_ssa_defs(nir_function_impl *impl) { unsigned index = 0; - nir_foreach_block_call(impl, index_ssa_block, &index); - impl->ssa_alloc = index; -} -static bool -index_instrs_block(nir_block *block, void *state) -{ - unsigned *index = state; - nir_foreach_instr(block, instr) - instr->index = (*index)++; + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) + nir_foreach_ssa_def(instr, index_ssa_def_cb, &index); + } - return true; + impl->ssa_alloc = index; } /** @@ -1708,7 +1775,12 @@ unsigned nir_index_instrs(nir_function_impl *impl) { unsigned index = 0; - nir_foreach_block_call(impl, index_instrs_block, &index); + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) + instr->index = index++; + } + return index; } @@ -1726,10 +1798,16 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_base_instance; case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: return nir_intrinsic_load_vertex_id_zero_base; + case SYSTEM_VALUE_IS_INDEXED_DRAW: + return nir_intrinsic_load_is_indexed_draw; + case SYSTEM_VALUE_FIRST_VERTEX: + return nir_intrinsic_load_first_vertex; case SYSTEM_VALUE_BASE_VERTEX: return nir_intrinsic_load_base_vertex; case SYSTEM_VALUE_INVOCATION_ID: return nir_intrinsic_load_invocation_id; + case SYSTEM_VALUE_FRAG_COORD: + return nir_intrinsic_load_frag_coord; case SYSTEM_VALUE_FRONT_FACE: return nir_intrinsic_load_front_face; case SYSTEM_VALUE_SAMPLE_ID: @@ -1740,6 +1818,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_sample_mask_in; case SYSTEM_VALUE_LOCAL_INVOCATION_ID: return nir_intrinsic_load_local_invocation_id; + case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: + return nir_intrinsic_load_local_invocation_index; case SYSTEM_VALUE_WORK_GROUP_ID: return nir_intrinsic_load_work_group_id; case SYSTEM_VALUE_NUM_WORK_GROUPS: @@ -1756,6 +1836,32 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_patch_vertices_in; case SYSTEM_VALUE_HELPER_INVOCATION: return nir_intrinsic_load_helper_invocation; + case SYSTEM_VALUE_VIEW_INDEX: + return nir_intrinsic_load_view_index; + case SYSTEM_VALUE_SUBGROUP_SIZE: + return nir_intrinsic_load_subgroup_size; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return nir_intrinsic_load_subgroup_invocation; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + return nir_intrinsic_load_subgroup_eq_mask; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + return nir_intrinsic_load_subgroup_ge_mask; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + return nir_intrinsic_load_subgroup_gt_mask; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + return nir_intrinsic_load_subgroup_le_mask; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: + return nir_intrinsic_load_subgroup_lt_mask; + case SYSTEM_VALUE_NUM_SUBGROUPS: + return nir_intrinsic_load_num_subgroups; + case SYSTEM_VALUE_SUBGROUP_ID: + return nir_intrinsic_load_subgroup_id; + case SYSTEM_VALUE_LOCAL_GROUP_SIZE: + return nir_intrinsic_load_local_group_size; + case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: + return nir_intrinsic_load_global_invocation_id; + case SYSTEM_VALUE_WORK_DIM: + return nir_intrinsic_load_work_dim; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1775,10 +1881,16 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_BASE_INSTANCE; case nir_intrinsic_load_vertex_id_zero_base: return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + case nir_intrinsic_load_first_vertex: + return SYSTEM_VALUE_FIRST_VERTEX; + case nir_intrinsic_load_is_indexed_draw: + return SYSTEM_VALUE_IS_INDEXED_DRAW; case nir_intrinsic_load_base_vertex: return SYSTEM_VALUE_BASE_VERTEX; case nir_intrinsic_load_invocation_id: return SYSTEM_VALUE_INVOCATION_ID; + case nir_intrinsic_load_frag_coord: + return SYSTEM_VALUE_FRAG_COORD; case nir_intrinsic_load_front_face: return SYSTEM_VALUE_FRONT_FACE; case nir_intrinsic_load_sample_id: @@ -1789,6 +1901,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_SAMPLE_MASK_IN; case nir_intrinsic_load_local_invocation_id: return SYSTEM_VALUE_LOCAL_INVOCATION_ID; + case nir_intrinsic_load_local_invocation_index: + return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; case nir_intrinsic_load_num_work_groups: return SYSTEM_VALUE_NUM_WORK_GROUPS; case nir_intrinsic_load_work_group_id: @@ -1805,7 +1919,75 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_VERTICES_IN; case nir_intrinsic_load_helper_invocation: return SYSTEM_VALUE_HELPER_INVOCATION; + case nir_intrinsic_load_view_index: + return SYSTEM_VALUE_VIEW_INDEX; + case nir_intrinsic_load_subgroup_size: + return SYSTEM_VALUE_SUBGROUP_SIZE; + case nir_intrinsic_load_subgroup_invocation: + return SYSTEM_VALUE_SUBGROUP_INVOCATION; + case nir_intrinsic_load_subgroup_eq_mask: + return SYSTEM_VALUE_SUBGROUP_EQ_MASK; + case nir_intrinsic_load_subgroup_ge_mask: + return SYSTEM_VALUE_SUBGROUP_GE_MASK; + case nir_intrinsic_load_subgroup_gt_mask: + return SYSTEM_VALUE_SUBGROUP_GT_MASK; + case nir_intrinsic_load_subgroup_le_mask: + return SYSTEM_VALUE_SUBGROUP_LE_MASK; + case nir_intrinsic_load_subgroup_lt_mask: + return SYSTEM_VALUE_SUBGROUP_LT_MASK; + case nir_intrinsic_load_num_subgroups: + return SYSTEM_VALUE_NUM_SUBGROUPS; + case nir_intrinsic_load_subgroup_id: + return SYSTEM_VALUE_SUBGROUP_ID; + case nir_intrinsic_load_local_group_size: + return SYSTEM_VALUE_LOCAL_GROUP_SIZE; + case nir_intrinsic_load_global_invocation_id: + return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; default: unreachable("intrinsic doesn't produce a system value"); } } + +/* OpenGL utility method that remaps the location attributes if they are + * doubles. Not needed for vulkan due the differences on the input location + * count for doubles on vulkan vs OpenGL + * + * The bitfield returned in dual_slot is one bit for each double input slot in + * the original OpenGL single-slot input numbering. The mapping from old + * locations to new locations is as follows: + * + * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc)) + */ +void +nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot) +{ + assert(shader->info.stage == MESA_SHADER_VERTEX); + + *dual_slot = 0; + nir_foreach_variable(var, &shader->inputs) { + if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { + unsigned slots = glsl_count_attribute_slots(var->type, true); + *dual_slot |= BITFIELD64_MASK(slots) << var->data.location; + } + } + + nir_foreach_variable(var, &shader->inputs) { + var->data.location += + util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location)); + } +} + +/* Returns an attribute mask that has been re-compacted using the given + * dual_slot mask. + */ +uint64_t +nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot) +{ + while (dual_slot) { + unsigned loc = u_bit_scan64(&dual_slot); + /* mask of all bits up to and including loc */ + uint64_t mask = BITFIELD64_MASK(loc + 1); + attribs = (attribs & mask) | ((attribs & ~mask) >> 1); + } + return attribs; +}