X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fcompiler%2Fglsl%2Fglsl_to_nir.cpp;h=2dc0df32a93ddb183652d23d3c855aba40dd335c;hp=8a19399d8907f8036365f19486381782a87e90db;hb=5e922fbc160bcda9b38ccf5704cbd7276a748094;hpb=d3b47e073e002a01261dacf11e04e37b812736e6 diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 8a19399d890..2dc0df32a93 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -25,14 +25,20 @@ * */ +#include "float64_glsl.h" #include "glsl_to_nir.h" #include "ir_visitor.h" #include "ir_hierarchical_visitor.h" #include "ir.h" +#include "ir_optimization.h" +#include "program.h" #include "compiler/nir/nir_control_flow.h" #include "compiler/nir/nir_builder.h" -#include "main/imports.h" +#include "compiler/nir/nir_builtin_builder.h" +#include "compiler/nir/nir_deref.h" +#include "main/errors.h" #include "main/mtypes.h" +#include "main/shaderobj.h" #include "util/u_math.h" /* @@ -48,7 +54,7 @@ namespace { class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader); + nir_visitor(gl_context *ctx, nir_shader *shader); ~nir_visitor(); virtual void visit(ir_variable *); @@ -57,6 +63,7 @@ public: virtual void visit(ir_loop *); virtual void visit(ir_if *); virtual void visit(ir_discard *); + virtual void visit(ir_demote *); virtual void visit(ir_loop_jump *); virtual void visit(ir_return *); virtual void visit(ir_call *); @@ -85,7 +92,7 @@ private: nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3); - bool supports_ints; + bool supports_std430; nir_shader *shader; nir_function_impl *impl; @@ -129,31 +136,109 @@ private: nir_visitor *visitor; }; +/* glsl_to_nir can only handle converting certain function paramaters + * to NIR. This visitor checks for parameters it can't currently handle. + */ +class ir_function_param_visitor : public ir_hierarchical_visitor +{ +public: + ir_function_param_visitor() + : unsupported(false) + { + } + + virtual ir_visitor_status visit_enter(ir_function_signature *ir) + { + + if (ir->is_intrinsic()) + return visit_continue; + + foreach_in_list(ir_variable, param, &ir->parameters) { + if (!param->type->is_vector() || !param->type->is_scalar()) { + unsupported = true; + return visit_stop; + } + + if (param->data.mode == ir_var_function_inout) { + unsupported = true; + return visit_stop; + } + } + + if (!glsl_type_is_vector_or_scalar(ir->return_type) && + !ir->return_type->is_void()) { + unsupported = true; + return visit_stop; + } + + return visit_continue; + } + + bool unsupported; +}; + } /* end of anonymous namespace */ + +static bool +has_unsupported_function_param(exec_list *ir) +{ + ir_function_param_visitor visitor; + visit_list_elements(&visitor, ir); + return visitor.unsupported; +} + nir_shader * -glsl_to_nir(const struct gl_shader_program *shader_prog, +glsl_to_nir(struct gl_context *ctx, + const struct gl_shader_program *shader_prog, gl_shader_stage stage, const nir_shader_compiler_options *options) { struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage]; + const struct gl_shader_compiler_options *gl_options = + &ctx->Const.ShaderCompilerOptions[stage]; + + /* glsl_to_nir can only handle converting certain function paramaters + * to NIR. If we find something we can't handle then we get the GLSL IR + * opts to remove it before we continue on. + * + * TODO: add missing glsl ir to nir support and remove this loop. + */ + while (has_unsupported_function_param(sh->ir)) { + do_common_optimization(sh->ir, true, true, gl_options, + ctx->Const.NativeIntegers); + } + nir_shader *shader = nir_shader_create(NULL, stage, options, &sh->Program->info); - nir_visitor v1(shader); + nir_visitor v1(ctx, shader); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); - nir_lower_constant_initializers(shader, (nir_variable_mode)~0); + nir_validate_shader(shader, "after glsl to nir, before function inline"); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + nir_lower_variable_initializers(shader, (nir_variable_mode)~0); + nir_lower_returns(shader); + nir_inline_functions(shader); + nir_opt_deref(shader); - /* Remap the locations to slots so those requiring two slots will occupy - * two locations. For instance, if we have in the IR code a dvec3 attr0 in - * location 0 and vec4 attr1 in location 1, in NIR attr0 will use - * locations/slots 0 and 1, and attr1 will use location/slot 2 */ - if (shader->info.stage == MESA_SHADER_VERTEX) - nir_remap_dual_slot_attributes(shader, &sh->Program->DualSlotInputs); + nir_validate_shader(shader, "after function inlining and return lowering"); + + /* Now that we have inlined everything remove all of the functions except + * main(). + */ + foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){ + if (strcmp("main", function->name) != 0) { + exec_node_remove(&function->node); + } + } shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); if (shader_prog->Label) @@ -168,20 +253,25 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader->info.has_transform_feedback_varyings |= shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0; + if (shader->info.stage == MESA_SHADER_FRAGMENT) { + shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer; + shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left; + } + return shader; } -nir_visitor::nir_visitor(nir_shader *shader) +nir_visitor::nir_visitor(gl_context *ctx, nir_shader *shader) { - this->supports_ints = shader->options->native_integers; + this->supports_std430 = ctx->Const.UseSTD430AsDefaultPacking; this->shader = shader; this->is_global = true; - this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + this->var_table = _mesa_pointer_hash_table_create(NULL); + this->overload_table = _mesa_pointer_hash_table_create(NULL); this->result = NULL; this->impl = NULL; + this->deref = NULL; + this->sig = NULL; memset(&this->b, 0, sizeof(this->b)); } @@ -217,11 +307,16 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) assert(cols == 1); for (unsigned r = 0; r < rows; r++) - if (supports_ints) - ret->values[0].u32[r] = ir->value.u[r]; - else - ret->values[0].f32[r] = ir->value.u[r]; + ret->values[r].u32 = ir->value.u[r]; + + break; + + case GLSL_TYPE_UINT16: + /* Only float base types can be matrices. */ + assert(cols == 1); + for (unsigned r = 0; r < rows; r++) + ret->values[r].u16 = ir->value.u16[r]; break; case GLSL_TYPE_INT: @@ -229,24 +324,68 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) assert(cols == 1); for (unsigned r = 0; r < rows; r++) - if (supports_ints) - ret->values[0].i32[r] = ir->value.i[r]; - else - ret->values[0].f32[r] = ir->value.i[r]; + ret->values[r].i32 = ir->value.i[r]; break; - case GLSL_TYPE_FLOAT: - for (unsigned c = 0; c < cols; c++) { - for (unsigned r = 0; r < rows; r++) - ret->values[c].f32[r] = ir->value.f[c * rows + r]; - } + case GLSL_TYPE_INT16: + /* Only float base types can be matrices. */ + assert(cols == 1); + + for (unsigned r = 0; r < rows; r++) + ret->values[r].i16 = ir->value.i16[r]; break; + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_FLOAT16: case GLSL_TYPE_DOUBLE: - for (unsigned c = 0; c < cols; c++) { - for (unsigned r = 0; r < rows; r++) - ret->values[c].f64[r] = ir->value.d[c * rows + r]; + if (cols > 1) { + ret->elements = ralloc_array(mem_ctx, nir_constant *, cols); + ret->num_elements = cols; + for (unsigned c = 0; c < cols; c++) { + nir_constant *col_const = rzalloc(mem_ctx, nir_constant); + col_const->num_elements = 0; + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + for (unsigned r = 0; r < rows; r++) + col_const->values[r].f32 = ir->value.f[c * rows + r]; + break; + + case GLSL_TYPE_FLOAT16: + for (unsigned r = 0; r < rows; r++) + col_const->values[r].u16 = ir->value.f16[c * rows + r]; + break; + + case GLSL_TYPE_DOUBLE: + for (unsigned r = 0; r < rows; r++) + col_const->values[r].f64 = ir->value.d[c * rows + r]; + break; + + default: + unreachable("Cannot get here from the first level switch"); + } + ret->elements[c] = col_const; + } + } else { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + for (unsigned r = 0; r < rows; r++) + ret->values[r].f32 = ir->value.f[r]; + break; + + case GLSL_TYPE_FLOAT16: + for (unsigned r = 0; r < rows; r++) + ret->values[r].u16 = ir->value.f16[r]; + break; + + case GLSL_TYPE_DOUBLE: + for (unsigned r = 0; r < rows; r++) + ret->values[r].f64 = ir->value.d[r]; + break; + + default: + unreachable("Cannot get here from the first level switch"); + } } break; @@ -255,7 +394,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) assert(cols == 1); for (unsigned r = 0; r < rows; r++) - ret->values[0].u64[r] = ir->value.u64[r]; + ret->values[r].u64 = ir->value.u64[r]; break; case GLSL_TYPE_INT64: @@ -263,7 +402,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) assert(cols == 1); for (unsigned r = 0; r < rows; r++) - ret->values[0].i64[r] = ir->value.i64[r]; + ret->values[r].i64 = ir->value.i64[r]; break; case GLSL_TYPE_BOOL: @@ -271,7 +410,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) assert(cols == 1); for (unsigned r = 0; r < rows; r++) - ret->values[0].b[r] = ir->value.b[r]; + ret->values[r].b = ir->value.b[r]; break; @@ -292,6 +431,26 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) return ret; } +static const glsl_type * +wrap_type_in_array(const glsl_type *elem_type, const glsl_type *array_type) +{ + if (!array_type->is_array()) + return elem_type; + + elem_type = wrap_type_in_array(elem_type, array_type->fields.array); + + return glsl_type::get_array_instance(elem_type, array_type->length); +} + +static unsigned +get_nir_how_declared(unsigned how_declared) +{ + if (how_declared == ir_var_hidden) + return nir_var_hidden; + + return nir_var_declared_normally; +} + void nir_visitor::visit(ir_variable *ir) { @@ -317,33 +476,36 @@ nir_visitor::visit(ir_variable *ir) var->data.centroid = ir->data.centroid; var->data.sample = ir->data.sample; var->data.patch = ir->data.patch; + var->data.how_declared = get_nir_how_declared(ir->data.how_declared); var->data.invariant = ir->data.invariant; var->data.location = ir->data.location; var->data.stream = ir->data.stream; + if (ir->data.stream & (1u << 31)) + var->data.stream |= NIR_STREAM_PACKED; + + var->data.precision = ir->data.precision; + var->data.explicit_location = ir->data.explicit_location; + var->data.matrix_layout = ir->data.matrix_layout; + var->data.from_named_ifc_block = ir->data.from_named_ifc_block; var->data.compact = false; switch(ir->data.mode) { case ir_var_auto: case ir_var_temporary: if (is_global) - var->data.mode = nir_var_private; + var->data.mode = nir_var_shader_temp; else - var->data.mode = nir_var_function; + var->data.mode = nir_var_function_temp; break; case ir_var_function_in: case ir_var_const_in: - var->data.mode = nir_var_function; + var->data.mode = nir_var_function_temp; break; case ir_var_shader_in: - if (shader->info.stage == MESA_SHADER_FRAGMENT && - ir->data.location == VARYING_SLOT_FACE) { - /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ - var->data.location = SYSTEM_VALUE_FRONT_FACE; - var->data.mode = nir_var_system_value; - } else if (shader->info.stage == MESA_SHADER_GEOMETRY && - ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { + if (shader->info.stage == MESA_SHADER_GEOMETRY && + ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; var->data.mode = nir_var_system_value; @@ -355,6 +517,12 @@ nir_visitor::visit(ir_variable *ir) ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { var->data.compact = ir->type->without_array()->is_scalar(); } + + if (shader->info.stage > MESA_SHADER_VERTEX && + ir->data.location >= VARYING_SLOT_CLIP_DIST0 && + ir->data.location <= VARYING_SLOT_CULL_DIST1) { + var->data.compact = ir->type->without_array()->is_scalar(); + } } break; @@ -365,17 +533,23 @@ nir_visitor::visit(ir_variable *ir) ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { var->data.compact = ir->type->without_array()->is_scalar(); } + + if (shader->info.stage <= MESA_SHADER_GEOMETRY && + ir->data.location >= VARYING_SLOT_CLIP_DIST0 && + ir->data.location <= VARYING_SLOT_CULL_DIST1) { + var->data.compact = ir->type->without_array()->is_scalar(); + } break; case ir_var_uniform: if (ir->get_interface_type()) - var->data.mode = nir_var_ubo; + var->data.mode = nir_var_mem_ubo; else var->data.mode = nir_var_uniform; break; case ir_var_shader_storage: - var->data.mode = nir_var_ssbo; + var->data.mode = nir_var_mem_ssbo; break; case ir_var_system_value: @@ -386,16 +560,63 @@ nir_visitor::visit(ir_variable *ir) unreachable("not reached"); } - var->data.interpolation = ir->data.interpolation; - var->data.origin_upper_left = ir->data.origin_upper_left; - var->data.pixel_center_integer = ir->data.pixel_center_integer; - var->data.location_frac = ir->data.location_frac; + unsigned mem_access = 0; + if (ir->data.memory_read_only) + mem_access |= ACCESS_NON_WRITEABLE; + if (ir->data.memory_write_only) + mem_access |= ACCESS_NON_READABLE; + if (ir->data.memory_coherent) + mem_access |= ACCESS_COHERENT; + if (ir->data.memory_volatile) + mem_access |= ACCESS_VOLATILE; + if (ir->data.memory_restrict) + mem_access |= ACCESS_RESTRICT; + + var->interface_type = ir->get_interface_type(); + + /* For UBO and SSBO variables, we need explicit types */ + if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) { + const glsl_type *explicit_ifc_type = + ir->get_interface_type()->get_explicit_interface_type(supports_std430); - if (var->data.pixel_center_integer) { - assert(shader->info.stage == MESA_SHADER_FRAGMENT); - shader->info.fs.pixel_center_integer = true; + var->interface_type = explicit_ifc_type; + + if (ir->type->without_array()->is_interface()) { + /* If the type contains the interface, wrap the explicit type in the + * right number of arrays. + */ + var->type = wrap_type_in_array(explicit_ifc_type, ir->type); + } else { + /* Otherwise, this variable is one entry in the interface */ + UNUSED bool found = false; + for (unsigned i = 0; i < explicit_ifc_type->length; i++) { + const glsl_struct_field *field = + &explicit_ifc_type->fields.structure[i]; + if (strcmp(ir->name, field->name) != 0) + continue; + + var->type = field->type; + if (field->memory_read_only) + mem_access |= ACCESS_NON_WRITEABLE; + if (field->memory_write_only) + mem_access |= ACCESS_NON_READABLE; + if (field->memory_coherent) + mem_access |= ACCESS_COHERENT; + if (field->memory_volatile) + mem_access |= ACCESS_VOLATILE; + if (field->memory_restrict) + mem_access |= ACCESS_RESTRICT; + + found = true; + break; + } + assert(found); + } } + var->data.interpolation = ir->data.interpolation; + var->data.location_frac = ir->data.location_frac; + switch (ir->data.depth_layout) { case ir_depth_layout_none: var->data.depth_layout = nir_depth_layout_none; @@ -422,26 +643,18 @@ nir_visitor::visit(ir_variable *ir) var->data.explicit_binding = ir->data.explicit_binding; var->data.bindless = ir->data.bindless; var->data.offset = ir->data.offset; + var->data.access = (gl_access_qualifier)mem_access; - unsigned image_access = 0; - if (ir->data.memory_read_only) - image_access |= ACCESS_NON_WRITEABLE; - if (ir->data.memory_write_only) - image_access |= ACCESS_NON_READABLE; - if (ir->data.memory_coherent) - image_access |= ACCESS_COHERENT; - if (ir->data.memory_volatile) - image_access |= ACCESS_VOLATILE; - if (ir->data.memory_restrict) - image_access |= ACCESS_RESTRICT; - var->data.image.access = (gl_access_qualifier)image_access; - var->data.image.format = ir->data.image_format; + if (var->type->without_array()->is_image()) { + var->data.image.format = ir->data.image_format; + } else if (var->data.mode == nir_var_shader_out) { + var->data.xfb.buffer = ir->data.xfb_buffer; + var->data.xfb.stride = ir->data.xfb_stride; + } var->data.fb_fetch_output = ir->data.fb_fetch_output; var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer; var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride; - var->data.xfb_buffer = ir->data.xfb_buffer; - var->data.xfb_stride = ir->data.xfb_stride; var->num_state_slots = ir->get_num_state_slots(); if (var->num_state_slots > 0) { @@ -460,9 +673,7 @@ nir_visitor::visit(ir_variable *ir) var->constant_initializer = constant_copy(ir->constant_initializer, var); - var->interface_type = ir->get_interface_type(); - - if (var->data.mode == nir_var_function) + if (var->data.mode == nir_var_function_temp) nir_function_impl_add_variable(impl, var); else nir_shader_add_variable(shader, var); @@ -613,6 +824,15 @@ nir_visitor::visit(ir_discard *ir) nir_builder_instr_insert(&b, &discard->instr); } +void +nir_visitor::visit(ir_demote *ir) +{ + nir_intrinsic_instr *demote = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_demote); + + nir_builder_instr_insert(&b, &demote->instr); +} + void nir_visitor::visit(ir_emit_vertex *ir) { @@ -656,7 +876,7 @@ nir_visitor::visit(ir_return *ir) if (ir->value != NULL) { nir_deref_instr *ret_deref = nir_build_deref_cast(&b, nir_load_param(&b, 0), - nir_var_function, ir->value->type, 0); + nir_var_function_temp, ir->value->type, 0); nir_ssa_def *val = evaluate_rvalue(ir->value); nir_store_deref(&b, ret_deref, val, ~0); @@ -674,6 +894,45 @@ intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type) nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0); } +/* Accumulate any qualifiers along the deref chain to get the actual + * load/store qualifier. + */ + +static enum gl_access_qualifier +deref_get_qualifier(nir_deref_instr *deref) +{ + nir_deref_path path; + nir_deref_path_init(&path, deref, NULL); + + unsigned qualifiers = path.path[0]->var->data.access; + + const glsl_type *parent_type = path.path[0]->type; + for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) { + nir_deref_instr *cur = *cur_ptr; + + if (parent_type->is_interface()) { + const struct glsl_struct_field *field = + &parent_type->fields.structure[cur->strct.index]; + if (field->memory_read_only) + qualifiers |= ACCESS_NON_WRITEABLE; + if (field->memory_write_only) + qualifiers |= ACCESS_NON_READABLE; + if (field->memory_coherent) + qualifiers |= ACCESS_COHERENT; + if (field->memory_volatile) + qualifiers |= ACCESS_VOLATILE; + if (field->memory_restrict) + qualifiers |= ACCESS_RESTRICT; + } + + parent_type = cur->type; + } + + nir_deref_path_finish(&path); + + return (gl_access_qualifier) qualifiers; +} + void nir_visitor::visit(ir_call *ir) { @@ -681,6 +940,49 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_op op; switch (ir->callee->intrinsic_id) { + case ir_intrinsic_generic_atomic_add: + op = ir->return_deref->type->is_integer_32_64() + ? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd; + break; + case ir_intrinsic_generic_atomic_and: + op = nir_intrinsic_deref_atomic_and; + break; + case ir_intrinsic_generic_atomic_or: + op = nir_intrinsic_deref_atomic_or; + break; + case ir_intrinsic_generic_atomic_xor: + op = nir_intrinsic_deref_atomic_xor; + break; + case ir_intrinsic_generic_atomic_min: + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_deref_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_deref_atomic_umin; + else if (ir->return_deref->type == glsl_type::float_type) + op = nir_intrinsic_deref_atomic_fmin; + else + unreachable("Invalid type"); + break; + case ir_intrinsic_generic_atomic_max: + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_deref_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_deref_atomic_umax; + else if (ir->return_deref->type == glsl_type::float_type) + op = nir_intrinsic_deref_atomic_fmax; + else + unreachable("Invalid type"); + break; + case ir_intrinsic_generic_atomic_exchange: + op = nir_intrinsic_deref_atomic_exchange; + break; + case ir_intrinsic_generic_atomic_comp_swap: + op = ir->return_deref->type->is_integer_32_64() + ? nir_intrinsic_deref_atomic_comp_swap + : nir_intrinsic_deref_atomic_fcomp_swap; + break; case ir_intrinsic_atomic_counter_read: op = nir_intrinsic_atomic_counter_read_deref; break; @@ -726,10 +1028,20 @@ nir_visitor::visit(ir_call *ir) : nir_intrinsic_image_deref_atomic_fadd; break; case ir_intrinsic_image_atomic_min: - op = nir_intrinsic_image_deref_atomic_min; + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_image_deref_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_image_deref_atomic_umin; + else + unreachable("Invalid type"); break; case ir_intrinsic_image_atomic_max: - op = nir_intrinsic_image_deref_atomic_max; + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_image_deref_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_image_deref_atomic_umax; + else + unreachable("Invalid type"); break; case ir_intrinsic_image_atomic_and: op = nir_intrinsic_image_deref_atomic_and; @@ -746,6 +1058,12 @@ nir_visitor::visit(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: op = nir_intrinsic_image_deref_atomic_comp_swap; break; + case ir_intrinsic_image_atomic_inc_wrap: + op = nir_intrinsic_image_deref_atomic_inc_wrap; + break; + case ir_intrinsic_image_atomic_dec_wrap: + op = nir_intrinsic_image_deref_atomic_dec_wrap; + break; case ir_intrinsic_memory_barrier: op = nir_intrinsic_memory_barrier; break; @@ -756,54 +1074,21 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_image_deref_samples; break; case ir_intrinsic_ssbo_store: - op = nir_intrinsic_store_ssbo; - break; case ir_intrinsic_ssbo_load: - op = nir_intrinsic_load_ssbo; - break; case ir_intrinsic_ssbo_atomic_add: - op = ir->return_deref->type->is_integer_32_64() - ? nir_intrinsic_ssbo_atomic_add : nir_intrinsic_ssbo_atomic_fadd; - break; case ir_intrinsic_ssbo_atomic_and: - op = nir_intrinsic_ssbo_atomic_and; - break; case ir_intrinsic_ssbo_atomic_or: - op = nir_intrinsic_ssbo_atomic_or; - break; case ir_intrinsic_ssbo_atomic_xor: - op = nir_intrinsic_ssbo_atomic_xor; - break; case ir_intrinsic_ssbo_atomic_min: - assert(ir->return_deref); - if (ir->return_deref->type == glsl_type::int_type) - op = nir_intrinsic_ssbo_atomic_imin; - else if (ir->return_deref->type == glsl_type::uint_type) - op = nir_intrinsic_ssbo_atomic_umin; - else if (ir->return_deref->type == glsl_type::float_type) - op = nir_intrinsic_ssbo_atomic_fmin; - else - unreachable("Invalid type"); - break; case ir_intrinsic_ssbo_atomic_max: - assert(ir->return_deref); - if (ir->return_deref->type == glsl_type::int_type) - op = nir_intrinsic_ssbo_atomic_imax; - else if (ir->return_deref->type == glsl_type::uint_type) - op = nir_intrinsic_ssbo_atomic_umax; - else if (ir->return_deref->type == glsl_type::float_type) - op = nir_intrinsic_ssbo_atomic_fmax; - else - unreachable("Invalid type"); - break; case ir_intrinsic_ssbo_atomic_exchange: - op = nir_intrinsic_ssbo_atomic_exchange; - break; case ir_intrinsic_ssbo_atomic_comp_swap: - op = ir->return_deref->type->is_integer_32_64() - ? nir_intrinsic_ssbo_atomic_comp_swap - : nir_intrinsic_ssbo_atomic_fcomp_swap; - break; + /* SSBO store/loads should only have been lowered in GLSL IR for + * non-nir drivers, NIR drivers make use of gl_nir_lower_buffers() + * instead. + */ + unreachable("Invalid operation nir doesn't want lowered ssbo " + "store/loads"); case ir_intrinsic_shader_clock: op = nir_intrinsic_shader_clock; break; @@ -896,6 +1181,9 @@ nir_visitor::visit(ir_call *ir) case ir_intrinsic_read_first_invocation: op = nir_intrinsic_read_first_invocation; break; + case ir_intrinsic_helper_invocation: + op = nir_intrinsic_is_helper_invocation; + break; default: unreachable("not reached"); } @@ -904,6 +1192,65 @@ nir_visitor::visit(ir_call *ir) nir_ssa_def *ret = &instr->dest.ssa; switch (op) { + case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_imin: + case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_imax: + case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_and: + case nir_intrinsic_deref_atomic_or: + case nir_intrinsic_deref_atomic_xor: + case nir_intrinsic_deref_atomic_exchange: + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fadd: + case nir_intrinsic_deref_atomic_fmin: + case nir_intrinsic_deref_atomic_fmax: + case nir_intrinsic_deref_atomic_fcomp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* Deref */ + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *rvalue = (ir_rvalue *) param; + ir_dereference *deref = rvalue->as_dereference(); + ir_swizzle *swizzle = NULL; + if (!deref) { + /* We may have a swizzle to pick off a single vec4 component */ + swizzle = rvalue->as_swizzle(); + assert(swizzle && swizzle->type->vector_elements == 1); + deref = swizzle->val->as_dereference(); + assert(deref); + } + nir_deref_instr *nir_deref = evaluate_deref(deref); + if (swizzle) { + nir_deref = nir_build_deref_array_imm(&b, nir_deref, + swizzle->mask.x); + } + instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa); + + nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref)); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 3) { + assert(op == nir_intrinsic_deref_atomic_comp_swap || + op == nir_intrinsic_deref_atomic_fcomp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, 32, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } case nir_intrinsic_atomic_counter_read_deref: case nir_intrinsic_atomic_counter_inc_deref: case nir_intrinsic_atomic_counter_pre_dec_deref: @@ -946,8 +1293,10 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: @@ -955,7 +1304,9 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_atomic_fadd: case nir_intrinsic_image_deref_samples: - case nir_intrinsic_image_deref_size: { + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_atomic_inc_wrap: + case nir_intrinsic_image_deref_atomic_dec_wrap: { nir_ssa_undef_instr *instr_undef = nir_ssa_undef_instr_create(shader, 1, 32); nir_builder_instr_insert(&b, &instr_undef->instr); @@ -963,10 +1314,12 @@ nir_visitor::visit(ir_call *ir) /* Set the image variable dereference. */ exec_node *param = ir->actual_parameters.get_head(); ir_dereference *image = (ir_dereference *)param; - const glsl_type *type = - image->variable_referenced()->type->without_array(); + nir_deref_instr *deref = evaluate_deref(image); + const glsl_type *type = deref->type; + + nir_intrinsic_set_access(instr, deref_get_qualifier(deref)); - instr->src[0] = nir_src_for_ssa(&evaluate_deref(image)->dest.ssa); + instr->src[0] = nir_src_for_ssa(&deref->dest.ssa); param = param->get_next(); /* Set the intrinsic destination. */ @@ -1022,13 +1375,18 @@ nir_visitor::visit(ir_call *ir) instr->src[3] = nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); param = param->get_next(); + } else if (op == nir_intrinsic_image_deref_load) { + instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */ } if (!param->is_tail_sentinel()) { instr->src[4] = nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); param = param->get_next(); + } else if (op == nir_intrinsic_image_deref_store) { + instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */ } + nir_builder_instr_insert(&b, &instr->instr); break; } @@ -1042,7 +1400,7 @@ nir_visitor::visit(ir_call *ir) break; case nir_intrinsic_shader_clock: nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL); - instr->num_components = 2; + nir_intrinsic_set_memory_scope(instr, NIR_SCOPE_SUBGROUP); nir_builder_instr_insert(&b, &instr->instr); break; case nir_intrinsic_begin_invocation_interlock: @@ -1079,87 +1437,6 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } - case nir_intrinsic_load_ssbo: { - exec_node *param = ir->actual_parameters.get_head(); - ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - - const glsl_type *type = ir->return_deref->var->type; - instr->num_components = type->vector_elements; - intrinsic_set_std430_align(instr, type); - - /* Setup destination register */ - unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); - nir_ssa_dest_init(&instr->instr, &instr->dest, - type->vector_elements, bit_size, NULL); - - /* Insert the created nir instruction now since in the case of boolean - * result we will need to emit another instruction after it - */ - nir_builder_instr_insert(&b, &instr->instr); - - /* - * In SSBO/UBO's, a true boolean value is any non-zero value, but we - * consider a true boolean to be ~0. Fix this up with a != 0 - * comparison. - */ - if (type->is_boolean()) - ret = nir_i2b(&b, &instr->dest.ssa); - break; - } - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_ssbo_atomic_fadd: - case nir_intrinsic_ssbo_atomic_fmin: - case nir_intrinsic_ssbo_atomic_fmax: - case nir_intrinsic_ssbo_atomic_fcomp_swap: { - int param_count = ir->actual_parameters.length(); - assert(param_count == 3 || param_count == 4); - - /* Block index */ - exec_node *param = ir->actual_parameters.get_head(); - ir_instruction *inst = (ir_instruction *) param; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* Offset */ - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* data1 parameter (this is always present) */ - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* data2 parameter (only with atomic_comp_swap) */ - if (param_count == 4) { - assert(op == nir_intrinsic_ssbo_atomic_comp_swap || - op == nir_intrinsic_ssbo_atomic_fcomp_swap); - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - } - - /* Atomic result */ - assert(ir->return_deref); - nir_ssa_dest_init(&instr->instr, &instr->dest, - ir->return_deref->type->vector_elements, 32, NULL); - nir_builder_instr_insert(&b, &instr->instr); - break; - } case nir_intrinsic_load_shared: { exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); @@ -1180,7 +1457,7 @@ nir_visitor::visit(ir_call *ir) /* The value in shared memory is a 32-bit value */ if (type->is_boolean()) - ret = nir_i2b(&b, &instr->dest.ssa); + ret = nir_b2b1(&b, &instr->dest.ssa); break; } case nir_intrinsic_store_shared: { @@ -1202,7 +1479,7 @@ nir_visitor::visit(ir_call *ir) nir_ssa_def *nir_val = evaluate_rvalue(val); /* The value in shared memory is a 32-bit value */ if (val->type->is_boolean()) - nir_val = nir_b2i32(&b, nir_val); + nir_val = nir_b2b32(&b, nir_val); instr->src[0] = nir_src_for_ssa(nir_val); instr->num_components = val->type->vector_elements; @@ -1257,11 +1534,12 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_vote_ieq: + instr->num_components = 1; + /* fall-through */ case nir_intrinsic_vote_any: - case nir_intrinsic_vote_all: - case nir_intrinsic_vote_ieq: { + case nir_intrinsic_vote_all: { nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); - instr->num_components = 1; ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); @@ -1306,6 +1584,11 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_is_helper_invocation: { + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } default: unreachable("not reached"); } @@ -1369,12 +1652,18 @@ nir_visitor::visit(ir_assignment *ir) if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { + nir_deref_instr *lhs = evaluate_deref(ir->lhs); + nir_deref_instr *rhs = evaluate_deref(ir->rhs); + enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs); + enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs); if (ir->condition) { nir_push_if(&b, evaluate_rvalue(ir->condition)); - nir_copy_deref(&b, evaluate_deref(ir->lhs), evaluate_deref(ir->rhs)); + nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers, + rhs_qualifiers); nir_pop_if(&b, NULL); } else { - nir_copy_deref(&b, evaluate_deref(ir->lhs), evaluate_deref(ir->rhs)); + nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers, + rhs_qualifiers); } return; } @@ -1396,15 +1685,18 @@ nir_visitor::visit(ir_assignment *ir) for (unsigned i = 0; i < 4; i++) { swiz[i] = ir->write_mask & (1 << i) ? component++ : 0; } - src = nir_swizzle(&b, src, swiz, num_components, !supports_ints); + src = nir_swizzle(&b, src, swiz, num_components); } + enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref); if (ir->condition) { nir_push_if(&b, evaluate_rvalue(ir->condition)); - nir_store_deref(&b, lhs_deref, src, ir->write_mask); + nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask, + qualifiers); nir_pop_if(&b, NULL); } else { - nir_store_deref(&b, lhs_deref, src, ir->write_mask); + nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask, + qualifiers); } } @@ -1471,7 +1763,8 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) * must emit a variable load. */ - this->result = nir_load_deref(&b, this->deref); + enum gl_access_qualifier access = deref_get_qualifier(this->deref); + this->result = nir_load_deref_with_access(&b, this->deref, access); } return this->result; @@ -1496,28 +1789,6 @@ nir_visitor::visit(ir_expression *ir) { /* Some special cases */ switch (ir->operation) { - case ir_binop_ubo_load: { - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); - unsigned bit_size = ir->type->is_boolean() ? 32 : - glsl_get_bit_size(ir->type); - load->num_components = ir->type->vector_elements; - load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); - load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); - intrinsic_set_std430_align(load, ir->type); - add_instr(&load->instr, ir->type->vector_elements, bit_size); - - /* - * In UBO's, a true boolean value is any non-zero value, but we consider - * a true boolean to be ~0. Fix this up with a != 0 comparison. - */ - - if (ir->type->is_boolean()) - this->result = nir_i2b(&b, &load->dest.ssa); - - return; - } - case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: { @@ -1557,7 +1828,7 @@ nir_visitor::visit(ir_expression *ir) * sense, we'll just turn it into a load which will probably * eventually end up as an SSA definition. */ - assert(this->deref->mode == nir_var_private); + assert(this->deref->mode == nir_var_shader_temp); op = nir_intrinsic_load_deref; } @@ -1578,12 +1849,29 @@ nir_visitor::visit(ir_expression *ir) }; result = nir_swizzle(&b, result, swiz, - swizzle->type->vector_elements, false); + swizzle->type->vector_elements); } return; } + case ir_unop_ssbo_unsized_array_length: { + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_deref_buffer_array_length); + + ir_dereference *deref = ir->operands[0]->as_dereference(); + intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa); + + add_instr(&intrin->instr, 1, 32); + return; + } + + case ir_binop_ubo_load: + /* UBO loads should only have been lowered in GLSL IR for non-nir drivers, + * NIR drivers make use of gl_nir_lower_buffers() instead. + */ + unreachable("Invalid operation nir doesn't want lowered ubo loads"); default: break; } @@ -1594,21 +1882,14 @@ nir_visitor::visit(ir_expression *ir) glsl_base_type types[4]; for (unsigned i = 0; i < ir->num_operands; i++) - if (supports_ints) - types[i] = ir->operands[i]->type->base_type; - else - types[i] = GLSL_TYPE_FLOAT; + types[i] = ir->operands[i]->type->base_type; - glsl_base_type out_type; - if (supports_ints) - out_type = ir->type->base_type; - else - out_type = GLSL_TYPE_FLOAT; + glsl_base_type out_type = ir->type->base_type; switch (ir->operation) { case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; case ir_unop_logic_not: - result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]); + result = nir_inot(&b, srcs[0]); break; case ir_unop_neg: result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0]) @@ -1618,6 +1899,9 @@ nir_visitor::visit(ir_expression *ir) result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0]) : nir_iabs(&b, srcs[0]); break; + case ir_unop_clz: + result = nir_uclz(&b, srcs[0]); + break; case ir_unop_saturate: assert(type_is_float(types[0])); result = nir_fsat(&b, srcs[0]); @@ -1634,14 +1918,8 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; case ir_unop_i2f: - result = supports_ints ? nir_i2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); - break; case ir_unop_u2f: - result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); - break; case ir_unop_b2f: - result = supports_ints ? nir_b2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); - break; case ir_unop_f2i: case ir_unop_f2u: case ir_unop_f2b: @@ -1650,6 +1928,12 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_b2i64: case ir_unop_d2f: case ir_unop_f2d: + case ir_unop_f162f: + case ir_unop_f2f16: + case ir_unop_f162b: + case ir_unop_b2f16: + case ir_unop_i2i: + case ir_unop_u2u: case ir_unop_d2i: case ir_unop_d2u: case ir_unop_d2b: @@ -1688,6 +1972,21 @@ nir_visitor::visit(ir_expression *ir) break; } + case ir_unop_f2fmp: { + result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL); + break; + } + + case ir_unop_i2imp: { + result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL); + break; + } + + case ir_unop_u2ump: { + result = nir_build_alu(&b, nir_op_u2ump, srcs[0], NULL, NULL, NULL); + break; + } + case ir_unop_bitcast_i2f: case ir_unop_bitcast_f2i: case ir_unop_bitcast_u2f: @@ -1698,7 +1997,7 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_bitcast_d2u64: case ir_unop_subroutine_to_int: /* no-op */ - result = nir_imov(&b, srcs[0]); + result = nir_mov(&b, srcs[0]); break; case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; @@ -1781,48 +2080,6 @@ nir_visitor::visit(ir_expression *ir) result = nir_find_lsb(&b, srcs[0]); break; - case ir_unop_noise: - switch (ir->type->vector_elements) { - case 1: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise1_1(&b, srcs[0]); break; - case 2: result = nir_fnoise1_2(&b, srcs[0]); break; - case 3: result = nir_fnoise1_3(&b, srcs[0]); break; - case 4: result = nir_fnoise1_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - case 2: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise2_1(&b, srcs[0]); break; - case 2: result = nir_fnoise2_2(&b, srcs[0]); break; - case 3: result = nir_fnoise2_3(&b, srcs[0]); break; - case 4: result = nir_fnoise2_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - case 3: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise3_1(&b, srcs[0]); break; - case 2: result = nir_fnoise3_2(&b, srcs[0]); break; - case 3: result = nir_fnoise3_3(&b, srcs[0]); break; - case 4: result = nir_fnoise3_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - case 4: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise4_1(&b, srcs[0]); break; - case 2: result = nir_fnoise4_2(&b, srcs[0]); break; - case 3: result = nir_fnoise4_3(&b, srcs[0]); break; - case 4: result = nir_fnoise4_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - default: - unreachable("not reached"); - } - break; case ir_unop_get_buffer_size: { nir_intrinsic_instr *load = nir_intrinsic_instr_create( this->shader, @@ -1834,17 +2091,58 @@ nir_visitor::visit(ir_expression *ir) return; } + case ir_unop_atan: + result = nir_atan(&b, srcs[0]); + break; + case ir_binop_add: result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1]) : nir_iadd(&b, srcs[0], srcs[1]); break; + case ir_binop_add_sat: + result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1]) + : nir_uadd_sat(&b, srcs[0], srcs[1]); + break; case ir_binop_sub: result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1]) : nir_isub(&b, srcs[0], srcs[1]); break; + case ir_binop_sub_sat: + result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1]) + : nir_usub_sat(&b, srcs[0], srcs[1]); + break; + case ir_binop_abs_sub: + /* out_type is always unsigned for ir_binop_abs_sub, so we have to key + * on the type of the sources. + */ + result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1]) + : nir_uabs_usub(&b, srcs[0], srcs[1]); + break; + case ir_binop_avg: + result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1]) + : nir_uhadd(&b, srcs[0], srcs[1]); + break; + case ir_binop_avg_round: + result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1]) + : nir_urhadd(&b, srcs[0], srcs[1]); + break; + case ir_binop_mul_32x16: + result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1]) + : nir_umul_32x16(&b, srcs[0], srcs[1]); + break; case ir_binop_mul: - result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1]) - : nir_imul(&b, srcs[0], srcs[1]); + if (type_is_float(out_type)) + result = nir_fmul(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT64 && + (ir->operands[0]->type->base_type == GLSL_TYPE_INT || + ir->operands[1]->type->base_type == GLSL_TYPE_INT)) + result = nir_imul_2x32_64(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_UINT64 && + (ir->operands[0]->type->base_type == GLSL_TYPE_UINT || + ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) + result = nir_umul_2x32_64(&b, srcs[0], srcs[1]); + else + result = nir_imul(&b, srcs[0], srcs[1]); break; case ir_binop_div: if (type_is_float(out_type)) @@ -1879,16 +2177,13 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; case ir_binop_logic_and: - result = supports_ints ? nir_iand(&b, srcs[0], srcs[1]) - : nir_fand(&b, srcs[0], srcs[1]); + result = nir_iand(&b, srcs[0], srcs[1]); break; case ir_binop_logic_or: - result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) - : nir_for(&b, srcs[0], srcs[1]); + result = nir_ior(&b, srcs[0], srcs[1]); break; case ir_binop_logic_xor: - result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) - : nir_fxor(&b, srcs[0], srcs[1]); + result = nir_ixor(&b, srcs[0], srcs[1]); break; case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; case ir_binop_rshift: @@ -1902,108 +2197,70 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; case ir_binop_less: - if (supports_ints) { - if (type_is_float(types[0])) - result = nir_flt(&b, srcs[0], srcs[1]); - else if (type_is_signed(types[0])) - result = nir_ilt(&b, srcs[0], srcs[1]); - else - result = nir_ult(&b, srcs[0], srcs[1]); - } else { - result = nir_slt(&b, srcs[0], srcs[1]); - } + if (type_is_float(types[0])) + result = nir_flt(&b, srcs[0], srcs[1]); + else if (type_is_signed(types[0])) + result = nir_ilt(&b, srcs[0], srcs[1]); + else + result = nir_ult(&b, srcs[0], srcs[1]); break; case ir_binop_gequal: - if (supports_ints) { - if (type_is_float(types[0])) - result = nir_fge(&b, srcs[0], srcs[1]); - else if (type_is_signed(types[0])) - result = nir_ige(&b, srcs[0], srcs[1]); - else - result = nir_uge(&b, srcs[0], srcs[1]); - } else { - result = nir_sge(&b, srcs[0], srcs[1]); - } + if (type_is_float(types[0])) + result = nir_fge(&b, srcs[0], srcs[1]); + else if (type_is_signed(types[0])) + result = nir_ige(&b, srcs[0], srcs[1]); + else + result = nir_uge(&b, srcs[0], srcs[1]); break; case ir_binop_equal: - if (supports_ints) { - if (type_is_float(types[0])) - result = nir_feq(&b, srcs[0], srcs[1]); - else - result = nir_ieq(&b, srcs[0], srcs[1]); - } else { - result = nir_seq(&b, srcs[0], srcs[1]); - } + if (type_is_float(types[0])) + result = nir_feq(&b, srcs[0], srcs[1]); + else + result = nir_ieq(&b, srcs[0], srcs[1]); break; case ir_binop_nequal: - if (supports_ints) { - if (type_is_float(types[0])) - result = nir_fne(&b, srcs[0], srcs[1]); - else - result = nir_ine(&b, srcs[0], srcs[1]); - } else { - result = nir_sne(&b, srcs[0], srcs[1]); - } + if (type_is_float(types[0])) + result = nir_fne(&b, srcs[0], srcs[1]); + else + result = nir_ine(&b, srcs[0], srcs[1]); break; case ir_binop_all_equal: - if (supports_ints) { - if (type_is_float(types[0])) { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; - case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } else { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; - case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } + if (type_is_float(types[0])) { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); } } else { switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_seq(&b, srcs[0], srcs[1]); break; - case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break; + case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; default: unreachable("not reached"); } } break; case ir_binop_any_nequal: - if (supports_ints) { - if (type_is_float(types[0])) { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; - case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } else { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; - case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } + if (type_is_float(types[0])) { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; + case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); } } else { switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_sne(&b, srcs[0], srcs[1]); break; - case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break; + case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; + case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; default: unreachable("not reached"); } @@ -2028,6 +2285,10 @@ nir_visitor::visit(ir_expression *ir) break; } + case ir_binop_atan2: + result = nir_atan2(&b, srcs[0], srcs[1]); + break; + case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; case ir_triop_fma: result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); @@ -2036,18 +2297,17 @@ nir_visitor::visit(ir_expression *ir) result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); break; case ir_triop_csel: - if (supports_ints) - result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); - else - result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]); + result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); break; case ir_triop_bitfield_extract: - result = (out_type == GLSL_TYPE_INT) ? - nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) : - nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]); + result = ir->type->is_int_16_32() ? + nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) : + nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])); break; case ir_quadop_bitfield_insert: - result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]); + result = nir_bitfield_insert(&b, + nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]), + nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3])); break; case ir_quadop_vector: result = nir_vec(&b, srcs, ir->type->vector_elements); @@ -2063,7 +2323,7 @@ nir_visitor::visit(ir_swizzle *ir) { unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, - ir->type->vector_elements, !supports_ints); + ir->type->vector_elements); } void @@ -2142,7 +2402,8 @@ nir_visitor::visit(ir_texture *ir) num_srcs++; if (ir->shadow_comparator != NULL) num_srcs++; - if (ir->offset != NULL) + /* offsets are constants we store inside nir_tex_intrs.offsets */ + if (ir->offset != NULL && !ir->offset->type->is_array()) num_srcs++; /* Add one for the texture deref */ @@ -2161,6 +2422,15 @@ nir_visitor::visit(ir_texture *ir) case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_FLOAT16: + instr->dest_type = nir_type_float16; + break; + case GLSL_TYPE_INT16: + instr->dest_type = nir_type_int16; + break; + case GLSL_TYPE_UINT16: + instr->dest_type = nir_type_uint16; + break; case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; @@ -2173,10 +2443,21 @@ nir_visitor::visit(ir_texture *ir) } nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler); - instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa); - instr->src[0].src_type = nir_tex_src_texture_deref; - instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa); - instr->src[1].src_type = nir_tex_src_sampler_deref; + + /* check for bindless handles */ + if (sampler_deref->mode != nir_var_uniform || + nir_deref_instr_get_variable(sampler_deref)->data.bindless) { + nir_ssa_def *load = nir_load_deref(&b, sampler_deref); + instr->src[0].src = nir_src_for_ssa(load); + instr->src[0].src_type = nir_tex_src_texture_handle; + instr->src[1].src = nir_src_for_ssa(load); + instr->src[1].src_type = nir_tex_src_sampler_handle; + } else { + instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa); + instr->src[0].src_type = nir_tex_src_texture_deref; + instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa); + instr->src[1].src_type = nir_tex_src_sampler_deref; + } unsigned src_number = 2; @@ -2203,13 +2484,25 @@ nir_visitor::visit(ir_texture *ir) } if (ir->offset != NULL) { - /* we don't support multiple offsets yet */ - assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); + if (ir->offset->type->is_array()) { + for (int i = 0; i < ir->offset->type->array_size(); i++) { + const ir_constant *c = + ir->offset->as_constant()->get_array_element(i); + + for (unsigned j = 0; j < 2; ++j) { + int val = c->get_int_component(j); + assert(val <= 31 && val >= -32); + instr->tg4_offsets[i][j] = val; + } + } + } else { + assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->offset)); - instr->src[src_number].src_type = nir_tex_src_offset; - src_number++; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->offset)); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; + } } switch (ir->op) { @@ -2294,7 +2587,7 @@ nir_visitor::visit(ir_dereference_variable *ir) } this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i), - nir_var_function, ir->type, 0); + nir_var_function_temp, ir->type, 0); return; } @@ -2332,7 +2625,74 @@ nir_visitor::visit(ir_dereference_array *ir) void nir_visitor::visit(ir_barrier *) { + if (shader->info.stage == MESA_SHADER_COMPUTE) { + nir_intrinsic_instr *shared_barrier = + nir_intrinsic_instr_create(this->shader, + nir_intrinsic_memory_barrier_shared); + nir_builder_instr_insert(&b, &shared_barrier->instr); + } else if (shader->info.stage == MESA_SHADER_TESS_CTRL) { + nir_intrinsic_instr *patch_barrier = + nir_intrinsic_instr_create(this->shader, + nir_intrinsic_memory_barrier_tcs_patch); + nir_builder_instr_insert(&b, &patch_barrier->instr); + } + nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); + nir_intrinsic_instr_create(this->shader, nir_intrinsic_control_barrier); nir_builder_instr_insert(&b, &instr->instr); } + +nir_shader * +glsl_float64_funcs_to_nir(struct gl_context *ctx, + const nir_shader_compiler_options *options) +{ + /* We pretend it's a vertex shader. Ultimately, the stage shouldn't + * matter because we're not optimizing anything here. + */ + struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX); + sh->Source = float64_source; + sh->CompileStatus = COMPILE_FAILURE; + _mesa_glsl_compile_shader(ctx, sh, false, false, true); + + if (!sh->CompileStatus) { + if (sh->InfoLog) { + _mesa_problem(ctx, + "fp64 software impl compile failed:\n%s\nsource:\n%s\n", + sh->InfoLog, float64_source); + } + return NULL; + } + + nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL); + + nir_visitor v1(ctx, nir); + nir_function_visitor v2(&v1); + v2.run(sh->ir); + visit_exec_list(sh->ir, &v1); + + /* _mesa_delete_shader will try to free sh->Source but it's static const */ + sh->Source = NULL; + _mesa_delete_shader(ctx, sh); + + nir_validate_shader(nir, "float64_funcs_to_nir"); + + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_opt_deref); + + /* Do some optimizations to clean up the shader now. By optimizing the + * functions in the library, we avoid having to re-do that work every + * time we inline a copy of a function. Reducing basic blocks also helps + * with compile times. + */ + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + NIR_PASS_V(nir, nir_opt_gcm, true); + NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false); + NIR_PASS_V(nir, nir_opt_dce); + + return nir; +}