From: Jason Ekstrand Date: Fri, 11 Dec 2015 00:58:24 +0000 (-0800) Subject: Merge remote-tracking branch 'mesa-public/master' into vulkan X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=d5c9955d3eaa7311e2b2350b6964bae516c7b7b2;hp=-c Merge remote-tracking branch 'mesa-public/master' into vulkan This pulls in nir_intrinsic_load/store changes and the switch of all uniforms in i965 to bytes. This accounts for the Vulkan changes. --- d5c9955d3eaa7311e2b2350b6964bae516c7b7b2 diff --combined configure.ac index ebea7fcb61a,b6680d04fd1..c26dbf2e126 --- a/configure.ac +++ b/configure.ac @@@ -99,7 -99,6 +99,7 @@@ AM_PROG_CC_C_ AM_PROG_AS AX_CHECK_GNU_MAKE AC_CHECK_PROGS([PYTHON2], [python2 python]) +AC_CHECK_PROGS([PYTHON3], [python3]) AC_PROG_SED AC_PROG_MKDIR_P @@@ -198,6 -197,13 +198,13 @@@ if test "x$GCC" = xyes -a "x$acv_mesa_C fi fi + dnl We don't support building Mesa with Sun C compiler + dnl https://bugs.freedesktop.org/show_bug.cgi?id=93189 + AC_CHECK_DECL([__SUNPRO_C], [SUNCC=yes], [SUNCC=no]) + if test "x$SUNCC" = xyes; then + AC_MSG_ERROR([Building with Sun C compiler is not supported, use GCC instead.]) + fi + dnl Check for compiler builtins AX_GCC_BUILTIN([__builtin_bswap32]) AX_GCC_BUILTIN([__builtin_bswap64]) @@@ -1559,8 -1565,6 +1566,8 @@@ GBM_PC_LIB_PRIV="$DLOPEN_LIBS AC_SUBST([GBM_PC_REQ_PRIV]) AC_SUBST([GBM_PC_LIB_PRIV]) +AM_CONDITIONAL(HAVE_VULKAN, true) + dnl dnl EGL configuration dnl @@@ -2332,13 -2336,6 +2339,13 @@@ AC_SUBST([XA_MINOR], $XA_MINOR AC_SUBST([XA_TINY], $XA_TINY) AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_TINY") +PKG_CHECK_MODULES(VALGRIND, [valgrind], + [have_valgrind=yes], [have_valgrind=no]) +if test "x$have_valgrind" = "xyes"; then + AC_DEFINE([HAVE_VALGRIND], 1, + [Use valgrind intrinsics to suppress false warnings]) +fi + dnl Restore LDFLAGS and CPPFLAGS LDFLAGS="$_SAVE_LDFLAGS" CPPFLAGS="$_SAVE_CPPFLAGS" @@@ -2451,9 -2448,6 +2458,9 @@@ AC_CONFIG_FILES([Makefil src/mesa/drivers/osmesa/osmesa.pc src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile + src/vulkan/Makefile + src/vulkan/anv_icd.json + src/vulkan/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile]) @@@ -2576,7 -2570,6 +2583,7 @@@ if test "x$MESA_LLVM" = x1; the echo "" fi echo " PYTHON2: $PYTHON2" +echo " PYTHON3: $PYTHON3" echo "" echo " Run '${MAKE-make}' to build Mesa" diff --combined src/glsl/Makefile.sources index 0c9fd75d206,fc10f14f4c1..e64c31e17c6 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@@ -34,7 -34,6 +34,7 @@@ NIR_FILES = nir/nir_control_flow_private.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ + nir/nir_gather_info.c \ nir/nir_gs_count_vertices.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ @@@ -77,7 -76,6 +77,7 @@@ nir/nir_remove_dead_variables.c \ nir/nir_search.c \ nir/nir_search.h \ + nir/nir_spirv.h \ nir/nir_split_var_copies.c \ nir/nir_sweep.c \ nir/nir_to_ssa.c \ @@@ -88,9 -86,7 +88,9 @@@ nir/nir_worklist.h \ nir/nir_types.cpp \ nir/shader_enums.h \ - nir/shader_enums.c + nir/shader_enums.c \ + nir/spirv_to_nir.c \ + nir/spirv_glsl450_to_nir.c # libglsl @@@ -160,6 -156,8 +160,8 @@@ LIBGLSL_FILES = loop_analysis.h \ loop_controls.cpp \ loop_unroll.cpp \ + lower_buffer_access.cpp \ + lower_buffer_access.h \ lower_clip_distance.cpp \ lower_const_arrays_to_uniforms.cpp \ lower_discard.cpp \ @@@ -184,6 -182,7 +186,7 @@@ lower_vector_insert.cpp \ lower_vertex_id.cpp \ lower_output_reads.cpp \ + lower_shared_reference.cpp \ lower_ubo_reference.cpp \ opt_algebraic.cpp \ opt_array_splitting.cpp \ diff --combined src/glsl/ast_to_hir.cpp index 52881a4da7a,35a1e134dfa..fc6bb3e31f1 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@@ -1088,7 -1088,6 +1088,7 @@@ do_comparison(void *mem_ctx, int operat case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_SUBROUTINE: /* I assume a comparison of a struct containing a sampler just @@@ -1825,7 -1824,7 +1825,7 @@@ ast_expression::do_hir(exec_list *instr * tree. This particular use must be at location specified in the grammar * as 'variable_identifier'. */ - ir_variable *var = + ir_variable *var = state->symbols->get_variable(this->primary_expression.identifier); if (var != NULL) { @@@ -2650,7 -2649,9 +2650,9 @@@ apply_explicit_binding(struct _mesa_gls return; } - } else if (state->is_version(420, 310) && base_type->is_image()) { + } else if ((state->is_version(420, 310) || + state->ARB_shading_language_420pack_enable) && + base_type->is_image()) { assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); if (max_index >= ctx->Const.MaxImageUnits) { _mesa_glsl_error(loc, state, "Image binding %d exceeds the " @@@ -3737,7 -3738,7 +3739,7 @@@ process_initializer(ir_variable *var, a * expressions. Const-qualified global variables must still be * initialized with constant expressions. */ - if (!state->ARB_shading_language_420pack_enable + if (!state->has_420pack() || state->current_function == NULL) { _mesa_glsl_error(& initializer_loc, state, "initializer of %s variable `%s' must be a " @@@ -5366,7 -5367,7 +5368,7 @@@ ast_jump_statement::hir(exec_list *inst if (state->current_function->return_type != ret_type) { YYLTYPE loc = this->get_location(); - if (state->ARB_shading_language_420pack_enable) { + if (state->has_420pack()) { if (!apply_implicit_conversion(state->current_function->return_type, ret, state)) { _mesa_glsl_error(& loc, state, @@@ -5558,8 -5559,8 +5560,8 @@@ ast_switch_statement::hir(exec_list *in /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec: * - * "The type of init-expression in a switch statement must be a - * scalar integer." + * "The type of init-expression in a switch statement must be a + * scalar integer." */ if (!test_expression->type->is_scalar() || !test_expression->type->is_integer()) { diff --combined src/glsl/glsl_parser_extras.cpp index b41b64af2c1,29cf0c633be..3988376ea9d --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@@ -87,8 -87,6 +87,8 @@@ _mesa_glsl_parse_state::_mesa_glsl_pars this->extensions = &ctx->Extensions; + this->ARB_compute_shader_enable = true; + this->Const.MaxLights = ctx->Const.MaxLights; this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; @@@ -479,7 -477,7 +479,7 @@@ _mesa_glsl_msg(const YYLTYPE *locp, _me struct gl_context *ctx = state->ctx; /* Report the error via GL_ARB_debug_output. */ - _mesa_shader_debug(ctx, type, &msg_id, msg, strlen(msg)); + _mesa_shader_debug(ctx, type, &msg_id, msg); ralloc_strcat(&state->info_log, "\n"); } @@@ -876,7 -874,7 +876,7 @@@ voi _mesa_ast_process_interface_block(YYLTYPE *locp, _mesa_glsl_parse_state *state, ast_interface_block *const block, - const struct ast_type_qualifier q) + const struct ast_type_qualifier &q) { if (q.flags.q.buffer) { if (!state->has_shader_storage_buffer_objects()) { @@@ -1088,7 -1086,7 +1088,7 @@@ voi ast_compound_statement::print(void) const { printf("{\n"); - + foreach_list_typed(ast_node, ast, link, &this->statements) { ast->print(); } @@@ -1414,7 -1412,6 +1414,6 @@@ ast_selection_statement::print(void) co printf("else "); else_statement->print(); } - } diff --combined src/glsl/nir/glsl_to_nir.cpp index a26300d1d26,db8b0cae814..9a25f2fc905 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@@ -46,7 -46,7 +46,7 @@@ namespace class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader); + nir_visitor(nir_shader *shader, gl_shader *sh); ~nir_visitor(); virtual void visit(ir_variable *); @@@ -86,8 -86,6 +86,8 @@@ private bool supports_ints; + struct gl_shader *sh; + nir_shader *shader; nir_function_impl *impl; nir_builder b; @@@ -141,7 -139,7 +141,7 @@@ glsl_to_nir(const struct gl_shader_prog nir_shader *shader = nir_shader_create(NULL, stage, options); - nir_visitor v1(shader); + nir_visitor v1(shader, sh); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); @@@ -207,11 -205,10 +207,11 @@@ return shader; } -nir_visitor::nir_visitor(nir_shader *shader) +nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh) { this->supports_ints = shader->options->native_integers; this->shader = shader; + this->sh = sh; this->is_global = true; this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); @@@ -393,7 -390,6 +393,7 @@@ nir_visitor::visit(ir_variable *ir } var->data.index = ir->data.index; + var->data.descriptor_set = 0; var->data.binding = ir->data.binding; var->data.atomic.offset = ir->data.atomic.offset; var->data.image.read_only = ir->data.image_read_only; @@@ -691,15 -687,15 +691,15 @@@ nir_visitor::visit(ir_call *ir op = nir_intrinsic_store_ssbo; } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) { op = nir_intrinsic_load_ssbo; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_add; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_and; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_or; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_xor; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) { assert(ir->return_deref); if (ir->return_deref->type == glsl_type::int_type) op = nir_intrinsic_ssbo_atomic_imin; @@@ -707,7 -703,7 +707,7 @@@ op = nir_intrinsic_ssbo_atomic_umin; else unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) { assert(ir->return_deref); if (ir->return_deref->type == glsl_type::int_type) op = nir_intrinsic_ssbo_atomic_imax; @@@ -715,9 -711,9 +715,9 @@@ op = nir_intrinsic_ssbo_atomic_umax; else unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_exchange; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_comp_swap; } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { op = nir_intrinsic_shader_clock; @@@ -731,6 -727,38 +731,38 @@@ op = nir_intrinsic_memory_barrier_image; } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) { op = nir_intrinsic_memory_barrier_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) { + op = nir_intrinsic_load_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { + op = nir_intrinsic_store_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) { + op = nir_intrinsic_shared_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) { + op = nir_intrinsic_shared_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) { + op = nir_intrinsic_shared_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) { + op = nir_intrinsic_shared_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umin; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umax; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) { + op = nir_intrinsic_shared_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) { + op = nir_intrinsic_shared_atomic_comp_swap; } else { unreachable("not reached"); } @@@ -857,24 -885,12 +889,12 @@@ ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); assert(write_mask); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_store_ssbo_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } - - instr->const_index[1] = write_mask->value.u[0]; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); + instr->const_index[0] = write_mask->value.u[0]; instr->num_components = val->type->vector_elements; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); nir_builder_instr_insert(&b, &instr->instr); break; } @@@ -885,20 -901,8 +905,8 @@@ param = param->get_next(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_load_ssbo_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - dest = &instr->dest; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); const glsl_type *type = ir->return_deref->var->type; instr->num_components = type->vector_elements; @@@ -978,6 -982,84 +986,84 @@@ nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_load_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + instr->const_index[0] = 0; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); + + const glsl_type *type = ir->return_deref->var->type; + instr->num_components = type->vector_elements; + + /* Setup destination register */ + nir_ssa_dest_init(&instr->instr, &instr->dest, + type->vector_elements, NULL); + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_store_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + instr->const_index[0] = 0; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); + + instr->const_index[1] = write_mask->value.u[0]; + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* Offset */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 3) { + assert(op == nir_intrinsic_shared_atomic_comp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = + nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } default: unreachable("not reached"); } @@@ -1178,21 -1260,11 +1264,11 @@@ nir_visitor::visit(ir_expression *ir /* Some special cases */ switch (ir->operation) { case ir_binop_ubo_load: { - ir_constant *const_index = ir->operands[1]->as_constant(); - - nir_intrinsic_op op; - if (const_index) { - op = nir_intrinsic_load_ubo; - } else { - op = nir_intrinsic_load_ubo_indirect; - } - - nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); load->num_components = ir->type->vector_elements; - load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); - if (!const_index) - load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); add_instr(&load->instr, ir->type->vector_elements); /* diff --combined src/glsl/nir/glsl_types.cpp index 64b5c0cb106,3cf2f03f9ba..bc8677ba6fc --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@@ -22,7 -22,7 +22,7 @@@ */ #include - #include "main/core.h" /* for Elements, MAX2 */ + #include "main/macros.h" #include "glsl_parser_extras.h" #include "glsl_types.h" #include "util/hash_table.h" @@@ -32,7 -32,6 +32,7 @@@ mtx_t glsl_type::mutex = _MTX_INITIALIZ hash_table *glsl_type::array_types = NULL; hash_table *glsl_type::record_types = NULL; hash_table *glsl_type::interface_types = NULL; +hash_table *glsl_type::function_types = NULL; hash_table *glsl_type::subroutine_types = NULL; void *glsl_type::mem_ctx = NULL; @@@ -170,39 -169,6 +170,39 @@@ glsl_type::glsl_type(const glsl_struct_ mtx_unlock(&glsl_type::mutex); } +glsl_type::glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) : + gl_type(0), + base_type(GLSL_TYPE_FUNCTION), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_params) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + + this->fields.parameters = rzalloc_array(this->mem_ctx, + glsl_function_param, num_params + 1); + + /* We store the return type as the first parameter */ + this->fields.parameters[0].type = return_type; + this->fields.parameters[0].in = false; + this->fields.parameters[0].out = true; + + /* We store the i'th parameter in slot i+1 */ + for (i = 0; i < length; i++) { + this->fields.parameters[i + 1].type = params[i].type; + this->fields.parameters[i + 1].in = params[i].in; + this->fields.parameters[i + 1].out = params[i].out; + } + + mtx_unlock(&glsl_type::mutex); +} + glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), base_type(GLSL_TYPE_SUBROUTINE), @@@ -714,93 -680,6 +714,93 @@@ glsl_type::get_sampler_instance(enum gl unreachable("switch statement above should be complete"); } +const glsl_type * +glsl_type::get_image_instance(enum glsl_sampler_dim dim, + bool array, glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? image1DArray_type : image1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? image2DArray_type : image2D_type); + case GLSL_SAMPLER_DIM_3D: + return image3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? imageCubeArray_type : imageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + else + return image2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + else + return imageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? image2DMSArray_type : image2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_INT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? iimage1DArray_type : iimage1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? iimage2DArray_type : iimage2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return iimage3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? iimageCubeArray_type : iimageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return iimage2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return iimageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? iimage2DMSArray_type : iimage2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? uimage1DArray_type : uimage1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? uimage2DArray_type : uimage2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return uimage3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? uimageCubeArray_type : uimageCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return uimage2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return uimageBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? uimage2DMSArray_type : uimage2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); +} + const glsl_type * glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) { @@@ -1045,74 -924,6 +1045,74 @@@ glsl_type::get_subroutine_instance(cons } +static bool +function_key_compare(const void *a, const void *b) +{ + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + if (key1->length != key2->length) + return 1; + + return memcmp(key1->fields.parameters, key2->fields.parameters, + (key1->length + 1) * sizeof(*key1->fields.parameters)); +} + + +static uint32_t +function_key_hash(const void *a) +{ + const glsl_type *const key = (glsl_type *) a; + char hash_key[128]; + unsigned size = 0; + + size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); + + for (unsigned i = 0; i < key->length; i++) { + if (size >= sizeof(hash_key)) + break; + + size += snprintf(& hash_key[size], sizeof(hash_key) - size, + "%p", (void *) key->fields.structure[i].type); + } + + return _mesa_hash_string(hash_key); +} + +const glsl_type * +glsl_type::get_function_instance(const glsl_type *return_type, + const glsl_function_param *params, + unsigned num_params) +{ + const glsl_type key(return_type, params, num_params); + + mtx_lock(&glsl_type::mutex); + + if (function_types == NULL) { + function_types = _mesa_hash_table_create(NULL, function_key_hash, + function_key_compare); + } + + struct hash_entry *entry = _mesa_hash_table_search(function_types, &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(return_type, params, num_params); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(function_types, t, (void *) t); + } + + const glsl_type *t = (const glsl_type *)entry->data; + + assert(t->base_type == GLSL_TYPE_FUNCTION); + assert(t->length == num_params); + + mtx_unlock(&glsl_type::mutex); + + return t; +} + + const glsl_type * glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) { @@@ -1242,8 -1053,6 +1242,8 @@@ glsl_type::component_slots() cons return 1; case GLSL_TYPE_SUBROUTINE: return 1; + + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_VOID: @@@ -1874,7 -1683,6 +1874,7 @@@ glsl_type::count_attribute_slots() cons case GLSL_TYPE_ARRAY: return this->length * this->fields.array->count_attribute_slots(); + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --combined src/glsl/nir/nir.c index 79df6d3df94,35fc1de2e01..94bb76034a2 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@@ -500,10 -500,8 +500,10 @@@ nir_tex_instr_create(nir_shader *shader for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); + instr->texture_index = 0; + instr->texture_array_size = 0; + instr->texture = NULL; instr->sampler_index = 0; - instr->sampler_array_size = 0; instr->sampler = NULL; return instr; @@@ -1021,10 -1019,6 +1021,10 @@@ visit_tex_src(nir_tex_instr *instr, nir if (!visit_src(&instr->src[i].src, cb, state)) return false; + if (instr->texture != NULL) + if (!visit_deref_src(instr->texture, cb, state)) + return false; + if (instr->sampler != NULL) if (!visit_deref_src(instr->sampler, cb, state)) return false; @@@ -1382,13 -1376,13 +1382,13 @@@ static inline boo foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) { if (reverse) { - foreach_list_typed_safe_reverse(nir_cf_node, node, node, + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &if_stmt->else_list) { if (!foreach_cf_node(node, cb, reverse, state)) return false; } - foreach_list_typed_safe_reverse(nir_cf_node, node, node, + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &if_stmt->then_list) { if (!foreach_cf_node(node, cb, reverse, state)) return false; @@@ -1412,7 -1406,7 +1412,7 @@@ static inline boo foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) { if (reverse) { - foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) { if (!foreach_cf_node(node, cb, reverse, state)) return false; } @@@ -1472,7 -1466,7 +1472,7 @@@ nir_foreach_block_reverse(nir_function_ if (!cb(impl->end_block, state)) return false; - foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) { if (!foreach_cf_node(node, cb, true, state)) return false; } diff --combined src/glsl/nir/nir.h index b7374e17407,2e72e66699c..021c4280557 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@@ -291,11 -291,6 +291,11 @@@ typedef struct */ int index; + /** + * Descriptor set binding for sampler or UBO. + */ + int descriptor_set; + /** * Initial binding point for a sampler or UBO. * @@@ -520,11 -515,7 +520,11 @@@ typedef struct nir_src bool is_ssa; } nir_src; -#define NIR_SRC_INIT (nir_src) { { NULL } } +#ifdef __cplusplus +# define NIR_SRC_INIT nir_src() +#else +# define NIR_SRC_INIT (nir_src) { { NULL } } +#endif #define nir_foreach_use(reg_or_ssa_def, src) \ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) @@@ -547,11 -538,7 +547,11 @@@ typedef struct bool is_ssa; } nir_dest; -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#ifdef __cplusplus +# define NIR_DEST_INIT nir_dest() +#else +# define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#endif #define nir_foreach_def(reg, dest) \ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) @@@ -948,7 -935,6 +948,7 @@@ typedef enum nir_tex_src_ms_index, /* MSAA sample index */ nir_tex_src_ddx, nir_tex_src_ddy, + nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ nir_num_tex_src_types } nir_tex_src_type; @@@ -999,24 -985,6 +999,24 @@@ typedef struct /* gather component selector */ unsigned component : 2; + /** The texture index + * + * If this texture instruction has a nir_tex_src_texture_offset source, + * then the texture index is given by texture_index + texture_offset. + */ + unsigned texture_index; + + /** The size of the texture array or 0 if it's not an array */ + unsigned texture_array_size; + + /** The texture deref + * + * If both this and `sampler` are both NULL, use texture_index instead. + * If `texture` is NULL, but `sampler` is non-NULL, then the texture is + * implied from the sampler. + */ + nir_deref_var *texture; + /** The sampler index * * If this texture instruction has a nir_tex_src_sampler_offset source, @@@ -1024,11 -992,10 +1024,11 @@@ */ unsigned sampler_index; - /** The size of the sampler array or 0 if it's not an array */ - unsigned sampler_array_size; - - nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ + /** The sampler deref + * + * If this is null, use sampler_index instead. + */ + nir_deref_var *sampler; } nir_tex_instr; static inline unsigned @@@ -1309,8 -1276,8 +1309,8 @@@ nir_block_last_instr(nir_block *block foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) #define nir_foreach_instr_safe(block, instr) \ foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) - #define nir_foreach_instr_safe_reverse(block, instr) \ - foreach_list_typed_safe_reverse(nir_instr, instr, node, &(block)->instr_list) + #define nir_foreach_instr_reverse_safe(block, instr) \ + foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) typedef struct nir_if { nir_cf_node cf_node; @@@ -1494,7 -1461,6 +1494,7 @@@ typedef struct nir_function typedef struct nir_shader_compiler_options { bool lower_ffma; + bool lower_fdiv; bool lower_flrp; bool lower_fpow; bool lower_fsat; @@@ -1765,17 -1731,6 +1765,17 @@@ typedef struct }; } nir_cursor; +static inline nir_block * +nir_cursor_current_block(nir_cursor cursor) +{ + if (cursor.option == nir_cursor_before_instr || + cursor.option == nir_cursor_after_instr) { + return cursor.instr->block; + } else { + return cursor.block; + } +} + static inline nir_cursor nir_before_block(nir_block *block) { @@@ -2007,10 -1962,6 +2007,10 @@@ bool nir_lower_locals_to_regs(nir_shade void nir_lower_outputs_to_temporaries(nir_shader *shader); +void nir_lower_outputs_to_temporaries(nir_shader *shader); + +void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); @@@ -2018,7 -1969,7 +2018,7 @@@ void nir_lower_io(nir_shader *shader, nir_variable_mode mode, int (*type_size)(const struct glsl_type *)); - nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr); + nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); void nir_lower_vars_to_ssa(nir_shader *shader); diff --combined src/glsl/nir/nir_intrinsics.h index de30db61eea,9811fb391de..5086e297e8e --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@@ -175,25 -175,6 +175,25 @@@ INTRINSIC(image_size, 0, ARR(), true, 4 INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* + * Vulkan descriptor set intrinsic + * + * The Vulkan API uses a different binding model from GL. In the Vulkan + * API, all external resources are represented by a tripple: + * + * (descriptor set, binding, array index) + * + * where the array index is the only thing allowed to be indirect. The + * vulkan_surface_index intrinsic takes the descriptor set and binding as + * its first two indices and the array index as its source. The third + * index is a nir_variable_mode in case that's useful to the backend. + * + * The intended usage is that the shader will call vulkan_surface_index to + * get an index and then pass that as the buffer index ubo/ssbo calls. + */ +INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* * SSBO atomic intrinsics * @@@ -222,6 -203,33 +222,33 @@@ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1 INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) + /* + * CS shared variable atomic intrinsics + * + * All of the shared variable atomic memory operations read a value from + * memory, compute a new value using one of the operations below, write the + * new value to memory, and return the original value read. + * + * All operations take 2 sources except CompSwap that takes 3. These + * sources represent: + * + * 0: The offset into the shared variable storage region that the atomic + * operation will operate on. + * 1: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 2: For CompSwap only: the second data parameter. + */ + INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + #define SYSTEM_VALUE(name, components, num_indices) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@@ -247,56 -255,60 +274,62 @@@ SYSTEM_VALUE(num_work_groups, 3, 0 SYSTEM_VALUE(helper_invocation, 1, 0) /* - * The format of the indices depends on the type of the load. For uniforms, - * the first index is the base address and the second index is an offset that - * should be added to the base address. (This way you can determine in the - * back-end which variable is being accessed even in an array.) For inputs, - * the one and only index corresponds to the attribute slot. UBO loads - * have two indices the first of which is the descriptor set and the second - * is the base address to load from. + * Load operations pull data from some piece of GPU memory. All load + * operations operate in terms of offsets into some piece of theoretical + * memory. Loads from externally visible memory (UBO and SSBO) simply take a + * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.) + * take a base+offset pair where the base (const_index[0]) gives the location + * of the start of the variable being loaded and and the offset source is a + * offset into that variable. * - * UBO loads have a (possibly constant) source which is the UBO buffer index. - * For each type of load, the _indirect variant has one additional source - * (the second in the case of UBO's) that is the is an indirect to be added to - * the constant address or base offset to compute the final offset. + * Some load operations such as UBO/SSBO load and per_vertex loads take an + * additional source to specify which UBO/SSBO/vertex to load from. * - * For vector backends, the address is in terms of one vec4, and so each array - * element is +4 scalar components from the previous array element. For scalar - * backends, the address is in terms of a single 4-byte float/int and arrays - * elements begin immediately after the previous array element. + * The exact address type depends on the lowering pass that generates the + * load/store intrinsics. Typically, this is vec4 units for things such as + * varying slots and float units for fragment shader inputs. UBO and SSBO + * offsets are always in bytes. */ - #define LOAD(name, extra_srcs, indices, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \ - INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, indices, flags) + #define LOAD(name, srcs, indices, flags) \ + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) - LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) - LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE) - LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) - LOAD(push_constant, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { buffer_index, offset }. No const_index */ + LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { vertex, offset }. const_index[] = { base } */ + LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { buffer_index, offset }. No const_index */ + LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) + /* src[] = { vertex, offset }. const_index[] = { base } */ + LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) ++/* src[] = { offset }. const_index[] = { base, size } */ ++LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* - * Stores work the same way as loads, except now the first register input is - * the value or array to store and the optional second input is the indirect - * offset. SSBO stores are similar, but they accept an extra source for the - * block index and an extra index with the writemask to use. + * Stores work the same way as loads, except now the first source is the value + * to store and the second (and possibly third) source specify where to store + * the value. SSBO and shared memory stores also have a write mask as + * const_index[0]. */ - #define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \ - INTRINSIC(store_##name, 1 + extra_srcs, \ - ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \ - false, 0, 0, 1 + extra_indices, flags) \ - INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \ - ARR(0, 1, extra_srcs_size, extra_srcs_size), \ - false, 0, 0, 1 + extra_indices, flags) + #define STORE(name, srcs, indices, flags) \ + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) - STORE(output, 0, 0, 0, 0) - STORE(per_vertex_output, 1, 1, 0, 0) - STORE(ssbo, 1, 1, 1, 0) + /* src[] = { value, offset }. const_index[] = { base } */ + STORE(output, 2, 1, 0) + /* src[] = { value, vertex, offset }. const_index[] = { base } */ + STORE(per_vertex_output, 3, 1, 0) + /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ + STORE(ssbo, 3, 1, 0) + /* src[] = { value, offset }. const_index[] = { base, write_mask } */ + STORE(shared, 2, 1, 0) - LAST_INTRINSIC(store_ssbo_indirect) + LAST_INTRINSIC(store_shared) diff --combined src/glsl/nir/nir_lower_io.c index 5683e69d865,3d646eb14b4..ec6d09d5b6d --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@@ -86,18 -86,11 +86,11 @@@ is_per_vertex_output(struct lower_io_st stage == MESA_SHADER_TESS_CTRL; } - static unsigned - get_io_offset(nir_deref_var *deref, nir_instr *instr, + static nir_ssa_def * + get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - nir_ssa_def **out_indirect, - struct lower_io_state *state) + int (*type_size)(const struct glsl_type *)) { - nir_ssa_def *indirect = NULL; - unsigned base_offset = 0; - - nir_builder *b = &state->builder; - b->cursor = nir_before_instr(instr); - nir_deref *tail = &deref->deref; /* For per-vertex input arrays (i.e. geometry shader inputs), keep the @@@ -115,64 -108,57 +108,57 @@@ *vertex_index = vtx; } + /* Just emit code and let constant-folding go to town */ + nir_ssa_def *offset = nir_imm_int(b, 0); + while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; tail = tail->child; if (tail->deref_type == nir_deref_type_array) { nir_deref_array *deref_array = nir_deref_as_array(tail); - unsigned size = state->type_size(tail->type); + unsigned size = type_size(tail->type); - base_offset += size * deref_array->base_offset; + offset = nir_iadd(b, offset, + nir_imm_int(b, size * deref_array->base_offset)); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_ssa_def *mul = nir_imul(b, nir_imm_int(b, size), nir_ssa_for_src(b, deref_array->indirect, 1)); - indirect = indirect ? nir_iadd(b, indirect, mul) : mul; + offset = nir_iadd(b, offset, mul); } } else if (tail->deref_type == nir_deref_type_struct) { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + unsigned field_offset = 0; for (unsigned i = 0; i < deref_struct->index; i++) { - base_offset += - state->type_size(glsl_get_struct_field(parent_type, i)); + field_offset += type_size(glsl_get_struct_field(parent_type, i)); } + offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); } } - *out_indirect = indirect; - return base_offset; + return offset; } static nir_intrinsic_op load_op(struct lower_io_state *state, - nir_variable_mode mode, bool per_vertex, bool has_indirect) + nir_variable_mode mode, bool per_vertex) { nir_intrinsic_op op; switch (mode) { case nir_var_shader_in: - if (per_vertex) { - op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect : - nir_intrinsic_load_per_vertex_input; - } else { - op = has_indirect ? nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input; - } + op = per_vertex ? nir_intrinsic_load_per_vertex_input : + nir_intrinsic_load_input; break; case nir_var_shader_out: - if (per_vertex) { - op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect : - nir_intrinsic_load_per_vertex_output; - } else { - op = has_indirect ? nir_intrinsic_load_output_indirect : - nir_intrinsic_load_output; - } + op = per_vertex ? nir_intrinsic_load_per_vertex_output : + nir_intrinsic_load_output; break; case nir_var_uniform: - op = has_indirect ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; + op = nir_intrinsic_load_uniform; break; default: unreachable("Unknown variable mode"); @@@ -185,6 -171,8 +171,8 @@@ nir_lower_io_block(nir_block *block, vo { struct lower_io_state *state = void_state; + nir_builder *b = &state->builder; + nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; @@@ -205,38 -193,33 +193,33 @@@ mode != nir_var_uniform) continue; + b->cursor = nir_before_instr(instr); + switch (intrin->intrinsic) { case nir_intrinsic_load_var: { bool per_vertex = is_per_vertex_input(state, intrin->variables[0]->var) || is_per_vertex_output(state, intrin->variables[0]->var); - nir_ssa_def *indirect; + nir_ssa_def *offset; nir_ssa_def *vertex_index; - unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, - per_vertex ? &vertex_index : NULL, - &indirect, state); + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, - load_op(state, mode, per_vertex, - indirect)); + load_op(state, mode, per_vertex)); load->num_components = intrin->num_components; - unsigned location = intrin->variables[0]->var->data.driver_location; - if (mode == nir_var_uniform) { - load->const_index[0] = location; - load->const_index[1] = offset; - } else { - load->const_index[0] = location + offset; - } + load->const_index[0] = + intrin->variables[0]->var->data.driver_location; if (per_vertex) load->src[0] = nir_src_for_ssa(vertex_index); - if (indirect) - load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect); + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset); if (intrin->dest.is_ssa) { nir_ssa_dest_init(&load->instr, &load->dest, @@@ -255,38 -238,33 +238,33 @@@ case nir_intrinsic_store_var: { assert(mode == nir_var_shader_out); - nir_ssa_def *indirect; + nir_ssa_def *offset; nir_ssa_def *vertex_index; bool per_vertex = is_per_vertex_output(state, intrin->variables[0]->var); - unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, - per_vertex ? &vertex_index : NULL, - &indirect, state); - offset += intrin->variables[0]->var->data.driver_location; + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); - nir_intrinsic_op store_op; - if (per_vertex) { - store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect - : nir_intrinsic_store_per_vertex_output; - } else { - store_op = indirect ? nir_intrinsic_store_output_indirect - : nir_intrinsic_store_output; - } + nir_intrinsic_op store_op = + per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, store_op); store->num_components = intrin->num_components; - store->const_index[0] = offset; nir_src_copy(&store->src[0], &intrin->src[0], store); + store->const_index[0] = + intrin->variables[0]->var->data.driver_location; + if (per_vertex) store->src[1] = nir_src_for_ssa(vertex_index); - if (indirect) - store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect); + store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset); nir_instr_insert_before(&intrin->instr, &store->instr); nir_instr_remove(&intrin->instr); @@@ -330,21 -308,21 +308,24 @@@ nir_lower_io(nir_shader *shader, nir_va } /** - * Return the indirect source for a load/store indirect intrinsic. + * Return the offset soruce for a load/store intrinsic. */ nir_src * - nir_get_io_indirect_src(nir_intrinsic_instr *instr) + nir_get_io_offset_src(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { - case nir_intrinsic_load_input_indirect: - case nir_intrinsic_load_output_indirect: - case nir_intrinsic_load_uniform_indirect: + case nir_intrinsic_load_input: + case nir_intrinsic_load_output: + case nir_intrinsic_load_uniform: return &instr->src[0]; - case nir_intrinsic_load_per_vertex_input_indirect: - case nir_intrinsic_load_per_vertex_output_indirect: - case nir_intrinsic_store_output_indirect: ++ case nir_intrinsic_load_ubo: ++ case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: return &instr->src[1]; - case nir_intrinsic_store_per_vertex_output_indirect: ++ case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_per_vertex_output: return &instr->src[2]; default: return NULL; @@@ -360,11 -338,8 +341,8 @@@ nir_get_io_vertex_index_src(nir_intrins switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: - case nir_intrinsic_load_per_vertex_input_indirect: - case nir_intrinsic_load_per_vertex_output_indirect: return &instr->src[0]; case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_per_vertex_output_indirect: return &instr->src[1]; default: return NULL; diff --combined src/glsl/nir/nir_lower_samplers.c index 19deafab37a,2aab305e6cc..858088237e3 --- a/src/glsl/nir/nir_lower_samplers.c +++ b/src/glsl/nir/nir_lower_samplers.c @@@ -25,7 -25,6 +25,6 @@@ #include "nir.h" #include "nir_builder.h" - #include "../program.h" #include "program/hash_table.h" #include "ir_uniform.h" @@@ -95,9 -94,6 +94,9 @@@ lower_sampler(nir_tex_instr *instr, con if (instr->sampler == NULL) return; + /* GLSL only has combined textures/samplers */ + assert(instr->texture == NULL); + instr->sampler_index = 0; unsigned location = instr->sampler->var->data.location; unsigned array_elements = 1; @@@ -110,7 -106,7 +109,7 @@@ if (indirect) { /* First, we have to resize the array of texture sources */ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); + instr->num_srcs + 2); for (unsigned i = 0; i < instr->num_srcs; i++) { new_srcs[i].src_type = instr->src[i].src_type; @@@ -124,19 -120,13 +123,19 @@@ /* Now we can go ahead and move the source over to being a * first-class texture source. */ + instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + nir_src_for_ssa(indirect)); + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; instr->num_srcs++; nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, nir_src_for_ssa(indirect)); - instr->sampler_array_size = array_elements; + instr->texture_array_size = array_elements; } if (location > shader_program->NumUniformStorage - 1 || @@@ -149,8 -139,6 +148,8 @@@ shader_program->UniformStorage[location].opaque[stage].index; instr->sampler = NULL; + + instr->texture_index = instr->sampler_index; } typedef struct { diff --combined src/glsl/nir/nir_opt_algebraic.py index 30ede52b146,cb715c0b2c1..3843f21c0ee --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@@ -70,7 -70,6 +70,7 @@@ optimizations = (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), (('imul', a, -1), ('ineg', a)), + (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), (('ffma', 0.0, a, b), b), (('ffma', a, 0.0, b), b), (('ffma', a, b, 0.0), ('fmul', a, b)), @@@ -185,8 -184,10 +185,10 @@@ (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), # Boolean simplifications - (('ine', 'a@bool', 0), 'a'), - (('ieq', 'a@bool', 0), ('inot', 'a')), + (('ieq', 'a@bool', True), a), + (('ine', 'a@bool', True), ('inot', a)), + (('ine', 'a@bool', False), a), + (('ieq', 'a@bool', False), ('inot', 'a')), (('bcsel', a, True, False), ('ine', a, 0)), (('bcsel', a, False, True), ('ieq', a, 0)), (('bcsel', True, b, c), b), diff --combined src/glsl/nir/nir_print.c index 76bfc47c2a0,1a4cc695d5a..10f46cef1de --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@@ -439,21 -439,15 +439,15 @@@ print_intrinsic_instr(nir_intrinsic_ins switch (instr->intrinsic) { case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: var_list = &state->shader->uniforms; break; case nir_intrinsic_load_input: - case nir_intrinsic_load_input_indirect: case nir_intrinsic_load_per_vertex_input: - case nir_intrinsic_load_per_vertex_input_indirect: var_list = &state->shader->inputs; break; case nir_intrinsic_load_output: - case nir_intrinsic_load_output_indirect: case nir_intrinsic_store_output: - case nir_intrinsic_store_output_indirect: case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_per_vertex_output_indirect: var_list = &state->shader->outputs; break; default: @@@ -553,9 -547,6 +547,9 @@@ print_tex_instr(nir_tex_instr *instr, p case nir_tex_src_ddy: fprintf(fp, "(ddy)"); break; + case nir_tex_src_texture_offset: + fprintf(fp, "(texture_offset)"); + break; case nir_tex_src_sampler_offset: fprintf(fp, "(sampler_offset)"); break; @@@ -586,18 -577,13 +580,18 @@@ fprintf(fp, "%u (gather_component), ", instr->component); } + if (instr->texture) { + assert(instr->sampler); + fprintf(fp, " (texture)"); + } if (instr->sampler) { print_deref(instr->sampler, state); + fprintf(fp, " (sampler)"); } else { - fprintf(fp, "%u", instr->sampler_index); + assert(instr->texture == NULL); + fprintf(fp, "%u (texture) %u (sampler)", + instr->texture_index, instr->sampler_index); } - - fprintf(fp, " (sampler)"); } static void diff --combined src/glsl/nir/spirv_to_nir.c index d014f3cd811,00000000000..68edea09309 mode 100644,000000..100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@@ -1,3788 -1,0 +1,3778 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "spirv_to_nir_private.h" +#include "nir_vla.h" +#include "nir_control_flow.h" + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count) +{ + return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); +} + +static const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + if (opcode == SpvOpNop) { + w++; + continue; + } + + if (!handler(b, opcode, w, count)) + return w; + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int parent_member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + int member; + if (dec->member < 0) { + member = parent_member; + } else { + assert(parent_member == -1); + member = dec->member; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, member, dec->group, + cb, data); + } else { + cb(b, base_value, member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + int member = -1; + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_undef); + break; + + case SpvOpMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpDecorate: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpGroupDecorate: { + struct vtn_value *group = &b->values[target]; + assert(group->value_type == vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = &b->values[*w]; + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->group = group; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_vector_or_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_ARRAY: + dest->array_element = src->array_element; + dest->stride = src->stride; + break; + + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* matrices */ + dest->row_major = src->row_major; + dest->stride = src->stride; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, + ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + case SpvDecorationMatrixStride: + ctx->type->members[member]->stride = dec->literals[0]; + break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: + val->type->type = glsl_int_type(); + break; + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + const struct glsl_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base)); + val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeRuntimeArray: + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + + /* A length of 0 is used to denote unsized arrays */ + unsigned length = (opcode == SpvOpTypeArray) ? w[3] : 0; + + val->type->type = glsl_array_type(array_element->type, length); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + /* TODO: Handle decorators */ + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i].type = val->type->members[i]->type; + fields[i].name = ralloc_asprintf(b, "field%d", i); + fields[i].location = -1; + fields[i].interpolation = 0; + fields[i].centroid = 0; + fields[i].sample = 0; + fields[i].matrix_layout = 2; + } + + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + assert(!multisampled && "FIXME: Handl multi-sampled textures"); + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + * + * TODO: Eventually we should consider adding a "bare sampler" type + * to glsl_types. + */ + val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, + GLSL_TYPE_FLOAT); + break; + + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = ralloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +validate_per_vertex_mode(struct vtn_builder *b, nir_variable_mode mode) +{ + switch (b->shader->stage) { + case MESA_SHADER_VERTEX: + assert(mode == nir_var_shader_out); + break; + case MESA_SHADER_GEOMETRY: + assert(mode == nir_var_shader_out || mode == nir_var_shader_in); + break; + default: + assert(!"Invalid shader stage"); + } +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + case SpvBuiltInWorkgroupSize: + /* these are constants, need to be handled specially */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInGlobalInvocationId: + case SpvBuiltInLocalInvocationIndex: + /* these are computed values, need to be handled specially */ + unreachable("unsupported builtin"); + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + assert(val->value_type == vtn_value_type_deref); + assert(val->deref->deref.child == NULL); + assert(val->deref->var == void_var); + + nir_variable *var = void_var; + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationNoPerspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonWritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + nir_variable_mode mode = var->data.mode; + vtn_get_builtin_location(b, builtin, &var->data.location, &mode); + var->data.explicit_location = true; + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[dec->literals[0]].out = var; + else + b->builtins[dec->literals[0]].in = var; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + nir_variable_mode mode, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var; + if (mode == nir_var_shader_out) + var = b->builtins[builtin].out; + else + var = b->builtins[builtin].in; + + if (!var) { + int location; + vtn_get_builtin_location(b, builtin, &location, &mode); + + var = nir_variable_create(b->shader, mode, type, "builtin"); + + var->data.location = location; + var->data.explicit_location = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[builtin].out = var; + else + b->builtins[builtin].in = var; + } + + return var; +} + +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + src_deref_tail->child = NULL; + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + dest_deref_tail->child = NULL; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +static nir_ssa_def * +nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *array_index) +{ + if (array_index == NULL) + array_index = nir_imm_int(b, 0); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + instr->const_index[0] = set; + instr->const_index[1] = binding; + instr->const_index[2] = mode; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(b, &instr->instr); + + return &instr->dest.ssa; +} + +static struct vtn_ssa_value * +_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, + unsigned set, unsigned binding, nir_variable_mode mode, - nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect, - struct vtn_type *type) ++ nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type->type; + val->transposed = NULL; + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = glsl_get_vector_elements(type->type); - load->const_index[0] = offset; + + switch (op) { - case nir_intrinsic_load_ubo_indirect: - case nir_intrinsic_load_ssbo_indirect: - load->src[1] = nir_src_for_ssa(indirect); - /* fall through */ + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: { + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + load->src[0] = nir_src_for_ssa(res_index); ++ load->src[1] = nir_src_for_ssa(offset); + break; + } + + case nir_intrinsic_load_push_constant: - break; /* Nothing to do */ - case nir_intrinsic_load_push_constant_indirect: - load->src[0] = nir_src_for_ssa(indirect); ++ load->src[0] = nir_src_for_ssa(offset); + break; + + default: + unreachable("Invalid block load intrinsic"); + } + + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; + } else { + unsigned elems = glsl_get_length(type->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { ++ nir_ssa_def *child_offset = ++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - offset + type->offsets[i], - indirect, type->members[i]); ++ child_offset, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { ++ nir_ssa_def *child_offset = ++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - offset + i * type->stride, - indirect, type->array_element); ++ child_offset,type->array_element); + } + } + } + + return val; +} + +static void +vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type **type, nir_deref *src_tail, - nir_ssa_def **index, - unsigned *offset, nir_ssa_def **indirect) ++ nir_ssa_def **index, nir_ssa_def **offset) +{ + nir_deref *deref = &src->deref; + + if (deref->child->deref_type == nir_deref_type_array) { + deref = deref->child; + *type = (*type)->array_element; + nir_deref_array *deref_array = nir_deref_as_array(deref); + *index = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + *index = nir_iadd(&b->nb, *index, deref_array->indirect.ssa); + } else { + *index = nir_imm_int(&b->nb, 0); + } + - *offset = 0; - *indirect = NULL; ++ *offset = nir_imm_int(&b->nb, 0); + while (deref != src_tail) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); - if (deref_array->deref_array_type == nir_deref_array_type_direct) { - *offset += (*type)->stride * deref_array->base_offset; - } else { - nir_ssa_def *off = nir_imul(&b->nb, deref_array->indirect.ssa, - nir_imm_int(&b->nb, (*type)->stride)); - *indirect = *indirect ? nir_iadd(&b->nb, *indirect, off) : off; - } ++ nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset); ++ ++ if (deref_array->deref_array_type == nir_deref_array_type_indirect) ++ off = nir_iadd(&b->nb, off, deref_array->indirect.ssa); ++ ++ off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride)); ++ *offset = nir_iadd(&b->nb, *offset, off); ++ + *type = (*type)->array_element; + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); - *offset += (*type)->offsets[deref_struct->index]; ++ ++ unsigned elem_off = (*type)->offsets[deref_struct->index]; ++ *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off)); ++ + *type = (*type)->members[deref_struct->index]; + break; + } + + default: + unreachable("unknown deref type"); + } + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + nir_ssa_def *index; - unsigned offset; - nir_ssa_def *indirect; - vtn_block_get_offset(b, src, &type, src_tail, &index, &offset, &indirect); ++ nir_ssa_def *offset; ++ vtn_block_get_offset(b, src, &type, src_tail, &index, &offset); + + nir_intrinsic_op op; + if (src->var->data.mode == nir_var_uniform) { + if (src->var->data.descriptor_set >= 0) { + /* UBO load */ + assert(src->var->data.binding >= 0); + - op = indirect ? nir_intrinsic_load_ubo_indirect - : nir_intrinsic_load_ubo; ++ op = nir_intrinsic_load_ubo; + } else { + /* Push constant load */ + assert(src->var->data.descriptor_set == -1 && + src->var->data.binding == -1); + - op = indirect ? nir_intrinsic_load_push_constant_indirect - : nir_intrinsic_load_push_constant; ++ op = nir_intrinsic_load_push_constant; + } + } else { + assert(src->var->data.mode == nir_var_shader_storage); - op = indirect ? nir_intrinsic_load_ssbo_indirect - : nir_intrinsic_load_ssbo; ++ op = nir_intrinsic_load_ssbo; + } + + return _vtn_block_load(b, op, src->var->data.descriptor_set, + src->var->data.binding, src->var->data.mode, - index, offset, indirect, type); ++ index, offset, type); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static bool +variable_is_external_block(nir_variable *var) +{ + return var->interface_type && + glsl_type_is_struct(var->interface_type) && + (var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage); +} + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type) +{ + nir_deref *src_tail = get_deref_tail(src); + + struct vtn_ssa_value *val; + if (variable_is_external_block(src->var)) + val = vtn_block_load(b, src, src_type, src_tail); + else + val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static void +_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, + struct vtn_ssa_value *src, unsigned set, unsigned binding, - nir_variable_mode mode, nir_ssa_def *index, unsigned offset, - nir_ssa_def *indirect, struct vtn_type *type) ++ nir_variable_mode mode, nir_ssa_def *index, ++ nir_ssa_def *offset, struct vtn_type *type) +{ + assert(src->type == type->type); + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); + store->num_components = glsl_get_vector_elements(type->type); - store->const_index[0] = offset; + store->const_index[1] = (1 << store->num_components) - 1; + store->src[0] = nir_src_for_ssa(src->def); + + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + store->src[1] = nir_src_for_ssa(res_index); - - if (op == nir_intrinsic_store_ssbo_indirect) - store->src[2] = nir_src_for_ssa(indirect); ++ store->src[2] = nir_src_for_ssa(offset); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else { + unsigned elems = glsl_get_length(type->type); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { ++ nir_ssa_def *child_offset = ++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); + _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, offset + type->offsets[i], indirect, - type->members[i]); ++ index, child_offset, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { ++ nir_ssa_def *child_offset = ++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); + _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, offset + i * type->stride, indirect, - type->array_element); ++ index, child_offset, type->array_element); + } + } + } +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *type, + nir_deref *dest_tail) +{ + nir_ssa_def *index; - unsigned offset; - nir_ssa_def *indirect; - vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset, &indirect); ++ nir_ssa_def *offset; ++ vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset); + - nir_intrinsic_op op = indirect ? nir_intrinsic_store_ssbo_indirect - : nir_intrinsic_store_ssbo; ++ nir_intrinsic_op op = nir_intrinsic_store_ssbo; + + return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, + dest->var->data.binding, dest->var->data.mode, - index, offset, indirect, type); ++ index, offset, type); +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *dest_type) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (variable_is_external_block(dest->var)) { + assert(dest->var->data.mode == nir_var_shader_storage); + vtn_block_store(b, src, dest, dest_type, dest_tail); + } else { + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest, struct vtn_type *type) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child || src->var->interface_type) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src, type); + vtn_variable_store(b, val, dest, type); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +static bool +is_interface_type(struct vtn_type *type) +{ + return type->block || type->buffer_block || + glsl_type_is_sampler(type->type) || + glsl_type_is_image(type->type); +} + +static void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + nir_variable *var = rzalloc(b->shader, nir_variable); + + var->type = type->type; + var->name = ralloc_strdup(var, val->name); + + struct vtn_type *interface_type; + if (is_interface_type(type)) { + interface_type = type; + } else if (glsl_type_is_array(type->type) && + is_interface_type(type->array_element)) { + interface_type = type->array_element; + } else { + interface_type = NULL; + } + + if (interface_type) + var->interface_type = interface_type->type; + + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (interface_type && interface_type->buffer_block) { + var->data.mode = nir_var_shader_storage; + b->shader->info.num_ssbos++; + } else { + /* UBO's and samplers */ + var->data.mode = nir_var_uniform; + var->data.read_only = true; + if (interface_type) { + if (glsl_type_is_image(interface_type->type)) { + b->shader->info.num_images++; + var->data.image.format = interface_type->image_format; + } else if (glsl_type_is_sampler(interface_type->type)) { + b->shader->info.num_textures++; + } else { + assert(glsl_type_is_struct(interface_type->type)); + b->shader->info.num_ubos++; + } + } + } + break; + case SpvStorageClassPushConstant: + assert(interface_type && interface_type->block); + var->data.mode = nir_var_uniform; + var->data.read_only = true; + var->data.descriptor_set = -1; + var->data.binding = -1; + + /* We have exactly one push constant block */ + assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(type); ++ b->shader->num_uniforms = vtn_type_block_size(type) * 4; + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivate: + var->data.mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + break; + case SpvStorageClassWorkgroup: + case SpvStorageClassCrossWorkgroup: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + var->constant_initializer = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + } + + val->deref = nir_deref_var_create(b, var); + val->deref_type = type; + + /* We handle decorations first because decorations might give us + * location information. We use the data.explicit_location field to + * note that the location provided is the "final" location. If + * data.explicit_location == false, this means that it's relative to + * whatever the base location is. + */ + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (!var->data.explicit_location) { + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + } + + /* Interface block variables aren't actually going to be referenced + * by the generated NIR, so we don't put them in the list + */ + if (interface_type && glsl_type_is_struct(interface_type->type)) + break; + + if (var->data.mode == nir_var_local) { + nir_function_impl_add_variable(b->impl, var); + } else { + nir_shader_add_variable(b->shader, var); + } + + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + nir_deref_var *base; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_deref); + base = base_val->deref; + } + + nir_deref_var *deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + nir_deref *tail = &deref->deref; + while (tail->child) + tail = tail->child; + + for (unsigned i = 0; i < count - 4; i++) { + assert(w[i + 4] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 4]]; + + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; + } else { + assert(glsl_type_is_vector(tail->type)); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); + } + + deref_arr->deref.type = deref_type->type; + + if (idx_val->value_type == vtn_value_type_constant) { + unsigned idx = idx_val->constant->value.u[0]; + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + assert(glsl_type_is_scalar(idx_val->ssa->type)); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + + if (deref_type->is_builtin) { + /* If we encounter a builtin, we throw away the ress of the + * access chain, jump to the builtin, and keep building. + */ + const struct glsl_type *builtin_type = deref_type->type; + + nir_deref_array *per_vertex_deref = NULL; + if (glsl_type_is_array(base->var->type)) { + /* This builtin is a per-vertex builtin */ + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + assert(base->var->data.mode == nir_var_shader_in); + builtin_type = glsl_array_type(builtin_type, + b->shader->info.gs.vertices_in); + + /* The first non-var deref should be an array deref. */ + assert(deref->deref.child->deref_type == + nir_deref_type_array); + per_vertex_deref = nir_deref_as_array(deref->deref.child); + } + + nir_variable *builtin = get_builtin_variable(b, + base->var->data.mode, + builtin_type, + deref_type->builtin); + deref = nir_deref_var_create(b, builtin); + + if (per_vertex_deref) { + /* Since deref chains start at the variable, we can just + * steal that link and use it. + */ + deref->deref.child = &per_vertex_deref->deref; + per_vertex_deref->deref.child = NULL; + per_vertex_deref->deref.type = + glsl_get_array_element(builtin_type); + + tail = &per_vertex_deref->deref; + } else { + tail = &deref->deref; + } + } else { + tail = tail->child; + } + } + + /* For uniform blocks, we don't resolve the access chain until we + * actually access the variable, so we need to keep around the original + * type of the variable. + */ + if (variable_is_external_block(base->var)) + deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = deref; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + val->deref = deref; + val->deref_type = deref_type; + } + + break; + } + + case SpvOpCopyMemory: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + + vtn_variable_copy(b, src, dest, type); + break; + } + + case SpvOpLoad: { + nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; + struct vtn_type *src_type = + vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (src->var->interface_type && + (glsl_type_is_sampler(src->var->interface_type) || + glsl_type_is_image(src->var->interface_type))) { + vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src, src_type); + break; + } + + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + struct vtn_type *dest_type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest, dest_type); + break; + } + + case SpvOpCopyMemorySized: + case SpvOpArrayLength: + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_deref)->deref; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_deref); + sampled.image = NULL; + sampled.sampler = sampled_val->deref; + } + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + unsigned coord_components = 0; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + /* These all have an explicit depth value as their next source */ + switch (opcode) { + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); + break; + default: + break; + } + + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + + case SpvOpImageFetch: + texop = nir_texop_txf; + break; + + case SpvOpImageGather: + case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* Now we need to handle some number of optional arguments */ + if (idx < count) { + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txs); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + + const struct glsl_type *sampler_type = + nir_deref_tail(&sampled.sampler->deref)->type; + instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + + switch (glsl_get_sampler_result_type(sampler_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + instr->op = texop; + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + instr->coord_components = coord_components; + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + + instr->sampler = + nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); + if (sampled.image) { + instr->texture = + nir_deref_as_var(nir_copy_deref(instr, &sampled.image->deref)); + } else { + instr->texture = NULL; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageRead: + image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + break; + + case SpvOpImageWrite: + image.deref = vtn_value(b, w[1], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); + intrin->src[0] = nir_src_for_ssa(image.coord); + intrin->src[1] = nir_src_for_ssa(image.sample); + + switch (opcode) { + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + glsl_get_vector_elements(type->type), NULL); + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = &intrin->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } +} + +static void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 3; + nir_ssa_def *src[4]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 3])->def; + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_bany2; break; + case 3: op = nir_op_bany3; break; + case 4: op = nir_op_bany4; break; + } + break; + + case SpvOpAll: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_ball2; break; + case 3: op = nir_op_ball3; break; + case 4: op = nir_op_ball4; break; + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(type), val->name); + instr->dest.write_mask = (1 << glsl_get_vector_elements(type)) - 1; + val->ssa->def = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + intrin->const_index[0] = w[1]; + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_phi_node_create(b, type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + b->nb.cursor = nir_before_block(block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: + switch ((SpvCapability)w[1]) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + /* All shaders support these */ + break; + case SpvCapabilityGeometry: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + break; + default: + assert(!"Unsupported capability"); + } + break; + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: + assert(b->entry_point == NULL); + b->entry_point = &b->values[w[2]]; + b->execution_model = w[1]; + break; + + case SpvOpExecutionMode: + assert(b->entry_point == &b->values[w[1]]); + + SpvExecutionMode mode = w[2]; + switch(mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = (mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = w[3]; + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = w[3]; + b->shader->info.cs.local_size[1] = w[4]; + b->shader->info.cs.local_size[2] = w[5]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = w[3]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeQuads: + case SpvExecutionModeIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + break; /* OpenCL */ + } + break; + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpLine: + break; /* Ignored for now */ + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + b->func->overload = overload; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: + break; /* Does nothing */ + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block); + b->block->branch = w; + b->block = NULL; + break; + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge_op == SpvOpNop); + b->block->merge_op = opcode; + b->block->merge_block_id = w[1]; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: { + struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; + assert(block->block == NULL); + + block->block = nir_cursor_current_block(b->nb.cursor); + break; + } + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: + vtn_push_value(b, w[2], vtn_value_type_undef); + break; + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpSampledImage: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(!"Atomic buffers not yet implemented"); + } + } + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalOr: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalAnd: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_matrix_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +static void +vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, + struct vtn_block *break_block, struct vtn_block *cont_block, + struct vtn_block *end_block) +{ + struct vtn_block *block = start; + while (block != end_block) { + if (block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + /* Reset the merge_op to prerevent infinite recursion */ + block->merge_op = SpvOpNop; + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + block = new_break_block; + continue; + } + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + + b->block = block; + vtn_foreach_instruction(b, block->label, block->branch, + vtn_handle_body_instruction); + + nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); + assert(cur_block == block->block); + _mesa_hash_table_insert(b->block_table, cur_block, block); + + switch (branch_op) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, w[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ + return; + } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ + block = branch_block; + continue; + } + } + + case SpvOpBranchConditional: { + /* Gather up the branch blocks */ + struct vtn_block *then_block = + vtn_value(b, w[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, w[3], vtn_value_type_block)->block; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + if (then_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else if (then_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else { + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ + assert(block->merge_op == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + block = merge_block; + continue; + } + + /* If we got here then we inserted a predicated break or continue + * above and we need to handle the other case. We already set + * `block` above to indicate what block to visit after the + * predicated break. + */ + + /* It's possible that the other branch is also a break/continue. + * If it is, we handle that here. + */ + if (block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } + + /* If we got here then there was a predicated break/continue but + * the other half of the if has stuff in it. `block` was already + * set above so there is nothing left for us to do. + */ + continue; + } + + case SpvOpReturn: { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpKill: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + return; + } + + case SpvOpSwitch: + case SpvOpReturnValue: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + +nir_shader * +spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] >= 0x10000); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + nir_shader *shader = nir_shader_create(NULL, stage, options); + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->shader = shader; + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + + /* XXX: We shouldn't need these defaults */ + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = 3; + b->shader->info.gs.output_primitive = 4; /* GL_TRIANGLES */ + } + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + /* Do a very quick CFG analysis pass */ + vtn_foreach_instruction(b, words, word_end, + vtn_handle_first_cfg_pass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + nir_builder_init(&b->nb, b->impl); + b->nb.cursor = nir_after_cf_list(&b->impl->body); + vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + } + + /* Because we can still have output reads in NIR, we need to lower + * outputs to temporaries before we are truely finished. + */ + nir_lower_outputs_to_temporaries(shader); + + ralloc_free(b); + + return shader; +} diff --combined src/glsl/standalone_scaffolding.cpp index 7d59c787aed,1f69d0dbd2a..84266b0cb58 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@@ -35,12 -35,6 +35,12 @@@ #include "util/ralloc.h" #include "util/strtod.h" +extern "C" void +_mesa_error_no_memory(const char *caller) +{ + fprintf(stderr, "Mesa error: out of memory in %s", caller); +} + void _mesa_warning(struct gl_context *ctx, const char *fmt, ...) { @@@ -69,7 -63,7 +69,7 @@@ _mesa_reference_shader(struct gl_contex void _mesa_shader_debug(struct gl_context *, GLenum, GLuint *, - const char *, int) + const char *) { } diff --combined src/mesa/drivers/dri/i965/Makefile.sources index 595903dd572,3ab143d4a16..7a2f43b1fe8 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@@ -56,7 -56,6 +56,7 @@@ i965_compiler_FILES = brw_shader.cpp \ brw_shader.h \ brw_surface_formats.c \ + brw_surface_formats.h \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ @@@ -163,7 -162,6 +163,6 @@@ i965_FILES = brw_wm_state.c \ brw_wm_surface_state.c \ gen6_blorp.cpp \ - gen6_blorp.h \ gen6_cc.c \ gen6_clip_state.c \ gen6_depth_state.c \ @@@ -181,13 -179,15 +180,15 @@@ gen6_vs_state.c \ gen6_wm_state.c \ gen7_blorp.cpp \ - gen7_blorp.h \ gen7_cs_state.c \ - gen7_disable.c \ + gen7_ds_state.c \ gen7_gs_state.c \ + gen7_hs_state.c \ + gen7_l3_state.c \ gen7_misc_state.c \ gen7_sf_state.c \ gen7_sol_state.c \ + gen7_te_state.c \ gen7_urb.c \ gen7_viewport_state.c \ gen7_vs_state.c \ @@@ -197,7 -197,9 +198,9 @@@ gen8_depth_state.c \ gen8_disable.c \ gen8_draw_upload.c \ + gen8_ds_state.c \ gen8_gs_state.c \ + gen8_hs_state.c \ gen8_misc_state.c \ gen8_multisample_state.c \ gen8_ps_state.c \ diff --combined src/mesa/drivers/dri/i965/brw_compiler.h index 7af8036b4ee,218d9c7b53a..0e25ae78a69 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@@ -94,9 -94,6 +94,9 @@@ struct brw_compiler struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; }; +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + /** * Program key structures. @@@ -303,6 -300,7 +303,7 @@@ struct brw_stage_prog_data unsigned curb_read_length; unsigned total_scratch; + unsigned total_shared; /** * Register where the thread expects to find input data from the URB @@@ -551,6 -549,24 +552,24 @@@ struct brw_vs_prog_data bool uses_instanceid; }; + struct brw_tcs_prog_data + { + struct brw_vue_prog_data base; + + /** Number vertices in output patch */ + int instances; + }; + + + struct brw_tes_prog_data + { + struct brw_vue_prog_data base; + + enum brw_tess_partitioning partitioning; + enum brw_tess_output_topology output_topology; + enum brw_tess_domain domain; + }; + struct brw_gs_prog_data { struct brw_vue_prog_data base; @@@ -685,10 -701,6 +704,10 @@@ brw_compile_cs(const struct brw_compile unsigned *final_assembly_size, char **error_str); +void +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, + void *buffer, uint32_t threads, uint32_t stride); + #ifdef __cplusplus } /* extern "C" */ #endif diff --combined src/mesa/drivers/dri/i965/brw_cs.h index f0b3f97dedc,9ce39fb18ac..890a0c8a807 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@@ -25,8 -25,6 +25,6 @@@ #ifndef BRW_CS_H #define BRW_CS_H - #include "brw_program.h" - #ifdef __cplusplus extern "C" { #endif @@@ -34,6 -32,10 +32,6 @@@ void brw_upload_cs_prog(struct brw_context *brw); -void -brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, - void *buffer, uint32_t threads, uint32_t stride); - #ifdef __cplusplus } #endif diff --combined src/mesa/drivers/dri/i965/brw_defines.h index 36d9f716e03,4a184cf72f3..7d8723efd5e --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@@ -41,6 -41,12 +41,12 @@@ #define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low)) #define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) + /** + * For use with masked MMIO registers where the upper 16 bits control which + * of the lower bits are committed to the register. + */ + #define REG_MASK(value) ((value) << 16) + #ifndef BRW_DEFINES_H #define BRW_DEFINES_H @@@ -57,7 -63,6 +63,7 @@@ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) +#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */ #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 #define _3DPRIM_LINESTRIP 0x03 @@@ -81,7 -86,6 +87,7 @@@ #define _3DPRIM_TRIFAN_NOSTIPPLE 0x16 #define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); }) +#endif /* bdw_pack.h */ /* We use this offset to be able to pass native primitive types in struct * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + @@@ -1722,6 -1726,19 +1728,19 @@@ enum brw_message_target #define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12 #define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13 + /* Dataport special binding table indices: */ + #define BRW_BTI_STATELESS 255 + #define GEN7_BTI_SLM 254 + /* Note that on Gen8+ BTI 255 was redefined to be IA-coherent according to the + * hardware spec, however because the DRM sets bit 4 of HDC_CHICKEN0 on BDW, + * CHV and at least some pre-production steppings of SKL due to + * WaForceEnableNonCoherent, HDC memory access may have been overridden by the + * kernel to be non-coherent (matching the behavior of the same BTI on + * pre-Gen8 hardware) and BTI 255 may actually be an alias for BTI 253. + */ + #define GEN8_BTI_STATELESS_IA_COHERENT 255 + #define GEN8_BTI_STATELESS_NON_COHERENT 253 + /* dataport atomic operations. */ #define BRW_AOP_AND 1 #define BRW_AOP_OR 2 @@@ -2554,6 -2571,25 +2573,25 @@@ enum brw_wm_barycentric_interp_mode #define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */ #define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */ + /* Resource streamer gather constants */ + #define _3DSTATE_GATHER_POOL_ALLOC 0x791A /* GEN7.5+ */ + #define HSW_GATHER_POOL_ALLOC_MUST_BE_ONE (3 << 4) /* GEN7.5 only */ + + #define _3DSTATE_GATHER_CONSTANT_VS 0x7834 /* GEN7.5+ */ + #define _3DSTATE_GATHER_CONSTANT_GS 0x7835 + #define _3DSTATE_GATHER_CONSTANT_HS 0x7836 + #define _3DSTATE_GATHER_CONSTANT_DS 0x7837 + #define _3DSTATE_GATHER_CONSTANT_PS 0x7838 + #define HSW_GATHER_CONSTANT_ENABLE (1 << 11) + #define HSW_GATHER_CONSTANT_BUFFER_VALID_SHIFT 16 + #define HSW_GATHER_CONSTANT_BUFFER_VALID_MASK INTEL_MASK(31, 16) + #define HSW_GATHER_CONSTANT_BINDING_TABLE_BLOCK_SHIFT 12 + #define HSW_GATHER_CONSTANT_BINDING_TABLE_BLOCK_MASK INTEL_MASK(15, 12) + #define HSW_GATHER_CONSTANT_CONST_BUFFER_OFFSET_SHIFT 8 + #define HSW_GATHER_CONSTANT_CONST_BUFFER_OFFSET_MASK INTEL_MASK(15, 8) + #define HSW_GATHER_CONSTANT_CHANNEL_MASK_SHIFT 4 + #define HSW_GATHER_CONSTANT_CHANNEL_MASK_MASK INTEL_MASK(7, 4) + #define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */ /* DW1 */ # define SO_FUNCTION_ENABLE (1 << 31) @@@ -2848,6 -2884,8 +2886,8 @@@ /* GEN7 DW5, GEN8+ DW6 */ # define MEDIA_BARRIER_ENABLE_SHIFT 21 # define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21) + # define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT 16 + # define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK INTEL_MASK(20, 16) # define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 # define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 diff --combined src/mesa/drivers/dri/i965/brw_device_info.c index 541c7958d5e,4bfc83186bb..db9acd64f4e --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@@ -28,6 -28,7 +28,7 @@@ static const struct brw_device_info brw_device_info_i965 = { .gen = 4, .has_negative_rhw_bug = true, + .num_slices = 1, .max_vs_threads = 16, .max_gs_threads = 2, .max_wm_threads = 8 * 4, @@@ -42,6 -43,7 +43,7 @@@ static const struct brw_device_info brw .has_compr4 = true, .has_surface_tile_offset = true, .is_g4x = true, + .num_slices = 1, .max_vs_threads = 32, .max_gs_threads = 2, .max_wm_threads = 10 * 5, @@@ -55,6 -57,7 +57,7 @@@ static const struct brw_device_info brw .has_pln = true, .has_compr4 = true, .has_surface_tile_offset = true, + .num_slices = 1, .max_vs_threads = 72, .max_gs_threads = 32, .max_wm_threads = 12 * 6, @@@ -71,6 -74,7 +74,7 @@@ static const struct brw_device_info brw .has_pln = true, .has_surface_tile_offset = true, .needs_unlit_centroid_workaround = true, + .num_slices = 1, .max_vs_threads = 24, .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ .max_wm_threads = 40, @@@ -90,6 -94,7 +94,7 @@@ static const struct brw_device_info brw .has_pln = true, .has_surface_tile_offset = true, .needs_unlit_centroid_workaround = true, + .num_slices = 1, .max_vs_threads = 60, .max_gs_threads = 60, .max_wm_threads = 80, @@@ -112,6 -117,7 +117,7 @@@ static const struct brw_device_info brw_device_info_ivb_gt1 = { GEN7_FEATURES, .is_ivybridge = true, .gt = 1, .needs_unlit_centroid_workaround = true, + .num_slices = 1, .max_vs_threads = 36, .max_hs_threads = 36, .max_ds_threads = 36, @@@ -123,6 -129,7 +129,7 @@@ .min_vs_entries = 32, .max_vs_entries = 512, .max_hs_entries = 32, + .min_ds_entries = 10, .max_ds_entries = 288, .max_gs_entries = 192, }, @@@ -131,6 -138,7 +138,7 @@@ static const struct brw_device_info brw_device_info_ivb_gt2 = { GEN7_FEATURES, .is_ivybridge = true, .gt = 2, .needs_unlit_centroid_workaround = true, + .num_slices = 1, .max_vs_threads = 128, .max_hs_threads = 128, .max_ds_threads = 128, @@@ -142,6 -150,7 +150,7 @@@ .min_vs_entries = 32, .max_vs_entries = 704, .max_hs_entries = 64, + .min_ds_entries = 10, .max_ds_entries = 448, .max_gs_entries = 320, }, @@@ -150,6 -159,7 +159,7 @@@ static const struct brw_device_info brw_device_info_byt = { GEN7_FEATURES, .is_baytrail = true, .gt = 1, .needs_unlit_centroid_workaround = true, + .num_slices = 1, .has_llc = false, .max_vs_threads = 36, .max_hs_threads = 36, @@@ -162,6 -172,7 +172,7 @@@ .min_vs_entries = 32, .max_vs_entries = 512, .max_hs_entries = 32, + .min_ds_entries = 10, .max_ds_entries = 288, .max_gs_entries = 192, }, @@@ -175,6 -186,7 +186,7 @@@ static const struct brw_device_info brw_device_info_hsw_gt1 = { HSW_FEATURES, .gt = 1, + .num_slices = 1, .max_vs_threads = 70, .max_hs_threads = 70, .max_ds_threads = 70, @@@ -186,6 -198,7 +198,7 @@@ .min_vs_entries = 32, .max_vs_entries = 640, .max_hs_entries = 64, + .min_ds_entries = 10, .max_ds_entries = 384, .max_gs_entries = 256, }, @@@ -193,6 -206,7 +206,7 @@@ static const struct brw_device_info brw_device_info_hsw_gt2 = { HSW_FEATURES, .gt = 2, + .num_slices = 1, .max_vs_threads = 280, .max_hs_threads = 256, .max_ds_threads = 280, @@@ -204,6 -218,7 +218,7 @@@ .min_vs_entries = 64, .max_vs_entries = 1664, .max_hs_entries = 128, + .min_ds_entries = 10, .max_ds_entries = 960, .max_gs_entries = 640, }, @@@ -211,6 -226,7 +226,7 @@@ static const struct brw_device_info brw_device_info_hsw_gt3 = { HSW_FEATURES, .gt = 3, + .num_slices = 2, .max_vs_threads = 280, .max_hs_threads = 256, .max_ds_threads = 280, @@@ -222,6 -238,7 +238,7 @@@ .min_vs_entries = 64, .max_vs_entries = 1664, .max_hs_entries = 128, + .min_ds_entries = 10, .max_ds_entries = 960, .max_gs_entries = 640, }, @@@ -243,12 -260,14 +260,14 @@@ static const struct brw_device_info brw_device_info_bdw_gt1 = { GEN8_FEATURES, .gt = 1, + .num_slices = 1, .max_cs_threads = 42, .urb = { .size = 192, .min_vs_entries = 64, .max_vs_entries = 2560, .max_hs_entries = 504, + .min_ds_entries = 34, .max_ds_entries = 1536, .max_gs_entries = 960, } @@@ -256,12 -275,14 +275,14 @@@ static const struct brw_device_info brw_device_info_bdw_gt2 = { GEN8_FEATURES, .gt = 2, + .num_slices = 1, .max_cs_threads = 56, .urb = { .size = 384, .min_vs_entries = 64, .max_vs_entries = 2560, .max_hs_entries = 504, + .min_ds_entries = 34, .max_ds_entries = 1536, .max_gs_entries = 960, } @@@ -269,12 -290,14 +290,14 @@@ static const struct brw_device_info brw_device_info_bdw_gt3 = { GEN8_FEATURES, .gt = 3, + .num_slices = 2, .max_cs_threads = 56, .urb = { .size = 384, .min_vs_entries = 64, .max_vs_entries = 2560, .max_hs_entries = 504, + .min_ds_entries = 34, .max_ds_entries = 1536, .max_gs_entries = 960, } @@@ -283,6 -306,7 +306,7 @@@ static const struct brw_device_info brw_device_info_chv = { GEN8_FEATURES, .is_cherryview = 1, .gt = 1, .has_llc = false, + .num_slices = 1, .max_vs_threads = 80, .max_hs_threads = 80, .max_ds_threads = 80, @@@ -294,6 -318,7 +318,7 @@@ .min_vs_entries = 34, .max_vs_entries = 640, .max_hs_entries = 80, + .min_ds_entries = 34, .max_ds_entries = 384, .max_gs_entries = 256, } @@@ -318,25 -343,30 +343,30 @@@ .min_vs_entries = 64, \ .max_vs_entries = 1856, \ .max_hs_entries = 672, \ + .min_ds_entries = 34, \ .max_ds_entries = 1120, \ .max_gs_entries = 640, \ } static const struct brw_device_info brw_device_info_skl_gt1 = { GEN9_FEATURES, .gt = 1, + .num_slices = 1, .urb.size = 192, }; static const struct brw_device_info brw_device_info_skl_gt2 = { GEN9_FEATURES, .gt = 2, + .num_slices = 1, }; static const struct brw_device_info brw_device_info_skl_gt3 = { GEN9_FEATURES, .gt = 3, + .num_slices = 2, }; static const struct brw_device_info brw_device_info_skl_gt4 = { GEN9_FEATURES, .gt = 4, + .num_slices = 3, /* From the "L3 Allocation and Programming" documentation: * * "URB is limited to 1008KB due to programming restrictions. This is not a @@@ -355,6 -385,7 +385,7 @@@ static const struct brw_device_info brw .has_llc = false, /* XXX: These are preliminary thread counts and URB sizes. */ + .num_slices = 1, .max_vs_threads = 56, .max_hs_threads = 56, .max_ds_threads = 56, @@@ -387,15 -418,3 +418,15 @@@ brw_get_device_info(int devid return devinfo; } + +const char * +brw_get_device_name(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, name) case id: return name; +#include "pci_ids/i965_pci_ids.h" + default: + return NULL; + } +} diff --combined src/mesa/drivers/dri/i965/brw_device_info.h index 6f4a250e874,73d682083a1..48e0dee9084 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@@ -68,6 -68,11 +68,11 @@@ struct brw_device_inf * GPU Limits: * @{ */ + /** + * Total number of slices present on the device whether or not they've been + * fused off. + */ + unsigned num_slices; unsigned max_vs_threads; unsigned max_hs_threads; unsigned max_ds_threads; @@@ -76,10 -81,16 +81,16 @@@ unsigned max_cs_threads; struct { + /** + * Hardware default URB size. The units this is expressed in are + * somewhat inconsistent: 512b units on Gen4-5, KB on Gen6-7, and KB + * times the slice count on Gen8+. + */ unsigned size; unsigned min_vs_entries; unsigned max_vs_entries; unsigned max_hs_entries; + unsigned min_ds_entries; unsigned max_ds_entries; unsigned max_gs_entries; } urb; @@@ -87,4 -98,3 +98,4 @@@ }; const struct brw_device_info *brw_get_device_info(int devid); +const char *brw_get_device_name(int devid); diff --combined src/mesa/drivers/dri/i965/brw_draw.c index a2eaf8fb1e0,8398471d221..bc013963eee --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@@ -25,7 -25,6 +25,6 @@@ #include - #include "main/glheader.h" #include "main/context.h" #include "main/condrender.h" #include "main/samplerobj.h" @@@ -55,6 -54,23 +54,6 @@@ #define FILE_DEBUG_FLAG DEBUG_PRIMS -static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { - [GL_POINTS] =_3DPRIM_POINTLIST, - [GL_LINES] = _3DPRIM_LINELIST, - [GL_LINE_LOOP] = _3DPRIM_LINELOOP, - [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, - [GL_TRIANGLES] = _3DPRIM_TRILIST, - [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [GL_QUADS] = _3DPRIM_QUADLIST, - [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, - [GL_POLYGON] = _3DPRIM_POLYGON, - [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -}; - static const GLenum reduced_prim[GL_POLYGON+1] = { [GL_POINTS] = GL_POINTS, @@@ -69,6 -85,18 +68,6 @@@ [GL_POLYGON] = GL_TRIANGLES }; -uint32_t -get_hw_prim_for_gl_prim(int mode) -{ - if (mode >= BRW_PRIM_OFFSET) - return mode - BRW_PRIM_OFFSET; - else { - assert(mode < ARRAY_SIZE(prim_to_hw_prim)); - return prim_to_hw_prim[mode]; - } -} - - /* When the primitive changes, set a state bit and re-validate. Not * the nicest and would rather deal with this by having all the * programs be immune to the active primitive (ie. cope with all @@@ -116,14 -144,17 +115,17 @@@ gen6_set_prim(struct brw_context *brw, DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); - if (prim->mode == GL_PATCHES) + if (prim->mode == GL_PATCHES) { hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices); - else + } else { hw_prim = get_hw_prim_for_gl_prim(prim->mode); + } if (hw_prim != brw->primitive) { brw->primitive = hw_prim; brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; + if (prim->mode == GL_PATCHES) + brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE; } } @@@ -397,6 -428,10 +399,10 @@@ brw_try_draw_prims(struct gl_context *c _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; + brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ? + _mesa_fls(ctx->TessEvalProgram._Current->Base.SamplersUsed) : 0; + brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ? + _mesa_fls(ctx->TessCtrlProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); diff --combined src/mesa/drivers/dri/i965/brw_fs.cpp index 419168966de,5e8acec2759..8dc260c51b9 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@@ -28,29 -28,17 +28,17 @@@ * from the LIR. */ - #include - - #include "util/hash_table.h" #include "main/macros.h" - #include "main/shaderobj.h" - #include "main/fbobject.h" - #include "program/prog_parameter.h" - #include "program/prog_print.h" - #include "util/register_allocate.h" - #include "program/hash_table.h" #include "brw_context.h" #include "brw_eu.h" - #include "brw_wm.h" #include "brw_fs.h" #include "brw_cs.h" #include "brw_nir.h" #include "brw_vec4_gs_visitor.h" #include "brw_cfg.h" + #include "brw_program.h" #include "brw_dead_control_flow.h" - #include "main/uniforms.h" - #include "brw_fs_live_variables.h" #include "glsl/nir/glsl_types.h" - #include "program/sampler.h" using namespace brw; @@@ -187,7 -175,7 +175,7 @@@ fs_visitor::VARYING_PULL_CONSTANT_LOAD( * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3)); + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); int scale = 1; if (devinfo->gen == 4 && bld.dispatch_width() == 8) { @@@ -219,7 -207,7 +207,7 @@@ inst->mlen = 1 + bld.dispatch_width() / 8; } - bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale)); + bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale)); } /** @@@ -300,6 -288,71 +288,71 @@@ fs_inst::is_send_from_grf() cons } } + /** + * Returns true if this instruction's sources and destinations cannot + * safely be the same register. + * + * In most cases, a register can be written over safely by the same + * instruction that is its last use. For a single instruction, the + * sources are dereferenced before writing of the destination starts + * (naturally). + * + * However, there are a few cases where this can be problematic: + * + * - Virtual opcodes that translate to multiple instructions in the + * code generator: if src == dst and one instruction writes the + * destination before a later instruction reads the source, then + * src will have been clobbered. + * + * - SIMD16 compressed instructions with certain regioning (see below). + * + * The register allocator uses this information to set up conflicts between + * GRF sources and the destination. + */ + bool + fs_inst::has_source_and_destination_hazard() const + { + switch (opcode) { + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + /* Multiple partial writes to the destination */ + return true; + default: + /* The SIMD16 compressed instruction + * + * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F + * + * is actually decoded in hardware as: + * + * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F + * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F + * + * Which is safe. However, if we have uniform accesses + * happening, we get into trouble: + * + * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F + * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F + * + * Now our destination for the first instruction overwrote the + * second instruction's src0, and we get garbage for those 8 + * pixels. There's a similar issue for the pre-gen6 + * pixel_x/pixel_y, which are registers of 16-bit values and thus + * would get stomped by the first decode as well. + */ + if (exec_size == 16) { + for (int i = 0; i < sources; i++) { + if (src[i].file == VGRF && (src[i].stride == 0 || + src[i].type == BRW_REGISTER_TYPE_UW || + src[i].type == BRW_REGISTER_TYPE_W || + src[i].type == BRW_REGISTER_TYPE_UB || + src[i].type == BRW_REGISTER_TYPE_B)) { + return true; + } + } + } + return false; + } + } + bool fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const { @@@ -375,7 -428,7 +428,7 @@@ fs_reg::fs_reg( this->file = BAD_FILE; } - fs_reg::fs_reg(struct brw_reg reg) : + fs_reg::fs_reg(struct ::brw_reg reg) : backend_reg(reg) { this->reg_offset = 0; @@@ -393,8 -446,7 +446,7 @@@ bool fs_reg::equals(const fs_reg &r) const { - return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 && - reg_offset == r.reg_offset && + return (this->backend_reg::equals(r) && subreg_offset == r.subreg_offset && !reladdr && !r.reladdr && stride == r.stride); @@@ -458,7 -510,6 +510,7 @@@ type_size_scalar(const struct glsl_typ case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@@ -688,15 -739,15 +740,15 @@@ fs_inst::components_read(unsigned i) co case SHADER_OPCODE_LOD_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: - assert(src[8].file == IMM && src[9].file == IMM); + assert(src[9].file == IMM && src[10].file == IMM); /* Texture coordinates. */ if (i == 0) - return src[8].ud; + return src[9].ud; /* Texture derivatives. */ else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL) - return src[9].ud; + return src[10].ud; /* Texture offset. */ - else if (i == 7) + else if (i == 8) return 2; /* MCS */ else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL) @@@ -1057,33 -1108,19 +1109,19 @@@ fs_visitor::emit_linterp(const fs_reg & } void - fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, + fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, const glsl_type *type, glsl_interp_qualifier interpolation_mode, - int location, bool mod_centroid, + int *location, bool mod_centroid, bool mod_sample) { - attr.type = brw_type_for_base_type(type->get_scalar_type()); - assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - unsigned int array_elements; - - if (type->is_array()) { - array_elements = type->arrays_of_arrays_size(); - if (array_elements == 0) { - fail("dereferenced array '%s' has length 0\n", name); - } - type = type->without_array(); - } else { - array_elements = 1; - } - if (interpolation_mode == INTERP_QUALIFIER_NONE) { bool is_gl_Color = - location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; + *location == VARYING_SLOT_COL0 || *location == VARYING_SLOT_COL1; if (key->flat_shade && is_gl_Color) { interpolation_mode = INTERP_QUALIFIER_FLAT; } else { @@@ -1091,71 -1128,86 +1129,86 @@@ } } - for (unsigned int i = 0; i < array_elements; i++) { - for (unsigned int j = 0; j < type->matrix_columns; j++) { - if (prog_data->urb_setup[location] == -1) { - /* If there's no incoming setup data for this slot, don't - * emit interpolation for it. - */ - attr = offset(attr, bld, type->vector_elements); - location++; - continue; - } + if (type->is_array() || type->is_matrix()) { + const glsl_type *elem_type = glsl_get_array_element(type); + const unsigned length = glsl_get_length(type); - if (interpolation_mode == INTERP_QUALIFIER_FLAT) { - /* Constant interpolation (flat shading) case. The SF has - * handed us defined values in only the constant offset - * field of the setup reg. - */ - for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - interp = suboffset(interp, 3); - interp.type = attr.type; - bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); - attr = offset(attr, bld, 1); - } - } else { - /* Smooth/noperspective interpolation case. */ - for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - if (devinfo->needs_unlit_centroid_workaround && mod_centroid) { - /* Get the pixel/sample mask into f0 so that we know - * which pixels are lit. Then, for each channel that is - * unlit, replace the centroid data with non-centroid - * data. - */ - bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); - - fs_inst *inst; - inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, - false, false); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - if (devinfo->has_pln) - inst->no_dd_clear = true; - - inst = emit_linterp(attr, fs_reg(interp), interpolation_mode, - mod_centroid && !key->persample_shading, - mod_sample || key->persample_shading); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = false; - if (devinfo->has_pln) - inst->no_dd_check = true; + for (unsigned i = 0; i < length; i++) { + emit_general_interpolation(attr, name, elem_type, interpolation_mode, + location, mod_centroid, mod_sample); + } + } else if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + emit_general_interpolation(attr, name, field_type, interpolation_mode, + location, mod_centroid, mod_sample); + } + } else { + assert(type->is_scalar() || type->is_vector()); - } else { - emit_linterp(attr, fs_reg(interp), interpolation_mode, - mod_centroid && !key->persample_shading, - mod_sample || key->persample_shading); - } - if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { - bld.MUL(attr, attr, this->pixel_w); - } - attr = offset(attr, bld, 1); - } + if (prog_data->urb_setup[*location] == -1) { + /* If there's no incoming setup data for this slot, don't + * emit interpolation for it. + */ + *attr = offset(*attr, bld, type->vector_elements); + (*location)++; + return; + } - } - location++; + attr->type = brw_type_for_base_type(type->get_scalar_type()); + + if (interpolation_mode == INTERP_QUALIFIER_FLAT) { + /* Constant interpolation (flat shading) case. The SF has + * handed us defined values in only the constant offset + * field of the setup reg. + */ + for (unsigned int i = 0; i < type->vector_elements; i++) { + struct brw_reg interp = interp_reg(*location, i); + interp = suboffset(interp, 3); + interp.type = attr->type; + bld.emit(FS_OPCODE_CINTERP, *attr, fs_reg(interp)); + *attr = offset(*attr, bld, 1); + } + } else { + /* Smooth/noperspective interpolation case. */ + for (unsigned int i = 0; i < type->vector_elements; i++) { + struct brw_reg interp = interp_reg(*location, i); + if (devinfo->needs_unlit_centroid_workaround && mod_centroid) { + /* Get the pixel/sample mask into f0 so that we know + * which pixels are lit. Then, for each channel that is + * unlit, replace the centroid data with non-centroid + * data. + */ + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + + fs_inst *inst; + inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode, + false, false); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; + if (devinfo->has_pln) + inst->no_dd_clear = true; + + inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode, + mod_centroid && !key->persample_shading, + mod_sample || key->persample_shading); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = false; + if (devinfo->has_pln) + inst->no_dd_check = true; + + } else { + emit_linterp(*attr, fs_reg(interp), interpolation_mode, + mod_centroid && !key->persample_shading, + mod_sample || key->persample_shading); + } + if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { + bld.MUL(*attr, *attr, this->pixel_w); + } + *attr = offset(*attr, bld, 1); + } } + (*location)++; } } @@@ -2003,7 -2055,7 +2056,7 @@@ fs_visitor::demote_pull_constants( VARYING_PULL_CONSTANT_LOAD(ibld, dst, brw_imm_ud(index), *inst->src[i].reladdr, - pull_index); + pull_index * 4); inst->src[i].reladdr = NULL; inst->src[i].stride = 1; } else { @@@ -2039,7 -2091,8 +2092,8 @@@ fs_visitor::opt_algebraic( if (inst->dst.type != inst->src[0].type) assert(!"unimplemented: saturate mixed types"); - if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) { + if (brw_saturate_immediate(inst->dst.type, + &inst->src[0].as_brw_reg())) { inst->saturate = false; progress = true; } @@@ -2727,23 -2780,10 +2781,23 @@@ fs_visitor::emit_repclear_shader( brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; int base_mrf = 1; int color_mrf = base_mrf + 2; + fs_inst *mov; - fs_inst *mov = bld.exec_all().group(4, 0) - .MOV(brw_message_reg(color_mrf), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + if (uniforms == 1) { + mov = bld.exec_all().group(4, 0) + .MOV(brw_message_reg(color_mrf), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + } else { + struct brw_reg reg = + brw_reg(BRW_GENERAL_REGISTER_FILE, + 2, 3, 0, 0, BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + + mov = bld.exec_all().group(4, 0) + .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg)); + } fs_inst *write; if (key->nr_color_regions == 1) { @@@ -2772,10 -2812,8 +2826,10 @@@ assign_curb_setup(); /* Now that we have the uniform assigned, go ahead and force it to a vec4. */ - assert(mov->src[0].file == FIXED_GRF); - mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); + if (uniforms == 1) { + assert(mov->src[0].file == FIXED_GRF); + mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); + } } /** @@@ -3054,13 -3092,11 +3108,11 @@@ fs_visitor::lower_uniform_pull_constant continue; if (devinfo->gen >= 7) { - /* The offset arg before was a vec4-aligned byte offset. We need to - * turn it into a dword offset. - */ + /* The offset arg is a vec4-aligned immediate byte offset. */ fs_reg const_offset_reg = inst->src[1]; assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); - const_offset_reg.ud /= 4; + assert(const_offset_reg.ud % 16 == 0); fs_reg payload, offset; if (devinfo->gen >= 9) { @@@ -3604,7 -3640,6 +3656,7 @@@ lower_sampler_logical_send_gen4(const f const fs_reg &coordinate, const fs_reg &shadow_c, const fs_reg &lod, const fs_reg &lod2, + const fs_reg &surface, const fs_reg &sampler, unsigned coord_components, unsigned grad_components) @@@ -3697,9 -3732,8 +3749,9 @@@ inst->opcode = op; inst->src[0] = reg_undef; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = msg_begin.nr; inst->mlen = msg_end.nr - msg_begin.nr; inst->header_size = 1; @@@ -3711,7 -3745,6 +3763,7 @@@ lower_sampler_logical_send_gen5(const f const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, const fs_reg &sample_index, + const fs_reg &surface, const fs_reg &sampler, const fs_reg &offset_value, unsigned coord_components, @@@ -3794,9 -3827,8 +3846,9 @@@ inst->opcode = op; inst->src[0] = reg_undef; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = message.nr; inst->mlen = msg_end.nr - message.nr; inst->header_size = header_size; @@@ -3820,9 -3852,7 +3872,9 @@@ lower_sampler_logical_send_gen7(const f const fs_reg &shadow_c, fs_reg lod, fs_reg lod2, const fs_reg &sample_index, - const fs_reg &mcs, const fs_reg &sampler, + const fs_reg &mcs, + const fs_reg &surface, + const fs_reg &sampler, fs_reg offset_value, unsigned coord_components, unsigned grad_components) @@@ -4025,9 -4055,8 +4077,9 @@@ /* Generate the SEND. */ inst->opcode = op; inst->src[0] = src_payload; - inst->src[1] = sampler; - inst->resize_sources(2); + inst->src[1] = surface; + inst->src[2] = sampler; + inst->resize_sources(3); inst->base_mrf = -1; inst->mlen = mlen; inst->header_size = header_size; @@@ -4046,27 -4075,25 +4098,27 @@@ lower_sampler_logical_send(const fs_bui const fs_reg &lod2 = inst->src[3]; const fs_reg &sample_index = inst->src[4]; const fs_reg &mcs = inst->src[5]; - const fs_reg &sampler = inst->src[6]; - const fs_reg &offset_value = inst->src[7]; - assert(inst->src[8].file == IMM && inst->src[9].file == IMM); - const unsigned coord_components = inst->src[8].ud; - const unsigned grad_components = inst->src[9].ud; + const fs_reg &surface = inst->src[6]; + const fs_reg &sampler = inst->src[7]; + const fs_reg &offset_value = inst->src[8]; + assert(inst->src[9].file == IMM && inst->src[10].file == IMM); + const unsigned coord_components = inst->src[9].ud; + const unsigned grad_components = inst->src[10].ud; if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - mcs, sampler, offset_value, + mcs, surface, sampler, offset_value, coord_components, grad_components); } else if (devinfo->gen >= 5) { lower_sampler_logical_send_gen5(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - sampler, offset_value, + surface, sampler, offset_value, coord_components, grad_components); } else { lower_sampler_logical_send_gen4(bld, inst, op, coordinate, - shadow_c, lod, lod2, sampler, + shadow_c, lod, lod2, + surface, sampler, coord_components, grad_components); } } @@@ -5542,42 -5569,6 +5594,6 @@@ brw_compile_fs(const struct brw_compile return g.get_assembly(final_assembly_size); } - void - brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data, - void *buffer, uint32_t threads, uint32_t stride) - { - if (prog_data->local_invocation_id_regs == 0) - return; - - /* 'stride' should be an integer number of registers, that is, a multiple - * of 32 bytes. - */ - assert(stride % 32 == 0); - - unsigned x = 0, y = 0, z = 0; - for (unsigned t = 0; t < threads; t++) { - uint32_t *param = (uint32_t *) buffer + stride * t / 4; - - for (unsigned i = 0; i < prog_data->simd_size; i++) { - param[0 * prog_data->simd_size + i] = x; - param[1 * prog_data->simd_size + i] = y; - param[2 * prog_data->simd_size + i] = z; - - x++; - if (x == prog_data->local_size[0]) { - x = 0; - y++; - if (y == prog_data->local_size[1]) { - y = 0; - z++; - if (z == prog_data->local_size[2]) - z = 0; - } - } - } - } - } - fs_reg * fs_visitor::emit_cs_local_invocation_id_setup() { diff --combined src/mesa/drivers/dri/i965/brw_fs.h index 658608f9951,f2e384129cb..dff86a97a14 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@@ -30,28 -30,8 +30,8 @@@ #include "brw_shader.h" #include "brw_ir_fs.h" #include "brw_fs_builder.h" - - extern "C" { - - #include - - #include "main/macros.h" - #include "main/shaderobj.h" - #include "main/uniforms.h" - #include "program/prog_parameter.h" - #include "program/prog_print.h" - #include "program/prog_optimize.h" - #include "util/register_allocate.h" - #include "program/hash_table.h" - #include "brw_context.h" - #include "brw_eu.h" - #include "brw_wm.h" - #include "intel_asm_annotation.h" - } - #include "glsl/nir/glsl_types.h" #include "glsl/ir.h" #include "glsl/nir/nir.h" - #include "program/sampler.h" struct bblock_t; namespace { @@@ -205,10 -185,10 +185,10 @@@ public fs_reg *emit_frontfacing_interpolation(); fs_reg *emit_samplepos_setup(); fs_reg *emit_sampleid_setup(); - void emit_general_interpolation(fs_reg attr, const char *name, + void emit_general_interpolation(fs_reg *attr, const char *name, const glsl_type *type, glsl_interp_qualifier interpolation_mode, - int location, bool mod_centroid, + int *location, bool mod_centroid, bool mod_sample); fs_reg *emit_vs_system_value(int location); void emit_interpolation_setup_gen4(); @@@ -224,8 -204,6 +204,8 @@@ fs_reg mcs, int gather_component, bool is_cube_array, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, @@@ -245,6 -223,8 +225,8 @@@ void emit_nir_code(); void nir_setup_inputs(); + void nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type, + unsigned *location); void nir_setup_outputs(); void nir_setup_uniforms(); void nir_emit_system_values(); @@@ -271,6 -251,8 +253,8 @@@ nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); + void nir_emit_shared_atomic(const brw::fs_builder &bld, + int op, nir_intrinsic_instr *instr); void nir_emit_texture(const brw::fs_builder &bld, nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, @@@ -298,7 -280,7 +282,7 @@@ unsigned stream_id); void emit_gs_thread_end(); void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - const fs_reg &indirect_offset, unsigned imm_offset, + unsigned base_offset, const nir_src &offset_src, unsigned num_components); void emit_cs_terminate(); fs_reg *emit_cs_local_invocation_id_setup(); @@@ -458,7 -440,6 +442,7 @@@ private void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index); void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, struct brw_reg src, diff --combined src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 8528f391941,c25da07c4ba..68c6443306e --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@@ -27,11 -27,10 +27,10 @@@ * native instructions. */ - #include "main/macros.h" - #include "brw_context.h" #include "brw_eu.h" #include "brw_fs.h" #include "brw_cfg.h" + #include "brw_program.h" static enum brw_reg_file brw_file_from_reg(fs_reg *reg) @@@ -92,7 -91,7 +91,7 @@@ brw_reg_from_fs_reg(fs_inst *inst, fs_r case ARF: case FIXED_GRF: case IMM: - brw_reg = *static_cast(reg); + brw_reg = reg->as_brw_reg(); break; case BAD_FILE: /* Probably unused. */ @@@ -679,7 -678,6 +678,7 @@@ fs_generator::generate_get_buffer_size( void fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { int msg_type = -1; @@@ -927,16 -925,14 +926,16 @@@ ? prog_data->binding_table.gather_texture_start : prog_data->binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, rlen, @@@ -945,24 -941,19 +944,24 @@@ simd_mode, return_format); - brw_mark_surface_used(prog_data, sampler + base_binding_table_index); + brw_mark_surface_used(prog_data, surface + base_binding_table_index); } else { /* Non-const sampler index */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@@ -2071,7 -2062,7 +2070,7 @@@ fs_generator::generate_code(const cfg_ case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1]); + generate_tex(inst, dst, src[0], src[1], src[2]); break; case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: diff --combined src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6b0c4a5b36e,db38f619272..fdf2f5be2a6 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@@ -22,16 -22,11 +22,11 @@@ */ #include "glsl/ir.h" - #include "glsl/ir_optimization.h" - #include "glsl/nir/glsl_to_nir.h" #include "main/shaderimage.h" - #include "program/prog_to_nir.h" #include "brw_fs.h" #include "brw_fs_surface_builder.h" - #include "brw_vec4_gs_visitor.h" #include "brw_nir.h" - #include "brw_fs_surface_builder.h" - #include "brw_vec4_gs_visitor.h" + #include "brw_program.h" using namespace brw; using namespace brw::surface_access; @@@ -81,14 -76,41 +76,41 @@@ fs_visitor::nir_setup_inputs( reg.type = BRW_REGISTER_TYPE_D; bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); } else { - emit_general_interpolation(input, var->name, var->type, + int location = var->data.location; + emit_general_interpolation(&input, var->name, var->type, (glsl_interp_qualifier) var->data.interpolation, - var->data.location, var->data.centroid, + &location, var->data.centroid, var->data.sample); } } } + void + fs_visitor::nir_setup_single_output_varying(fs_reg *reg, + const glsl_type *type, + unsigned *location) + { + if (type->is_array() || type->is_matrix()) { + const struct glsl_type *elem_type = glsl_get_array_element(type); + const unsigned length = glsl_get_length(type); + + for (unsigned i = 0; i < length; i++) { + nir_setup_single_output_varying(reg, elem_type, location); + } + } else if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + const struct glsl_type *field_type = type->fields.structure[i].type; + nir_setup_single_output_varying(reg, field_type, location); + } + } else { + assert(type->is_scalar() || type->is_vector()); + this->outputs[*location] = *reg; + this->output_components[*location] = type->vector_elements; + *reg = offset(*reg, bld, 4); + (*location)++; + } + } + void fs_visitor::nir_setup_outputs() { @@@ -99,17 -121,13 +121,13 @@@ nir_foreach_variable(var, &nir->outputs) { fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); - int vector_elements = var->type->without_array()->vector_elements; - switch (stage) { case MESA_SHADER_VERTEX: - case MESA_SHADER_GEOMETRY: - for (int i = 0; i < type_size_vec4(var->type); i++) { - int output = var->data.location + i; - this->outputs[output] = offset(reg, bld, 4 * i); - this->output_components[output] = vector_elements; - } + case MESA_SHADER_GEOMETRY: { + unsigned location = var->data.location; + nir_setup_single_output_varying(®, var->type, &location); break; + } case MESA_SHADER_FRAGMENT: if (var->data.index > 0) { assert(var->data.location == FRAG_RESULT_DATA0); @@@ -129,6 -147,8 +147,8 @@@ } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { this->sample_mask = reg; } else { + int vector_elements = var->type->without_array()->vector_elements; + /* gl_FragData or a user-defined FS output */ assert(var->data.location >= FRAG_RESULT_DATA0 && var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS); @@@ -153,7 -173,7 +173,7 @@@ fs_visitor::nir_setup_uniforms( if (dispatch_width != 8) return; - uniforms = nir->num_uniforms; + uniforms = nir->num_uniforms / 4; nir_foreach_variable(var, &nir->uniforms) { /* UBO's and atomics don't take up space in the uniform file */ @@@ -161,7 -181,7 +181,7 @@@ continue; if (type_size_scalar(var->type) > 0) - param_size[var->data.driver_location] = type_size_scalar(var->type); + param_size[var->data.driver_location / 4] = type_size_scalar(var->type); } } @@@ -1076,28 -1096,6 +1096,6 @@@ fs_visitor::nir_emit_undef(const fs_bui instr->def.num_components); } - static fs_reg - fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, - unsigned base_offset, nir_src *indirect) - { - fs_reg reg; - - assert(!nir_reg->is_global); - - reg = v->nir_locals[nir_reg->index]; - - reg = offset(reg, v->bld, base_offset * nir_reg->num_components); - if (indirect) { - int multiplier = nir_reg->num_components * (v->dispatch_width / 8); - - reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type)); - v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect), - brw_imm_d(multiplier)); - } - - return reg; - } - fs_reg fs_visitor::get_nir_src(nir_src src) { @@@ -1105,8 -1103,10 +1103,10 @@@ if (src.is_ssa) { reg = nir_ssa_values[src.ssa->index]; } else { - reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, - src.reg.indirect); + /* We don't handle indirects on locals */ + assert(src.reg.indirect == NULL); + reg = offset(nir_locals[src.reg.reg->index], bld, + src.reg.base_offset * src.reg.reg->num_components); } /* to avoid floating-point denorm flushing problems, set the type by @@@ -1123,16 -1123,18 +1123,18 @@@ fs_visitor::get_nir_dest(nir_dest dest nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F, dest.ssa.num_components); return nir_ssa_values[dest.ssa.index]; + } else { + /* We don't handle indirects on locals */ + assert(dest.reg.indirect == NULL); + return offset(nir_locals[dest.reg.reg->index], bld, + dest.reg.base_offset * dest.reg.reg->num_components); } - - return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, - dest.reg.indirect); } fs_reg fs_visitor::get_nir_image_deref(const nir_deref_var *deref) { - fs_reg image(UNIFORM, deref->var->data.driver_location, + fs_reg image(UNIFORM, deref->var->data.driver_location / 4, BRW_REGISTER_TYPE_UD); for (const nir_deref *tail = &deref->deref; tail->child; @@@ -1163,7 -1165,7 +1165,7 @@@ bld.MOV(tmp, get_nir_src(deref_array->indirect)); } - bld.MUL(tmp, tmp, brw_imm_ud(element_size)); + bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4)); if (image.reladdr) bld.ADD(*image.reladdr, *image.reladdr, tmp); else @@@ -1601,28 -1603,30 +1603,30 @@@ fs_visitor::emit_gs_vertex(const nir_sr void fs_visitor::emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - const fs_reg &indirect_offset, - unsigned imm_offset, + unsigned base_offset, + const nir_src &offset_src, unsigned num_components) { struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data; + nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); + nir_const_value *offset_const = nir_src_as_const_value(offset_src); + const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; + /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y], * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only * gl_PointSize is available as a GS input, however, so it must be that. */ - const bool is_point_size = - indirect_offset.file == BAD_FILE && imm_offset == 0; + const bool is_point_size = (base_offset == 0); - nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); - const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; - - if (indirect_offset.file == BAD_FILE && vertex_const != NULL && - 4 * imm_offset < push_reg_count) { - imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count; + if (offset_const != NULL && vertex_const != NULL && + 4 * (base_offset + offset_const->u[0]) < push_reg_count) { + int imm_offset = (base_offset + offset_const->u[0]) * 4 + + vertex_const->u[0] * push_reg_count; /* This input was pushed into registers. */ if (is_point_size) { /* gl_PointSize comes in .w */ + assert(imm_offset == 0); bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type)); } else { for (unsigned i = 0; i < num_components; i++) { @@@ -1681,21 -1685,21 +1685,21 @@@ } fs_inst *inst; - if (indirect_offset.file == BAD_FILE) { + if (offset_const) { /* Constant indexing - use global offset. */ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); - inst->offset = imm_offset; + inst->offset = base_offset + offset_const->u[0]; inst->base_mrf = -1; inst->mlen = 1; inst->regs_written = num_components; } else { /* Indirect indexing - use per-slot offsets as well. */ - const fs_reg srcs[] = { icp_handle, indirect_offset }; + const fs_reg srcs[] = { icp_handle, get_nir_src(offset_src) }; fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); - inst->offset = imm_offset; + inst->offset = base_offset; inst->base_mrf = -1; inst->mlen = 2; inst->regs_written = num_components; @@@ -1761,17 -1765,12 +1765,12 @@@ fs_visitor::nir_emit_gs_intrinsic(cons retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); break; - case nir_intrinsic_load_input_indirect: case nir_intrinsic_load_input: unreachable("load_input intrinsics are invalid for the GS stage"); - case nir_intrinsic_load_per_vertex_input_indirect: - indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D); - /* fallthrough */ case nir_intrinsic_load_per_vertex_input: - emit_gs_input_load(dest, instr->src[0], - indirect_offset, instr->const_index[0], - instr->num_components); + emit_gs_input_load(dest, instr->src[0], instr->const_index[0], + instr->src[1], instr->num_components); break; case nir_intrinsic_emit_vertex_with_counter: @@@ -2091,6 -2090,37 +2090,37 @@@ fs_visitor::nir_emit_cs_intrinsic(cons break; } + case nir_intrinsic_shared_atomic_add: + nir_emit_shared_atomic(bld, BRW_AOP_ADD, instr); + break; + case nir_intrinsic_shared_atomic_imin: + nir_emit_shared_atomic(bld, BRW_AOP_IMIN, instr); + break; + case nir_intrinsic_shared_atomic_umin: + nir_emit_shared_atomic(bld, BRW_AOP_UMIN, instr); + break; + case nir_intrinsic_shared_atomic_imax: + nir_emit_shared_atomic(bld, BRW_AOP_IMAX, instr); + break; + case nir_intrinsic_shared_atomic_umax: + nir_emit_shared_atomic(bld, BRW_AOP_UMAX, instr); + break; + case nir_intrinsic_shared_atomic_and: + nir_emit_shared_atomic(bld, BRW_AOP_AND, instr); + break; + case nir_intrinsic_shared_atomic_or: + nir_emit_shared_atomic(bld, BRW_AOP_OR, instr); + break; + case nir_intrinsic_shared_atomic_xor: + nir_emit_shared_atomic(bld, BRW_AOP_XOR, instr); + break; + case nir_intrinsic_shared_atomic_exchange: + nir_emit_shared_atomic(bld, BRW_AOP_MOV, instr); + break; + case nir_intrinsic_shared_atomic_comp_swap: + nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr); + break; + default: nir_emit_intrinsic(bld, instr); break; @@@ -2104,8 -2134,6 +2134,6 @@@ fs_visitor::nir_emit_intrinsic(const fs if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); - bool has_indirect = false; - switch (instr->intrinsic) { case nir_intrinsic_atomic_counter_inc: case nir_intrinsic_atomic_counter_dec: @@@ -2294,27 -2322,27 +2322,27 @@@ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; - case nir_intrinsic_load_uniform_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_uniform: { - fs_reg uniform_reg(UNIFORM, instr->const_index[0]); - uniform_reg.reg_offset = instr->const_index[1]; + /* Offsets are in bytes but they should always be multiples of 4 */ + assert(instr->const_index[0] % 4 == 0); - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(uniform_reg, dest.type), bld, j); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type); - bld.MOV(dest, src); - dest = offset(dest, bld, 1); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + /* Offsets are in bytes but they should always be multiples of 4 */ + assert(const_offset->u[0] % 4 == 0); + src.reg_offset = const_offset->u[0] / 4; + } else { + src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + } + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); } break; } - case nir_intrinsic_load_ubo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ubo: { nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; @@@ -2342,27 -2370,24 +2370,24 @@@ nir->info.num_ubos - 1); } - if (has_indirect) { - /* Turn the byte offset into a dword offset. */ - fs_reg base_offset = vgrf(glsl_type::int_type); - bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), - BRW_REGISTER_TYPE_D), - brw_imm_d(2)); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset == NULL) { + fs_reg base_offset = retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_D); - unsigned vec4_offset = instr->const_index[0] / 4; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, - base_offset, vec4_offset + i); + base_offset, i * 4); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15); + struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); for (unsigned i = 0; i < instr->num_components; i++) { - packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i); + packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i); /* The std140 packing rules don't allow vectors to cross 16-byte * boundaries, and a reg is 32 bytes. @@@ -2376,9 -2401,6 +2401,6 @@@ break; } - case nir_intrinsic_load_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); @@@ -2404,12 -2426,41 +2426,41 @@@ nir->info.num_ssbos - 1); } - /* Get the offset to read from */ fs_reg offset_reg; - if (has_indirect) { + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0]); + } else { offset_reg = get_nir_src(instr->src[1]); + } + + /* Read the vector */ + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (int i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + + break; + } + + case nir_intrinsic_load_shared: { + assert(devinfo->gen >= 7); + + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Get the offset to read from */ + fs_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); } else { - offset_reg = brw_imm_ud(instr->const_index[0]); + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); } /* Read the vector */ @@@ -2424,32 -2475,72 +2475,72 @@@ break; } - case nir_intrinsic_load_input_indirect: - has_indirect = true; - /* fallthrough */ - case nir_intrinsic_load_input: { - unsigned index = 0; - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src; - if (stage == MESA_SHADER_VERTEX) { - src = offset(fs_reg(ATTR, instr->const_index[0], dest.type), bld, index); + case nir_intrinsic_store_shared: { + assert(devinfo->gen >= 7); + + /* Block index */ + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + + /* Value */ + fs_reg val_reg = get_nir_src(instr->src[0]); + + /* Writemask */ + unsigned writemask = instr->const_index[1]; + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + + 4 * first_component); } else { - src = offset(retype(nir_inputs, dest.type), bld, - instr->const_index[0] + index); + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0] + 4 * first_component)); } - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); - index++; - bld.MOV(dest, src); - dest = offset(dest, bld, 1); + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); + } + + break; + } + + case nir_intrinsic_load_input: { + fs_reg src; + if (stage == MESA_SHADER_VERTEX) { + src = fs_reg(ATTR, instr->const_index[0], dest.type); + } else { + src = offset(retype(nir_inputs, dest.type), bld, + instr->const_index[0]); + } + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset && "Indirect input loads not allowed"); + src = offset(src, bld, const_offset->u[0]); + + for (unsigned j = 0; j < instr->num_components; j++) { + bld.MOV(offset(dest, bld, j), offset(src, bld, j)); } break; } - case nir_intrinsic_store_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_ssbo: { assert(devinfo->gen >= 7); @@@ -2476,7 -2567,7 +2567,7 @@@ fs_reg val_reg = get_nir_src(instr->src[0]); /* Writemask */ - unsigned writemask = instr->const_index[1]; + unsigned writemask = instr->const_index[0]; /* Combine groups of consecutive enabled channels in one write * message. We use ffs to find the first enabled channel and then ffs on @@@ -2486,10 -2577,11 +2577,11 @@@ while (writemask) { unsigned first_component = ffs(writemask) - 1; unsigned length = ffs(~(writemask >> first_component)) - 1; - fs_reg offset_reg; - if (!has_indirect) { - offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component); + fs_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, @@@ -2510,20 -2602,17 +2602,17 @@@ break; } - case nir_intrinsic_store_output_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); - unsigned index = 0; + fs_reg new_dest = offset(retype(nir_outputs, src.type), bld, + instr->const_index[0]); + + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + assert(const_offset && "Indirect output stores not allowed"); + new_dest = offset(new_dest, bld, const_offset->u[0]); + for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg new_dest = offset(retype(nir_outputs, src.type), bld, - instr->const_index[0] + index); - if (has_indirect) - src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); - index++; - bld.MOV(new_dest, src); - src = offset(src, bld, 1); + bld.MOV(offset(new_dest, bld, j), offset(src, bld, j)); } break; } @@@ -2646,12 -2735,37 +2735,39 @@@ fs_visitor::nir_emit_ssbo_atomic(const bld.MOV(dest, atomic_result); } + void + fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, + int op, nir_intrinsic_instr *instr) + { + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); + fs_reg offset = get_nir_src(instr->src[0]); + fs_reg data1 = get_nir_src(instr->src[1]); + fs_reg data2; + if (op == BRW_AOP_CMPWR) + data2 = get_nir_src(instr->src[2]); + + /* Emit the actual atomic operation operation */ + + fs_reg atomic_result = + surface_access::emit_untyped_atomic(bld, surface, offset, + data1, data2, + 1 /* dims */, 1 /* rsize */, + op, + BRW_PREDICATE_NONE); + dest.type = atomic_result.type; + bld.MOV(dest, atomic_result); + } + void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; + fs_reg texture_reg(brw_imm_ud(texture)); fs_reg sampler_reg(brw_imm_ud(sampler)); int gather_component = instr->component; @@@ -2718,9 -2832,9 +2834,9 @@@ case nir_tex_src_projector: unreachable("should be lowered"); - case nir_tex_src_sampler_offset: { - /* Figure out the highest possible sampler index and mark it as used */ - uint32_t max_used = sampler + instr->sampler_array_size - 1; + case nir_tex_src_texture_offset: { + /* Figure out the highest possible texture index and mark it as used */ + uint32_t max_used = texture + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4 && devinfo->gen < 8) { max_used += stage_prog_data->binding_table.gather_texture_start; } else { @@@ -2728,14 -2842,6 +2844,14 @@@ } brw_mark_surface_used(prog_data, max_used); + /* Emit code to evaluate the actual indexing expression */ + texture_reg = vgrf(glsl_type::uint_type); + bld.ADD(texture_reg, src, brw_imm_ud(texture)); + texture_reg = bld.emit_uniformize(texture_reg); + break; + } + + case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); bld.ADD(sampler_reg, src, brw_imm_ud(sampler)); @@@ -2751,8 -2857,8 +2867,8 @@@ if (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical) { if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg); + key_tex->compressed_multisample_layout_mask & (1 << texture)) { + mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg); } else { mcs = brw_imm_ud(0u); } @@@ -2789,7 -2895,7 +2905,7 @@@ fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D); fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst, bld.vgrf(BRW_REGISTER_TYPE_D, 1), - sampler_reg); + texture_reg, texture_reg); inst->mlen = 1; inst->header_size = 1; inst->base_mrf = -1; @@@ -2803,7 -2909,7 +2919,7 @@@ emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, sampler, sampler_reg); + is_cube_array, texture, texture_reg, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; @@@ -2824,12 -2930,6 +2940,12 @@@ fs_visitor::nir_emit_jump(const fs_buil bld.emit(BRW_OPCODE_CONTINUE); break; case nir_jump_return: + /* This has to be the last block in the shader. We don't handle + * early returns. + */ + assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && + instr->instr.block->cf_node.parent->type == nir_cf_node_function); + break; default: unreachable("unknown jump"); } diff --combined src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index e82acd141f3,68f2548d2bc..76f0185fdd7 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@@ -27,26 -27,8 +27,8 @@@ * makes it easier to do backend-specific optimizations than doing so * in the GLSL IR or in the native code. */ - #include - - #include "main/macros.h" - #include "main/shaderobj.h" - #include "program/prog_parameter.h" - #include "program/prog_print.h" - #include "program/prog_optimize.h" - #include "util/register_allocate.h" - #include "program/hash_table.h" - #include "brw_context.h" - #include "brw_eu.h" - #include "brw_wm.h" - #include "brw_cs.h" - #include "brw_vec4.h" - #include "brw_vec4_gs_visitor.h" #include "brw_fs.h" - #include "main/uniforms.h" #include "glsl/nir/glsl_types.h" - #include "glsl/ir_optimization.h" - #include "program/sampler.h" using namespace brw; @@@ -82,12 -64,12 +64,12 @@@ fs_visitor::emit_vs_system_value(int lo /* Sample from the MCS surface attached to this multisample texture. */ fs_reg fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, - const fs_reg &sampler) + const fs_reg &texture) { const fs_reg dest = vgrf(glsl_type::uvec4_type); const fs_reg srcs[] = { coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), - sampler, fs_reg(), brw_imm_ud(components), brw_imm_d(0) + texture, texture, fs_reg(), brw_imm_ud(components), brw_imm_d(0) }; fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); @@@ -111,8 -93,6 +93,8 @@@ fs_visitor::emit_texture(ir_texture_opc fs_reg mcs, int gather_component, bool is_cube_array, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg) { @@@ -152,7 -132,7 +134,7 @@@ fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1)); const fs_reg srcs[] = { coordinate, shadow_c, lod, lod2, - sample_index, mcs, sampler_reg, offset_value, + sample_index, mcs, surface_reg, sampler_reg, offset_value, brw_imm_d(coord_components), brw_imm_d(grad_components) }; enum opcode opcode; @@@ -205,7 -185,7 +187,7 @@@ if (op == ir_tg4) { if (gather_component == 1 && - key_tex->gather_channel_quirk_mask & (1 << sampler)) { + key_tex->gather_channel_quirk_mask & (1 << surface)) { /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ @@@ -215,7 -195,7 +197,7 @@@ } if (devinfo->gen == 6) - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst); } /* fixup #layers for cube map arrays */ @@@ -768,7 -748,6 +750,6 @@@ fs_visitor::emit_urb_writes(const fs_re const int output_vertex_size_owords = gs_prog_data->output_vertex_size_hwords * 2; - fs_reg offset; if (gs_vertex_count.file == IMM) { per_slot_offsets = brw_imm_ud(output_vertex_size_owords * gs_vertex_count.ud); diff --combined src/mesa/drivers/dri/i965/brw_nir.c index 10e92735210,14ad172a2c3..fdfc4f661d1 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@@ -23,33 -23,50 +23,50 @@@ #include "brw_nir.h" #include "brw_shader.h" - #include "glsl/glsl_parser_extras.h" #include "glsl/nir/glsl_to_nir.h" + #include "glsl/nir/nir_builder.h" #include "program/prog_to_nir.h" + struct remap_vs_attrs_state { + nir_builder b; + uint64_t inputs_read; + }; + static bool - remap_vs_attrs(nir_block *block, void *closure) + remap_vs_attrs(nir_block *block, void *void_state) { - GLbitfield64 inputs_read = *((GLbitfield64 *) closure); + struct remap_vs_attrs_state *state = void_state; - nir_foreach_instr(block, instr) { + nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ - assert(intrin->intrinsic != nir_intrinsic_load_input_indirect); - if (intrin->intrinsic == nir_intrinsic_load_input) { /* Attributes come in a contiguous block, ordered by their * gl_vert_attrib value. That means we can compute the slot * number for an attribute by masking out the enabled attributes * before it and counting the bits. */ - int attr = intrin->const_index[0]; - int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); + nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]); + + /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ + assert(const_offset); + + int attr = intrin->const_index[0] + const_offset->u[0]; + int slot = _mesa_bitcount_64(state->inputs_read & + BITFIELD64_MASK(attr)); + + /* The NIR -> FS pass will just add the base and offset together, so + * there's no reason to keep them separate. Just put it all in + * const_index[0] and set the offset src[0] to load_const(0). + */ intrin->const_index[0] = 4 * slot; + + state->b.cursor = nir_before_instr(&intrin->instr); + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(nir_imm_int(&state->b, 0))); } } return true; @@@ -62,13 -79,6 +79,6 @@@ brw_nir_lower_inputs(nir_shader *nir { switch (nir->stage) { case MESA_SHADER_VERTEX: - /* For now, leave the vec4 backend doing the old method. */ - if (!is_scalar) { - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, - type_size_vec4); - break; - } - /* Start with the location of the variable's base. */ foreach_list_typed(nir_variable, var, node, &nir->inputs) { var->data.driver_location = var->data.location; @@@ -80,15 -90,25 +90,25 @@@ */ nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - /* Finally, translate VERT_ATTRIB_* values into the actual registers. - * - * Note that we can use nir->info.inputs_read instead of key->inputs_read - * since the two are identical aside from Gen4-5 edge flag differences. - */ - GLbitfield64 inputs_read = nir->info.inputs_read; - nir_foreach_overload(nir, overload) { - if (overload->impl) { - nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); + if (is_scalar) { + /* Finally, translate VERT_ATTRIB_* values into the actual registers. + * + * Note that we can use nir->info.inputs_read instead of + * key->inputs_read since the two are identical aside from Gen4-5 + * edge flag differences. + */ + struct remap_vs_attrs_state remap_state = { + .inputs_read = nir->info.inputs_read, + }; + + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); + + nir_foreach_overload(nir, overload) { + if (overload->impl) { + nir_builder_init(&remap_state.b, overload->impl); + nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state); + } } } break; @@@ -171,12 -191,40 +191,40 @@@ brw_nir_lower_outputs(nir_shader *nir, } } + static int + type_size_scalar_bytes(const struct glsl_type *type) + { + return type_size_scalar(type) * 4; + } + + static int + type_size_vec4_bytes(const struct glsl_type *type) + { + return type_size_vec4(type) * 16; + } + + static void + brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) + { + if (is_scalar) { + nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, + type_size_scalar_bytes); + nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes); + } else { + nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, + type_size_vec4_bytes); + nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes); + } + } + + #include "util/debug.h" + static bool should_clone_nir() { static int should_clone = -1; if (should_clone < 1) - should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false); + should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); return should_clone; } @@@ -298,9 -346,7 +346,7 @@@ brw_lower_nir(nir_shader *nir OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); OPT_V(brw_nir_lower_outputs, is_scalar); - //nir_assign_var_locations(&nir->uniforms, - // &nir->num_uniforms, - // is_scalar ? type_size_scalar : type_size_vec4); - OPT_V(brw_nir_lower_uniforms, is_scalar); ++ //OPT_V(brw_nir_lower_uniforms, is_scalar); OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); if (shader_prog) { diff --combined src/mesa/drivers/dri/i965/brw_program.c index 3da8e9e8a97,20d4e0d6c4a..94ceb526762 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@@ -31,8 -31,6 +31,6 @@@ #include #include "main/imports.h" - #include "main/enums.h" - #include "main/shaderobj.h" #include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/program.h" @@@ -41,10 -39,10 +39,10 @@@ #include "util/ralloc.h" #include "glsl/ir.h" + #include "brw_program.h" #include "brw_context.h" #include "brw_shader.h" #include "brw_nir.h" - #include "brw_wm.h" #include "intel_batchbuffer.h" static unsigned @@@ -90,6 -88,28 +88,28 @@@ static struct gl_program *brwNewProgram struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program); if (prog) { prog->id = get_new_program_id(brw->intelScreen); + + return _mesa_init_gl_program(&prog->program.Base, target, id); + } else { + return NULL; + } + } + + case GL_TESS_CONTROL_PROGRAM_NV: { + struct brw_tess_ctrl_program *prog = CALLOC_STRUCT(brw_tess_ctrl_program); + if (prog) { + prog->id = get_new_program_id(brw->intelScreen); + + return _mesa_init_gl_program(&prog->program.Base, target, id); + } else { + return NULL; + } + } + + case GL_TESS_EVALUATION_PROGRAM_NV: { + struct brw_tess_eval_program *prog = CALLOC_STRUCT(brw_tess_eval_program); + if (prog) { + prog->id = get_new_program_id(brw->intelScreen); return _mesa_init_gl_program(&prog->program.Base, target, id); } else { @@@ -259,7 -279,7 +279,7 @@@ brw_get_scratch_bo(struct brw_context * void brwInitFragProgFuncs( struct dd_function_table *functions ) { - assert(functions->ProgramStringNotify == _tnl_program_string); + /* assert(functions->ProgramStringNotify == _tnl_program_string); */ functions->NewProgram = brwNewProgram; functions->DeleteProgram = brwDeleteProgram; diff --combined src/mesa/drivers/dri/i965/brw_shader.cpp index 2f0e8b680ab,7a6751bc71b..d051e124584 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@@ -21,16 -21,14 +21,14 @@@ * IN THE SOFTWARE. */ - #include "main/macros.h" #include "brw_context.h" - #include "brw_vs.h" - #include "brw_gs.h" - #include "brw_fs.h" #include "brw_cfg.h" + #include "brw_eu.h" #include "brw_nir.h" - #include "glsl/ir_optimization.h" #include "glsl/glsl_parser_extras.h" - #include "main/shaderapi.h" + #include "main/shaderobj.h" + #include "main/uniforms.h" + #include "util/debug.h" static void shader_debug_log_mesa(void *data, const char *fmt, ...) @@@ -87,7 -85,7 +85,7 @@@ brw_compiler_create(void *mem_ctx, cons compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = - devinfo->gen >= 8 && brw_env_var_as_boolean("INTEL_SCALAR_GS", false); + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; @@@ -100,8 -98,6 +98,8 @@@ */ nir_options->lower_ffma = true; nir_options->lower_sub = true; + nir_options->lower_fdiv = true; + /* In the vec4 backend, our dpN instruction replicates its result to all * the components of a vec4. We would like NIR to give us replicated fdot * instructions because it can optimize better for us. @@@ -142,10 -138,13 +140,13 @@@ if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] + .LowerShaderSharedVariables = true; + return compiler; } - struct gl_shader * + extern "C" struct gl_shader * brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) { struct brw_shader *shader; @@@ -161,7 -160,7 +162,7 @@@ return &shader->base; } - void + extern "C" void brw_mark_surface_used(struct brw_stage_prog_data *prog_data, unsigned surf_index) { @@@ -199,7 -198,6 +200,7 @@@ brw_type_for_base_type(const struct gls case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@@ -685,6 -683,13 +686,13 @@@ backend_shader::backend_shader(const st stage_abbrev = _mesa_shader_stage_to_abbrev(stage); } + bool + backend_reg::equals(const backend_reg &r) const + { + return memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 && + reg_offset == r.reg_offset; + } + bool backend_reg::is_zero() const { diff --combined src/mesa/drivers/dri/i965/brw_shader.h index 9555406c777,8c5778f9048..0b77dc24539 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@@ -21,28 -21,41 +21,41 @@@ * IN THE SOFTWARE. */ + #pragma once + #include #include "brw_reg.h" #include "brw_defines.h" #include "brw_context.h" - #include "main/compiler.h" - #include "glsl/ir.h" - #include "program/prog_parameter.h" #ifdef __cplusplus #include "brw_ir_allocator.h" #endif - #pragma once - #define MAX_SAMPLER_MESSAGE_SIZE 11 #define MAX_VGRF_SIZE 16 #ifdef __cplusplus - struct backend_reg : public brw_reg + struct backend_reg : private brw_reg { backend_reg() {} - backend_reg(struct brw_reg reg) : brw_reg(reg) {} + backend_reg(const struct brw_reg ®) : brw_reg(reg) {} + + const brw_reg &as_brw_reg() const + { + assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM); + assert(reg_offset == 0); + return static_cast(*this); + } + + brw_reg &as_brw_reg() + { + assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM); + assert(reg_offset == 0); + return static_cast(*this); + } + + bool equals(const backend_reg &r) const; bool is_zero() const; bool is_one() const; @@@ -61,6 -74,25 +74,25 @@@ * For uniforms, this is in units of 1 float. */ uint16_t reg_offset; + + using brw_reg::type; + using brw_reg::file; + using brw_reg::negate; + using brw_reg::abs; + using brw_reg::address_mode; + using brw_reg::subnr; + using brw_reg::nr; + + using brw_reg::swizzle; + using brw_reg::writemask; + using brw_reg::indirect_offset; + using brw_reg::vstride; + using brw_reg::width; + using brw_reg::hstride; + + using brw_reg::f; + using brw_reg::d; + using brw_reg::ud; }; #endif @@@ -227,6 -259,9 +259,6 @@@ struct brw_gs_compil unsigned control_data_header_size_bits; }; -struct brw_compiler * -brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); - void brw_assign_common_binding_table_offsets(gl_shader_stage stage, const struct brw_device_info *devinfo, @@@ -248,6 -283,9 +280,9 @@@ bool brw_cs_precompile(struct gl_contex struct gl_shader_program *shader_prog, struct gl_program *prog); + GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); + struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type); + int type_size_scalar(const struct glsl_type *type); int type_size_vec4(const struct glsl_type *type); int type_size_vec4_times_4(const struct glsl_type *type); diff --combined src/mesa/drivers/dri/i965/brw_surface_formats.c index 69eed4bc629,ff8aac2c0e5..b7d9078d87d --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@@ -20,14 -20,26 +20,13 @@@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ - #include "main/context.h" #include "main/mtypes.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" - -struct surface_format_info { - bool exists; - int sampling; - int filtering; - int shadow_compare; - int chroma_key; - int render_target; - int alpha_blend; - int input_vb; - int streamed_output_vb; - int color_processing; - int lossless_compression; - const char *name; -}; +#include "brw_wm.h" +#include "brw_surface_formats.h" /* This macro allows us to write the table almost as it appears in the PRM, * while restructuring it to turn it into the C code we want. @@@ -74,7 -86,7 +73,7 @@@ * - VOL4_Part1 section 3.9.11 Render Target Write. * - Render Target Surface Types [SKL+] */ -const struct surface_format_info surface_formats[] = { +const struct brw_surface_format_info surface_formats[] = { /* smpl filt shad CK RT AB VB SO color ccs_e */ SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) @@@ -153,8 -165,8 +152,8 @@@ SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT) SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT) SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT) - SF( Y, Y, x, Y, x, x, x, x, 60, 90, B8G8R8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, Y, 80, 80, x, x, 60, 90, B8G8R8X8_UNORM) + SF( Y, Y, x, x, 80, 80, x, x, x, x, B8G8R8X8_UNORM_SRGB) SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM) SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) @@@ -605,7 -617,7 +604,7 @@@ brw_init_surface_formats(struct brw_con for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) { uint32_t texture, render; - const struct surface_format_info *rinfo, *tinfo; + const struct brw_surface_format_info *rinfo, *tinfo; bool is_integer = _mesa_is_format_integer_color(format); render = texture = brw_format_for_mesa_format(format); @@@ -656,9 -668,10 +655,10 @@@ * mask writes to alpha (ala glColorMask) and reconfigure the * alpha blending hardware to use GL_ONE (or GL_ZERO) for * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is - * used. + * used. On Gen8+ BGRX is actually allowed (but not RGBX). */ - render = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (gen < tinfo->render_target) + render = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; case BRW_SURFACEFORMAT_R8G8B8X8_UNORM: render = BRW_SURFACEFORMAT_R8G8B8A8_UNORM; @@@ -781,7 -794,7 +781,7 @@@ boo brw_losslessly_compressible_format(struct brw_context *brw, uint32_t brw_format) { - const struct surface_format_info * const sinfo = + const struct brw_surface_format_info * const sinfo = &surface_formats[brw_format]; const int gen = brw->gen * 10; diff --combined src/mesa/drivers/dri/i965/brw_util.c index e4533f34f1f,bf7f9c61c84..934b6b8d627 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@@ -30,10 -30,6 +30,6 @@@ */ - #include - - #include "main/mtypes.h" - #include "program/prog_parameter.h" #include "brw_util.h" #include "brw_defines.h" @@@ -102,31 -98,3 +98,31 @@@ GLuint brw_translate_blend_factor( GLen unreachable("not reached"); } } + +static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { + [GL_POINTS] =_3DPRIM_POINTLIST, + [GL_LINES] = _3DPRIM_LINELIST, + [GL_LINE_LOOP] = _3DPRIM_LINELOOP, + [GL_LINE_STRIP] = _3DPRIM_LINESTRIP, + [GL_TRIANGLES] = _3DPRIM_TRILIST, + [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [GL_QUADS] = _3DPRIM_QUADLIST, + [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP, + [GL_POLYGON] = _3DPRIM_POLYGON, + [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +}; + +uint32_t +get_hw_prim_for_gl_prim(int mode) +{ + if (mode >= BRW_PRIM_OFFSET) + return mode - BRW_PRIM_OFFSET; + else { + assert(mode < ARRAY_SIZE(prim_to_hw_prim)); + return prim_to_hw_prim[mode]; + } +} diff --combined src/mesa/drivers/dri/i965/brw_vec4.h index f94f7128a07,ae5bf6939f5..27c72766f2d --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@@ -24,24 -24,11 +24,11 @@@ #ifndef BRW_VEC4_H #define BRW_VEC4_H - #include #include "brw_shader.h" - #include "main/compiler.h" - #include "program/hash_table.h" #include "brw_program.h" #ifdef __cplusplus #include "brw_ir_vec4.h" - - extern "C" { - #endif - - #include "brw_context.h" - #include "brw_eu.h" - #include "intel_asm_annotation.h" - - #ifdef __cplusplus - }; /* extern "C" */ #endif #include "glsl/ir.h" @@@ -273,7 -260,6 +260,7 @@@ public src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, src_reg surface_reg, uint32_t sampler, src_reg sampler_reg); src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, @@@ -328,7 -314,6 +315,6 @@@ bool is_high_sampler(src_reg sampler); virtual void emit_nir_code(); - virtual void nir_setup_inputs(); virtual void nir_setup_uniforms(); virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); virtual void nir_setup_system_values(); @@@ -361,7 -346,6 +347,6 @@@ dst_reg *nir_locals; dst_reg *nir_ssa_values; - src_reg *nir_inputs; dst_reg *nir_system_values; protected: diff --combined src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 432ccb77cc3,c3426ddd1c8..205a86d597a --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@@ -23,6 -23,8 +23,8 @@@ #include "glsl/glsl_parser_extras.h" #include "brw_vec4.h" #include "brw_cfg.h" + #include "brw_eu.h" + #include "brw_program.h" using namespace brw; @@@ -108,7 -110,6 +110,7 @@@ generate_tex(struct brw_codegen *p vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { const struct brw_device_info *devinfo = p->devinfo; @@@ -264,16 -265,14 +266,16 @@@ ? prog_data->base.binding_table.gather_texture_start : prog_data->base.binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, dst, inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, 1, /* response length */ @@@ -287,19 -286,14 +289,19 @@@ /* Non-constant sampler index. */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@@ -364,7 -358,7 +366,7 @@@ generate_gs_urb_write_allocate(struct b /* We pass the temporary passed in src0 as the writeback register */ brw_urb_WRITE(p, - inst->src[0], /* dest */ + inst->src[0].as_brw_reg(), /* dest */ inst->base_mrf, /* starting mrf reg nr */ src, BRW_URB_WRITE_ALLOCATE_COMPLETE, @@@ -377,8 -371,8 +379,8 @@@ brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, get_element_ud(inst->dst, 0), - get_element_ud(inst->src[0], 0)); + brw_MOV(p, get_element_ud(inst->dst.as_brw_reg(), 0), + get_element_ud(inst->src[0].as_brw_reg(), 0)); brw_pop_insn_state(p); } @@@ -808,7 -802,7 +810,7 @@@ generate_scratch_read(struct brw_codege if (devinfo->gen < 6) brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf); brw_set_dp_read_message(p, send, - 255, /* binding table index: stateless access */ + brw_scratch_surface_idx(p), BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_RENDER_CACHE, @@@ -881,7 -875,7 +883,7 @@@ generate_scratch_write(struct brw_codeg if (devinfo->gen < 6) brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); brw_set_dp_write_message(p, send, - 255, /* binding table index: stateless access */ + brw_scratch_surface_idx(p), BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, 3, /* mlen */ @@@ -909,8 -903,21 +911,21 @@@ generate_pull_constant_load(struct brw_ gen6_resolve_implied_move(p, &header, inst->base_mrf); - brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), - offset); + if (devinfo->gen >= 6) { + if (offset.file == BRW_IMMEDIATE_VALUE) { + brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), + BRW_REGISTER_TYPE_D), + brw_imm_d(offset.ud >> 4)); + } else { + brw_SHR(p, retype(brw_message_reg(inst->base_mrf + 1), + BRW_REGISTER_TYPE_D), + offset, brw_imm_d(4)); + } + } else { + brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), + BRW_REGISTER_TYPE_D), + offset); + } uint32_t msg_type; @@@ -1067,9 -1074,9 +1082,9 @@@ generate_code(struct brw_codegen *p annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { - src[i] = inst->src[i]; + src[i] = inst->src[i].as_brw_reg(); } - dst = inst->dst; + dst = inst->dst.as_brw_reg(); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); @@@ -1326,7 -1333,7 +1341,7 @@@ case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(p, prog_data, inst, dst, src[0], src[1]); + generate_tex(p, prog_data, inst, dst, src[0], src[1], src[1]); break; case VS_OPCODE_URB_WRITE: diff --combined src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 260b515ad42,f965b39360f..cf1f82f9d78 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@@ -25,7 -25,7 +25,7 @@@ #include "brw_vec4.h" #include "brw_vec4_builder.h" #include "brw_vec4_surface_builder.h" - #include "glsl/ir_uniform.h" + #include "brw_program.h" using namespace brw; using namespace brw::surface_access; @@@ -35,9 -35,6 +35,6 @@@ namespace brw void vec4_visitor::emit_nir_code() { - if (nir->num_inputs > 0) - nir_setup_inputs(); - if (nir->num_uniforms > 0) nir_setup_uniforms(); @@@ -117,28 -114,10 +114,10 @@@ vec4_visitor::nir_setup_system_values( } } - void - vec4_visitor::nir_setup_inputs() - { - nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs); - for (unsigned i = 0; i < nir->num_inputs; i++) { - nir_inputs[i] = src_reg(); - } - - nir_foreach_variable(var, &nir->inputs) { - int offset = var->data.driver_location; - unsigned size = type_size_vec4(var->type); - for (unsigned i = 0; i < size; i++) { - src_reg src = src_reg(ATTR, var->data.location + i, var->type); - nir_inputs[offset + i] = src; - } - } - } - void vec4_visitor::nir_setup_uniforms() { - uniforms = nir->num_uniforms; + uniforms = nir->num_uniforms / 16; nir_foreach_variable(var, &nir->uniforms) { /* UBO's and atomics don't take up space in the uniform file */ @@@ -146,7 -125,7 +125,7 @@@ continue; if (type_size_vec4(var->type) > 0) - uniform_size[var->data.driver_location] = type_size_vec4(var->type); + uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type); } } @@@ -390,22 -369,17 +369,17 @@@ vec4_visitor::nir_emit_intrinsic(nir_in dst_reg dest; src_reg src; - bool has_indirect = false; - switch (instr->intrinsic) { - case nir_intrinsic_load_input_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_input: { - int offset = instr->const_index[0]; - src = nir_inputs[offset]; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + + /* We set EmitNoIndirectInput for VS */ + assert(const_offset); + + src = src_reg(ATTR, instr->const_index[0] + const_offset->u[0], + glsl_type::uvec4_type); - if (has_indirect) { - dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0], - BRW_REGISTER_TYPE_D, - 1)); - } dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); @@@ -413,22 -387,16 +387,16 @@@ break; } - case nir_intrinsic_store_output_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_output: { - int varying = instr->const_index[0]; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + assert(const_offset); + + int varying = instr->const_index[0] + const_offset->u[0]; src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, instr->num_components); - dest = dst_reg(src); - if (has_indirect) { - dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[1], - BRW_REGISTER_TYPE_D, - 1)); - } - output_reg[varying] = dest; + output_reg[varying] = dst_reg(src); break; } @@@ -458,9 -426,6 +426,6 @@@ break; } - case nir_intrinsic_store_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_ssbo: { assert(devinfo->gen >= 7); @@@ -485,20 -450,19 +450,19 @@@ } /* Offset */ - src_reg offset_reg = src_reg(this, glsl_type::uint_type); - unsigned const_offset_bytes = 0; - if (has_indirect) { - emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1))); + src_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0]); } else { - const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes))); + offset_reg = get_nir_src(instr->src[2], 1); } /* Value */ src_reg val_reg = get_nir_src(instr->src[0], 4); /* Writemask */ - unsigned write_mask = instr->const_index[1]; + unsigned write_mask = instr->const_index[0]; /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped * writes will use SIMD8 mode. In order to hide this and keep symmetry across @@@ -564,9 -528,8 +528,8 @@@ * write at to skip the channels we skipped, if any. */ if (skipped_channels > 0) { - if (!has_indirect) { - const_offset_bytes += 4 * skipped_channels; - offset_reg = brw_imm_ud(const_offset_bytes); + if (offset_reg.file == IMM) { + offset_reg.ud += 4 * skipped_channels; } else { emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(4 * skipped_channels))); @@@ -601,9 -564,6 +564,6 @@@ break; } - case nir_intrinsic_load_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); @@@ -631,13 -591,12 +591,12 @@@ nir->info.num_ssbos - 1); } - src_reg offset_reg = src_reg(this, glsl_type::uint_type); - unsigned const_offset_bytes = 0; - if (has_indirect) { - emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1))); + src_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0]); } else { - const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes)))); + offset_reg = get_nir_src(instr->src[1], 1); } /* Read the vector */ @@@ -700,16 -659,21 +659,21 @@@ break; } - case nir_intrinsic_load_uniform_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_uniform: { + /* Offsets are in bytes but they should always be multiples of 16 */ + assert(instr->const_index[0] % 16 == 0); + dest = get_nir_dest(instr->dest); - src = src_reg(dst_reg(UNIFORM, instr->const_index[0])); - src.reg_offset = instr->const_index[1]; + src = src_reg(dst_reg(UNIFORM, instr->const_index[0] / 16)); + src.type = dest.type; - if (has_indirect) { + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + /* Offsets are in bytes but they should always be multiples of 16 */ + assert(const_offset->u[0] % 16 == 0); + src.reg_offset = const_offset->u[0] / 16; + } else { src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); src.reladdr = new(mem_ctx) src_reg(tmp); } @@@ -747,9 -711,6 +711,6 @@@ break; } - case nir_intrinsic_load_ubo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ubo: { nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]); src_reg surf_index; @@@ -783,15 -744,12 +744,12 @@@ nir->info.num_ubos - 1); } - unsigned const_offset = instr->const_index[0]; src_reg offset; - - if (!has_indirect) { - offset = brw_imm_ud(const_offset / 16); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset = brw_imm_ud(const_offset->u[0] & ~15); } else { - offset = src_reg(this, glsl_type::uint_type); - emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1), - brw_imm_ud(4u))); + offset = get_nir_src(instr->src[1], nir_type_int, 1); } src_reg packed_consts = src_reg(this, glsl_type::vec4_type); @@@ -803,10 -761,12 +761,12 @@@ NULL, NULL /* before_block/inst */); packed_consts.swizzle = brw_swizzle_for_size(instr->num_components); - packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4); + if (const_offset) { + packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u[0] % 16 / 4, + const_offset->u[0] % 16 / 4, + const_offset->u[0] % 16 / 4, + const_offset->u[0] % 16 / 4); + } emit(MOV(dest, packed_consts)); break; @@@ -1522,13 -1482,7 +1482,13 @@@ vec4_visitor::nir_emit_jump(nir_jump_in break; case nir_jump_return: - /* fall through */ + /* This has to be the last block in the shader. We don't handle + * early returns. + */ + assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && + instr->instr.block->cf_node.parent->type == nir_cf_node_function); + break; + default: unreachable("unknown jump"); } @@@ -1581,9 -1535,7 +1541,9 @@@ glsl_type_for_nir_alu_type(nir_alu_typ void vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; + src_reg texture_reg = brw_imm_ud(texture); src_reg sampler_reg = brw_imm_ud(sampler); src_reg coordinate; const glsl_type *coord_type = NULL; @@@ -1659,12 -1611,13 +1619,12 @@@ offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); break; - case nir_tex_src_sampler_offset: { - /* The highest sampler which may be used by this operation is + case nir_tex_src_texture_offset: { + /* The highest texture which may be used by this operation is * the last element of the array. Mark it here, because the generator * doesn't have enough information to determine the bound. */ - uint32_t array_size = instr->sampler_array_size; - uint32_t max_used = sampler + array_size - 1; + uint32_t max_used = texture + instr->texture_array_size - 1; if (instr->op == nir_texop_tg4) { max_used += prog_data->base.binding_table.gather_texture_start; } else { @@@ -1673,15 -1626,6 +1633,15 @@@ brw_mark_surface_used(&prog_data->base, max_used); + /* Emit code to evaluate the actual indexing expression */ + src_reg src = get_nir_src(instr->src[i].src, 1); + src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), src, brw_imm_ud(texture))); + texture_reg = emit_uniformize(temp); + break; + } + + case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); @@@ -1723,7 -1667,7 +1683,7 @@@ /* Stuff the channel select bits in the top of the texture offset */ if (instr->op == nir_texop_tg4) { if (instr->component == 1 && - (key_tex->gather_channel_quirk_mask & (1 << sampler))) { + (key_tex->gather_channel_quirk_mask & (1 << texture))) { /* gather4 sampler is broken for green channel on RG32F -- * we must ask for blue instead. */ @@@ -1744,8 -1688,7 +1704,8 @@@ shadow_comparitor, lod, lod2, sample_index, constant_offset, offset_value, - mcs, is_cube_array, sampler, sampler_reg); + mcs, is_cube_array, + texture, texture_reg, sampler, sampler_reg); } void diff --combined src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index caf1ee02bf0,443d0eb5387..529498be77a --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@@ -23,8 -23,8 +23,8 @@@ #include "brw_vec4.h" #include "brw_cfg.h" - #include "glsl/ir_uniform.h" - #include "program/sampler.h" + #include "brw_eu.h" + #include "brw_program.h" namespace brw { @@@ -622,7 -622,6 +622,7 @@@ type_size_vec4(const struct glsl_type * case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@@ -878,8 -877,6 +878,8 @@@ vec4_visitor::emit_texture(ir_texture_o src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, + src_reg surface_reg, uint32_t sampler, src_reg sampler_reg) { @@@ -945,8 -942,7 +945,8 @@@ inst->dst.writemask = WRITEMASK_XYZW; inst->shadow_compare = shadow_comparitor.file != BAD_FILE; - inst->src[1] = sampler_reg; + inst->src[1] = surface_reg; + inst->src[2] = sampler_reg; /* MRF for the first parameter */ int param_base = inst->base_mrf + inst->header_size; @@@ -1072,7 -1068,7 +1072,7 @@@ } if (devinfo->gen == 6 && op == ir_tg4) { - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } if (op == ir_query_levels) { @@@ -1476,24 -1472,16 +1476,16 @@@ vec4_visitor::get_pull_constant_offset( src_reg index = src_reg(this, glsl_type::int_type); emit_before(block, inst, ADD(dst_reg(index), *reladdr, - brw_imm_d(reg_offset))); - - /* Pre-gen6, the message header uses byte offsets instead of vec4 - * (16-byte) offset units. - */ - if (devinfo->gen < 6) { - emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16))); - } + brw_imm_d(reg_offset * 16))); return index; } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ src_reg offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset))); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); return offset; } else { - int message_header_scale = devinfo->gen < 6 ? 16 : 1; - return brw_imm_d(reg_offset * message_header_scale); + return brw_imm_d(reg_offset * 16); } } diff --combined src/mesa/main/mtypes.h index 4a849fb090d,48309bf65ec..fabe9913c11 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@@ -1275,6 -1275,10 +1275,10 @@@ struct gl_buffer_objec GLboolean Immutable; /**< GL_ARB_buffer_storage */ gl_buffer_usage UsageHistory; /**< How has this buffer been used so far? */ + /** Counters used for buffer usage warnings */ + GLuint NumSubDataCalls; + GLuint NumMapBufferWriteCalls; + struct gl_buffer_mapping Mappings[MAP_COUNT]; }; @@@ -1419,6 -1423,9 +1423,9 @@@ struct gl_vertex_array_objec /** Vertex buffer bindings */ struct gl_vertex_buffer_binding VertexBinding[VERT_ATTRIB_MAX]; + /** Mask indicating which vertex arrays have vertex buffer associated. */ + GLbitfield64 VertexAttribBufferMask; + /** Mask of VERT_BIT_* values indicating which arrays are enabled */ GLbitfield64 _Enabled; @@@ -2479,11 -2486,6 +2486,11 @@@ struct gl_uniform_bloc */ GLuint Binding; + /** + * Vulkan descriptor set qualifier for this block. + */ + GLuint Set; + /** * Minimum size (in bytes) of a buffer object to back this uniform buffer * (GL_UNIFORM_BLOCK_DATA_SIZE). @@@ -2672,6 -2674,10 +2679,10 @@@ struct gl_shader_progra * local_size_{x,y,z}. Otherwise undefined. */ unsigned LocalSize[3]; + /** + * Size of shared variables accessed by the compute shader. + */ + unsigned SharedSize; } Comp; /* post-link info: */ @@@ -2883,6 -2889,9 +2894,9 @@@ struct gl_shader_compiler_option GLboolean LowerBufferInterfaceBlocks; /**< Lower UBO and SSBO access to intrinsics. */ + GLboolean LowerShaderSharedVariables; /**< Lower compute shader shared + * variable access to intrinsics. */ + const struct nir_shader_compiler_options *NirOptions; }; @@@ -3402,7 -3411,7 +3416,7 @@@ struct gl_constant */ GLuint MaxUserAssignableUniformLocations; - /** GL_ARB_geometry_shader4 */ + /** geometry shader */ GLuint MaxGeometryOutputVertices; GLuint MaxGeometryTotalOutputComponents; @@@ -3454,6 -3463,9 +3468,9 @@@ /** GL_EXT_provoking_vertex */ GLboolean QuadsFollowProvokingVertexConvention; + /** GL_ARB_viewport_array */ + GLenum LayerAndVPIndexProvokingVertex; + /** OpenGL version 3.0 */ GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */ @@@ -3687,7 -3699,6 +3704,6 @@@ struct gl_extension GLboolean ARB_enhanced_layouts; GLboolean ARB_explicit_attrib_location; GLboolean ARB_explicit_uniform_location; - GLboolean ARB_geometry_shader4; GLboolean ARB_gpu_shader5; GLboolean ARB_gpu_shader_fp64; GLboolean ARB_half_float_vertex; diff --combined src/vulkan/anv_cmd_buffer.c index d34d53dcbb3,00000000000..5a56bb53c5e mode 100644,000000..100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@@ -1,1096 -1,0 +1,1088 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + +static void +anv_cmd_state_init(struct anv_cmd_state *state) +{ + memset(&state->descriptors, 0, sizeof(state->descriptors)); + memset(&state->push_constants, 0, sizeof(state->push_constants)); + + state->dirty = ~0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->push_constants_dirty = 0; + state->pipeline = NULL; + state->restart_index = UINT32_MAX; + state->dynamic = default_dynamic_state; + + state->gen7.index_buffer = NULL; +} + +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else if ((*ptr)->size < size) { + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + (*ptr)->size = size; + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, + VkCommandBuffer* pCommandBuffer) +{ + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + cmd_buffer->level = level; + cmd_buffer->usage_flags = 0; + + anv_cmd_state_init(&cmd_buffer->state); + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } + + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + + return result; +} + +VkResult anv_AllocateCommandBuffers( + VkDevice _device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->bufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } +} + +VkResult anv_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + anv_cmd_state_init(&cmd_buffer->state); + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + case 8: + return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + cmd_buffer->usage_flags = pBeginInfo->flags; + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->renderPass); + + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->subpass]; + + anv_cmd_buffer_begin_subpass(cmd_buffer, subpass); + } + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->state.current_pipeline = UINT32_MAX; + + return VK_SUCCESS; +} + +VkResult anv_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.viewport.count = viewportCount; + memcpy(cmd_buffer->state.dynamic.viewport.viewports, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void anv_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.scissor.count = scissorCount; + memcpy(cmd_buffer->state.dynamic.scissor.scissors, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; +} + +void anv_CmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void anv_CmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; +} + +void anv_CmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConstants, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void anv_CmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void anv_CmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void anv_CmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void anv_CmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void anv_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + descriptorSetCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->dynamic_offset_count > 0) { + anv_foreach_stage(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = (desc++)->range; + d++; + } + } + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + } + } +} + +void anv_CmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(startBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); + } +} + +static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, uint32_t offset) +{ + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +static void +fill_descriptor_buffer_surface_state(struct anv_device *device, void *state, + gl_shader_stage stage, + VkDescriptorType type, + uint32_t offset, uint32_t range) +{ + VkFormat format; - uint32_t stride; - + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - if (device->instance->physicalDevice.compiler->scalar_stage[stage]) { - stride = 4; - } else { - stride = 16; - } + format = VK_FORMAT_R32G32B32A32_SFLOAT; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - stride = 1; + format = VK_FORMAT_UNDEFINED; + break; + + default: + unreachable("Invalid descriptor type"); + } + + anv_fill_buffer_surface_state(device, state, + anv_format_for_vk_format(format), - offset, range, stride); ++ offset, range, 1); +} + +VkResult +anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_layout *layout; + uint32_t color_count, bias, state_offset; + + if (stage == MESA_SHADER_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + if (stage == MESA_SHADER_FRAGMENT) { + bias = MAX_RTS; + color_count = subpass->color_count; + } else { + bias = 0; + color_count = 0; + } + + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; + + if (color_count + surface_count == 0) + return VK_SUCCESS; + + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + surface_count, + &state_offset); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t a = 0; a < color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + + if (layout == NULL) + goto out; + + if (layout->stage[stage].image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; + for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + bo = desc->buffer->bo; + bo_offset = desc->buffer->offset + desc->offset; + + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + fill_descriptor_buffer_surface_state(cmd_buffer->device, + surface_state.map, + stage, desc->type, + bo_offset, desc->range); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(surface_state); + + break; + } + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->nonrt_surface_state; + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, + image_param); + image_param->surface_idx = bias + s; + break; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + assert(!"Unsupported descriptor type"); + break; + + default: + assert(!"Invalid descriptor type"); + continue; + } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + assert(image == layout->stage[stage].image_count); + + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, struct anv_state *state) +{ + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == MESA_SHADER_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) + return VK_SUCCESS; + + uint32_t size = sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) { + struct anv_pipeline_binding *binding = + &layout->stage[stage].sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + struct anv_sampler *sampler = desc->sampler; + + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; + + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); + + return state; +} + +struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +void +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; + case 8: + gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; + case 9: + gen9_cmd_buffer_begin_subpass(cmd_buffer, subpass); + break; + default: + unreachable("unsupported gen\n"); + } +} + +void anv_CmdSetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void anv_CmdResetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void anv_CmdWaitEvents( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + stub(); +} + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; + struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->prog_data[stage]; + + /* If we don't actually have any push constants, bail. */ + if (data == NULL || prog_data->nr_params == 0) + return (struct anv_state) { .offset = 0 }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + prog_data->nr_params * sizeof(float), + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + const unsigned push_constant_data_size = - (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value); ++ (local_id_dwords + prog_data->nr_params) * sizeof(union gl_constant_value *); + const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + const unsigned param_aligned_count = + reg_aligned_constant_size / sizeof(uint32_t); + + /* If we don't actually have any push constants, bail. */ + if (reg_aligned_constant_size == 0) + return (struct anv_state) { .offset = 0 }; + + const unsigned threads = pipeline->cs_thread_width_max; + const unsigned total_push_constants_size = + reg_aligned_constant_size * threads; + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + total_push_constants_size, + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, + reg_aligned_constant_size); + + /* Setup uniform data for the first thread */ + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + } + + /* Copy uniform data from the first thread to every other thread */ + const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); + for (unsigned t = 1; t < threads; t++) { + memcpy(&u32_map[t * param_aligned_count + local_id_dwords], + &u32_map[local_id_dwords], + uniform_data_size); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +void anv_CmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_foreach_stage(stage, stageFlags) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); + + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); + } + + cmd_buffer->state.push_constants_dirty |= stageFlags; +} + +void anv_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBuffersCount, + const VkCommandBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); + + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); + + for (uint32_t i = 0; i < commandBuffersCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } +} + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void anv_DestroyCommandPool( + VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + anv_ResetCommandPool(_device, commandPool, 0); + + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_destroy(cmd_buffer); + } + + return VK_SUCCESS; +} + +/** + * Return NULL if the current subpass has no depthstencil attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + return NULL; + + const struct anv_image_view *iview = + fb->attachments[subpass->depth_stencil_attachment]; + + assert(anv_format_is_depth_or_stencil(iview->format)); + + return iview; +} diff --combined src/vulkan/anv_nir_apply_dynamic_offsets.c index c500ab3e03c,00000000000..16b29b46fc9 mode 100644,000000..100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@@ -1,243 -1,0 +1,171 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "glsl/nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + unsigned block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_indirect: + case nir_intrinsic_load_ssbo: - case nir_intrinsic_load_ssbo_indirect: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo: - case nir_intrinsic_store_ssbo_indirect: + block_idx_src = 1; + break; + default: + continue; /* the loop */ + } + + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; + + set_layout = state->layout->set[set].layout; + if (set_layout->binding[binding].dynamic_offset_index < 0) + continue; + + b->cursor = nir_before_instr(&intrin->instr); + - int indirect_src; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo_indirect: - case nir_intrinsic_load_ssbo_indirect: - indirect_src = 1; - break; - case nir_intrinsic_store_ssbo_indirect: - indirect_src = 2; - break; - default: - indirect_src = -1; - break; - } - + /* First, we need to generate the uniform load for the buffer offset */ + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; + - nir_const_value *const_arr_idx = - nir_src_as_const_value(res_intrin->src[0]); - - nir_intrinsic_op offset_load_op; - if (const_arr_idx) - offset_load_op = nir_intrinsic_load_uniform; - else - offset_load_op = nir_intrinsic_load_uniform_indirect; - + nir_intrinsic_instr *offset_load = - nir_intrinsic_instr_create(state->shader, offset_load_op); ++ nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 2; - offset_load->const_index[0] = state->indices_start + index * 2; - - if (const_arr_idx) { - offset_load->const_index[1] = const_arr_idx->u[0] * 2; - } else { - offset_load->const_index[1] = 0; - offset_load->src[0] = nir_src_for_ssa( - nir_imul(b, nir_ssa_for_src(b, res_intrin->src[0], 1), - nir_imm_int(b, 2))); - } ++ offset_load->const_index[0] = state->indices_start + index * 8; ++ offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, ++ nir_imm_int(b, 8))); + + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); + nir_builder_instr_insert(b, &offset_load->instr); + - /* We calculate the full offset and don't bother with the base - * offset. We need the full offset for the predicate anyway. - */ - nir_ssa_def *rel_offset = nir_imm_int(b, intrin->const_index[0]); - if (indirect_src >= 0) { - assert(intrin->src[indirect_src].is_ssa); - rel_offset = nir_iadd(b, intrin->src[indirect_src].ssa, rel_offset); - } - nir_ssa_def *global_offset = nir_iadd(b, rel_offset, - &offset_load->dest.ssa); - - /* Now we replace the load/store intrinsic */ - - nir_intrinsic_op indirect_op; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - indirect_op = nir_intrinsic_load_ubo_indirect; - break; - case nir_intrinsic_load_ssbo: - indirect_op = nir_intrinsic_load_ssbo_indirect; - break; - case nir_intrinsic_store_ssbo: - indirect_op = nir_intrinsic_store_ssbo_indirect; - break; - default: - unreachable("Invalid direct load/store intrinsic"); - } - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(state->shader, indirect_op); - copy->num_components = intrin->num_components; - - /* The indirect is always the last source */ - indirect_src = nir_intrinsic_infos[indirect_op].num_srcs - 1; - - for (unsigned i = 0; i < (unsigned)indirect_src; i++) - nir_src_copy(©->src[i], &intrin->src[i], ©->instr); - - copy->src[indirect_src] = nir_src_for_ssa(global_offset); - nir_ssa_dest_init(©->instr, ©->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.name); ++ nir_src *offset_src = nir_get_io_offset_src(intrin); ++ nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, ++ &offset_load->dest.ssa); + + /* In order to avoid out-of-bounds access, we predicate */ - nir_ssa_def *pred = nir_fge(b, nir_channel(b, &offset_load->dest.ssa, 1), - rel_offset); ++ nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), ++ offset_src->ssa); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + - nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); ++ nir_instr_remove(&intrin->instr); ++ *offset_src = nir_src_for_ssa(new_offset); ++ nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); + - if (indirect_op != nir_intrinsic_store_ssbo) { ++ if (intrin->intrinsic != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); - src1->src = nir_src_for_ssa(©->dest.ssa); ++ src1->src = nir_src_for_ssa(&intrin->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(zero); + exec_list_push_tail(&phi->srcs, &src2->node); + - nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); - + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&phi->dest.ssa)); - } + - nir_instr_remove(&intrin->instr); ++ nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); ++ } + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) + return; + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_builder_init(&state.builder, overload->impl); + nir_foreach_block(overload->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(overload->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = - (const gl_constant_value *)&null_data->dynamic[i].offset; ++ (const union gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = - (const gl_constant_value *)&null_data->dynamic[i].range; ++ (const union gl_constant_value *)&null_data->dynamic[i].range; + } + - shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 2; ++ shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; +} diff --combined src/vulkan/anv_nir_apply_pipeline_layout.c index 8632dc74e57,00000000000..5a31b02ae4f mode 100644,000000..100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@@ -1,316 -1,0 +1,317 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" ++#include "program/prog_parameter.h" +#include "glsl/nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + const struct anv_pipeline_layout *layout; + + bool progress; +}; + +static uint32_t +get_surface_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + gl_shader_stage stage = state->shader->stage; + + assert(binding < set_layout->binding_count); + + assert(set_layout->binding[binding].stage[stage].surface_index >= 0); + + uint32_t surface_index = + state->layout->set[set].stage[stage].surface_start + + set_layout->binding[binding].stage[stage].surface_index; + + assert(surface_index < state->layout->stage[stage].surface_count); + + return surface_index; +} + +static uint32_t +get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + if (set_layout->binding[binding].stage[stage].sampler_index < 0) { + assert(tex_op == nir_texop_txf); + return 0; + } + + uint32_t sampler_index = + state->layout->set[set].stage[stage].sampler_start + + set_layout->binding[binding].stage[stage].sampler_index; + + assert(sampler_index < state->layout->stage[stage].sampler_count); + + return sampler_index; +} + +static uint32_t +get_image_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + assert(set_layout->binding[binding].stage[stage].image_index >= 0); + + uint32_t image_index = + state->layout->set[set].stage[stage].image_start + + set_layout->binding[binding].stage[stage].image_index; + + assert(image_index < state->layout->stage[stage].image_count); + + return image_index; +} + +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = intrin->const_index[0]; + uint32_t binding = intrin->const_index[1]; + + uint32_t surface_index = get_surface_index(set, binding, state); + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[0]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); +} + +static void +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) +{ + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + *const_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = src_type; + tex->num_srcs++; + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); + } + } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->sampler); + + nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler; + tex->texture_index = + get_surface_index(tex_deref->var->data.descriptor_set, + tex_deref->var->data.binding, state); + lower_tex_deref(tex, tex_deref, &tex->texture_index, + nir_tex_src_texture_offset, state); + + tex->sampler_index = + get_sampler_index(tex->sampler->var->data.descriptor_set, + tex->sampler->var->data.binding, tex->op, state); + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + + if (tex->texture) + cleanup_tex_deref(tex, tex->texture); + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); + state->progress = true; + } + break; + } + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + /* All texture instructions need lowering */ + state->progress = true; + break; + default: + continue; + } + } + + return true; +} + +static void - setup_vec4_uniform_value(const gl_constant_value **params, - const gl_constant_value *values, ++setup_vec4_uniform_value(const union gl_constant_value **params, ++ const union gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + +bool +anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, + const struct anv_pipeline_layout *layout) +{ + struct apply_pipeline_layout_state state = { + .shader = shader, + .layout = layout, + }; + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_builder_init(&state.builder, overload->impl); + nir_foreach_block(overload->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(overload->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (layout->stage[shader->stage].image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned image_index = get_image_index(var->data.descriptor_set, + var->data.binding, &state); + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const gl_constant_value *)&image_param->surface_idx, 1); ++ (const union gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const gl_constant_value *)image_param->offset, 2); ++ (const union gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const gl_constant_value *)image_param->size, 3); ++ (const union gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const gl_constant_value *)image_param->stride, 4); ++ (const union gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (const gl_constant_value *)image_param->tiling, 3); ++ (const union gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const gl_constant_value *)image_param->swizzling, 2); ++ (const union gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += layout->stage[shader->stage].image_count * + BRW_IMAGE_PARAM_SIZE; + } + + return state.progress; +} diff --combined src/vulkan/anv_pipeline.c index 9b6eef8074b,00000000000..05d84feba68 mode 100644,000000..100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@@ -1,1135 -1,0 +1,1135 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "glsl/nir/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_free2(&device->alloc, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage) +{ + if (strcmp(entrypoint_name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = nir_options; + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(module->size % 4 == 0); + + nir = spirv_to_nir(spirv, module->size / 4, stage, nir_options); + } + nir_validate_shader(nir); + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + /* Make sure the provided shader has exactly one entrypoint and that the + * name matches the name that came in from the VkShader. + */ + nir_function_impl *entrypoint = NULL; + nir_foreach_overload(nir, overload) { + if (strcmp(entrypoint_name, overload->function->name) == 0 && + overload->impl) { + assert(entrypoint == NULL); + entrypoint = overload->impl; + } + } + assert(entrypoint != NULL); + + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entrypoint); + + return nir; +} + +VkResult anv_CreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + *pPipelineCache = (VkPipelineCache)1; + + stub_return(VK_SUCCESS); +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + size_t* pDataSize, + void* pData) +{ + *pDataSize = 0; + stub_return(VK_SUCCESS); +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_SUCCESS); +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + anv_state_stream_finish(&pipeline->program_stream); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_free2(&device->alloc, pAllocator, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0) + prog_data->nr_params += pipeline->layout->stage[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + + if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ - prog_data->param = (const gl_constant_value **) - malloc(prog_data->nr_params * sizeof(gl_constant_value *)); ++ prog_data->param = (const union gl_constant_value **) ++ malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) - prog_data->param[i] = (const gl_constant_value *) ++ prog_data->param[i] = (const union gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + if (pipeline->layout) + anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias = stage == MESA_SHADER_FRAGMENT ? MAX_RTS : 0; + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + nir = brw_lower_nir(nir, &pipeline->device->info, NULL, + compiler->scalar_stage[stage]); + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ - nir->num_uniforms = prog_data->nr_params; ++ nir->num_uniforms = prog_data->nr_params * 4; + + return nir; +} + +static uint32_t +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + if (!pipeline->device->info.has_llc) + anv_state_clflush(state); + + return state.offset; +} + +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + + populate_vs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + const uint32_t offset = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + + populate_gs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + pipeline->gs_vec4 = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + pipeline->gs_vertex_count = nir->info.gs.vertices_in; + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + + populate_wm_prog_key(&pipeline->device->info, info, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t offset = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + + populate_cs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterizationState); + dynamic->depth_bias.bias = + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; + dynamic->depth_bias.slope = + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConstants, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.compareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.compareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.writeMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.writeMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.reference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.reference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterizationState); + assert(info->pMultisampleState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) +{ + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + if (alloc == NULL) + alloc = &device->alloc; + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + anv_reloc_list_init(&pipeline->batch_relocs, alloc); + /* TODO: Handle allocation fail */ + + pipeline->batch.alloc = alloc; + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterizationSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_vec4 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + const char *entrypoint = pCreateInfo->pStages[i].pName; + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + anv_pipeline_compile_vs(pipeline, pCreateInfo, module, entrypoint); + break; + case VK_SHADER_STAGE_GEOMETRY_BIT: + anv_pipeline_compile_gs(pipeline, pCreateInfo, module, entrypoint); + break; + case VK_SHADER_STAGE_FRAGMENT_BIT: + anv_pipeline_compile_fs(pipeline, pCreateInfo, module, entrypoint); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->stride; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->inputRate) { + default: + case VK_VERTEX_INPUT_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + else + return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + else + return gen7_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} diff --combined src/vulkan/gen8_cmd_buffer.c index fccc2f4d084,00000000000..4e5db676722 mode 100644,000000..100644 --- a/src/vulkan/gen8_cmd_buffer.c +++ b/src/vulkan/gen8_cmd_buffer.c @@@ -1,1016 -1,0 +1,1016 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "gen8_pack.h" +#include "gen9_pack.h" + +static void +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; +} + +#if ANV_GEN == 8 +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = vp->y, + .YMaxViewPort = vp->y + vp->height - 1, + }; + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport); + } + + if (!cmd_buffer->device->info.has_llc) { + anv_state_clflush(sf_clip_state); + anv_state_clflush(cc_state); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} +#endif + +static void +cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + +#if ANV_GEN >= 9 + /* On SKL+ the new constants don't take effect until the next corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure + * that is sent. As it is, we re-emit binding tables but we could hold on + * to the offset of the most recent binding table and only re-emit the + * 3DSTATE_BINDING_TABLE_POINTER_* command. + */ + cmd_buffer->state.descriptors_dirty |= + cmd_buffer->state.push_constants_dirty & + cmd_buffer->state.pipeline->active_stages; +#endif + + if (cmd_buffer->state.descriptors_dirty) + gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) + cmd_buffer_flush_push_constants(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); + anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, + pipeline->gen8.raster); + } + + /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to + * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split + * across different state packets for gen8 and gen9. We handle that by + * using a big old #if switch here. + */ +#if ANV_GEN == 8 + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN8_COLOR_CALC_STATE_length, 64); + struct GEN8_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gen8.wm_depth_stencil); + } +#else + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN9_COLOR_CALC_STATE_length, 64); + struct GEN9_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + }; + GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN9_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN9_3DSTATE_WM_DEPTH_STENCIL_header, + + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front, + .BackfaceStencilReferenceValue = d->stencil_reference.back + }; + GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gen9.wm_depth_stencil); + } +#endif + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index, + ); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, + }; + + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); ++ (prog_data->nr_params + local_id_dwords) * sizeof(union gl_constant_value *); + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .ConstantIndirectURBEntryReadLength = push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .NumberofThreadsinGPGPUThreadGroup = 0); + + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + cmd_buffer_flush_compute_state(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const bool has_depth = iview && iview->format->depth_format; + const bool has_stencil = iview && iview->format->has_stencil; + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = iview->format->depth_format, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = iview->format->depth_format, + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), + .StencilBufferEnable = true, + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }, + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); +} + +void +genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pClearValues); + + genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t entry, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + entry * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t entry) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + entry * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t entry) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, entry * 8 }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, entry * 8 + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { &pool->bo, entry * 8 }); + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +}