From: Jason Ekstrand Date: Fri, 5 Feb 2016 23:03:04 +0000 (-0800) Subject: Merge commit mesa-public/master into vulkan X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=741744f691d6ef63e9f9a4c03136f969f2ffb0bf;p=mesa.git Merge commit mesa-public/master into vulkan This pulls in the patches that move all of the compiler stuff around --- 741744f691d6ef63e9f9a4c03136f969f2ffb0bf diff --cc src/compiler/Makefile.am index 00000000000,0bc8e48efa6..e3d297fe299 mode 000000,100644..100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@@ -1,0 -1,323 +1,333 @@@ + # + # Copyright © 2012 Jon TURNEY + # Copyright (C) 2015 Intel Corporation + # + # Permission is hereby granted, free of charge, to any person obtaining a + # copy of this software and associated documentation files (the "Software"), + # to deal in the Software without restriction, including without limitation + # the rights to use, copy, modify, merge, publish, distribute, sublicense, + # and/or sell copies of the Software, and to permit persons to whom the + # Software is furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice (including the next + # paragraph) shall be included in all copies or substantial portions of the + # Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + + include Makefile.sources + + AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa/ \ + -I$(top_builddir)/src/compiler/glsl\ + -I$(top_srcdir)/src/compiler/glsl\ + -I$(top_srcdir)/src/compiler/glsl/glcpp\ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gtest/include \ + $(DEFINES) + + AM_CFLAGS = \ + $(VISIBILITY_CFLAGS) \ + $(MSVC2013_COMPAT_CFLAGS) + + AM_CXXFLAGS = \ + $(VISIBILITY_CXXFLAGS) \ + $(MSVC2013_COMPAT_CXXFLAGS) + + noinst_LTLIBRARIES = libcompiler.la + + libcompiler_la_SOURCES = $(LIBCOMPILER_FILES) + + check_PROGRAMS = + TESTS = + BUILT_SOURCES = + CLEANFILES = + EXTRA_DIST = SConscript + + + EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \ + glsl/TODO glsl/glcpp/README \ + glsl/glsl_lexer.ll \ + glsl/glsl_parser.yy \ + glsl/glcpp/glcpp-lex.l \ + glsl/glcpp/glcpp-parse.y \ + glsl/Makefile.sources \ + glsl/SConscript + + TESTS += glsl/glcpp/tests/glcpp-test \ + glsl/glcpp/tests/glcpp-test-cr-lf \ + glsl/tests/blob-test \ + glsl/tests/general-ir-test \ + glsl/tests/optimization-test \ + glsl/tests/sampler-types-test \ + glsl/tests/uniform-initializer-test + + TESTS_ENVIRONMENT= \ + export PYTHON2=$(PYTHON2); \ + export PYTHON_FLAGS=$(PYTHON_FLAGS); + + check_PROGRAMS += \ + glsl/glcpp/glcpp \ + glsl/glsl_test \ + glsl/tests/blob-test \ + glsl/tests/general-ir-test \ + glsl/tests/sampler-types-test \ + glsl/tests/uniform-initializer-test + -noinst_PROGRAMS = glsl_compiler ++noinst_PROGRAMS = glsl_compiler spirv2nir + + glsl_tests_blob_test_SOURCES = \ + glsl/tests/blob_test.c + glsl_tests_blob_test_LDADD = \ + glsl/libglsl.la + + glsl_tests_general_ir_test_SOURCES = \ + glsl/standalone_scaffolding.cpp \ + glsl/tests/builtin_variable_test.cpp \ + glsl/tests/invalidate_locations_test.cpp \ + glsl/tests/general_ir_test.cpp \ + glsl/tests/varyings_test.cpp + glsl_tests_general_ir_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + glsl_tests_general_ir_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + glsl_tests_uniform_initializer_test_SOURCES = \ + glsl/tests/copy_constant_to_storage_tests.cpp \ + glsl/tests/set_uniform_initializer_tests.cpp \ + glsl/tests/uniform_initializer_utils.cpp \ + glsl/tests/uniform_initializer_utils.h + glsl_tests_uniform_initializer_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + glsl_tests_uniform_initializer_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + glsl_tests_sampler_types_test_SOURCES = \ + glsl/tests/sampler_types_test.cpp + glsl_tests_sampler_types_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + glsl_tests_sampler_types_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la + + glsl_libglcpp_la_LIBADD = \ + $(top_builddir)/src/util/libmesautil.la + glsl_libglcpp_la_SOURCES = \ + glsl/glcpp/glcpp-lex.c \ + glsl/glcpp/glcpp-parse.c \ + glsl/glcpp/glcpp-parse.h \ + $(LIBGLCPP_FILES) + + glsl_glcpp_glcpp_SOURCES = \ + glsl/glcpp/glcpp.c + glsl_glcpp_glcpp_LDADD = \ + glsl/libglcpp.la \ + $(top_builddir)/src/libglsl_util.la \ + -lm + + glsl_libglsl_la_LIBADD = \ + nir/libnir.la \ + glsl/libglcpp.la + + glsl_libglsl_la_SOURCES = \ + glsl/glsl_lexer.cpp \ + glsl/glsl_parser.cpp \ + glsl/glsl_parser.h \ + $(LIBGLSL_FILES) + + + glsl_compiler_SOURCES = \ + $(GLSL_COMPILER_CXX_FILES) + + glsl_compiler_LDADD = \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + + glsl_glsl_test_SOURCES = \ + glsl/standalone_scaffolding.cpp \ + glsl/test.cpp \ + glsl/test_optpass.cpp \ + glsl/test_optpass.h + + glsl_glsl_test_LDADD = \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + ++spirv2nir_SOURCES = \ ++ nir/spirv2nir.c ++ ++spirv2nir_LDADD = \ ++ nir/libnir.la \ ++ $(top_builddir)/src/util/libmesautil.la \ ++ -lm -lstdc++ \ ++ $(PTHREAD_LIBS) ++ + # We write our own rules for yacc and lex below. We'd rather use automake, + # but automake makes it especially difficult for a number of reasons: + # + # * < automake-1.12 generates .h files from .yy and .ypp files, but + # >=automake-1.12 generates .hh and .hpp files respectively. There's no + # good way of making a project that uses C++ yacc files compatible with + # both versions of automake. Strong work automake developers. + # + # * Since we're generating code from .l/.y files in a subdirectory (glcpp/) + # we'd like the resulting generated code to also go in glcpp/ for purposes + # of distribution. Automake gives no way to do this. + # + # * Since we're building multiple yacc parsers into one library (and via one + # Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes + # automake to name the resulting generated code as _filename.c. + # Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file. + + # In order to make build output print "LEX" and "YACC", we reproduce the + # automake variables below. + + AM_V_LEX = $(am__v_LEX_$(V)) + am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY)) + am__v_LEX_0 = @echo " LEX " $@; + am__v_LEX_1 = + + AM_V_YACC = $(am__v_YACC_$(V)) + am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY)) + am__v_YACC_0 = @echo " YACC " $@; + am__v_YACC_1 = + + MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) + YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) + LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) + + glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy + $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy + + glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll + $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll + + glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y + $(MKDIR_GEN) + $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y + + glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l + $(MKDIR_GEN) + $(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l + + # Only the parsers (specifically the header files generated at the same time) + # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is + # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files + # YACC is only executed once for each parser. The rest of the generated code + # will be created at the appropriate times according to standard automake + # dependency rules. + BUILT_SOURCES += \ + glsl/glsl_parser.cpp \ + glsl/glsl_lexer.cpp \ + glsl/glcpp/glcpp-parse.c \ + glsl/glcpp/glcpp-lex.c + CLEANFILES += \ + glsl/glcpp/glcpp-parse.h \ + glsl/glsl_parser.h \ + glsl/glsl_parser.cpp \ + glsl/glsl_lexer.cpp \ + glsl/glcpp/glcpp-parse.c \ + glsl/glcpp/glcpp-lex.c + + clean-local: + $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr + + dist-hook: + $(RM) glsl/glcpp/tests/*.out + $(RM) glsl/glcpp/tests/subtest*/*.out + + noinst_LTLIBRARIES += nir/libnir.la + + nir_libnir_la_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir + + nir_libnir_la_LIBADD = \ + libcompiler.la + + nir_libnir_la_SOURCES = \ + $(NIR_FILES) \ ++ $(SPIRV_FILES) \ + $(NIR_GENERATED_FILES) + + PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) + + nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false) + + nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false) + + nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false) + + nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false) + + nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false) + + + check_PROGRAMS += nir/tests/control_flow_tests + + nir_tests_control_flow_tests_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir + + nir_tests_control_flow_tests_SOURCES = \ + nir/tests/control_flow_tests.cpp + nir_tests_control_flow_tests_CFLAGS = \ + $(PTHREAD_CFLAGS) + nir_tests_control_flow_tests_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + nir/libnir.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + + + TESTS += nir/tests/control_flow_tests + + + BUILT_SOURCES += $(NIR_GENERATED_FILES) + CLEANFILES += $(NIR_GENERATED_FILES) + + EXTRA_DIST += \ + nir/nir_algebraic.py \ + nir/nir_builder_opcodes_h.py \ + nir/nir_constant_expressions.py \ + nir/nir_opcodes.py \ + nir/nir_opcodes_c.py \ + nir/nir_opcodes_h.py \ + nir/nir_opt_algebraic.py \ + nir/tests \ + nir/Makefile.sources diff --cc src/compiler/Makefile.sources index 00000000000,c9780d6d6f7..2a4568aa679 mode 000000,100644..100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@@ -1,0 -1,226 +1,242 @@@ + LIBCOMPILER_FILES = \ + builtin_type_macros.h \ + glsl_types.cpp \ + glsl_types.h \ + nir_types.cpp \ + nir_types.h \ + shader_enums.c \ + shader_enums.h + + # libglsl + + LIBGLSL_FILES = \ + glsl/ast.h \ + glsl/ast_array_index.cpp \ + glsl/ast_expr.cpp \ + glsl/ast_function.cpp \ + glsl/ast_to_hir.cpp \ + glsl/ast_type.cpp \ + glsl/blob.c \ + glsl/blob.h \ + glsl/builtin_functions.cpp \ + glsl/builtin_types.cpp \ + glsl/builtin_variables.cpp \ + glsl/glsl_parser_extras.cpp \ + glsl/glsl_parser_extras.h \ + glsl/glsl_symbol_table.cpp \ + glsl/glsl_symbol_table.h \ + glsl/hir_field_selection.cpp \ + glsl/ir_basic_block.cpp \ + glsl/ir_basic_block.h \ + glsl/ir_builder.cpp \ + glsl/ir_builder.h \ + glsl/ir_clone.cpp \ + glsl/ir_constant_expression.cpp \ + glsl/ir.cpp \ + glsl/ir.h \ + glsl/ir_equals.cpp \ + glsl/ir_expression_flattening.cpp \ + glsl/ir_expression_flattening.h \ + glsl/ir_function_can_inline.cpp \ + glsl/ir_function_detect_recursion.cpp \ + glsl/ir_function_inlining.h \ + glsl/ir_function.cpp \ + glsl/ir_hierarchical_visitor.cpp \ + glsl/ir_hierarchical_visitor.h \ + glsl/ir_hv_accept.cpp \ + glsl/ir_import_prototypes.cpp \ + glsl/ir_optimization.h \ + glsl/ir_print_visitor.cpp \ + glsl/ir_print_visitor.h \ + glsl/ir_reader.cpp \ + glsl/ir_reader.h \ + glsl/ir_rvalue_visitor.cpp \ + glsl/ir_rvalue_visitor.h \ + glsl/ir_set_program_inouts.cpp \ + glsl/ir_uniform.h \ + glsl/ir_validate.cpp \ + glsl/ir_variable_refcount.cpp \ + glsl/ir_variable_refcount.h \ + glsl/ir_visitor.h \ + glsl/linker.cpp \ + glsl/linker.h \ + glsl/link_atomics.cpp \ + glsl/link_functions.cpp \ + glsl/link_interface_blocks.cpp \ + glsl/link_uniforms.cpp \ + glsl/link_uniform_initializers.cpp \ + glsl/link_uniform_block_active_visitor.cpp \ + glsl/link_uniform_block_active_visitor.h \ + glsl/link_uniform_blocks.cpp \ + glsl/link_varyings.cpp \ + glsl/link_varyings.h \ + glsl/list.h \ + glsl/loop_analysis.cpp \ + glsl/loop_analysis.h \ + glsl/loop_controls.cpp \ + glsl/loop_unroll.cpp \ + glsl/lower_buffer_access.cpp \ + glsl/lower_buffer_access.h \ + glsl/lower_clip_distance.cpp \ + glsl/lower_const_arrays_to_uniforms.cpp \ + glsl/lower_discard.cpp \ + glsl/lower_discard_flow.cpp \ + glsl/lower_if_to_cond_assign.cpp \ + glsl/lower_instructions.cpp \ + glsl/lower_jumps.cpp \ + glsl/lower_mat_op_to_vec.cpp \ + glsl/lower_noise.cpp \ + glsl/lower_offset_array.cpp \ + glsl/lower_packed_varyings.cpp \ + glsl/lower_named_interface_blocks.cpp \ + glsl/lower_packing_builtins.cpp \ + glsl/lower_subroutine.cpp \ + glsl/lower_tess_level.cpp \ + glsl/lower_texture_projection.cpp \ + glsl/lower_variable_index_to_cond_assign.cpp \ + glsl/lower_vec_index_to_cond_assign.cpp \ + glsl/lower_vec_index_to_swizzle.cpp \ + glsl/lower_vector.cpp \ + glsl/lower_vector_derefs.cpp \ + glsl/lower_vector_insert.cpp \ + glsl/lower_vertex_id.cpp \ + glsl/lower_output_reads.cpp \ + glsl/lower_shared_reference.cpp \ + glsl/lower_ubo_reference.cpp \ + glsl/opt_algebraic.cpp \ + glsl/opt_array_splitting.cpp \ + glsl/opt_conditional_discard.cpp \ + glsl/opt_constant_folding.cpp \ + glsl/opt_constant_propagation.cpp \ + glsl/opt_constant_variable.cpp \ + glsl/opt_copy_propagation.cpp \ + glsl/opt_copy_propagation_elements.cpp \ + glsl/opt_dead_builtin_variables.cpp \ + glsl/opt_dead_builtin_varyings.cpp \ + glsl/opt_dead_code.cpp \ + glsl/opt_dead_code_local.cpp \ + glsl/opt_dead_functions.cpp \ + glsl/opt_flatten_nested_if_blocks.cpp \ + glsl/opt_flip_matrices.cpp \ + glsl/opt_function_inlining.cpp \ + glsl/opt_if_simplification.cpp \ + glsl/opt_minmax.cpp \ + glsl/opt_noop_swizzle.cpp \ + glsl/opt_rebalance_tree.cpp \ + glsl/opt_redundant_jumps.cpp \ + glsl/opt_structure_splitting.cpp \ + glsl/opt_swizzle_swizzle.cpp \ + glsl/opt_tree_grafting.cpp \ + glsl/opt_vectorize.cpp \ + glsl/program.h \ + glsl/s_expression.cpp \ + glsl/s_expression.h + + # glsl_compiler + + GLSL_COMPILER_CXX_FILES = \ + glsl/standalone_scaffolding.cpp \ + glsl/standalone_scaffolding.h \ + glsl/main.cpp + + # libglsl generated sources + LIBGLSL_GENERATED_CXX_FILES = \ + glsl/glsl_lexer.cpp \ + glsl/glsl_parser.cpp + + # libglcpp + + LIBGLCPP_FILES = \ + glsl/glcpp/glcpp.h \ + glsl/glcpp/pp.c + + LIBGLCPP_GENERATED_FILES = \ + glsl/glcpp/glcpp-lex.c \ + glsl/glcpp/glcpp-parse.c + + NIR_GENERATED_FILES = \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c + + NIR_FILES = \ + nir/glsl_to_nir.cpp \ + nir/glsl_to_nir.h \ + nir/nir.c \ + nir/nir.h \ + nir/nir_array.h \ + nir/nir_builder.h \ + nir/nir_clone.c \ + nir/nir_constant_expressions.h \ + nir/nir_control_flow.c \ + nir/nir_control_flow.h \ + nir/nir_control_flow_private.h \ + nir/nir_dominance.c \ + nir/nir_from_ssa.c \ ++ nir/nir_gather_info.c \ + nir/nir_gs_count_vertices.c \ ++ nir/nir_inline_functions.c \ + nir/nir_intrinsics.c \ + nir/nir_intrinsics.h \ + nir/nir_instr_set.c \ + nir/nir_instr_set.h \ + nir/nir_liveness.c \ + nir/nir_lower_alu_to_scalar.c \ + nir/nir_lower_atomics.c \ + nir/nir_lower_clip.c \ + nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_gs_intrinsics.c \ ++ nir/nir_lower_indirect_derefs.c \ + nir/nir_lower_load_const_to_scalar.c \ + nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_idiv.c \ + nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ + nir/nir_lower_phis_to_scalar.c \ ++ nir/nir_lower_returns.c \ + nir/nir_lower_samplers.c \ + nir/nir_lower_system_values.c \ + nir/nir_lower_tex.c \ + nir/nir_lower_to_source_mods.c \ + nir/nir_lower_two_sided_color.c \ + nir/nir_lower_vars_to_ssa.c \ + nir/nir_lower_var_copies.c \ + nir/nir_lower_vec_to_movs.c \ + nir/nir_metadata.c \ + nir/nir_move_vec_src_uses_to_dest.c \ + nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_constant_folding.c \ + nir/nir_opt_copy_propagate.c \ + nir/nir_opt_cse.c \ + nir/nir_opt_dce.c \ + nir/nir_opt_dead_cf.c \ + nir/nir_opt_gcm.c \ + nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_select.c \ + nir/nir_opt_remove_phis.c \ + nir/nir_opt_undef.c \ ++ nir/nir_phi_builder.c \ ++ nir/nir_phi_builder.h \ + nir/nir_print.c \ + nir/nir_remove_dead_variables.c \ ++ nir/nir_repair_ssa.c \ + nir/nir_search.c \ + nir/nir_search.h \ + nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ + nir/nir_to_ssa.c \ + nir/nir_validate.c \ + nir/nir_vla.h \ + nir/nir_worklist.c \ + nir/nir_worklist.h ++ ++SPIRV_FILES = \ ++ nir/spirv/nir_spirv.h \ ++ nir/spirv/spirv_to_nir.c \ ++ nir/spirv/vtn_alu.c \ ++ nir/spirv/vtn_cfg.c \ ++ nir/spirv/vtn_glsl450.c \ ++ nir/spirv/vtn_private.h \ ++ nir/spirv/vtn_variables.c diff --cc src/compiler/glsl/.gitignore index 00000000000,dda423f83db..e80f8af6bfc mode 000000,100644..100644 --- a/src/compiler/glsl/.gitignore +++ b/src/compiler/glsl/.gitignore @@@ -1,0 -1,10 +1,11 @@@ + glsl_compiler + glsl_lexer.cpp + glsl_parser.cpp + glsl_parser.h + glsl_parser.output + glsl_test ++spirv2nir + subtest-cr/ + subtest-lf/ + subtest-cr-lf/ + subtest-lf-cr/ diff --cc src/compiler/glsl/Makefile.am index 00000000000,9954b812403..d6b1f9ed695 mode 000000,100644..100644 --- a/src/compiler/glsl/Makefile.am +++ b/src/compiler/glsl/Makefile.am @@@ -1,0 -1,228 +1,237 @@@ + # Copyright © 2012 Jon TURNEY + # + # Permission is hereby granted, free of charge, to any person obtaining a + # copy of this software and associated documentation files (the "Software"), + # to deal in the Software without restriction, including without limitation + # the rights to use, copy, modify, merge, publish, distribute, sublicense, + # and/or sell copies of the Software, and to permit persons to whom the + # Software is furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice (including the next + # paragraph) shall be included in all copies or substantial portions of the + # Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + + AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa/ \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/glsl/glcpp \ + -I$(top_srcdir)/src/gtest/include \ + $(DEFINES) + AM_CFLAGS = \ + $(VISIBILITY_CFLAGS) \ + $(MSVC2013_COMPAT_CFLAGS) + AM_CXXFLAGS = \ + $(VISIBILITY_CXXFLAGS) \ + $(MSVC2013_COMPAT_CXXFLAGS) + + EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ + glsl_lexer.ll \ + glsl_parser.yy \ + glcpp/glcpp-lex.l \ + glcpp/glcpp-parse.y \ + SConscript + + include Makefile.sources + + TESTS = glcpp/tests/glcpp-test \ + glcpp/tests/glcpp-test-cr-lf \ + tests/blob-test \ + tests/general-ir-test \ + tests/optimization-test \ + tests/sampler-types-test \ + tests/uniform-initializer-test + + TESTS_ENVIRONMENT= \ + export PYTHON2=$(PYTHON2); \ + export PYTHON_FLAGS=$(PYTHON_FLAGS); + + noinst_LTLIBRARIES = libglsl.la libglcpp.la + check_PROGRAMS = \ + glcpp/glcpp \ + glsl_test \ + tests/blob-test \ + tests/general-ir-test \ + tests/sampler-types-test \ + tests/uniform-initializer-test + -noinst_PROGRAMS = glsl_compiler ++noinst_PROGRAMS = glsl_compiler spirv2nir + + tests_blob_test_SOURCES = \ + tests/blob_test.c + tests_blob_test_LDADD = \ + $(top_builddir)/src/glsl/libglsl.la + + tests_general_ir_test_SOURCES = \ + standalone_scaffolding.cpp \ + tests/builtin_variable_test.cpp \ + tests/invalidate_locations_test.cpp \ + tests/general_ir_test.cpp \ + tests/varyings_test.cpp + tests_general_ir_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + tests_general_ir_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + tests_uniform_initializer_test_SOURCES = \ + tests/copy_constant_to_storage_tests.cpp \ + tests/set_uniform_initializer_tests.cpp \ + tests/uniform_initializer_utils.cpp \ + tests/uniform_initializer_utils.h + tests_uniform_initializer_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + tests_uniform_initializer_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + tests_sampler_types_test_SOURCES = \ + tests/sampler_types_test.cpp + tests_sampler_types_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + tests_sampler_types_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + libglcpp_la_LIBADD = \ + $(top_builddir)/src/util/libmesautil.la + libglcpp_la_SOURCES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c \ + glcpp/glcpp-parse.h \ + $(LIBGLCPP_FILES) + + glcpp_glcpp_SOURCES = \ + glcpp/glcpp.c + glcpp_glcpp_LDADD = \ + libglcpp.la \ + $(top_builddir)/src/libglsl_util.la \ + -lm + + libglsl_la_LIBADD = \ + $(top_builddir)/src/compiler/nir/libnir.la \ + libglcpp.la + + libglsl_la_SOURCES = \ + glsl_lexer.cpp \ + glsl_parser.cpp \ + glsl_parser.h \ + $(LIBGLSL_FILES) + - + glsl_compiler_SOURCES = \ + $(GLSL_COMPILER_CXX_FILES) + + glsl_compiler_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + ++spirv2nir_SOURCES = \ ++ standalone_scaffolding.cpp \ ++ standalone_scaffolding.h \ ++ nir/spirv2nir.c ++ ++spirv2nir_LDADD = \ ++ libglsl.la \ ++ $(top_builddir)/src/libglsl_util.la \ ++ $(PTHREAD_LIBS) ++ + glsl_test_SOURCES = \ + standalone_scaffolding.cpp \ + test.cpp \ + test_optpass.cpp \ + test_optpass.h + + glsl_test_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + + # We write our own rules for yacc and lex below. We'd rather use automake, + # but automake makes it especially difficult for a number of reasons: + # + # * < automake-1.12 generates .h files from .yy and .ypp files, but + # >=automake-1.12 generates .hh and .hpp files respectively. There's no + # good way of making a project that uses C++ yacc files compatible with + # both versions of automake. Strong work automake developers. + # + # * Since we're generating code from .l/.y files in a subdirectory (glcpp/) + # we'd like the resulting generated code to also go in glcpp/ for purposes + # of distribution. Automake gives no way to do this. + # + # * Since we're building multiple yacc parsers into one library (and via one + # Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes + # automake to name the resulting generated code as _filename.c. + # Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file. + + # In order to make build output print "LEX" and "YACC", we reproduce the + # automake variables below. + + AM_V_LEX = $(am__v_LEX_$(V)) + am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY)) + am__v_LEX_0 = @echo " LEX " $@; + am__v_LEX_1 = + + AM_V_YACC = $(am__v_YACC_$(V)) + am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY)) + am__v_YACC_0 = @echo " YACC " $@; + am__v_YACC_1 = + + MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) + YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) + LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) + + glsl_parser.cpp glsl_parser.h: glsl_parser.yy + $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy + + glsl_lexer.cpp: glsl_lexer.ll + $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll + + glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y + $(MKDIR_GEN) + $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y + + glcpp/glcpp-lex.c: glcpp/glcpp-lex.l + $(MKDIR_GEN) + $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l + + # Only the parsers (specifically the header files generated at the same time) + # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is + # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files + # YACC is only executed once for each parser. The rest of the generated code + # will be created at the appropriate times according to standard automake + # dependency rules. + BUILT_SOURCES = \ + glsl_parser.cpp \ + glsl_lexer.cpp \ + glcpp/glcpp-parse.c \ + glcpp/glcpp-lex.c + CLEANFILES = \ + glcpp/glcpp-parse.h \ + glsl_parser.h \ + $(BUILT_SOURCES) + + clean-local: + $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr + + dist-hook: + $(RM) glcpp/tests/*.out + $(RM) glcpp/tests/subtest*/*.out diff --cc src/compiler/glsl/Makefile.sources index 00000000000,08b40c5cc8f..3f537d5b37a mode 000000,100644..100644 --- a/src/compiler/glsl/Makefile.sources +++ b/src/compiler/glsl/Makefile.sources @@@ -1,0 -1,222 +1,238 @@@ + # shared source lists for Makefile, SConscript, and Android.mk + + # libglcpp + + LIBGLCPP_FILES = \ + glcpp/glcpp.h \ + glcpp/pp.c + + LIBGLCPP_GENERATED_FILES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c + + NIR_GENERATED_FILES = \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c + + NIR_FILES = \ + nir/nir.c \ + nir/nir.h \ + nir/nir_array.h \ + nir/nir_builder.h \ + nir/nir_clone.c \ + nir/nir_constant_expressions.h \ + nir/nir_control_flow.c \ + nir/nir_control_flow.h \ + nir/nir_control_flow_private.h \ + nir/nir_dominance.c \ + nir/nir_from_ssa.c \ ++ nir/nir_gather_info.c \ + nir/nir_gs_count_vertices.c \ ++ nir/nir_inline_functions.c \ + nir/nir_intrinsics.c \ + nir/nir_intrinsics.h \ + nir/nir_instr_set.c \ + nir/nir_instr_set.h \ + nir/nir_liveness.c \ + nir/nir_lower_alu_to_scalar.c \ + nir/nir_lower_atomics.c \ + nir/nir_lower_clip.c \ ++ nir/nir_lower_returns.c \ + nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_gs_intrinsics.c \ ++ nir/nir_lower_indirect_derefs.c \ + nir/nir_lower_load_const_to_scalar.c \ + nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_idiv.c \ + nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ + nir/nir_lower_phis_to_scalar.c \ + nir/nir_lower_samplers.c \ + nir/nir_lower_system_values.c \ + nir/nir_lower_tex.c \ + nir/nir_lower_to_source_mods.c \ + nir/nir_lower_two_sided_color.c \ + nir/nir_lower_vars_to_ssa.c \ + nir/nir_lower_var_copies.c \ + nir/nir_lower_vec_to_movs.c \ + nir/nir_metadata.c \ + nir/nir_move_vec_src_uses_to_dest.c \ + nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_constant_folding.c \ + nir/nir_opt_copy_propagate.c \ + nir/nir_opt_cse.c \ + nir/nir_opt_dce.c \ + nir/nir_opt_dead_cf.c \ + nir/nir_opt_gcm.c \ + nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_select.c \ + nir/nir_opt_remove_phis.c \ + nir/nir_opt_undef.c \ ++ nir/nir_phi_builder.c \ ++ nir/nir_phi_builder.h \ + nir/nir_print.c \ + nir/nir_remove_dead_variables.c \ ++ nir/nir_repair_ssa.c \ + nir/nir_search.c \ + nir/nir_search.h \ + nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ + nir/nir_to_ssa.c \ + nir/nir_validate.c \ + nir/nir_vla.h \ + nir/nir_worklist.c \ + nir/nir_worklist.h + ++SPIRV_FILES = \ ++ nir/spirv/nir_spirv.h \ ++ nir/spirv/spirv_to_nir.c \ ++ nir/spirv/vtn_alu.c \ ++ nir/spirv/vtn_cfg.c \ ++ nir/spirv/vtn_glsl450.c \ ++ nir/spirv/vtn_private.h \ ++ nir/spirv/vtn_variables.c ++ + # libglsl + + LIBGLSL_FILES = \ + ast.h \ + ast_array_index.cpp \ + ast_expr.cpp \ + ast_function.cpp \ + ast_to_hir.cpp \ + ast_type.cpp \ + blob.c \ + blob.h \ + builtin_functions.cpp \ + builtin_types.cpp \ + builtin_variables.cpp \ + glsl_parser_extras.cpp \ + glsl_parser_extras.h \ + glsl_symbol_table.cpp \ + glsl_symbol_table.h \ + hir_field_selection.cpp \ + ir_basic_block.cpp \ + ir_basic_block.h \ + ir_builder.cpp \ + ir_builder.h \ + ir_clone.cpp \ + ir_constant_expression.cpp \ + ir.cpp \ + ir.h \ + ir_equals.cpp \ + ir_expression_flattening.cpp \ + ir_expression_flattening.h \ + ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ + ir_function_inlining.h \ + ir_function.cpp \ + ir_hierarchical_visitor.cpp \ + ir_hierarchical_visitor.h \ + ir_hv_accept.cpp \ + ir_import_prototypes.cpp \ + ir_optimization.h \ + ir_print_visitor.cpp \ + ir_print_visitor.h \ + ir_reader.cpp \ + ir_reader.h \ + ir_rvalue_visitor.cpp \ + ir_rvalue_visitor.h \ + ir_set_program_inouts.cpp \ + ir_uniform.h \ + ir_validate.cpp \ + ir_variable_refcount.cpp \ + ir_variable_refcount.h \ + ir_visitor.h \ + linker.cpp \ + linker.h \ + link_atomics.cpp \ + link_functions.cpp \ + link_interface_blocks.cpp \ + link_uniforms.cpp \ + link_uniform_initializers.cpp \ + link_uniform_block_active_visitor.cpp \ + link_uniform_block_active_visitor.h \ + link_uniform_blocks.cpp \ + link_varyings.cpp \ + link_varyings.h \ + list.h \ + loop_analysis.cpp \ + loop_analysis.h \ + loop_controls.cpp \ + loop_unroll.cpp \ + lower_buffer_access.cpp \ + lower_buffer_access.h \ + lower_clip_distance.cpp \ + lower_const_arrays_to_uniforms.cpp \ + lower_discard.cpp \ + lower_discard_flow.cpp \ + lower_if_to_cond_assign.cpp \ + lower_instructions.cpp \ + lower_jumps.cpp \ + lower_mat_op_to_vec.cpp \ + lower_noise.cpp \ + lower_offset_array.cpp \ + lower_packed_varyings.cpp \ + lower_named_interface_blocks.cpp \ + lower_packing_builtins.cpp \ + lower_subroutine.cpp \ + lower_tess_level.cpp \ + lower_texture_projection.cpp \ + lower_variable_index_to_cond_assign.cpp \ + lower_vec_index_to_cond_assign.cpp \ + lower_vec_index_to_swizzle.cpp \ + lower_vector.cpp \ + lower_vector_derefs.cpp \ + lower_vector_insert.cpp \ + lower_vertex_id.cpp \ + lower_output_reads.cpp \ + lower_shared_reference.cpp \ + lower_ubo_reference.cpp \ + opt_algebraic.cpp \ + opt_array_splitting.cpp \ + opt_conditional_discard.cpp \ + opt_constant_folding.cpp \ + opt_constant_propagation.cpp \ + opt_constant_variable.cpp \ + opt_copy_propagation.cpp \ + opt_copy_propagation_elements.cpp \ + opt_dead_builtin_variables.cpp \ + opt_dead_builtin_varyings.cpp \ + opt_dead_code.cpp \ + opt_dead_code_local.cpp \ + opt_dead_functions.cpp \ + opt_flatten_nested_if_blocks.cpp \ + opt_flip_matrices.cpp \ + opt_function_inlining.cpp \ + opt_if_simplification.cpp \ + opt_minmax.cpp \ + opt_noop_swizzle.cpp \ + opt_rebalance_tree.cpp \ + opt_redundant_jumps.cpp \ + opt_structure_splitting.cpp \ + opt_swizzle_swizzle.cpp \ + opt_tree_grafting.cpp \ + opt_vectorize.cpp \ + program.h \ + s_expression.cpp \ + s_expression.h + + # glsl to nir pass + GLSL_TO_NIR_FILES = \ + nir/glsl_to_nir.cpp \ + nir/glsl_to_nir.h + + # glsl_compiler + + GLSL_COMPILER_CXX_FILES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + main.cpp + + # libglsl generated sources + LIBGLSL_GENERATED_CXX_FILES = \ + glsl_lexer.cpp \ + glsl_parser.cpp diff --cc src/compiler/glsl/ast_to_hir.cpp index 00000000000,dfd31966eb0..98d8bc5f268 mode 000000,100644..100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@@ -1,0 -1,7583 +1,7584 @@@ + /* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /** + * \file ast_to_hir.c + * Convert abstract syntax to to high-level intermediate reprensentation (HIR). + * + * During the conversion to HIR, the majority of the symantic checking is + * preformed on the program. This includes: + * + * * Symbol table management + * * Type checking + * * Function binding + * + * The majority of this work could be done during parsing, and the parser could + * probably generate HIR directly. However, this results in frequent changes + * to the parser code. Since we do not assume that every system this complier + * is built on will have Flex and Bison installed, we have to store the code + * generated by these tools in our version control system. In other parts of + * the system we've seen problems where a parser was changed but the generated + * code was not committed, merge conflicts where created because two developers + * had slightly different versions of Bison installed, etc. + * + * I have also noticed that running Bison generated parsers in GDB is very + * irritating. When you get a segfault on '$$ = $1->foo', you can't very + * well 'print $1' in GDB. + * + * As a result, my preference is to put as little C code as possible in the + * parser (and lexer) sources. + */ + + #include "glsl_symbol_table.h" + #include "glsl_parser_extras.h" + #include "ast.h" + #include "compiler/glsl_types.h" + #include "program/hash_table.h" + #include "main/shaderobj.h" + #include "ir.h" + #include "ir_builder.h" + + using namespace ir_builder; + + static void + detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, + exec_list *instructions); + static void + remove_per_vertex_blocks(exec_list *instructions, + _mesa_glsl_parse_state *state, ir_variable_mode mode); + + /** + * Visitor class that finds the first instance of any write-only variable that + * is ever read, if any + */ + class read_from_write_only_variable_visitor : public ir_hierarchical_visitor + { + public: + read_from_write_only_variable_visitor() : found(NULL) + { + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (this->in_assignee) + return visit_continue; + + ir_variable *var = ir->variable_referenced(); + /* We can have image_write_only set on both images and buffer variables, + * but in the former there is a distinction between reads from + * the variable itself (write_only) and from the memory they point to + * (image_write_only), while in the case of buffer variables there is + * no such distinction, that is why this check here is limited to + * buffer variables alone. + */ + if (!var || var->data.mode != ir_var_shader_storage) + return visit_continue; + + if (var->data.image_write_only) { + found = var; + return visit_stop; + } + + return visit_continue; + } + + ir_variable *get_variable() { + return found; + } + + virtual ir_visitor_status visit_enter(ir_expression *ir) + { + /* .length() doesn't actually read anything */ + if (ir->operation == ir_unop_ssbo_unsized_array_length) + return visit_continue_with_parent; + + return visit_continue; + } + + private: + ir_variable *found; + }; + + void + _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) + { + _mesa_glsl_initialize_variables(instructions, state); + + state->symbols->separate_function_namespace = state->language_version == 110; + + state->current_function = NULL; + + state->toplevel_ir = instructions; + + state->gs_input_prim_type_specified = false; + state->tcs_output_vertices_specified = false; + state->cs_input_local_size_specified = false; + + /* Section 4.2 of the GLSL 1.20 specification states: + * "The built-in functions are scoped in a scope outside the global scope + * users declare global variables in. That is, a shader's global scope, + * available for user-defined functions and global variables, is nested + * inside the scope containing the built-in functions." + * + * Since built-in functions like ftransform() access built-in variables, + * it follows that those must be in the outer scope as well. + * + * We push scope here to create this nesting effect...but don't pop. + * This way, a shader's globals are still in the symbol table for use + * by the linker. + */ + state->symbols->push_scope(); + + foreach_list_typed (ast_node, ast, link, & state->translation_unit) + ast->hir(instructions, state); + + detect_recursion_unlinked(state, instructions); + detect_conflicting_assignments(state, instructions); + + state->toplevel_ir = NULL; + + /* Move all of the variable declarations to the front of the IR list, and + * reverse the order. This has the (intended!) side effect that vertex + * shader inputs and fragment shader outputs will appear in the IR in the + * same order that they appeared in the shader code. This results in the + * locations being assigned in the declared order. Many (arguably buggy) + * applications depend on this behavior, and it matches what nearly all + * other drivers do. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + var->remove(); + instructions->push_head(var); + } + + /* Figure out if gl_FragCoord is actually used in fragment shader */ + ir_variable *const var = state->symbols->get_variable("gl_FragCoord"); + if (var != NULL) + state->fs_uses_gl_fragcoord = var->data.used; + + /* From section 7.1 (Built-In Language Variables) of the GLSL 4.10 spec: + * + * If multiple shaders using members of a built-in block belonging to + * the same interface are linked together in the same program, they + * must all redeclare the built-in block in the same way, as described + * in section 4.3.7 "Interface Blocks" for interface block matching, or + * a link error will result. + * + * The phrase "using members of a built-in block" implies that if two + * shaders are linked together and one of them *does not use* any members + * of the built-in block, then that shader does not need to have a matching + * redeclaration of the built-in block. + * + * This appears to be a clarification to the behaviour established for + * gl_PerVertex by GLSL 1.50, therefore implement it regardless of GLSL + * version. + * + * The definition of "interface" in section 4.3.7 that applies here is as + * follows: + * + * The boundary between adjacent programmable pipeline stages: This + * spans all the outputs in all compilation units of the first stage + * and all the inputs in all compilation units of the second stage. + * + * Therefore this rule applies to both inter- and intra-stage linking. + * + * The easiest way to implement this is to check whether the shader uses + * gl_PerVertex right after ast-to-ir conversion, and if it doesn't, simply + * remove all the relevant variable declaration from the IR, so that the + * linker won't see them and complain about mismatches. + */ + remove_per_vertex_blocks(instructions, state, ir_var_shader_in); + remove_per_vertex_blocks(instructions, state, ir_var_shader_out); + + /* Check that we don't have reads from write-only variables */ + read_from_write_only_variable_visitor v; + v.run(instructions); + ir_variable *error_var = v.get_variable(); + if (error_var) { + /* It would be nice to have proper location information, but for that + * we would need to check this as we process each kind of AST node + */ + YYLTYPE loc; + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'", + error_var->name); + } + } + + + static ir_expression_operation + get_conversion_operation(const glsl_type *to, const glsl_type *from, + struct _mesa_glsl_parse_state *state) + { + switch (to->base_type) { + case GLSL_TYPE_FLOAT: + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2f; + case GLSL_TYPE_UINT: return ir_unop_u2f; + case GLSL_TYPE_DOUBLE: return ir_unop_d2f; + default: return (ir_expression_operation)0; + } + + case GLSL_TYPE_UINT: + if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2u; + default: return (ir_expression_operation)0; + } + + case GLSL_TYPE_DOUBLE: + if (!state->has_double()) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2d; + case GLSL_TYPE_UINT: return ir_unop_u2d; + case GLSL_TYPE_FLOAT: return ir_unop_f2d; + default: return (ir_expression_operation)0; + } + + default: return (ir_expression_operation)0; + } + } + + + /** + * If a conversion is available, convert one operand to a different type + * + * The \c from \c ir_rvalue is converted "in place". + * + * \param to Type that the operand it to be converted to + * \param from Operand that is being converted + * \param state GLSL compiler state + * + * \return + * If a conversion is possible (or unnecessary), \c true is returned. + * Otherwise \c false is returned. + */ + bool + apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + if (to->base_type == from->type->base_type) + return true; + + /* Prior to GLSL 1.20, there are no implicit conversions */ + if (!state->is_version(120, 0)) + return false; + + /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec: + * + * "There are no implicit array or structure conversions. For + * example, an array of int cannot be implicitly converted to an + * array of float. + */ + if (!to->is_numeric() || !from->type->is_numeric()) + return false; + + /* We don't actually want the specific type `to`, we want a type + * with the same base type as `to`, but the same vector width as + * `from`. + */ + to = glsl_type::get_instance(to->base_type, from->type->vector_elements, + from->type->matrix_columns); + + ir_expression_operation op = get_conversion_operation(to, from->type, state); + if (op) { + from = new(ctx) ir_expression(op, to, from, NULL); + return true; + } else { + return false; + } + } + + + static const struct glsl_type * + arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + bool multiply, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + /* From GLSL 1.50 spec, page 56: + * + * "The arithmetic binary operators add (+), subtract (-), + * multiply (*), and divide (/) operate on integer and + * floating-point scalars, vectors, and matrices." + */ + if (!type_a->is_numeric() || !type_b->is_numeric()) { + _mesa_glsl_error(loc, state, + "operands to arithmetic operators must be numeric"); + return glsl_type::error_type; + } + + + /* "If one operand is floating-point based and the other is + * not, then the conversions from Section 4.1.10 "Implicit + * Conversions" are applied to the non-floating-point-based operand." + */ + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "arithmetic operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + /* "If the operands are integer types, they must both be signed or + * both be unsigned." + * + * From this rule and the preceeding conversion it can be inferred that + * both types must be GLSL_TYPE_FLOAT, or GLSL_TYPE_UINT, or GLSL_TYPE_INT. + * The is_numeric check above already filtered out the case where either + * type is not one of these, so now the base types need only be tested for + * equality. + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, + "base type mismatch for arithmetic operator"); + return glsl_type::error_type; + } + + /* "All arithmetic binary operators result in the same fundamental type + * (signed integer, unsigned integer, or floating-point) as the + * operands they operate on, after operand type conversion. After + * conversion, the following cases are valid + * + * * The two operands are scalars. In this case the operation is + * applied, resulting in a scalar." + */ + if (type_a->is_scalar() && type_b->is_scalar()) + return type_a; + + /* "* One operand is a scalar, and the other is a vector or matrix. + * In this case, the scalar operation is applied independently to each + * component of the vector or matrix, resulting in the same size + * vector or matrix." + */ + if (type_a->is_scalar()) { + if (!type_b->is_scalar()) + return type_b; + } else if (type_b->is_scalar()) { + return type_a; + } + + /* All of the combinations of , , + * , , and have been + * handled. + */ + assert(!type_a->is_scalar()); + assert(!type_b->is_scalar()); + + /* "* The two operands are vectors of the same size. In this case, the + * operation is done component-wise resulting in the same size + * vector." + */ + if (type_a->is_vector() && type_b->is_vector()) { + if (type_a == type_b) { + return type_a; + } else { + _mesa_glsl_error(loc, state, + "vector size mismatch for arithmetic operator"); + return glsl_type::error_type; + } + } + + /* All of the combinations of , , + * , , , and + * have been handled. At least one of the operands must + * be matrix. Further, since there are no integer matrix types, the base + * type of both operands must be float. + */ + assert(type_a->is_matrix() || type_b->is_matrix()); + assert(type_a->base_type == GLSL_TYPE_FLOAT || + type_a->base_type == GLSL_TYPE_DOUBLE); + assert(type_b->base_type == GLSL_TYPE_FLOAT || + type_b->base_type == GLSL_TYPE_DOUBLE); + + /* "* The operator is add (+), subtract (-), or divide (/), and the + * operands are matrices with the same number of rows and the same + * number of columns. In this case, the operation is done component- + * wise resulting in the same size matrix." + * * The operator is multiply (*), where both operands are matrices or + * one operand is a vector and the other a matrix. A right vector + * operand is treated as a column vector and a left vector operand as a + * row vector. In all these cases, it is required that the number of + * columns of the left operand is equal to the number of rows of the + * right operand. Then, the multiply (*) operation does a linear + * algebraic multiply, yielding an object that has the same number of + * rows as the left operand and the same number of columns as the right + * operand. Section 5.10 "Vector and Matrix Operations" explains in + * more detail how vectors and matrices are operated on." + */ + if (! multiply) { + if (type_a == type_b) + return type_a; + } else { + const glsl_type *type = glsl_type::get_mul_type(type_a, type_b); + + if (type == glsl_type::error_type) { + _mesa_glsl_error(loc, state, + "size mismatch for matrix multiplication"); + } + + return type; + } + + + /* "All other cases are illegal." + */ + _mesa_glsl_error(loc, state, "type mismatch"); + return glsl_type::error_type; + } + + + static const struct glsl_type * + unary_arithmetic_result_type(const struct glsl_type *type, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + /* From GLSL 1.50 spec, page 57: + * + * "The arithmetic unary operators negate (-), post- and pre-increment + * and decrement (-- and ++) operate on integer or floating-point + * values (including vectors and matrices). All unary operators work + * component-wise on their operands. These result with the same type + * they operated on." + */ + if (!type->is_numeric()) { + _mesa_glsl_error(loc, state, + "operands to arithmetic operators must be numeric"); + return glsl_type::error_type; + } + + return type; + } + + /** + * \brief Return the result type of a bit-logic operation. + * + * If the given types to the bit-logic operator are invalid, return + * glsl_type::error_type. + * + * \param value_a LHS of bit-logic op + * \param value_b RHS of bit-logic op + */ + static const struct glsl_type * + bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + ast_operators op, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } + + /* From page 50 (page 56 of PDF) of GLSL 1.30 spec: + * + * "The bitwise operators and (&), exclusive-or (^), and inclusive-or + * (|). The operands must be of type signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't + * make sense for bitwise operations, as they don't operate on floats. + * + * GLSL 4.0 added implicit int -> uint conversions, which are relevant + * here. It wasn't clear whether or not we should apply them to bitwise + * operations. However, Khronos has decided that they should in future + * language revisions. Applications also rely on this behavior. We opt + * to apply them in general, but issue a portability warning. + * + * See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405 + */ + if (type_a->base_type != type_b->base_type) { + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "`%s` operator", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } else { + _mesa_glsl_warning(loc, state, + "some implementations may not support implicit " + "int -> uint conversions for `%s' operators; " + "consider casting explicitly for portability", + ast_expression::operator_string(op)); + } + type_a = value_a->type; + type_b = value_b->type; + } + + /* "The fundamental types of the operands (signed or unsigned) must + * match," + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "operands of `%s' must have the same " + "base type", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "The operands cannot be vectors of differing size." */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of " + "different sizes", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "If one operand is a scalar and the other a vector, the scalar is + * applied component-wise to the vector, resulting in the same type as + * the vector. The fundamental types of the operands [...] will be the + * resulting fundamental type." + */ + if (type_a->is_scalar()) + return type_b; + else + return type_a; + } + + static const struct glsl_type * + modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) { + return glsl_type::error_type; + } + + /* Section 5.9 (Expressions) of the GLSL 4.00 specification says: + * + * "The operator modulus (%) operates on signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer"); + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer"); + return glsl_type::error_type; + } + + /* "If the fundamental types in the operands do not match, then the + * conversions from section 4.1.10 "Implicit Conversions" are applied + * to create matching types." + * + * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit + * int -> uint conversion rules. Prior to that, there were no implicit + * conversions. So it's harmless to apply them universally - no implicit + * conversions will exist. If the types don't match, we'll receive false, + * and raise an error, satisfying the GLSL 1.50 spec, page 56: + * + * "The operand types must both be signed or unsigned." + */ + if (!apply_implicit_conversion(type_a, value_b, state) && + !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "modulus (%%) operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + /* "The operands cannot be vectors of differing size. If one operand is + * a scalar and the other vector, then the scalar is applied component- + * wise to the vector, resulting in the same type as the vector. If both + * are vectors of the same size, the result is computed component-wise." + */ + if (type_a->is_vector()) { + if (!type_b->is_vector() + || (type_a->vector_elements == type_b->vector_elements)) + return type_a; + } else + return type_b; + + /* "The operator modulus (%) is not defined for any other data types + * (non-integer types)." + */ + _mesa_glsl_error(loc, state, "type mismatch"); + return glsl_type::error_type; + } + + + static const struct glsl_type * + relational_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + /* From GLSL 1.50 spec, page 56: + * "The relational operators greater than (>), less than (<), greater + * than or equal (>=), and less than or equal (<=) operate only on + * scalar integer and scalar floating-point expressions." + */ + if (!type_a->is_numeric() + || !type_b->is_numeric() + || !type_a->is_scalar() + || !type_b->is_scalar()) { + _mesa_glsl_error(loc, state, + "operands to relational operators must be scalar and " + "numeric"); + return glsl_type::error_type; + } + + /* "Either the operands' types must match, or the conversions from + * Section 4.1.10 "Implicit Conversions" will be applied to the integer + * operand, after which the types must match." + */ + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "relational operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "base type mismatch"); + return glsl_type::error_type; + } + + /* "The result is scalar Boolean." + */ + return glsl_type::bool_type; + } + + /** + * \brief Return the result type of a bit-shift operation. + * + * If the given types to the bit-shift operator are invalid, return + * glsl_type::error_type. + * + * \param type_a Type of LHS of bit-shift op + * \param type_b Type of RHS of bit-shift op + */ + static const struct glsl_type * + shift_result_type(const struct glsl_type *type_a, + const struct glsl_type *type_b, + ast_operators op, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } + + /* From page 50 (page 56 of the PDF) of the GLSL 1.30 spec: + * + * "The shift operators (<<) and (>>). For both operators, the operands + * must be signed or unsigned integers or integer vectors. One operand + * can be signed while the other is unsigned." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or " + "integer vector", ast_expression::operator_string(op)); + return glsl_type::error_type; + + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of operator %s must be an integer or " + "integer vector", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "If the first operand is a scalar, the second operand has to be + * a scalar as well." + */ + if (type_a->is_scalar() && !type_b->is_scalar()) { + _mesa_glsl_error(loc, state, "if the first operand of %s is scalar, the " + "second must be scalar as well", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* If both operands are vectors, check that they have same number of + * elements. + */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "vector operands to operator %s must " + "have same number of elements", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "In all cases, the resulting type will be the same type as the left + * operand." + */ + return type_a; + } + + /** + * Returns the innermost array index expression in an rvalue tree. + * This is the largest indexing level -- if an array of blocks, then + * it is the block index rather than an indexing expression for an + * array-typed member of an array of blocks. + */ + static ir_rvalue * + find_innermost_array_index(ir_rvalue *rv) + { + ir_dereference_array *last = NULL; + while (rv) { + if (rv->as_dereference_array()) { + last = rv->as_dereference_array(); + rv = last->array; + } else if (rv->as_dereference_record()) + rv = rv->as_dereference_record()->record; + else if (rv->as_swizzle()) + rv = rv->as_swizzle()->val; + else + rv = NULL; + } + + if (last) + return last->array_index; + + return NULL; + } + + /** + * Validates that a value can be assigned to a location with a specified type + * + * Validates that \c rhs can be assigned to some location. If the types are + * not an exact match but an automatic conversion is possible, \c rhs will be + * converted. + * + * \return + * \c NULL if \c rhs cannot be assigned to a location with type \c lhs_type. + * Otherwise the actual RHS to be assigned will be returned. This may be + * \c rhs, or it may be \c rhs after some type conversion. + * + * \note + * In addition to being used for assignments, this function is used to + * type-check return values. + */ + static ir_rvalue * + validate_assignment(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_rvalue *lhs, + ir_rvalue *rhs, bool is_initializer) + { + /* If there is already some error in the RHS, just return it. Anything + * else will lead to an avalanche of error message back to the user. + */ + if (rhs->type->is_error()) + return rhs; + + /* In the Tessellation Control Shader: + * If a per-vertex output variable is used as an l-value, it is an error + * if the expression indicating the vertex number is not the identifier + * `gl_InvocationID`. + */ + if (state->stage == MESA_SHADER_TESS_CTRL) { + ir_variable *var = lhs->variable_referenced(); + if (var->data.mode == ir_var_shader_out && !var->data.patch) { + ir_rvalue *index = find_innermost_array_index(lhs); + ir_variable *index_var = index ? index->variable_referenced() : NULL; + if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) { + _mesa_glsl_error(&loc, state, + "Tessellation control shader outputs can only " + "be indexed by gl_InvocationID"); + return NULL; + } + } + } + + /* If the types are identical, the assignment can trivially proceed. + */ + if (rhs->type == lhs->type) + return rhs; + + /* If the array element types are the same and the LHS is unsized, + * the assignment is okay for initializers embedded in variable + * declarations. + * + * Note: Whole-array assignments are not permitted in GLSL 1.10, but this + * is handled by ir_dereference::is_lvalue. + */ + const glsl_type *lhs_t = lhs->type; + const glsl_type *rhs_t = rhs->type; + bool unsized_array = false; + while(lhs_t->is_array()) { + if (rhs_t == lhs_t) + break; /* the rest of the inner arrays match so break out early */ + if (!rhs_t->is_array()) { + unsized_array = false; + break; /* number of dimensions mismatch */ + } + if (lhs_t->length == rhs_t->length) { + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + continue; + } else if (lhs_t->is_unsized_array()) { + unsized_array = true; + } else { + unsized_array = false; + break; /* sized array mismatch */ + } + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + } + if (unsized_array) { + if (is_initializer) { + return rhs; + } else { + _mesa_glsl_error(&loc, state, + "implicitly sized arrays cannot be assigned"); + return NULL; + } + } + + /* Check for implicit conversion in GLSL 1.20 */ + if (apply_implicit_conversion(lhs->type, rhs, state)) { + if (rhs->type == lhs->type) + return rhs; + } + + _mesa_glsl_error(&loc, state, + "%s of type %s cannot be assigned to " + "variable of type %s", + is_initializer ? "initializer" : "value", + rhs->type->name, lhs->type->name); + + return NULL; + } + + static void + mark_whole_array_access(ir_rvalue *access) + { + ir_dereference_variable *deref = access->as_dereference_variable(); + + if (deref && deref->var) { + deref->var->data.max_array_access = deref->type->length - 1; + } + } + + static bool + do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, + const char *non_lvalue_description, + ir_rvalue *lhs, ir_rvalue *rhs, + ir_rvalue **out_rvalue, bool needs_rvalue, + bool is_initializer, + YYLTYPE lhs_loc) + { + void *ctx = state; + bool error_emitted = (lhs->type->is_error() || rhs->type->is_error()); + + ir_variable *lhs_var = lhs->variable_referenced(); + if (lhs_var) + lhs_var->data.assigned = true; + + if (!error_emitted) { + if (non_lvalue_description != NULL) { + _mesa_glsl_error(&lhs_loc, state, + "assignment to %s", + non_lvalue_description); + error_emitted = true; + } else if (lhs_var != NULL && (lhs_var->data.read_only || + (lhs_var->data.mode == ir_var_shader_storage && + lhs_var->data.image_read_only))) { + /* We can have image_read_only set on both images and buffer variables, + * but in the former there is a distinction between assignments to + * the variable itself (read_only) and to the memory they point to + * (image_read_only), while in the case of buffer variables there is + * no such distinction, that is why this check here is limited to + * buffer variables alone. + */ + _mesa_glsl_error(&lhs_loc, state, + "assignment to read-only variable '%s'", + lhs_var->name); + error_emitted = true; + } else if (lhs->type->is_array() && + !state->check_version(120, 300, &lhs_loc, + "whole array assignment forbidden")) { + /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: + * + * "Other binary or unary expressions, non-dereferenced + * arrays, function names, swizzles with repeated fields, + * and constants cannot be l-values." + * + * The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00. + */ + error_emitted = true; + } else if (!lhs->is_lvalue()) { + _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment"); + error_emitted = true; + } + } + + ir_rvalue *new_rhs = + validate_assignment(state, lhs_loc, lhs, rhs, is_initializer); + if (new_rhs != NULL) { + rhs = new_rhs; + + /* If the LHS array was not declared with a size, it takes it size from + * the RHS. If the LHS is an l-value and a whole array, it must be a + * dereference of a variable. Any other case would require that the LHS + * is either not an l-value or not a whole array. + */ + if (lhs->type->is_unsized_array()) { + ir_dereference *const d = lhs->as_dereference(); + + assert(d != NULL); + + ir_variable *const var = d->variable_referenced(); + + assert(var != NULL); + + if (var->data.max_array_access >= unsigned(rhs->type->array_size())) { + /* FINISHME: This should actually log the location of the RHS. */ + _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to " + "previous access", + var->data.max_array_access); + } + + var->type = glsl_type::get_array_instance(lhs->type->fields.array, + rhs->type->array_size()); + d->type = var->type; + } + if (lhs->type->is_array()) { + mark_whole_array_access(rhs); + mark_whole_array_access(lhs); + } + } + + /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, + * but not post_inc) need the converted assigned value as an rvalue + * to handle things like: + * + * i = j += 1; + */ + if (needs_rvalue) { + ir_variable *var = new(ctx) ir_variable(rhs->type, "assignment_tmp", + ir_var_temporary); + instructions->push_tail(var); + instructions->push_tail(assign(var, rhs)); + + if (!error_emitted) { + ir_dereference_variable *deref_var = new(ctx) ir_dereference_variable(var); + instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var)); + } + ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var); + + *out_rvalue = rvalue; + } else { + if (!error_emitted) + instructions->push_tail(new(ctx) ir_assignment(lhs, rhs)); + *out_rvalue = NULL; + } + + return error_emitted; + } + + static ir_rvalue * + get_lvalue_copy(exec_list *instructions, ir_rvalue *lvalue) + { + void *ctx = ralloc_parent(lvalue); + ir_variable *var; + + var = new(ctx) ir_variable(lvalue->type, "_post_incdec_tmp", + ir_var_temporary); + instructions->push_tail(var); + + instructions->push_tail(new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), + lvalue)); + + return new(ctx) ir_dereference_variable(var); + } + + + ir_rvalue * + ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) + { + (void) instructions; + (void) state; + + return NULL; + } + + bool + ast_node::has_sequence_subexpression() const + { + return false; + } + + void + ast_function_expression::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + (void)hir(instructions, state); + } + + void + ast_aggregate_initializer::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + (void)hir(instructions, state); + } + + static ir_rvalue * + do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) + { + int join_op; + ir_rvalue *cmp = NULL; + + if (operation == ir_binop_all_equal) + join_op = ir_binop_logic_and; + else + join_op = ir_binop_logic_or; + + switch (op0->type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: + return new(mem_ctx) ir_expression(operation, op0, op1); + + case GLSL_TYPE_ARRAY: { + for (unsigned int i = 0; i < op0->type->length; i++) { + ir_rvalue *e0, *e1, *result; + + e0 = new(mem_ctx) ir_dereference_array(op0->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + e1 = new(mem_ctx) ir_dereference_array(op1->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + result = do_comparison(mem_ctx, operation, e0, e1); + + if (cmp) { + cmp = new(mem_ctx) ir_expression(join_op, cmp, result); + } else { + cmp = result; + } + } + + mark_whole_array_access(op0); + mark_whole_array_access(op1); + break; + } + + case GLSL_TYPE_STRUCT: { + for (unsigned int i = 0; i < op0->type->length; i++) { + ir_rvalue *e0, *e1, *result; + const char *field_name = op0->type->fields.structure[i].name; + + e0 = new(mem_ctx) ir_dereference_record(op0->clone(mem_ctx, NULL), + field_name); + e1 = new(mem_ctx) ir_dereference_record(op1->clone(mem_ctx, NULL), + field_name); + result = do_comparison(mem_ctx, operation, e0, e1); + + if (cmp) { + cmp = new(mem_ctx) ir_expression(join_op, cmp, result); + } else { + cmp = result; + } + } + break; + } + + case GLSL_TYPE_ERROR: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_INTERFACE: ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_SUBROUTINE: + /* I assume a comparison of a struct containing a sampler just + * ignores the sampler present in the type. + */ + break; + } + + if (cmp == NULL) + cmp = new(mem_ctx) ir_constant(true); + + return cmp; + } + + /* For logical operations, we want to ensure that the operands are + * scalar booleans. If it isn't, emit an error and return a constant + * boolean to avoid triggering cascading error messages. + */ + ir_rvalue * + get_scalar_boolean_operand(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + ast_expression *parent_expr, + int operand, + const char *operand_name, + bool *error_emitted) + { + ast_expression *expr = parent_expr->subexpressions[operand]; + void *ctx = state; + ir_rvalue *val = expr->hir(instructions, state); + + if (val->type->is_boolean() && val->type->is_scalar()) + return val; + + if (!*error_emitted) { + YYLTYPE loc = expr->get_location(); + _mesa_glsl_error(&loc, state, "%s of `%s' must be scalar boolean", + operand_name, + parent_expr->operator_string(parent_expr->oper)); + *error_emitted = true; + } + + return new(ctx) ir_constant(true); + } + + /** + * If name refers to a builtin array whose maximum allowed size is less than + * size, report an error and return true. Otherwise return false. + */ + void + check_builtin_array_max_size(const char *name, unsigned size, + YYLTYPE loc, struct _mesa_glsl_parse_state *state) + { + if ((strcmp("gl_TexCoord", name) == 0) + && (size > state->Const.MaxTextureCoords)) { + /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec: + * + * "The size [of gl_TexCoord] can be at most + * gl_MaxTextureCoords." + */ + _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot " + "be larger than gl_MaxTextureCoords (%u)", + state->Const.MaxTextureCoords); + } else if (strcmp("gl_ClipDistance", name) == 0 + && size > state->Const.MaxClipPlanes) { + /* From section 7.1 (Vertex Shader Special Variables) of the + * GLSL 1.30 spec: + * + * "The gl_ClipDistance array is predeclared as unsized and + * must be sized by the shader either redeclaring it with a + * size or indexing it only with integral constant + * expressions. ... The size can be at most + * gl_MaxClipDistances." + */ + _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size cannot " + "be larger than gl_MaxClipDistances (%u)", + state->Const.MaxClipPlanes); + } + } + + /** + * Create the constant 1, of a which is appropriate for incrementing and + * decrementing values of the given GLSL type. For example, if type is vec4, + * this creates a constant value of 1.0 having type float. + * + * If the given type is invalid for increment and decrement operators, return + * a floating point 1--the error will be detected later. + */ + static ir_rvalue * + constant_one_for_inc_dec(void *ctx, const glsl_type *type) + { + switch (type->base_type) { + case GLSL_TYPE_UINT: + return new(ctx) ir_constant((unsigned) 1); + case GLSL_TYPE_INT: + return new(ctx) ir_constant(1); + default: + case GLSL_TYPE_FLOAT: + return new(ctx) ir_constant(1.0f); + } + } + + ir_rvalue * + ast_expression::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + return do_hir(instructions, state, true); + } + + void + ast_expression::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + do_hir(instructions, state, false); + } + + ir_rvalue * + ast_expression::do_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool needs_rvalue) + { + void *ctx = state; + static const int operations[AST_NUM_OPERATORS] = { + -1, /* ast_assign doesn't convert to ir_expression. */ + -1, /* ast_plus doesn't convert to ir_expression. */ + ir_unop_neg, + ir_binop_add, + ir_binop_sub, + ir_binop_mul, + ir_binop_div, + ir_binop_mod, + ir_binop_lshift, + ir_binop_rshift, + ir_binop_less, + ir_binop_greater, + ir_binop_lequal, + ir_binop_gequal, + ir_binop_all_equal, + ir_binop_any_nequal, + ir_binop_bit_and, + ir_binop_bit_xor, + ir_binop_bit_or, + ir_unop_bit_not, + ir_binop_logic_and, + ir_binop_logic_xor, + ir_binop_logic_or, + ir_unop_logic_not, + + /* Note: The following block of expression types actually convert + * to multiple IR instructions. + */ + ir_binop_mul, /* ast_mul_assign */ + ir_binop_div, /* ast_div_assign */ + ir_binop_mod, /* ast_mod_assign */ + ir_binop_add, /* ast_add_assign */ + ir_binop_sub, /* ast_sub_assign */ + ir_binop_lshift, /* ast_ls_assign */ + ir_binop_rshift, /* ast_rs_assign */ + ir_binop_bit_and, /* ast_and_assign */ + ir_binop_bit_xor, /* ast_xor_assign */ + ir_binop_bit_or, /* ast_or_assign */ + + -1, /* ast_conditional doesn't convert to ir_expression. */ + ir_binop_add, /* ast_pre_inc. */ + ir_binop_sub, /* ast_pre_dec. */ + ir_binop_add, /* ast_post_inc. */ + ir_binop_sub, /* ast_post_dec. */ + -1, /* ast_field_selection doesn't conv to ir_expression. */ + -1, /* ast_array_index doesn't convert to ir_expression. */ + -1, /* ast_function_call doesn't conv to ir_expression. */ + -1, /* ast_identifier doesn't convert to ir_expression. */ + -1, /* ast_int_constant doesn't convert to ir_expression. */ + -1, /* ast_uint_constant doesn't conv to ir_expression. */ + -1, /* ast_float_constant doesn't conv to ir_expression. */ + -1, /* ast_bool_constant doesn't conv to ir_expression. */ + -1, /* ast_sequence doesn't convert to ir_expression. */ + }; + ir_rvalue *result = NULL; + ir_rvalue *op[3]; + const struct glsl_type *type; /* a temporary variable for switch cases */ + bool error_emitted = false; + YYLTYPE loc; + + loc = this->get_location(); + + switch (this->oper) { + case ast_aggregate: + assert(!"ast_aggregate: Should never get here."); + break; + + case ast_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0], op[1], &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_plus: + op[0] = this->subexpressions[0]->hir(instructions, state); + + type = unary_arithmetic_result_type(op[0]->type, state, & loc); + + error_emitted = type->is_error(); + + result = op[0]; + break; + + case ast_neg: + op[0] = this->subexpressions[0]->hir(instructions, state); + + type = unary_arithmetic_result_type(op[0]->type, state, & loc); + + error_emitted = type->is_error(); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], NULL); + break; + + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = arithmetic_result_type(op[0], op[1], + (this->oper == ast_mul), + state, & loc); + error_emitted = type->is_error(); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + break; + + case ast_mod: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = modulus_result_type(op[0], op[1], state, &loc); + + assert(operations[this->oper] == ir_binop_mod); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = type->is_error(); + break; + + case ast_lshift: + case ast_rshift: + if (!state->check_bitwise_operations_allowed(&loc)) { + error_emitted = true; + } + + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, + &loc); + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + break; + + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = relational_result_type(op[0], op[1], state, & loc); + + /* The relational operators must either generate an error or result + * in a scalar boolean. See page 57 of the GLSL 1.50 spec. + */ + assert(type->is_error() + || ((type->base_type == GLSL_TYPE_BOOL) + && type->is_scalar())); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = type->is_error(); + break; + + case ast_nequal: + case ast_equal: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + /* From page 58 (page 64 of the PDF) of the GLSL 1.50 spec: + * + * "The equality operators equal (==), and not equal (!=) + * operate on all types. They result in a scalar Boolean. If + * the operand types do not match, then there must be a + * conversion from Section 4.1.10 "Implicit Conversions" + * applied to one operand that can make them match, in which + * case this conversion is done." + */ + + if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) { + _mesa_glsl_error(& loc, state, "`%s': wrong operand types: " + "no operation `%1$s' exists that takes a left-hand " + "operand of type 'void' or a right operand of type " + "'void'", (this->oper == ast_equal) ? "==" : "!="); + error_emitted = true; + } else if ((!apply_implicit_conversion(op[0]->type, op[1], state) + && !apply_implicit_conversion(op[1]->type, op[0], state)) + || (op[0]->type != op[1]->type)) { + _mesa_glsl_error(& loc, state, "operands of `%s' must have the same " + "type", (this->oper == ast_equal) ? "==" : "!="); + error_emitted = true; + } else if ((op[0]->type->is_array() || op[1]->type->is_array()) && + !state->check_version(120, 300, &loc, + "array comparisons forbidden")) { + error_emitted = true; + } else if ((op[0]->type->contains_opaque() || + op[1]->type->contains_opaque())) { + _mesa_glsl_error(&loc, state, "opaque type comparisons forbidden"); + error_emitted = true; + } + + if (error_emitted) { + result = new(ctx) ir_constant(false); + } else { + result = do_comparison(ctx, operations[this->oper], op[0], op[1]); + assert(result->type == glsl_type::bool_type); + } + break; + + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + break; + + case ast_bit_not: + op[0] = this->subexpressions[0]->hir(instructions, state); + + if (!state->check_bitwise_operations_allowed(&loc)) { + error_emitted = true; + } + + if (!op[0]->type->is_integer()) { + _mesa_glsl_error(&loc, state, "operand of `~' must be an integer"); + error_emitted = true; + } + + type = error_emitted ? glsl_type::error_type : op[0]->type; + result = new(ctx) ir_expression(ir_unop_bit_not, type, op[0], NULL); + break; + + case ast_logic_and: { + exec_list rhs_instructions; + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "LHS", &error_emitted); + op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, + "RHS", &error_emitted); + + if (rhs_instructions.is_empty()) { + result = new(ctx) ir_expression(ir_binop_logic_and, op[0], op[1]); + type = result->type; + } else { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, + "and_tmp", + ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + stmt->then_instructions.append_list(&rhs_instructions); + ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, op[1]); + stmt->then_instructions.push_tail(then_assign); + + ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, new(ctx) ir_constant(false)); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + type = tmp->type; + } + break; + } + + case ast_logic_or: { + exec_list rhs_instructions; + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "LHS", &error_emitted); + op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, + "RHS", &error_emitted); + + if (rhs_instructions.is_empty()) { + result = new(ctx) ir_expression(ir_binop_logic_or, op[0], op[1]); + type = result->type; + } else { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, + "or_tmp", + ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, new(ctx) ir_constant(true)); + stmt->then_instructions.push_tail(then_assign); + + stmt->else_instructions.append_list(&rhs_instructions); + ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, op[1]); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + type = tmp->type; + } + break; + } + + case ast_logic_xor: + /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: + * + * "The logical binary operators and (&&), or ( | | ), and + * exclusive or (^^). They operate only on two Boolean + * expressions and result in a Boolean expression." + */ + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, "LHS", + &error_emitted); + op[1] = get_scalar_boolean_operand(instructions, state, this, 1, "RHS", + &error_emitted); + + result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, + op[0], op[1]); + break; + + case ast_logic_not: + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "operand", &error_emitted); + + result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, + op[0], NULL); + break; + + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = arithmetic_result_type(op[0], op[1], + (this->oper == ast_mul_assign), + state, & loc); + + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + + /* GLSL 1.10 does not allow array assignment. However, we don't have to + * explicitly test for this because none of the binary expression + * operators allow array operands either. + */ + + break; + } + + case ast_mod_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = modulus_result_type(op[0], op[1], state, &loc); + + assert(operations[this->oper] == ir_binop_mod); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_ls_assign: + case ast_rs_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, + &loc); + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], + type, op[0], op[1]); + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], + type, op[0], op[1]); + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_conditional: { + /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: + * + * "The ternary selection operator (?:). It operates on three + * expressions (exp1 ? exp2 : exp3). This operator evaluates the + * first expression, which must result in a scalar Boolean." + */ + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "condition", &error_emitted); + + /* The :? operator is implemented by generating an anonymous temporary + * followed by an if-statement. The last instruction in each branch of + * the if-statement assigns a value to the anonymous temporary. This + * temporary is the r-value of the expression. + */ + exec_list then_instructions; + exec_list else_instructions; + + op[1] = this->subexpressions[1]->hir(&then_instructions, state); + op[2] = this->subexpressions[2]->hir(&else_instructions, state); + + /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: + * + * "The second and third expressions can be any type, as + * long their types match, or there is a conversion in + * Section 4.1.10 "Implicit Conversions" that can be applied + * to one of the expressions to make their types match. This + * resulting matching type is the type of the entire + * expression." + */ + if ((!apply_implicit_conversion(op[1]->type, op[2], state) + && !apply_implicit_conversion(op[2]->type, op[1], state)) + || (op[1]->type != op[2]->type)) { + YYLTYPE loc = this->subexpressions[1]->get_location(); + + _mesa_glsl_error(& loc, state, "second and third operands of ?: " + "operator must have matching types"); + error_emitted = true; + type = glsl_type::error_type; + } else { + type = op[1]->type; + } + + /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: + * + * "The second and third expressions must be the same type, but can + * be of any type other than an array." + */ + if (type->is_array() && + !state->check_version(120, 300, &loc, + "second and third operands of ?: operator " + "cannot be arrays")) { + error_emitted = true; + } + + /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types): + * + * "Except for array indexing, structure member selection, and + * parentheses, opaque variables are not allowed to be operands in + * expressions; such use results in a compile-time error." + */ + if (type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "opaque variables cannot be operands " + "of the ?: operator"); + error_emitted = true; + } + + ir_constant *cond_val = op[0]->constant_expression_value(); + + if (then_instructions.is_empty() + && else_instructions.is_empty() + && cond_val != NULL) { + result = cond_val->value.b[0] ? op[1] : op[2]; + } else { + /* The copy to conditional_tmp reads the whole array. */ + if (type->is_array()) { + mark_whole_array_access(op[1]); + mark_whole_array_access(op[2]); + } + + ir_variable *const tmp = + new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + then_instructions.move_nodes_to(& stmt->then_instructions); + ir_dereference *const then_deref = + new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, op[1]); + stmt->then_instructions.push_tail(then_assign); + + else_instructions.move_nodes_to(& stmt->else_instructions); + ir_dereference *const else_deref = + new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, op[2]); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + } + break; + } + + case ast_pre_inc: + case ast_pre_dec: { + this->non_lvalue_description = (this->oper == ast_pre_inc) + ? "pre-increment operation" : "pre-decrement operation"; + + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = constant_one_for_inc_dec(ctx, op[0]->type); + + type = arithmetic_result_type(op[0], op[1], false, state, & loc); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_post_inc: + case ast_post_dec: { + this->non_lvalue_description = (this->oper == ast_post_inc) + ? "post-increment operation" : "post-decrement operation"; + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = constant_one_for_inc_dec(ctx, op[0]->type); + + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + + type = arithmetic_result_type(op[0], op[1], false, state, & loc); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + /* Get a temporary of a copy of the lvalue before it's modified. + * This may get thrown away later. + */ + result = get_lvalue_copy(instructions, op[0]->clone(ctx, NULL)); + + ir_rvalue *junk_rvalue; + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &junk_rvalue, false, false, + this->subexpressions[0]->get_location()); + + break; + } + + case ast_field_selection: + result = _mesa_ast_field_selection_to_hir(this, instructions, state); + break; + + case ast_array_index: { + YYLTYPE index_loc = subexpressions[1]->get_location(); + + op[0] = subexpressions[0]->hir(instructions, state); + op[1] = subexpressions[1]->hir(instructions, state); + + result = _mesa_ast_array_index_to_hir(ctx, state, op[0], op[1], + loc, index_loc); + + if (result->type->is_error()) + error_emitted = true; + + break; + } + + case ast_unsized_array_dim: + assert(!"ast_unsized_array_dim: Should never get here."); + break; + + case ast_function_call: + /* Should *NEVER* get here. ast_function_call should always be handled + * by ast_function_expression::hir. + */ + assert(0); + break; + + case ast_identifier: { + /* ast_identifier can appear several places in a full abstract syntax + * tree. This particular use must be at location specified in the grammar + * as 'variable_identifier'. + */ + ir_variable *var = + state->symbols->get_variable(this->primary_expression.identifier); + + if (var != NULL) { + var->data.used = true; + result = new(ctx) ir_dereference_variable(var); + } else { + _mesa_glsl_error(& loc, state, "`%s' undeclared", + this->primary_expression.identifier); + + result = ir_rvalue::error_value(ctx); + error_emitted = true; + } + break; + } + + case ast_int_constant: + result = new(ctx) ir_constant(this->primary_expression.int_constant); + break; + + case ast_uint_constant: + result = new(ctx) ir_constant(this->primary_expression.uint_constant); + break; + + case ast_float_constant: + result = new(ctx) ir_constant(this->primary_expression.float_constant); + break; + + case ast_bool_constant: + result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant)); + break; + + case ast_double_constant: + result = new(ctx) ir_constant(this->primary_expression.double_constant); + break; + + case ast_sequence: { + /* It should not be possible to generate a sequence in the AST without + * any expressions in it. + */ + assert(!this->expressions.is_empty()); + + /* The r-value of a sequence is the last expression in the sequence. If + * the other expressions in the sequence do not have side-effects (and + * therefore add instructions to the instruction list), they get dropped + * on the floor. + */ + exec_node *previous_tail_pred = NULL; + YYLTYPE previous_operand_loc = loc; + + foreach_list_typed (ast_node, ast, link, &this->expressions) { + /* If one of the operands of comma operator does not generate any + * code, we want to emit a warning. At each pass through the loop + * previous_tail_pred will point to the last instruction in the + * stream *before* processing the previous operand. Naturally, + * instructions->tail_pred will point to the last instruction in the + * stream *after* processing the previous operand. If the two + * pointers match, then the previous operand had no effect. + * + * The warning behavior here differs slightly from GCC. GCC will + * only emit a warning if none of the left-hand operands have an + * effect. However, it will emit a warning for each. I believe that + * there are some cases in C (especially with GCC extensions) where + * it is useful to have an intermediate step in a sequence have no + * effect, but I don't think these cases exist in GLSL. Either way, + * it would be a giant hassle to replicate that behavior. + */ + if (previous_tail_pred == instructions->tail_pred) { + _mesa_glsl_warning(&previous_operand_loc, state, + "left-hand operand of comma expression has " + "no effect"); + } + + /* tail_pred is directly accessed instead of using the get_tail() + * method for performance reasons. get_tail() has extra code to + * return NULL when the list is empty. We don't care about that + * here, so using tail_pred directly is fine. + */ + previous_tail_pred = instructions->tail_pred; + previous_operand_loc = ast->get_location(); + + result = ast->hir(instructions, state); + } + + /* Any errors should have already been emitted in the loop above. + */ + error_emitted = true; + break; + } + } + type = NULL; /* use result->type, not type. */ + assert(result != NULL || !needs_rvalue); + + if (result && result->type->is_error() && !error_emitted) + _mesa_glsl_error(& loc, state, "type mismatch"); + + return result; + } + + bool + ast_expression::has_sequence_subexpression() const + { + switch (this->oper) { + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + case ast_post_inc: + case ast_post_dec: + return this->subexpressions[0]->has_sequence_subexpression(); + + case ast_assign: + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + case ast_mod: + case ast_lshift: + case ast_rshift: + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + case ast_nequal: + case ast_equal: + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + case ast_logic_and: + case ast_logic_or: + case ast_logic_xor: + case ast_array_index: + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_mod_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression(); + + case ast_conditional: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression() || + this->subexpressions[2]->has_sequence_subexpression(); + + case ast_sequence: + return true; + + case ast_field_selection: + case ast_identifier: + case ast_int_constant: + case ast_uint_constant: + case ast_float_constant: + case ast_bool_constant: + case ast_double_constant: + return false; + + case ast_aggregate: + unreachable("ast_aggregate: Should never get here."); + + case ast_function_call: + unreachable("should be handled by ast_function_expression::hir"); + + case ast_unsized_array_dim: + unreachable("ast_unsized_array_dim: Should never get here."); + } + + return false; + } + + ir_rvalue * + ast_expression_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + /* It is possible to have expression statements that don't have an + * expression. This is the solitary semicolon: + * + * for (i = 0; i < 5; i++) + * ; + * + * In this case the expression will be NULL. Test for NULL and don't do + * anything in that case. + */ + if (expression != NULL) + expression->hir_no_rvalue(instructions, state); + + /* Statements do not have r-values. + */ + return NULL; + } + + + ir_rvalue * + ast_compound_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + if (new_scope) + state->symbols->push_scope(); + + foreach_list_typed (ast_node, ast, link, &this->statements) + ast->hir(instructions, state); + + if (new_scope) + state->symbols->pop_scope(); + + /* Compound statements do not have r-values. + */ + return NULL; + } + + /** + * Evaluate the given exec_node (which should be an ast_node representing + * a single array dimension) and return its integer value. + */ + static unsigned + process_array_size(exec_node *node, + struct _mesa_glsl_parse_state *state) + { + exec_list dummy_instructions; + + ast_node *array_size = exec_node_data(ast_node, node, link); + + /** + * Dimensions other than the outermost dimension can by unsized if they + * are immediately sized by a constructor or initializer. + */ + if (((ast_expression*)array_size)->oper == ast_unsized_array_dim) + return 0; + + ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); + YYLTYPE loc = array_size->get_location(); + + if (ir == NULL) { + _mesa_glsl_error(& loc, state, + "array size could not be resolved"); + return 0; + } + + if (!ir->type->is_integer()) { + _mesa_glsl_error(& loc, state, + "array size must be integer type"); + return 0; + } + + if (!ir->type->is_scalar()) { + _mesa_glsl_error(& loc, state, + "array size must be scalar type"); + return 0; + } + + ir_constant *const size = ir->constant_expression_value(); + if (size == NULL || array_size->has_sequence_subexpression()) { + _mesa_glsl_error(& loc, state, "array size must be a " + "constant valued expression"); + return 0; + } + + if (size->value.i[0] <= 0) { + _mesa_glsl_error(& loc, state, "array size must be > 0"); + return 0; + } + + assert(size->type == ir->type); + + /* If the array size is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the array size isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + return size->value.u[0]; + } + + static const glsl_type * + process_array_type(YYLTYPE *loc, const glsl_type *base, + ast_array_specifier *array_specifier, + struct _mesa_glsl_parse_state *state) + { + const glsl_type *array_type = base; + + if (array_specifier != NULL) { + if (base->is_array()) { + + /* From page 19 (page 25) of the GLSL 1.20 spec: + * + * "Only one-dimensional arrays may be declared." + */ + if (!state->check_arrays_of_arrays_allowed(loc)) { + return glsl_type::error_type; + } + } + + for (exec_node *node = array_specifier->array_dimensions.tail_pred; + !node->is_head_sentinel(); node = node->prev) { + unsigned array_size = process_array_size(node, state); + array_type = glsl_type::get_array_instance(array_type, array_size); + } + } + + return array_type; + } + + static bool + precision_qualifier_allowed(const glsl_type *type) + { + /* Precision qualifiers apply to floating point, integer and opaque + * types. + * + * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says: + * "Any floating point or any integer declaration can have the type + * preceded by one of these precision qualifiers [...] Literal + * constants do not have precision qualifiers. Neither do Boolean + * variables. + * + * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30 + * spec also says: + * + * "Precision qualifiers are added for code portability with OpenGL + * ES, not for functionality. They have the same syntax as in OpenGL + * ES." + * + * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says: + * + * "uniform lowp sampler2D sampler; + * highp vec2 coord; + * ... + * lowp vec4 col = texture2D (sampler, coord); + * // texture2D returns lowp" + * + * From this, we infer that GLSL 1.30 (and later) should allow precision + * qualifiers on sampler types just like float and integer types. + */ + return (type->is_float() + || type->is_integer() + || type->contains_opaque()) + && !type->without_array()->is_record(); + } + + const glsl_type * + ast_type_specifier::glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) const + { + const struct glsl_type *type; + + type = state->symbols->get_type(this->type_name); + *name = this->type_name; + + YYLTYPE loc = this->get_location(); + type = process_array_type(&loc, type, this->array_specifier, state); + + return type; + } + + /** + * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers: + * + * "The precision statement + * + * precision precision-qualifier type; + * + * can be used to establish a default precision qualifier. The type field can + * be either int or float or any of the sampler types, (...) If type is float, + * the directive applies to non-precision-qualified floating point type + * (scalar, vector, and matrix) declarations. If type is int, the directive + * applies to all non-precision-qualified integer type (scalar, vector, signed, + * and unsigned) declarations." + * + * We use the symbol table to keep the values of the default precisions for + * each 'type' in each scope and we use the 'type' string from the precision + * statement as key in the symbol table. When we want to retrieve the default + * precision associated with a given glsl_type we need to know the type string + * associated with it. This is what this function returns. + */ + static const char * + get_type_name_for_precision_qualifier(const glsl_type *type) + { + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + return "float"; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + return "int"; + case GLSL_TYPE_ATOMIC_UINT: + return "atomic_uint"; + case GLSL_TYPE_IMAGE: + /* fallthrough */ + case GLSL_TYPE_SAMPLER: { + const unsigned type_idx = + type->sampler_array + 2 * type->sampler_shadow; + const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4; + assert(type_idx < 4); + switch (type->sampler_type) { + case GLSL_TYPE_FLOAT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "sampler1D", "sampler1DArray", + "sampler1DShadow", "sampler1DArrayShadow" + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "sampler2D", "sampler2DArray", + "sampler2DShadow", "sampler2DArrayShadow", + "image2D", "image2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "sampler3D", NULL, NULL, NULL, + "image3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "samplerCube", "samplerCubeArray", + "samplerCubeShadow", "samplerCubeArrayShadow", + "imageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "sampler2DMS", "sampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerRect", NULL, "samplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_EXTERNAL: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerExternalOES", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported sampler/image dimensionality"); + } /* sampler/image float dimensionality */ + break; + case GLSL_TYPE_INT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isampler1D", "isampler1DArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "isampler2D", "isampler2DArray", NULL, NULL, + "iimage2D", "iimage2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "isampler3D", NULL, NULL, NULL, + "iimage3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "isamplerCube", "isamplerCubeArray", NULL, NULL, + "iimageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isampler2DMS", "isampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isamplerRect", NULL, "isamplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isamplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported isampler/iimage dimensionality"); + } /* sampler/image int dimensionality */ + break; + case GLSL_TYPE_UINT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usampler1D", "usampler1DArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "usampler2D", "usampler2DArray", NULL, NULL, + "uimage2D", "uimage2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "usampler3D", NULL, NULL, NULL, + "uimage3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "usamplerCube", "usamplerCubeArray", NULL, NULL, + "uimageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usampler2DMS", "usampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usamplerRect", NULL, "usamplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usamplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported usampler/uimage dimensionality"); + } /* sampler/image uint dimensionality */ + break; + default: + unreachable("Unsupported sampler/image type"); + } /* sampler/image type */ + break; + } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */ + break; + default: + unreachable("Unsupported type"); + } /* base type */ + } + + static unsigned + select_gles_precision(unsigned qual_precision, + const glsl_type *type, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) + { + /* Precision qualifiers do not have any meaning in Desktop GLSL. + * In GLES we take the precision from the type qualifier if present, + * otherwise, if the type of the variable allows precision qualifiers at + * all, we look for the default precision qualifier for that type in the + * current scope. + */ + assert(state->es_shader); + + unsigned precision = GLSL_PRECISION_NONE; + if (qual_precision) { + precision = qual_precision; + } else if (precision_qualifier_allowed(type)) { + const char *type_name = + get_type_name_for_precision_qualifier(type->without_array()); + assert(type_name != NULL); + + precision = + state->symbols->get_default_precision_qualifier(type_name); + if (precision == ast_precision_none) { + _mesa_glsl_error(loc, state, + "No precision specified in this scope for type `%s'", + type->name); + } + } + return precision; + } + + const glsl_type * + ast_fully_specified_type::glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) const + { + return this->specifier->glsl_type(name, state); + } + + /** + * Determine whether a toplevel variable declaration declares a varying. This + * function operates by examining the variable's mode and the shader target, + * so it correctly identifies linkage variables regardless of whether they are + * declared using the deprecated "varying" syntax or the new "in/out" syntax. + * + * Passing a non-toplevel variable declaration (e.g. a function parameter) to + * this function will produce undefined results. + */ + static bool + is_varying_var(ir_variable *var, gl_shader_stage target) + { + switch (target) { + case MESA_SHADER_VERTEX: + return var->data.mode == ir_var_shader_out; + case MESA_SHADER_FRAGMENT: + return var->data.mode == ir_var_shader_in; + default: + return var->data.mode == ir_var_shader_out || var->data.mode == ir_var_shader_in; + } + } + + + /** + * Matrix layout qualifiers are only allowed on certain types + */ + static void + validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + const glsl_type *type, + ir_variable *var) + { + if (var && !var->is_in_buffer_block()) { + /* Layout qualifiers may only apply to interface blocks and fields in + * them. + */ + _mesa_glsl_error(loc, state, + "uniform block layout qualifiers row_major and " + "column_major may not be applied to variables " + "outside of uniform blocks"); + } else if (!type->without_array()->is_matrix()) { + /* The OpenGL ES 3.0 conformance tests did not originally allow + * matrix layout qualifiers on non-matrices. However, the OpenGL + * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were + * amended to specifically allow these layouts on all types. Emit + * a warning so that people know their code may not be portable. + */ + _mesa_glsl_warning(loc, state, + "uniform block layout qualifiers row_major and " + "column_major applied to non-matrix types may " + "be rejected by older compilers"); + } + } + + static bool + process_qualifier_constant(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + const char *qual_indentifier, + ast_expression *const_expression, + unsigned *value) + { + exec_list dummy_instructions; + + if (const_expression == NULL) { + *value = 0; + return true; + } + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + _mesa_glsl_error(loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < 0) { + _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)", + qual_indentifier, const_int->value.u[0]); + return false; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + *value = const_int->value.u[0]; + return true; + } + + static bool + validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state, + unsigned stream) + { + if (stream >= state->ctx->Const.MaxVertexStreams) { + _mesa_glsl_error(loc, state, + "invalid stream specified %d is larger than " + "MAX_VERTEX_STREAMS - 1 (%d).", + stream, state->ctx->Const.MaxVertexStreams - 1); + return false; + } + + return true; + } + + static void + apply_explicit_binding(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + ir_variable *var, + const glsl_type *type, + const ast_type_qualifier *qual) + { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { + _mesa_glsl_error(loc, state, + "the \"binding\" qualifier only applies to uniforms and " + "shader storage buffer objects"); + return; + } + + unsigned qual_binding; + if (!process_qualifier_constant(state, loc, "binding", qual->binding, + &qual_binding)) { + return; + } + + const struct gl_context *const ctx = state->ctx; + unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1; + unsigned max_index = qual_binding + elements - 1; + const glsl_type *base_type = type->without_array(); + + if (base_type->is_interface()) { + /* UBOs. From page 60 of the GLSL 4.20 specification: + * "If the binding point for any uniform block instance is less than zero, + * or greater than or equal to the implementation-dependent maximum + * number of uniform buffer bindings, a compilation error will occur. + * When the binding identifier is used with a uniform block instanced as + * an array of size N, all elements of the array from binding through + * binding + N – 1 must be within this range." + * + * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS. + */ + if (qual->flags.q.uniform && + max_index >= ctx->Const.MaxUniformBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds " + "the maximum number of UBO binding points (%d)", + qual_binding, elements, + ctx->Const.MaxUniformBufferBindings); + return; + } + + /* SSBOs. From page 67 of the GLSL 4.30 specification: + * "If the binding point for any uniform or shader storage block instance + * is less than zero, or greater than or equal to the + * implementation-dependent maximum number of uniform buffer bindings, a + * compile-time error will occur. When the binding identifier is used + * with a uniform or shader storage block instanced as an array of size + * N, all elements of the array from binding through binding + N – 1 must + * be within this range." + */ + if (qual->flags.q.buffer && + max_index >= ctx->Const.MaxShaderStorageBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds " + "the maximum number of SSBO binding points (%d)", + qual_binding, elements, + ctx->Const.MaxShaderStorageBufferBindings); + return; + } + } else if (base_type->is_sampler()) { + /* Samplers. From page 63 of the GLSL 4.20 specification: + * "If the binding is less than zero, or greater than or equal to the + * implementation-dependent maximum supported number of units, a + * compilation error will occur. When the binding identifier is used + * with an array of size N, all elements of the array from binding + * through binding + N - 1 must be within this range." + */ + unsigned limit = ctx->Const.MaxCombinedTextureImageUnits; + + if (max_index >= limit) { + _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " + "exceeds the maximum number of texture image units " + "(%u)", qual_binding, elements, limit); + + return; + } + } else if (base_type->contains_atomic()) { + assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); + if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " + " maximum number of atomic counter buffer bindings" + "(%u)", qual_binding, + ctx->Const.MaxAtomicBufferBindings); + + return; + } + } else if ((state->is_version(420, 310) || + state->ARB_shading_language_420pack_enable) && + base_type->is_image()) { + assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); + if (max_index >= ctx->Const.MaxImageUnits) { + _mesa_glsl_error(loc, state, "Image binding %d exceeds the " + " maximum number of image units (%d)", max_index, + ctx->Const.MaxImageUnits); + return; + } + + } else { + _mesa_glsl_error(loc, state, + "the \"binding\" qualifier only applies to uniform " + "blocks, opaque variables, or arrays thereof"); + return; + } + + var->data.explicit_binding = true; + var->data.binding = qual_binding; + + return; + } + + + static glsl_interp_qualifier + interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, + ir_variable_mode mode, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) + { + glsl_interp_qualifier interpolation; + if (qual->flags.q.flat) + interpolation = INTERP_QUALIFIER_FLAT; + else if (qual->flags.q.noperspective) + interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + else if (qual->flags.q.smooth) + interpolation = INTERP_QUALIFIER_SMOOTH; + else + interpolation = INTERP_QUALIFIER_NONE; + + if (interpolation != INTERP_QUALIFIER_NONE) { + if (mode != ir_var_shader_in && mode != ir_var_shader_out) { + _mesa_glsl_error(loc, state, + "interpolation qualifier `%s' can only be applied to " + "shader inputs or outputs.", + interpolation_string(interpolation)); + + } + + if ((state->stage == MESA_SHADER_VERTEX && mode == ir_var_shader_in) || + (state->stage == MESA_SHADER_FRAGMENT && mode == ir_var_shader_out)) { + _mesa_glsl_error(loc, state, + "interpolation qualifier `%s' cannot be applied to " + "vertex shader inputs or fragment shader outputs", + interpolation_string(interpolation)); + } + } + + return interpolation; + } + + + static void + apply_explicit_location(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) + { + bool fail = false; + + unsigned qual_location; + if (!process_qualifier_constant(state, loc, "location", qual->location, + &qual_location)) { + return; + } + + /* Checks for GL_ARB_explicit_uniform_location. */ + if (qual->flags.q.uniform) { + if (!state->check_explicit_uniform_location_allowed(loc, var)) + return; + + const struct gl_context *const ctx = state->ctx; + unsigned max_loc = qual_location + var->type->uniform_locations() - 1; + + if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { + _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " + ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, + ctx->Const.MaxUserAssignableUniformLocations); + return; + } + + var->data.explicit_location = true; + var->data.location = qual_location; + return; + } + + /* Between GL_ARB_explicit_attrib_location an + * GL_ARB_separate_shader_objects, the inputs and outputs of any shader + * stage can be assigned explicit locations. The checking here associates + * the correct extension with the correct stage's input / output: + * + * input output + * ----- ------ + * vertex explicit_loc sso + * tess control sso sso + * tess eval sso sso + * geometry sso sso + * fragment sso explicit_loc + */ + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_in) { + if (!state->check_explicit_attrib_location_allowed(loc, var)) + return; + + break; + } + + if (var->data.mode == ir_var_shader_out) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_in) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + if (var->data.mode == ir_var_shader_out) { + if (!state->check_explicit_attrib_location_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_COMPUTE: + _mesa_glsl_error(loc, state, + "compute shader variables cannot be given " + "explicit locations"); + return; + }; + + if (fail) { + _mesa_glsl_error(loc, state, + "%s cannot be given an explicit location in %s shader", + mode_string(var), + _mesa_shader_stage_to_string(state->stage)); + } else { + var->data.explicit_location = true; + + switch (state->stage) { + case MESA_SHADER_VERTEX: + var->data.location = (var->data.mode == ir_var_shader_in) + ? (qual_location + VERT_ATTRIB_GENERIC0) + : (qual_location + VARYING_SLOT_VAR0); + break; + + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.patch) + var->data.location = qual_location + VARYING_SLOT_PATCH0; + else + var->data.location = qual_location + VARYING_SLOT_VAR0; + break; + + case MESA_SHADER_FRAGMENT: + var->data.location = (var->data.mode == ir_var_shader_out) + ? (qual_location + FRAG_RESULT_DATA0) + : (qual_location + VARYING_SLOT_VAR0); + break; + case MESA_SHADER_COMPUTE: + assert(!"Unexpected shader type"); + break; + } + + /* Check if index was set for the uniform instead of the function */ + if (qual->flags.q.explicit_index && qual->flags.q.subroutine) { + _mesa_glsl_error(loc, state, "an index qualifier can only be " + "used with subroutine functions"); + return; + } + + unsigned qual_index; + if (qual->flags.q.explicit_index && + process_qualifier_constant(state, loc, "index", qual->index, + &qual_index)) { + /* From the GLSL 4.30 specification, section 4.4.2 (Output + * Layout Qualifiers): + * + * "It is also a compile-time error if a fragment shader + * sets a layout index to less than 0 or greater than 1." + * + * Older specifications don't mandate a behavior; we take + * this as a clarification and always generate the error. + */ + if (qual_index > 1) { + _mesa_glsl_error(loc, state, + "explicit index may only be 0 or 1"); + } else { + var->data.explicit_index = true; + var->data.index = qual_index; + } + } + } + } + + static void + apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) + { + const glsl_type *base_type = var->type->without_array(); + + if (base_type->is_image()) { + if (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "image variables may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + + var->data.image_read_only |= qual->flags.q.read_only; + var->data.image_write_only |= qual->flags.q.write_only; + var->data.image_coherent |= qual->flags.q.coherent; + var->data.image_volatile |= qual->flags.q._volatile; + var->data.image_restrict |= qual->flags.q.restrict_flag; + var->data.read_only = true; + + if (qual->flags.q.explicit_image_format) { + if (var->data.mode == ir_var_function_in) { + _mesa_glsl_error(loc, state, "format qualifiers cannot be " + "used on image function parameters"); + } + + if (qual->image_base_type != base_type->sampler_type) { + _mesa_glsl_error(loc, state, "format qualifier doesn't match the " + "base data type of the image"); + } + + var->data.image_format = qual->image_format; + } else { + if (var->data.mode == ir_var_uniform) { + if (state->es_shader) { + _mesa_glsl_error(loc, state, "all image uniforms " + "must have a format layout qualifier"); + + } else if (!qual->flags.q.write_only) { + _mesa_glsl_error(loc, state, "image uniforms not qualified with " + "`writeonly' must have a format layout " + "qualifier"); + } + } + + var->data.image_format = GL_NONE; + } + + /* From page 70 of the GLSL ES 3.1 specification: + * + * "Except for image variables qualified with the format qualifiers + * r32f, r32i, and r32ui, image variables must specify either memory + * qualifier readonly or the memory qualifier writeonly." + */ + if (state->es_shader && + var->data.image_format != GL_R32F && + var->data.image_format != GL_R32I && + var->data.image_format != GL_R32UI && + !var->data.image_read_only && + !var->data.image_write_only) { + _mesa_glsl_error(loc, state, "image variables of format other than " + "r32f, r32i or r32ui must be qualified `readonly' or " + "`writeonly'"); + } + + } else if (qual->flags.q.read_only || + qual->flags.q.write_only || + qual->flags.q.coherent || + qual->flags.q._volatile || + qual->flags.q.restrict_flag || + qual->flags.q.explicit_image_format) { + _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to " + "images"); + } + } + + static inline const char* + get_layout_qualifier_string(bool origin_upper_left, bool pixel_center_integer) + { + if (origin_upper_left && pixel_center_integer) + return "origin_upper_left, pixel_center_integer"; + else if (origin_upper_left) + return "origin_upper_left"; + else if (pixel_center_integer) + return "pixel_center_integer"; + else + return " "; + } + + static inline bool + is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state, + const struct ast_type_qualifier *qual) + { + /* If gl_FragCoord was previously declared, and the qualifiers were + * different in any way, return true. + */ + if (state->fs_redeclares_gl_fragcoord) { + return (state->fs_pixel_center_integer != qual->flags.q.pixel_center_integer + || state->fs_origin_upper_left != qual->flags.q.origin_upper_left); + } + + return false; + } + + static inline void + validate_array_dimensions(const glsl_type *t, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) { + if (t->is_array()) { + t = t->fields.array; + while (t->is_array()) { + if (t->is_unsized_array()) { + _mesa_glsl_error(loc, state, + "only the outermost array dimension can " + "be unsized", + t->name); + break; + } + t = t->fields.array; + } + } + } + + static void + apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) + { + if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { + + /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: + * + * "Within any shader, the first redeclarations of gl_FragCoord + * must appear before any use of gl_FragCoord." + * + * Generate a compiler error if above condition is not met by the + * fragment shader. + */ + ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); + if (earlier != NULL && + earlier->data.used && + !state->fs_redeclares_gl_fragcoord) { + _mesa_glsl_error(loc, state, + "gl_FragCoord used before its first redeclaration " + "in fragment shader"); + } + + /* Make sure all gl_FragCoord redeclarations specify the same layout + * qualifiers. + */ + if (is_conflicting_fragcoord_redeclaration(state, qual)) { + const char *const qual_string = + get_layout_qualifier_string(qual->flags.q.origin_upper_left, + qual->flags.q.pixel_center_integer); + + const char *const state_string = + get_layout_qualifier_string(state->fs_origin_upper_left, + state->fs_pixel_center_integer); + + _mesa_glsl_error(loc, state, + "gl_FragCoord redeclared with different layout " + "qualifiers (%s) and (%s) ", + state_string, + qual_string); + } + state->fs_origin_upper_left = qual->flags.q.origin_upper_left; + state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = + !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord = + state->fs_origin_upper_left || + state->fs_pixel_center_integer || + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + } + + var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; + var->data.origin_upper_left = qual->flags.q.origin_upper_left; + if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) + && (strcmp(var->name, "gl_FragCoord") != 0)) { + const char *const qual_string = (qual->flags.q.origin_upper_left) + ? "origin_upper_left" : "pixel_center_integer"; + + _mesa_glsl_error(loc, state, + "layout qualifier `%s' can only be applied to " + "fragment shader input `gl_FragCoord'", + qual_string); + } + + if (qual->flags.q.explicit_location) { + apply_explicit_location(qual, var, state, loc); + } else if (qual->flags.q.explicit_index) { + if (!qual->flags.q.subroutine_def) + _mesa_glsl_error(loc, state, + "explicit index requires explicit location"); + } + + if (qual->flags.q.explicit_binding) { + apply_explicit_binding(state, loc, var, var->type, qual); + } + + if (state->stage == MESA_SHADER_GEOMETRY && + qual->flags.q.out && qual->flags.q.stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, loc, "stream", qual->stream, + &qual_stream) && + validate_stream_qualifier(loc, state, qual_stream)) { + var->data.stream = qual_stream; + } + } + + if (var->type->contains_atomic()) { + if (var->data.mode == ir_var_uniform) { + if (var->data.explicit_binding) { + unsigned *offset = + &state->atomic_counter_offsets[var->data.binding]; + + if (*offset % ATOMIC_COUNTER_SIZE) + _mesa_glsl_error(loc, state, + "misaligned atomic counter offset"); + + var->data.offset = *offset; + *offset += var->type->atomic_size(); + + } else { + _mesa_glsl_error(loc, state, + "atomic counters require explicit binding point"); + } + } else if (var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "atomic counters may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + } + + /* Is the 'layout' keyword used with parameters that allow relaxed checking. + * Many implementations of GL_ARB_fragment_coord_conventions_enable and some + * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable + * allowed the layout qualifier to be used with 'varying' and 'attribute'. + * These extensions and all following extensions that add the 'layout' + * keyword have been modified to require the use of 'in' or 'out'. + * + * The following extension do not allow the deprecated keywords: + * + * GL_AMD_conservative_depth + * GL_ARB_conservative_depth + * GL_ARB_gpu_shader5 + * GL_ARB_separate_shader_objects + * GL_ARB_tessellation_shader + * GL_ARB_transform_feedback3 + * GL_ARB_uniform_buffer_object + * + * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 + * allow layout with the deprecated keywords. + */ + const bool relaxed_layout_qualifier_checking = + state->ARB_fragment_coord_conventions_enable; + + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + if (qual->has_layout() && uses_deprecated_qualifier) { + if (relaxed_layout_qualifier_checking) { + _mesa_glsl_warning(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } else { + _mesa_glsl_error(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } + } + + /* Layout qualifiers for gl_FragDepth, which are enabled by extension + * AMD_conservative_depth. + */ + int depth_layout_count = qual->flags.q.depth_any + + qual->flags.q.depth_greater + + qual->flags.q.depth_less + + qual->flags.q.depth_unchanged; + if (depth_layout_count > 0 + && !state->AMD_conservative_depth_enable + && !state->ARB_conservative_depth_enable) { + _mesa_glsl_error(loc, state, + "extension GL_AMD_conservative_depth or " + "GL_ARB_conservative_depth must be enabled " + "to use depth layout qualifiers"); + } else if (depth_layout_count > 0 + && strcmp(var->name, "gl_FragDepth") != 0) { + _mesa_glsl_error(loc, state, + "depth layout qualifiers can be applied only to " + "gl_FragDepth"); + } else if (depth_layout_count > 1 + && strcmp(var->name, "gl_FragDepth") == 0) { + _mesa_glsl_error(loc, state, + "at most one depth layout qualifier can be applied to " + "gl_FragDepth"); + } + if (qual->flags.q.depth_any) + var->data.depth_layout = ir_depth_layout_any; + else if (qual->flags.q.depth_greater) + var->data.depth_layout = ir_depth_layout_greater; + else if (qual->flags.q.depth_less) + var->data.depth_layout = ir_depth_layout_less; + else if (qual->flags.q.depth_unchanged) + var->data.depth_layout = ir_depth_layout_unchanged; + else + var->data.depth_layout = ir_depth_layout_none; + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(loc, state, + "uniform and shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform or shader storage blocks, not " + "members"); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + validate_matrix_layout_for_type(state, loc, var->type, var); + } + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } + } + + static void + apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + bool is_parameter) + { + STATIC_ASSERT(sizeof(qual->flags.q) <= sizeof(qual->flags.i)); + + if (qual->flags.q.invariant) { + if (var->data.used) { + _mesa_glsl_error(loc, state, + "variable `%s' may not be redeclared " + "`invariant' after being used", + var->name); + } else { + var->data.invariant = 1; + } + } + + if (qual->flags.q.precise) { + if (var->data.used) { + _mesa_glsl_error(loc, state, + "variable `%s' may not be redeclared " + "`precise' after being used", + var->name); + } else { + var->data.precise = 1; + } + } + + if (qual->flags.q.subroutine && !qual->flags.q.uniform) { + _mesa_glsl_error(loc, state, + "`subroutine' may only be applied to uniforms, " + "subroutine type declarations, or function definitions"); + } + + if (qual->flags.q.constant || qual->flags.q.attribute + || qual->flags.q.uniform + || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) + var->data.read_only = 1; + + if (qual->flags.q.centroid) + var->data.centroid = 1; + + if (qual->flags.q.sample) + var->data.sample = 1; + + /* Precision qualifiers do not hold any meaning in Desktop GLSL */ + if (state->es_shader) { + var->data.precision = + select_gles_precision(qual->precision, var->type, state, loc); + } + + if (qual->flags.q.patch) + var->data.patch = 1; + + if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) { + var->type = glsl_type::error_type; + _mesa_glsl_error(loc, state, + "`attribute' variables may not be declared in the " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } + + /* Disallow layout qualifiers which may only appear on layout declarations. */ + if (qual->flags.q.prim_type) { + _mesa_glsl_error(loc, state, + "Primitive type may only be specified on GS input or output " + "layout declaration, not on variables."); + } + + /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says: + * + * "However, the const qualifier cannot be used with out or inout." + * + * The same section of the GLSL 4.40 spec further clarifies this saying: + * + * "The const qualifier cannot be used with out or inout, or a + * compile-time error results." + */ + if (is_parameter && qual->flags.q.constant && qual->flags.q.out) { + _mesa_glsl_error(loc, state, + "`const' may not be applied to `out' or `inout' " + "function parameters"); + } + + /* If there is no qualifier that changes the mode of the variable, leave + * the setting alone. + */ + assert(var->data.mode != ir_var_temporary); + if (qual->flags.q.in && qual->flags.q.out) + var->data.mode = ir_var_function_inout; + else if (qual->flags.q.in) + var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in; + else if (qual->flags.q.attribute + || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) + var->data.mode = ir_var_shader_in; + else if (qual->flags.q.out) + var->data.mode = is_parameter ? ir_var_function_out : ir_var_shader_out; + else if (qual->flags.q.varying && (state->stage == MESA_SHADER_VERTEX)) + var->data.mode = ir_var_shader_out; + else if (qual->flags.q.uniform) + var->data.mode = ir_var_uniform; + else if (qual->flags.q.buffer) + var->data.mode = ir_var_shader_storage; + else if (qual->flags.q.shared_storage) + var->data.mode = ir_var_shader_shared; + + if (!is_parameter && is_varying_var(var, state->stage)) { + /* User-defined ins/outs are not permitted in compute shaders. */ + if (state->stage == MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "user-defined input and output variables are not " + "permitted in compute shaders"); + } + + /* This variable is being used to link data between shader stages (in + * pre-glsl-1.30 parlance, it's a "varying"). Check that it has a type + * that is allowed for such purposes. + * + * From page 25 (page 31 of the PDF) of the GLSL 1.10 spec: + * + * "The varying qualifier can be used only with the data types + * float, vec2, vec3, vec4, mat2, mat3, and mat4, or arrays of + * these." + * + * This was relaxed in GLSL version 1.30 and GLSL ES version 3.00. From + * page 31 (page 37 of the PDF) of the GLSL 1.30 spec: + * + * "Fragment inputs can only be signed and unsigned integers and + * integer vectors, float, floating-point vectors, matrices, or + * arrays of these. Structures cannot be input. + * + * Similar text exists in the section on vertex shader outputs. + * + * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES + * 3.00 spec allows structs as well. Varying structs are also allowed + * in GLSL 1.50. + */ + switch (var->type->get_scalar_type()->base_type) { + case GLSL_TYPE_FLOAT: + /* Ok in all GLSL versions */ + break; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + if (state->is_version(130, 300)) + break; + _mesa_glsl_error(loc, state, + "varying variables must be of base type float in %s", + state->get_version_string()); + break; + case GLSL_TYPE_STRUCT: + if (state->is_version(150, 300)) + break; + _mesa_glsl_error(loc, state, + "varying variables may not be of type struct"); + break; + case GLSL_TYPE_DOUBLE: + break; + default: + _mesa_glsl_error(loc, state, "illegal type for a varying variable"); + break; + } + } + + if (state->all_invariant && (state->current_function == NULL)) { + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_out) + var->data.invariant = true; + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if ((var->data.mode == ir_var_shader_in) + || (var->data.mode == ir_var_shader_out)) + var->data.invariant = true; + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_in) + var->data.invariant = true; + break; + case MESA_SHADER_COMPUTE: + /* Invariance isn't meaningful in compute shaders. */ + break; + } + } + + var->data.interpolation = + interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode, + state, loc); + + /* Does the declaration use the deprecated 'attribute' or 'varying' + * keywords? + */ + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + + + /* Validate auxiliary storage qualifiers */ + + /* From section 4.3.4 of the GLSL 1.30 spec: + * "It is an error to use centroid in in a vertex shader." + * + * From section 4.3.4 of the GLSL ES 3.00 spec: + * "It is an error to use centroid in or interpolation qualifiers in + * a vertex shader input." + */ + + /* Section 4.3.6 of the GLSL 1.30 specification states: + * "It is an error to use centroid out in a fragment shader." + * + * The GL_ARB_shading_language_420pack extension specification states: + * "It is an error to use auxiliary storage qualifiers or interpolation + * qualifiers on an output in a fragment shader." + */ + if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) { + _mesa_glsl_error(loc, state, + "sample qualifier may only be used on `in` or `out` " + "variables between shader stages"); + } + if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) { + _mesa_glsl_error(loc, state, + "centroid qualifier may only be used with `in', " + "`out' or `varying' variables between shader stages"); + } + + if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "the shared storage qualifiers can only be used with " + "compute shaders"); + } + + apply_image_qualifier_to_variable(qual, var, state, loc); + } + + /** + * Get the variable that is being redeclared by this declaration + * + * Semantic checks to verify the validity of the redeclaration are also + * performed. If semantic checks fail, compilation error will be emitted via + * \c _mesa_glsl_error, but a non-\c NULL pointer will still be returned. + * + * \returns + * A pointer to an existing variable in the current scope if the declaration + * is a redeclaration, \c NULL otherwise. + */ + static ir_variable * + get_variable_being_redeclared(ir_variable *var, YYLTYPE loc, + struct _mesa_glsl_parse_state *state, + bool allow_all_redeclarations) + { + /* Check if this declaration is actually a re-declaration, either to + * resize an array or add qualifiers to an existing variable. + * + * This is allowed for variables in the current scope, or when at + * global scope (for built-ins in the implicit outer scope). + */ + ir_variable *earlier = state->symbols->get_variable(var->name); + if (earlier == NULL || + (state->current_function != NULL && + !state->symbols->name_declared_this_scope(var->name))) { + return NULL; + } + + + /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec, + * + * "It is legal to declare an array without a size and then + * later re-declare the same name as an array of the same + * type and specify a size." + */ + if (earlier->type->is_unsized_array() && var->type->is_array() + && (var->type->fields.array == earlier->type->fields.array)) { + /* FINISHME: This doesn't match the qualifiers on the two + * FINISHME: declarations. It's not 100% clear whether this is + * FINISHME: required or not. + */ + + const unsigned size = unsigned(var->type->array_size()); + check_builtin_array_max_size(var->name, size, loc, state); + if ((size > 0) && (size <= earlier->data.max_array_access)) { + _mesa_glsl_error(& loc, state, "array size must be > %u due to " + "previous access", + earlier->data.max_array_access); + } + + earlier->type = var->type; + delete var; + var = NULL; + } else if ((state->ARB_fragment_coord_conventions_enable || + state->is_version(150, 0)) + && strcmp(var->name, "gl_FragCoord") == 0 + && earlier->type == var->type + && var->data.mode == ir_var_shader_in) { + /* Allow redeclaration of gl_FragCoord for ARB_fcc layout + * qualifiers. + */ + earlier->data.origin_upper_left = var->data.origin_upper_left; + earlier->data.pixel_center_integer = var->data.pixel_center_integer; + + /* According to section 4.3.7 of the GLSL 1.30 spec, + * the following built-in varaibles can be redeclared with an + * interpolation qualifier: + * * gl_FrontColor + * * gl_BackColor + * * gl_FrontSecondaryColor + * * gl_BackSecondaryColor + * * gl_Color + * * gl_SecondaryColor + */ + } else if (state->is_version(130, 0) + && (strcmp(var->name, "gl_FrontColor") == 0 + || strcmp(var->name, "gl_BackColor") == 0 + || strcmp(var->name, "gl_FrontSecondaryColor") == 0 + || strcmp(var->name, "gl_BackSecondaryColor") == 0 + || strcmp(var->name, "gl_Color") == 0 + || strcmp(var->name, "gl_SecondaryColor") == 0) + && earlier->type == var->type + && earlier->data.mode == var->data.mode) { + earlier->data.interpolation = var->data.interpolation; + + /* Layout qualifiers for gl_FragDepth. */ + } else if ((state->AMD_conservative_depth_enable || + state->ARB_conservative_depth_enable) + && strcmp(var->name, "gl_FragDepth") == 0 + && earlier->type == var->type + && earlier->data.mode == var->data.mode) { + + /** From the AMD_conservative_depth spec: + * Within any shader, the first redeclarations of gl_FragDepth + * must appear before any use of gl_FragDepth. + */ + if (earlier->data.used) { + _mesa_glsl_error(&loc, state, + "the first redeclaration of gl_FragDepth " + "must appear before any use of gl_FragDepth"); + } + + /* Prevent inconsistent redeclaration of depth layout qualifier. */ + if (earlier->data.depth_layout != ir_depth_layout_none + && earlier->data.depth_layout != var->data.depth_layout) { + _mesa_glsl_error(&loc, state, + "gl_FragDepth: depth layout is declared here " + "as '%s, but it was previously declared as " + "'%s'", + depth_layout_string(var->data.depth_layout), + depth_layout_string(earlier->data.depth_layout)); + } + + earlier->data.depth_layout = var->data.depth_layout; + + } else if (allow_all_redeclarations) { + if (earlier->data.mode != var->data.mode) { + _mesa_glsl_error(&loc, state, + "redeclaration of `%s' with incorrect qualifiers", + var->name); + } else if (earlier->type != var->type) { + _mesa_glsl_error(&loc, state, + "redeclaration of `%s' has incorrect type", + var->name); + } + } else { + _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); + } + + return earlier; + } + + /** + * Generate the IR for an initializer in a variable declaration + */ + ir_rvalue * + process_initializer(ir_variable *var, ast_declaration *decl, + ast_fully_specified_type *type, + exec_list *initializer_instructions, + struct _mesa_glsl_parse_state *state) + { + ir_rvalue *result = NULL; + + YYLTYPE initializer_loc = decl->initializer->get_location(); + + /* From page 24 (page 30 of the PDF) of the GLSL 1.10 spec: + * + * "All uniform variables are read-only and are initialized either + * directly by an application via API commands, or indirectly by + * OpenGL." + */ + if (var->data.mode == ir_var_uniform) { + state->check_version(120, 0, &initializer_loc, + "cannot initialize uniform %s", + var->name); + } + + /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: + * + * "Buffer variables cannot have initializers." + */ + if (var->data.mode == ir_var_shader_storage) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize buffer variable %s", + var->name); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables [...] are initialized only through the + * OpenGL API; they cannot be declared with an initializer in a + * shader." + */ + if (var->type->contains_opaque()) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize opaque variable %s", + var->name); + } + + if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader input / %s %s", + _mesa_shader_stage_to_string(state->stage), + (state->stage == MESA_SHADER_VERTEX) + ? "attribute" : "varying", + var->name); + } + + if (var->data.mode == ir_var_shader_out && state->current_function == NULL) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader output %s", + _mesa_shader_stage_to_string(state->stage), + var->name); + } + + /* If the initializer is an ast_aggregate_initializer, recursively store + * type information from the LHS into it, so that its hir() function can do + * type checking. + */ + if (decl->initializer->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(var->type, decl->initializer); + + ir_dereference *const lhs = new(state) ir_dereference_variable(var); + ir_rvalue *rhs = decl->initializer->hir(initializer_instructions, state); + + /* Calculate the constant value if this is a const or uniform + * declaration. + * + * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says: + * + * "Declarations of globals without a storage qualifier, or with + * just the const qualifier, may include initializers, in which case + * they will be initialized before the first line of main() is + * executed. Such initializers must be a constant expression." + * + * The same section of the GLSL ES 3.00.4 spec has similar language. + */ + if (type->qualifier.flags.q.constant + || type->qualifier.flags.q.uniform + || (state->es_shader && state->current_function == NULL)) { + ir_rvalue *new_rhs = validate_assignment(state, initializer_loc, + lhs, rhs, true); + if (new_rhs != NULL) { + rhs = new_rhs; + + /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec + * says: + * + * "A constant expression is one of + * + * ... + * + * - an expression formed by an operator on operands that are + * all constant expressions, including getting an element of + * a constant array, or a field of a constant structure, or + * components of a constant vector. However, the sequence + * operator ( , ) and the assignment operators ( =, +=, ...) + * are not included in the operators that can create a + * constant expression." + * + * Section 12.43 (Sequence operator and constant expressions) says: + * + * "Should the following construct be allowed? + * + * float a[2,3]; + * + * The expression within the brackets uses the sequence operator + * (',') and returns the integer 3 so the construct is declaring + * a single-dimensional array of size 3. In some languages, the + * construct declares a two-dimensional array. It would be + * preferable to make this construct illegal to avoid confusion. + * + * One possibility is to change the definition of the sequence + * operator so that it does not return a constant-expression and + * hence cannot be used to declare an array size. + * + * RESOLUTION: The result of a sequence operator is not a + * constant-expression." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec + * contains language almost identical to the section 4.3.3 in the + * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL + * versions. + */ + ir_constant *constant_value = rhs->constant_expression_value(); + if (!constant_value || + (state->is_version(430, 300) && + decl->initializer->has_sequence_subexpression())) { + const char *const variable_mode = + (type->qualifier.flags.q.constant) + ? "const" + : ((type->qualifier.flags.q.uniform) ? "uniform" : "global"); + + /* If ARB_shading_language_420pack is enabled, initializers of + * const-qualified local variables do not have to be constant + * expressions. Const-qualified global variables must still be + * initialized with constant expressions. + */ + if (!state->has_420pack() + || state->current_function == NULL) { + _mesa_glsl_error(& initializer_loc, state, + "initializer of %s variable `%s' must be a " + "constant expression", + variable_mode, + decl->identifier); + if (var->type->is_numeric()) { + /* Reduce cascading errors. */ + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; + } + } + } else { + rhs = constant_value; + var->constant_value = type->qualifier.flags.q.constant + ? constant_value : NULL; + } + } else { + if (var->type->is_numeric()) { + /* Reduce cascading errors. */ + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; + } + } + } + + if (rhs && !rhs->type->is_error()) { + bool temp = var->data.read_only; + if (type->qualifier.flags.q.constant) + var->data.read_only = false; + + /* Never emit code to initialize a uniform. + */ + const glsl_type *initializer_type; + if (!type->qualifier.flags.q.uniform) { + do_assignment(initializer_instructions, state, + NULL, + lhs, rhs, + &result, true, + true, + type->get_location()); + initializer_type = result->type; + } else + initializer_type = rhs->type; + + var->constant_initializer = rhs->constant_expression_value(); + var->data.has_initializer = true; + + /* If the declared variable is an unsized array, it must inherrit + * its full type from the initializer. A declaration such as + * + * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0); + * + * becomes + * + * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0); + * + * The assignment generated in the if-statement (below) will also + * automatically handle this case for non-uniforms. + * + * If the declared variable is not an array, the types must + * already match exactly. As a result, the type assignment + * here can be done unconditionally. For non-uniforms the call + * to do_assignment can change the type of the initializer (via + * the implicit conversion rules). For uniforms the initializer + * must be a constant expression, and the type of that expression + * was validated above. + */ + var->type = initializer_type; + + var->data.read_only = temp; + } + + return result; + } + + static void + validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var, + unsigned num_vertices, + unsigned *size, + const char *var_category) + { + if (var->type->is_unsized_array()) { + /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says: + * + * All geometry shader input unsized array declarations will be + * sized by an earlier input layout qualifier, when present, as per + * the following table. + * + * Followed by a table mapping each allowed input layout qualifier to + * the corresponding input length. + * + * Similarly for tessellation control shader outputs. + */ + if (num_vertices != 0) + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } else { + /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec + * includes the following examples of compile-time errors: + * + * // code sequence within one shader... + * in vec4 Color1[]; // size unknown + * ...Color1.length()...// illegal, length() unknown + * in vec4 Color2[2]; // size is 2 + * ...Color1.length()...// illegal, Color1 still has no size + * in vec4 Color3[3]; // illegal, input sizes are inconsistent + * layout(lines) in; // legal, input size is 2, matching + * in vec4 Color4[3]; // illegal, contradicts layout + * ... + * + * To detect the case illustrated by Color3, we verify that the size of + * an explicitly-sized array matches the size of any previously declared + * explicitly-sized array. To detect the case illustrated by Color4, we + * verify that the size of an explicitly-sized array is consistent with + * any previously declared input layout. + */ + if (num_vertices != 0 && var->type->length != num_vertices) { + _mesa_glsl_error(&loc, state, + "%s size contradicts previously declared layout " + "(size is %u, but layout requires a size of %u)", + var_category, var->type->length, num_vertices); + } else if (*size != 0 && var->type->length != *size) { + _mesa_glsl_error(&loc, state, + "%s sizes are inconsistent (size is %u, but a " + "previous declaration has size %u)", + var_category, var->type->length, *size); + } else { + *size = var->type->length; + } + } + } + + static void + handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) + { + unsigned num_vertices = 0; + + if (state->tcs_output_vertices_specified) { + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", + &num_vertices, false)) { + return; + } + + if (num_vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", num_vertices); + return; + } + } + + if (!var->type->is_array() && !var->data.patch) { + _mesa_glsl_error(&loc, state, + "tessellation control shader outputs must be arrays"); + + /* To avoid cascading failures, short circuit the checks below. */ + return; + } + + if (var->data.patch) + return; + + validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, + &state->tcs_output_size, + "tessellation control shader output"); + } + + /** + * Do additional processing necessary for tessellation control/evaluation shader + * input declarations. This covers both interface block arrays and bare input + * variables. + */ + static void + handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) + { + if (!var->type->is_array() && !var->data.patch) { + _mesa_glsl_error(&loc, state, + "per-vertex tessellation shader inputs must be arrays"); + /* Avoid cascading failures. */ + return; + } + + if (var->data.patch) + return; + + /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */ + if (var->type->is_unsized_array()) { + var->type = glsl_type::get_array_instance(var->type->fields.array, + state->Const.MaxPatchVertices); + } + } + + + /** + * Do additional processing necessary for geometry shader input declarations + * (this covers both interface blocks arrays and bare input variables). + */ + static void + handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) + { + unsigned num_vertices = 0; + + if (state->gs_input_prim_type_specified) { + num_vertices = vertices_per_prim(state->in_qualifier->prim_type); + } + + /* Geometry shader input variables must be arrays. Caller should have + * reported an error for this. + */ + if (!var->type->is_array()) { + assert(state->error); + + /* To avoid cascading failures, short circuit the checks below. */ + return; + } + + validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, + &state->gs_input_size, + "geometry shader input"); + } + + void + validate_identifier(const char *identifier, YYLTYPE loc, + struct _mesa_glsl_parse_state *state) + { + /* From page 15 (page 21 of the PDF) of the GLSL 1.10 spec, + * + * "Identifiers starting with "gl_" are reserved for use by + * OpenGL, and may not be declared in a shader as either a + * variable or a function." + */ + if (is_gl_identifier(identifier)) { + _mesa_glsl_error(&loc, state, + "identifier `%s' uses reserved `gl_' prefix", + identifier); + } else if (strstr(identifier, "__")) { + /* From page 14 (page 20 of the PDF) of the GLSL 1.10 + * spec: + * + * "In addition, all identifiers containing two + * consecutive underscores (__) are reserved as + * possible future keywords." + * + * The intention is that names containing __ are reserved for internal + * use by the implementation, and names prefixed with GL_ are reserved + * for use by Khronos. Names simply containing __ are dangerous to use, + * but should be allowed. + * + * A future version of the GLSL specification will clarify this. + */ + _mesa_glsl_warning(&loc, state, + "identifier `%s' uses reserved `__' string", + identifier); + } + } + + ir_rvalue * + ast_declarator_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + const struct glsl_type *decl_type; + const char *type_name = NULL; + ir_rvalue *result = NULL; + YYLTYPE loc = this->get_location(); + + /* From page 46 (page 52 of the PDF) of the GLSL 1.50 spec: + * + * "To ensure that a particular output variable is invariant, it is + * necessary to use the invariant qualifier. It can either be used to + * qualify a previously declared variable as being invariant + * + * invariant gl_Position; // make existing gl_Position be invariant" + * + * In these cases the parser will set the 'invariant' flag in the declarator + * list, and the type will be NULL. + */ + if (this->invariant) { + assert(this->type == NULL); + + if (state->current_function != NULL) { + _mesa_glsl_error(& loc, state, + "all uses of `invariant' keyword must be at global " + "scope"); + } + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + assert(decl->array_specifier == NULL); + assert(decl->initializer == NULL); + + ir_variable *const earlier = + state->symbols->get_variable(decl->identifier); + if (earlier == NULL) { + _mesa_glsl_error(& loc, state, + "undeclared variable `%s' cannot be marked " + "invariant", decl->identifier); + } else if (!is_varying_var(earlier, state->stage)) { + _mesa_glsl_error(&loc, state, + "`%s' cannot be marked invariant; interfaces between " + "shader stages only.", decl->identifier); + } else if (earlier->data.used) { + _mesa_glsl_error(& loc, state, + "variable `%s' may not be redeclared " + "`invariant' after being used", + earlier->name); + } else { + earlier->data.invariant = true; + } + } + + /* Invariant redeclarations do not have r-values. + */ + return NULL; + } + + if (this->precise) { + assert(this->type == NULL); + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + assert(decl->array_specifier == NULL); + assert(decl->initializer == NULL); + + ir_variable *const earlier = + state->symbols->get_variable(decl->identifier); + if (earlier == NULL) { + _mesa_glsl_error(& loc, state, + "undeclared variable `%s' cannot be marked " + "precise", decl->identifier); + } else if (state->current_function != NULL && + !state->symbols->name_declared_this_scope(decl->identifier)) { + /* Note: we have to check if we're in a function, since + * builtins are treated as having come from another scope. + */ + _mesa_glsl_error(& loc, state, + "variable `%s' from an outer scope may not be " + "redeclared `precise' in this scope", + earlier->name); + } else if (earlier->data.used) { + _mesa_glsl_error(& loc, state, + "variable `%s' may not be redeclared " + "`precise' after being used", + earlier->name); + } else { + earlier->data.precise = true; + } + } + + /* Precise redeclarations do not have r-values either. */ + return NULL; + } + + assert(this->type != NULL); + assert(!this->invariant); + assert(!this->precise); + + /* The type specifier may contain a structure definition. Process that + * before any of the variable declarations. + */ + (void) this->type->specifier->hir(instructions, state); + + decl_type = this->type->glsl_type(& type_name, state); + + /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: + * "Buffer variables may only be declared inside interface blocks + * (section 4.3.9 “Interface Blocks”), which are then referred to as + * shader storage blocks. It is a compile-time error to declare buffer + * variables at global scope (outside a block)." + */ + if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) { + _mesa_glsl_error(&loc, state, + "buffer variables cannot be declared outside " + "interface blocks"); + } + + /* An offset-qualified atomic counter declaration sets the default + * offset for the next declaration within the same atomic counter + * buffer. + */ + if (decl_type && decl_type->contains_atomic()) { + if (type->qualifier.flags.q.explicit_binding && + type->qualifier.flags.q.explicit_offset) { + unsigned qual_binding; + unsigned qual_offset; + if (process_qualifier_constant(state, &loc, "binding", + type->qualifier.binding, + &qual_binding) + && process_qualifier_constant(state, &loc, "offset", + type->qualifier.offset, + &qual_offset)) { + state->atomic_counter_offsets[qual_binding] = qual_offset; + } + } + } + + if (this->declarations.is_empty()) { + /* If there is no structure involved in the program text, there are two + * possible scenarios: + * + * - The program text contained something like 'vec4;'. This is an + * empty declaration. It is valid but weird. Emit a warning. + * + * - The program text contained something like 'S;' and 'S' is not the + * name of a known structure type. This is both invalid and weird. + * Emit an error. + * + * - The program text contained something like 'mediump float;' + * when the programmer probably meant 'precision mediump + * float;' Emit a warning with a description of what they + * probably meant to do. + * + * Note that if decl_type is NULL and there is a structure involved, + * there must have been some sort of error with the structure. In this + * case we assume that an error was already generated on this line of + * code for the structure. There is no need to generate an additional, + * confusing error. + */ + assert(this->type->specifier->structure == NULL || decl_type != NULL + || state->error); + + if (decl_type == NULL) { + _mesa_glsl_error(&loc, state, + "invalid type `%s' in empty declaration", + type_name); + } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) { + /* Empty atomic counter declarations are allowed and useful + * to set the default offset qualifier. + */ + return NULL; + } else if (this->type->qualifier.precision != ast_precision_none) { + if (this->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers can't be applied " + "to structures"); + } else { + static const char *const precision_names[] = { + "highp", + "highp", + "mediump", + "lowp" + }; + + _mesa_glsl_warning(&loc, state, + "empty declaration with precision qualifier, " + "to set the default precision, use " + "`precision %s %s;'", + precision_names[this->type->qualifier.precision], + type_name); + } + } else if (this->type->specifier->structure == NULL) { + _mesa_glsl_warning(&loc, state, "empty declaration"); + } + } + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + const struct glsl_type *var_type; + ir_variable *var; + const char *identifier = decl->identifier; + /* FINISHME: Emit a warning if a variable declaration shadows a + * FINISHME: declaration at a higher scope. + */ + + if ((decl_type == NULL) || decl_type->is_void()) { + if (type_name != NULL) { + _mesa_glsl_error(& loc, state, + "invalid type `%s' in declaration of `%s'", + type_name, decl->identifier); + } else { + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + decl->identifier); + } + continue; + } + + if (this->type->qualifier.flags.q.subroutine) { + const glsl_type *t; + const char *name; + + t = state->symbols->get_type(this->type->specifier->type_name); + if (!t) + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + decl->identifier); + name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier); + + identifier = name; + + } + var_type = process_array_type(&loc, decl_type, decl->array_specifier, + state); + + var = new(ctx) ir_variable(var_type, identifier, ir_var_auto); + + /* The 'varying in' and 'varying out' qualifiers can only be used with + * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support + * yet. + */ + if (this->type->qualifier.flags.q.varying) { + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(& loc, state, + "`varying in' qualifier in declaration of " + "`%s' only valid for geometry shaders using " + "ARB_geometry_shader4 or EXT_geometry_shader4", + decl->identifier); + } else if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(& loc, state, + "`varying out' qualifier in declaration of " + "`%s' only valid for geometry shaders using " + "ARB_geometry_shader4 or EXT_geometry_shader4", + decl->identifier); + } + } + + /* From page 22 (page 28 of the PDF) of the GLSL 1.10 specification; + * + * "Global variables can only use the qualifiers const, + * attribute, uniform, or varying. Only one may be + * specified. + * + * Local variables can only use the qualifier const." + * + * This is relaxed in GLSL 1.30 and GLSL ES 3.00. It is also relaxed by + * any extension that adds the 'layout' keyword. + */ + if (!state->is_version(130, 300) + && !state->has_explicit_attrib_location() + && !state->has_separate_shader_objects() + && !state->ARB_fragment_coord_conventions_enable) { + if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(& loc, state, + "`out' qualifier in declaration of `%s' " + "only valid for function parameters in %s", + decl->identifier, state->get_version_string()); + } + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(& loc, state, + "`in' qualifier in declaration of `%s' " + "only valid for function parameters in %s", + decl->identifier, state->get_version_string()); + } + /* FINISHME: Test for other invalid qualifiers. */ + } + + apply_type_qualifier_to_variable(& this->type->qualifier, var, state, + & loc, false); + apply_layout_qualifier_to_variable(&this->type->qualifier, var, state, + &loc); + + if (this->type->qualifier.flags.q.invariant) { + if (!is_varying_var(var, state->stage)) { + _mesa_glsl_error(&loc, state, + "`%s' cannot be marked invariant; interfaces between " + "shader stages only", var->name); + } + } + + if (state->current_function != NULL) { + const char *mode = NULL; + const char *extra = ""; + + /* There is no need to check for 'inout' here because the parser will + * only allow that in function parameter lists. + */ + if (this->type->qualifier.flags.q.attribute) { + mode = "attribute"; + } else if (this->type->qualifier.flags.q.subroutine) { + mode = "subroutine uniform"; + } else if (this->type->qualifier.flags.q.uniform) { + mode = "uniform"; + } else if (this->type->qualifier.flags.q.varying) { + mode = "varying"; + } else if (this->type->qualifier.flags.q.in) { + mode = "in"; + extra = " or in function parameter list"; + } else if (this->type->qualifier.flags.q.out) { + mode = "out"; + extra = " or in function parameter list"; + } + + if (mode) { + _mesa_glsl_error(& loc, state, + "%s variable `%s' must be declared at " + "global scope%s", + mode, var->name, extra); + } + } else if (var->data.mode == ir_var_shader_in) { + var->data.read_only = true; + + if (state->stage == MESA_SHADER_VERTEX) { + bool error_emitted = false; + + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. Vertex shader inputs can also form arrays of these + * types, but not structures." + * + * From page 31 (page 27 of the PDF) of the GLSL 1.30 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. They cannot be arrays or structures." + * + * From page 23 (page 29 of the PDF) of the GLSL 1.20 spec: + * + * "The attribute qualifier can be used only with float, + * floating-point vectors, and matrices. Attribute variables + * cannot be declared as arrays or structures." + * + * From page 33 (page 39 of the PDF) of the GLSL ES 3.00 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. Vertex shader inputs cannot be arrays or + * structures." + */ + const glsl_type *check_type = var->type->without_array(); + + switch (check_type->base_type) { + case GLSL_TYPE_FLOAT: + break; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + if (state->is_version(120, 300)) + break; + case GLSL_TYPE_DOUBLE: + if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable)) + break; + /* FALLTHROUGH */ + default: + _mesa_glsl_error(& loc, state, + "vertex shader input / attribute cannot have " + "type %s`%s'", + var->type->is_array() ? "array of " : "", + check_type->name); + error_emitted = true; + } + + if (!error_emitted && var->type->is_array() && + !state->check_version(150, 0, &loc, + "vertex shader input / attribute " + "cannot have array type")) { + error_emitted = true; + } + } else if (state->stage == MESA_SHADER_GEOMETRY) { + /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * Geometry shader input variables get the per-vertex values + * written out by vertex shader output variables of the same + * names. Since a geometry shader operates on a set of + * vertices, each input varying variable (or input block, see + * interface blocks below) needs to be declared as an array. + */ + if (!var->type->is_array()) { + _mesa_glsl_error(&loc, state, + "geometry shader inputs must be arrays"); + } + + handle_geometry_shader_input_decl(state, loc, var); + } else if (state->stage == MESA_SHADER_FRAGMENT) { + /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a fragment shader + * input with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + */ + if (state->es_shader) { + const glsl_type *check_type = var->type->without_array(); + if (check_type->is_boolean() || + check_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "fragment shader input cannot have type %s", + check_type->name); + } + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "fragment shader input " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "fragement shader input cannot have " + "a struct that contains an " + "array or struct"); + } + } + } + } else if (state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) { + handle_tess_shader_input_decl(state, loc, var); + } + } else if (var->data.mode == ir_var_shader_out) { + const glsl_type *check_type = var->type->without_array(); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a vertex, tessellation + * evaluation, tessellation control, or geometry shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * An opaque type + */ + if (check_type->is_boolean() || check_type->contains_opaque()) + _mesa_glsl_error(&loc, state, + "%s shader output cannot have type %s", + _mesa_shader_stage_to_string(state->stage), + check_type->name); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a fragment shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * A double-precision scalar or vector (double, dvec2 ...) + * * An opaque type + * * Any matrix type + * * A structure + */ + if (state->stage == MESA_SHADER_FRAGMENT) { + if (check_type->is_record() || check_type->is_matrix()) + _mesa_glsl_error(&loc, state, + "fragment shader output " + "cannot have struct or matrix type"); + switch (check_type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + break; + default: + _mesa_glsl_error(&loc, state, + "fragment shader output cannot have " + "type %s", check_type->name); + } + } + + /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a vertex shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + * + * It is a compile-time error to declare a fragment shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * A matrix + * * A structure + * * An array of array + */ + if (state->es_shader) { + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_VERTEX) { + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "vertex shader output " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "vertex shader output cannot have a " + "struct that contains an " + "array or struct"); + } + } + } + } + + if (state->stage == MESA_SHADER_TESS_CTRL) { + handle_tess_ctrl_shader_output_decl(state, loc, var); + } + } else if (var->type->contains_subroutine()) { + /* declare subroutine uniforms as hidden */ + var->data.how_declared = ir_var_hidden; + } + + /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES, + * so must integer vertex outputs. + * + * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec: + * "Fragment shader inputs that are signed or unsigned integers or + * integer vectors must be qualified with the interpolation qualifier + * flat." + * + * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec: + * "Fragment shader inputs that are, or contain, signed or unsigned + * integers or integer vectors must be qualified with the + * interpolation qualifier flat." + * + * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec: + * "Vertex shader outputs that are, or contain, signed or unsigned + * integers or integer vectors must be qualified with the + * interpolation qualifier flat." + * + * Note that prior to GLSL 1.50, this requirement applied to vertex + * outputs rather than fragment inputs. That creates problems in the + * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all + * desktop GL shaders. For GLSL ES shaders, we follow the spec and + * apply the restriction to both vertex outputs and fragment inputs. + * + * Note also that the desktop GLSL specs are missing the text "or + * contain"; this is presumably an oversight, since there is no + * reasonable way to interpolate a fragment shader input that contains + * an integer. + */ + if (state->is_version(130, 300) && + var->type->contains_integer() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + ((state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_in) + || (state->stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_out + && state->es_shader))) { + const char *var_type = (state->stage == MESA_SHADER_VERTEX) ? + "vertex output" : "fragment input"; + _mesa_glsl_error(&loc, state, "if a %s is (or contains) " + "an integer, then it must be qualified with 'flat'", + var_type); + } + + /* Double fragment inputs must be qualified with 'flat'. */ + if (var->type->contains_double() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) " + "a double, then it must be qualified with 'flat'", + var_type); + } + + /* Interpolation qualifiers cannot be applied to 'centroid' and + * 'centroid varying'. + * + * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: + * "interpolation qualifiers may only precede the qualifiers in, + * centroid in, out, or centroid out in a declaration. They do not apply + * to the deprecated storage qualifiers varying or centroid varying." + * + * These deprecated storage qualifiers do not exist in GLSL ES 3.00. + */ + if (state->is_version(130, 0) + && this->type->qualifier.has_interpolation() + && this->type->qualifier.flags.q.varying) { + + const char *i = this->type->qualifier.interpolation_string(); + assert(i != NULL); + const char *s; + if (this->type->qualifier.flags.q.centroid) + s = "centroid varying"; + else + s = "varying"; + + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to the " + "deprecated storage qualifier '%s'", i, s); + } + + + /* Interpolation qualifiers can only apply to vertex shader outputs and + * fragment shader inputs. + * + * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: + * "Outputs from a vertex shader (out) and inputs to a fragment + * shader (in) can be further qualified with one or more of these + * interpolation qualifiers" + * + * From page 31 (page 37 of the PDF) of the GLSL ES 3.00 spec: + * "These interpolation qualifiers may only precede the qualifiers + * in, centroid in, out, or centroid out in a declaration. They do + * not apply to inputs into a vertex shader or outputs from a + * fragment shader." + */ + if (state->is_version(130, 300) + && this->type->qualifier.has_interpolation()) { + + const char *i = this->type->qualifier.interpolation_string(); + assert(i != NULL); + + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to vertex " + "shader inputs", i); + } + break; + case MESA_SHADER_FRAGMENT: + if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to fragment " + "shader outputs", i); + } + break; + default: + break; + } + } + + + /* From section 4.3.4 of the GLSL 4.00 spec: + * "Input variables may not be declared using the patch in qualifier + * in tessellation control or geometry shaders." + * + * From section 4.3.6 of the GLSL 4.00 spec: + * "It is an error to use patch out in a vertex, tessellation + * evaluation, or geometry shader." + * + * This doesn't explicitly forbid using them in a fragment shader, but + * that's probably just an oversight. + */ + if (state->stage != MESA_SHADER_TESS_EVAL + && this->type->qualifier.flags.q.patch + && this->type->qualifier.flags.q.in) { + + _mesa_glsl_error(&loc, state, "'patch in' can only be used in a " + "tessellation evaluation shader"); + } + + if (state->stage != MESA_SHADER_TESS_CTRL + && this->type->qualifier.flags.q.patch + && this->type->qualifier.flags.q.out) { + + _mesa_glsl_error(&loc, state, "'patch out' can only be used in a " + "tessellation control shader"); + } + + /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30. + */ + if (this->type->qualifier.precision != ast_precision_none) { + state->check_precision_qualifiers_allowed(&loc); + } + + + /* If a precision qualifier is allowed on a type, it is allowed on + * an array of that type. + */ + if (!(this->type->qualifier.precision == ast_precision_none + || precision_qualifier_allowed(var->type->without_array()))) { + + _mesa_glsl_error(&loc, state, + "precision qualifiers apply only to floating point" + ", integer and opaque types"); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "[Opaque types] can only be declared as function + * parameters or uniform-qualified variables." + */ + if (var_type->contains_opaque() && + !this->type->qualifier.flags.q.uniform) { + _mesa_glsl_error(&loc, state, + "opaque variables must be declared uniform"); + } + + /* Process the initializer and add its instructions to a temporary + * list. This list will be added to the instruction stream (below) after + * the declaration is added. This is done because in some cases (such as + * redeclarations) the declaration may not actually be added to the + * instruction stream. + */ + exec_list initializer_instructions; + + /* Examine var name here since var may get deleted in the next call */ + bool var_is_gl_id = is_gl_identifier(var->name); + + ir_variable *earlier = + get_variable_being_redeclared(var, decl->get_location(), state, + false /* allow_all_redeclarations */); + if (earlier != NULL) { + if (var_is_gl_id && + earlier->data.how_declared == ir_var_declared_in_block) { + _mesa_glsl_error(&loc, state, + "`%s' has already been redeclared using " + "gl_PerVertex", earlier->name); + } + earlier->data.how_declared = ir_var_declared_normally; + } + + if (decl->initializer != NULL) { + result = process_initializer((earlier == NULL) ? var : earlier, + decl, this->type, + &initializer_instructions, state); + } else { + validate_array_dimensions(var_type, state, &loc); + } + + /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec: + * + * "It is an error to write to a const variable outside of + * its declaration, so they must be initialized when + * declared." + */ + if (this->type->qualifier.flags.q.constant && decl->initializer == NULL) { + _mesa_glsl_error(& loc, state, + "const declaration of `%s' must be initialized", + decl->identifier); + } + + if (state->es_shader) { + const glsl_type *const t = (earlier == NULL) + ? var->type : earlier->type; + + if (t->is_unsized_array()) + /* Section 10.17 of the GLSL ES 1.00 specification states that + * unsized array declarations have been removed from the language. + * Arrays that are sized using an initializer are still explicitly + * sized. However, GLSL ES 1.00 does not allow array + * initializers. That is only allowed in GLSL ES 3.00. + * + * Section 4.1.9 (Arrays) of the GLSL ES 3.00 spec says: + * + * "An array type can also be formed without specifying a size + * if the definition includes an initializer: + * + * float x[] = float[2] (1.0, 2.0); // declares an array of size 2 + * float y[] = float[] (1.0, 2.0, 3.0); // declares an array of size 3 + * + * float a[5]; + * float b[] = a;" + */ + _mesa_glsl_error(& loc, state, + "unsized array declarations are not allowed in " + "GLSL ES"); + } + + /* If the declaration is not a redeclaration, there are a few additional + * semantic checks that must be applied. In addition, variable that was + * created for the declaration should be added to the IR stream. + */ + if (earlier == NULL) { + validate_identifier(decl->identifier, loc, state); + + /* Add the variable to the symbol table. Note that the initializer's + * IR was already processed earlier (though it hasn't been emitted + * yet), without the variable in scope. + * + * This differs from most C-like languages, but it follows the GLSL + * specification. From page 28 (page 34 of the PDF) of the GLSL 1.50 + * spec: + * + * "Within a declaration, the scope of a name starts immediately + * after the initializer if present or immediately after the name + * being declared if not." + */ + if (!state->symbols->add_variable(var)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "name `%s' already taken in the " + "current scope", decl->identifier); + continue; + } + + /* Push the variable declaration to the top. It means that all the + * variable declarations will appear in a funny last-to-first order, + * but otherwise we run into trouble if a function is prototyped, a + * global var is decled, then the function is defined with usage of + * the global var. See glslparsertest's CorrectModule.frag. + */ + instructions->push_head(var); + } + + instructions->append_list(&initializer_instructions); + } + + + /* Generally, variable declarations do not have r-values. However, + * one is used for the declaration in + * + * while (bool b = some_condition()) { + * ... + * } + * + * so we return the rvalue from the last seen declaration here. + */ + return result; + } + + + ir_rvalue * + ast_parameter_declarator::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + const struct glsl_type *type; + const char *name = NULL; + YYLTYPE loc = this->get_location(); + + type = this->type->glsl_type(& name, state); + + if (type == NULL) { + if (name != NULL) { + _mesa_glsl_error(& loc, state, + "invalid type `%s' in declaration of `%s'", + name, this->identifier); + } else { + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + this->identifier); + } + + type = glsl_type::error_type; + } + + /* From page 62 (page 68 of the PDF) of the GLSL 1.50 spec: + * + * "Functions that accept no input arguments need not use void in the + * argument list because prototypes (or definitions) are required and + * therefore there is no ambiguity when an empty argument list "( )" is + * declared. The idiom "(void)" as a parameter list is provided for + * convenience." + * + * Placing this check here prevents a void parameter being set up + * for a function, which avoids tripping up checks for main taking + * parameters and lookups of an unnamed symbol. + */ + if (type->is_void()) { + if (this->identifier != NULL) + _mesa_glsl_error(& loc, state, + "named parameter cannot have type `void'"); + + is_void = true; + return NULL; + } + + if (formal_parameter && (this->identifier == NULL)) { + _mesa_glsl_error(& loc, state, "formal parameter lacks a name"); + return NULL; + } + + /* This only handles "vec4 foo[..]". The earlier specifier->glsl_type(...) + * call already handled the "vec4[..] foo" case. + */ + type = process_array_type(&loc, type, this->array_specifier, state); + + if (!type->is_error() && type->is_unsized_array()) { + _mesa_glsl_error(&loc, state, "arrays passed as parameters must have " + "a declared size"); + type = glsl_type::error_type; + } + + is_void = false; + ir_variable *var = new(ctx) + ir_variable(type, this->identifier, ir_var_function_in); + + /* Apply any specified qualifiers to the parameter declaration. Note that + * for function parameters the default mode is 'in'. + */ + apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc, + true); + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables cannot be treated as l-values; hence cannot + * be used as out or inout function parameters, nor can they be + * assigned into." + */ + if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) + && type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "out and inout parameters cannot " + "contain opaque variables"); + type = glsl_type::error_type; + } + + /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "When calling a function, expressions that do not evaluate to + * l-values cannot be passed to parameters declared as out or inout." + * + * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: + * + * "Other binary or unary expressions, non-dereferenced arrays, + * function names, swizzles with repeated fields, and constants + * cannot be l-values." + * + * So for GLSL 1.10, passing an array as an out or inout parameter is not + * allowed. This restriction is removed in GLSL 1.20, and in GLSL ES. + */ + if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) + && type->is_array() + && !state->check_version(120, 100, &loc, + "arrays cannot be out or inout parameters")) { + type = glsl_type::error_type; + } + + instructions->push_tail(var); + + /* Parameter declarations do not have r-values. + */ + return NULL; + } + + + void + ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters, + bool formal, + exec_list *ir_parameters, + _mesa_glsl_parse_state *state) + { + ast_parameter_declarator *void_param = NULL; + unsigned count = 0; + + foreach_list_typed (ast_parameter_declarator, param, link, ast_parameters) { + param->formal_parameter = formal; + param->hir(ir_parameters, state); + + if (param->is_void) + void_param = param; + + count++; + } + + if ((void_param != NULL) && (count > 1)) { + YYLTYPE loc = void_param->get_location(); + + _mesa_glsl_error(& loc, state, + "`void' parameter must be only parameter"); + } + } + + + void + emit_function(_mesa_glsl_parse_state *state, ir_function *f) + { + /* IR invariants disallow function declarations or definitions + * nested within other function definitions. But there is no + * requirement about the relative order of function declarations + * and definitions with respect to one another. So simply insert + * the new ir_function block at the end of the toplevel instruction + * list. + */ + state->toplevel_ir->push_tail(f); + } + + + ir_rvalue * + ast_function::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + ir_function *f = NULL; + ir_function_signature *sig = NULL; + exec_list hir_parameters; + YYLTYPE loc = this->get_location(); + + const char *const name = identifier; + + /* New functions are always added to the top-level IR instruction stream, + * so this instruction list pointer is ignored. See also emit_function + * (called below). + */ + (void) instructions; + + /* From page 21 (page 27 of the PDF) of the GLSL 1.20 spec, + * + * "Function declarations (prototypes) cannot occur inside of functions; + * they must be at global scope, or for the built-in functions, outside + * the global scope." + * + * From page 27 (page 33 of the PDF) of the GLSL ES 1.00.16 spec, + * + * "User defined functions may only be defined within the global scope." + * + * Note that this language does not appear in GLSL 1.10. + */ + if ((state->current_function != NULL) && + state->is_version(120, 100)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "declaration of function `%s' not allowed within " + "function body", name); + } + + validate_identifier(name, this->get_location(), state); + + /* Convert the list of function parameters to HIR now so that they can be + * used below to compare this function's signature with previously seen + * signatures for functions with the same name. + */ + ast_parameter_declarator::parameters_to_hir(& this->parameters, + is_definition, + & hir_parameters, state); + + const char *return_type_name; + const glsl_type *return_type = + this->return_type->glsl_type(& return_type_name, state); + + if (!return_type) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function `%s' has undeclared return type `%s'", + name, return_type_name); + return_type = glsl_type::error_type; + } + + /* ARB_shader_subroutine states: + * "Subroutine declarations cannot be prototyped. It is an error to prepend + * subroutine(...) to a function declaration." + */ + if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function declaration `%s' cannot have subroutine prepended", + name); + } + + /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec: + * "No qualifier is allowed on the return type of a function." + */ + if (this->return_type->has_qualifiers(state)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "function `%s' return type has qualifiers", name); + } + + /* Section 6.1 (Function Definitions) of the GLSL 1.20 spec says: + * + * "Arrays are allowed as arguments and as the return type. In both + * cases, the array must be explicitly sized." + */ + if (return_type->is_unsized_array()) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "function `%s' return type array must be explicitly " + "sized", name); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "[Opaque types] can only be declared as function parameters + * or uniform-qualified variables." + */ + if (return_type->contains_opaque()) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function `%s' return type can't contain an opaque type", + name); + } + + /* Create an ir_function if one doesn't already exist. */ + f = state->symbols->get_function(name); + if (f == NULL) { + f = new(ctx) ir_function(name); + if (!this->return_type->qualifier.flags.q.subroutine) { + if (!state->symbols->add_function(f)) { + /* This function name shadows a non-function use of the same name. */ + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "function name `%s' conflicts with " + "non-function", name); + return NULL; + } + } + emit_function(state, f); + } + + /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71: + * + * "A shader cannot redefine or overload built-in functions." + * + * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions": + * + * "User code can overload the built-in functions but cannot redefine + * them." + */ + if (state->es_shader && state->language_version >= 300) { + /* Local shader has no exact candidates; check the built-ins. */ + _mesa_glsl_initialize_builtin_functions(); + if (_mesa_glsl_find_builtin_function_by_name(name)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "A shader cannot redefine or overload built-in " + "function `%s' in GLSL ES 3.00", name); + return NULL; + } + } + + /* Verify that this function's signature either doesn't match a previously + * seen signature for a function with the same name, or, if a match is found, + * that the previously seen signature does not have an associated definition. + */ + if (state->es_shader || f->has_user_signature()) { + sig = f->exact_matching_signature(state, &hir_parameters); + if (sig != NULL) { + const char *badvar = sig->qualifiers_match(&hir_parameters); + if (badvar != NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(&loc, state, "function `%s' parameter `%s' " + "qualifiers don't match prototype", name, badvar); + } + + if (sig->return_type != return_type) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(&loc, state, "function `%s' return type doesn't " + "match prototype", name); + } + + if (sig->is_defined) { + if (is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' redefined", name); + } else { + /* We just encountered a prototype that exactly matches a + * function that's already been defined. This is redundant, + * and we should ignore it. + */ + return NULL; + } + } + } + } + + /* Verify the return type of main() */ + if (strcmp(name, "main") == 0) { + if (! return_type->is_void()) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "main() must return void"); + } + + if (!hir_parameters.is_empty()) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "main() must not take any parameters"); + } + } + + /* Finish storing the information about this new function in its signature. + */ + if (sig == NULL) { + sig = new(ctx) ir_function_signature(return_type); + f->add_signature(sig); + } + + sig->replace_parameters(&hir_parameters); + signature = sig; + + if (this->return_type->qualifier.flags.q.subroutine_def) { + int idx; + + if (this->return_type->qualifier.flags.q.explicit_index) { + unsigned qual_index; + if (process_qualifier_constant(state, &loc, "index", + this->return_type->qualifier.index, + &qual_index)) { + if (!state->has_explicit_uniform_location()) { + _mesa_glsl_error(&loc, state, "subroutine index requires " + "GL_ARB_explicit_uniform_location or " + "GLSL 4.30"); + } else if (qual_index >= MAX_SUBROUTINES) { + _mesa_glsl_error(&loc, state, + "invalid subroutine index (%d) index must " + "be a number between 0 and " + "GL_MAX_SUBROUTINES - 1 (%d)", qual_index, + MAX_SUBROUTINES - 1); + } else { + f->subroutine_index = qual_index; + } + } + } + + f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length(); + f->subroutine_types = ralloc_array(state, const struct glsl_type *, + f->num_subroutine_types); + idx = 0; + foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) { + const struct glsl_type *type; + /* the subroutine type must be already declared */ + type = state->symbols->get_type(decl->identifier); + if (!type) { + _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier); + } + f->subroutine_types[idx++] = type; + } + state->subroutines = (ir_function **)reralloc(state, state->subroutines, + ir_function *, + state->num_subroutines + 1); + state->subroutines[state->num_subroutines] = f; + state->num_subroutines++; + + } + + if (this->return_type->qualifier.flags.q.subroutine) { + if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) { + _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier); + return NULL; + } + state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types, + ir_function *, + state->num_subroutine_types + 1); + state->subroutine_types[state->num_subroutine_types] = f; + state->num_subroutine_types++; + + f->is_subroutine = true; + } + + /* Function declarations (prototypes) do not have r-values. + */ + return NULL; + } + + + ir_rvalue * + ast_function_definition::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + prototype->is_definition = true; + prototype->hir(instructions, state); + + ir_function_signature *signature = prototype->signature; + if (signature == NULL) + return NULL; + + assert(state->current_function == NULL); + state->current_function = signature; + state->found_return = false; + + /* Duplicate parameters declared in the prototype as concrete variables. + * Add these to the symbol table. + */ + state->symbols->push_scope(); + foreach_in_list(ir_variable, var, &signature->parameters) { + assert(var->as_variable() != NULL); + + /* The only way a parameter would "exist" is if two parameters have + * the same name. + */ + if (state->symbols->name_declared_this_scope(var->name)) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "parameter `%s' redeclared", var->name); + } else { + state->symbols->add_variable(var); + } + } + + /* Convert the body of the function to HIR. */ + this->body->hir(&signature->body, state); + signature->is_defined = true; + + state->symbols->pop_scope(); + + assert(state->current_function == signature); + state->current_function = NULL; + + if (!signature->return_type->is_void() && !state->found_return) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' has non-void return type " + "%s, but no return statement", + signature->function_name(), + signature->return_type->name); + } + + /* Function definitions do not have r-values. + */ + return NULL; + } + + + ir_rvalue * + ast_jump_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + switch (mode) { + case ast_return: { + ir_return *inst; + assert(state->current_function); + + if (opt_return_value) { + ir_rvalue *ret = opt_return_value->hir(instructions, state); + + /* The value of the return type can be NULL if the shader says + * 'return foo();' and foo() is a function that returns void. + * + * NOTE: The GLSL spec doesn't say that this is an error. The type + * of the return value is void. If the return type of the function is + * also void, then this should compile without error. Seriously. + */ + const glsl_type *const ret_type = + (ret == NULL) ? glsl_type::void_type : ret->type; + + /* Implicit conversions are not allowed for return values prior to + * ARB_shading_language_420pack. + */ + if (state->current_function->return_type != ret_type) { + YYLTYPE loc = this->get_location(); + + if (state->has_420pack()) { + if (!apply_implicit_conversion(state->current_function->return_type, + ret, state)) { + _mesa_glsl_error(& loc, state, + "could not implicitly convert return value " + "to %s, in function `%s'", + state->current_function->return_type->name, + state->current_function->function_name()); + } + } else { + _mesa_glsl_error(& loc, state, + "`return' with wrong type %s, in function `%s' " + "returning %s", + ret_type->name, + state->current_function->function_name(), + state->current_function->return_type->name); + } + } else if (state->current_function->return_type->base_type == + GLSL_TYPE_VOID) { + YYLTYPE loc = this->get_location(); + + /* The ARB_shading_language_420pack, GLSL ES 3.0, and GLSL 4.20 + * specs add a clarification: + * + * "A void function can only use return without a return argument, even if + * the return argument has void type. Return statements only accept values: + * + * void func1() { } + * void func2() { return func1(); } // illegal return statement" + */ + _mesa_glsl_error(& loc, state, + "void functions can only use `return' without a " + "return argument"); + } + + inst = new(ctx) ir_return(ret); + } else { + if (state->current_function->return_type->base_type != + GLSL_TYPE_VOID) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "`return' with no value, in function %s returning " + "non-void", + state->current_function->function_name()); + } + inst = new(ctx) ir_return; + } + + state->found_return = true; + instructions->push_tail(inst); + break; + } + + case ast_discard: + if (state->stage != MESA_SHADER_FRAGMENT) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "`discard' may only appear in a fragment shader"); + } + instructions->push_tail(new(ctx) ir_discard); + break; + + case ast_break: + case ast_continue: + if (mode == ast_continue && + state->loop_nesting_ast == NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "continue may only appear in a loop"); + } else if (mode == ast_break && + state->loop_nesting_ast == NULL && + state->switch_state.switch_nesting_ast == NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "break may only appear in a loop or a switch"); + } else { + /* For a loop, inline the for loop expression again, since we don't + * know where near the end of the loop body the normal copy of it is + * going to be placed. Same goes for the condition for a do-while + * loop. + */ + if (state->loop_nesting_ast != NULL && + mode == ast_continue && !state->switch_state.is_switch_innermost) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(instructions, state); + } + } + + if (state->switch_state.is_switch_innermost && + mode == ast_continue) { + /* Set 'continue_inside' to true. */ + ir_rvalue *const true_val = new (ctx) ir_constant(true); + ir_dereference_variable *deref_continue_inside_var = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, + true_val)); + + /* Break out from the switch, continue for the loop will + * be called right after switch. */ + ir_loop_jump *const jump = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + instructions->push_tail(jump); + + } else if (state->switch_state.is_switch_innermost && + mode == ast_break) { + /* Force break out of switch by inserting a break. */ + ir_loop_jump *const jump = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + instructions->push_tail(jump); + } else { + ir_loop_jump *const jump = + new(ctx) ir_loop_jump((mode == ast_break) + ? ir_loop_jump::jump_break + : ir_loop_jump::jump_continue); + instructions->push_tail(jump); + } + } + + break; + } + + /* Jump instructions do not have r-values. + */ + return NULL; + } + + + ir_rvalue * + ast_selection_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + ir_rvalue *const condition = this->condition->hir(instructions, state); + + /* From page 66 (page 72 of the PDF) of the GLSL 1.50 spec: + * + * "Any expression whose type evaluates to a Boolean can be used as the + * conditional expression bool-expression. Vector types are not accepted + * as the expression to if." + * + * The checks are separated so that higher quality diagnostics can be + * generated for cases where both rules are violated. + */ + if (!condition->type->is_boolean() || !condition->type->is_scalar()) { + YYLTYPE loc = this->condition->get_location(); + + _mesa_glsl_error(& loc, state, "if-statement condition must be scalar " + "boolean"); + } + + ir_if *const stmt = new(ctx) ir_if(condition); + + if (then_statement != NULL) { + state->symbols->push_scope(); + then_statement->hir(& stmt->then_instructions, state); + state->symbols->pop_scope(); + } + + if (else_statement != NULL) { + state->symbols->push_scope(); + else_statement->hir(& stmt->else_instructions, state); + state->symbols->pop_scope(); + } + + instructions->push_tail(stmt); + + /* if-statements do not have r-values. + */ + return NULL; + } + + + ir_rvalue * + ast_switch_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + ir_rvalue *const test_expression = + this->test_expression->hir(instructions, state); + + /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec: + * + * "The type of init-expression in a switch statement must be a + * scalar integer." + */ + if (!test_expression->type->is_scalar() || + !test_expression->type->is_integer()) { + YYLTYPE loc = this->test_expression->get_location(); + + _mesa_glsl_error(& loc, + state, + "switch-statement expression must be scalar " + "integer"); + } + + /* Track the switch-statement nesting in a stack-like manner. + */ + struct glsl_switch_state saved = state->switch_state; + + state->switch_state.is_switch_innermost = true; + state->switch_state.switch_nesting_ast = this; + state->switch_state.labels_ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + state->switch_state.previous_default = NULL; + + /* Initalize is_fallthru state to false. + */ + ir_rvalue *const is_fallthru_val = new (ctx) ir_constant(false); + state->switch_state.is_fallthru_var = + new(ctx) ir_variable(glsl_type::bool_type, + "switch_is_fallthru_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.is_fallthru_var); + + ir_dereference_variable *deref_is_fallthru_var = + new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + instructions->push_tail(new(ctx) ir_assignment(deref_is_fallthru_var, + is_fallthru_val)); + + /* Initialize continue_inside state to false. + */ + state->switch_state.continue_inside = + new(ctx) ir_variable(glsl_type::bool_type, + "continue_inside_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.continue_inside); + + ir_rvalue *const false_val = new (ctx) ir_constant(false); + ir_dereference_variable *deref_continue_inside_var = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, + false_val)); + + state->switch_state.run_default = + new(ctx) ir_variable(glsl_type::bool_type, + "run_default_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.run_default); + + /* Loop around the switch is used for flow control. */ + ir_loop * loop = new(ctx) ir_loop(); + instructions->push_tail(loop); + + /* Cache test expression. + */ + test_to_hir(&loop->body_instructions, state); + + /* Emit code for body of switch stmt. + */ + body->hir(&loop->body_instructions, state); + + /* Insert a break at the end to exit loop. */ + ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + loop->body_instructions.push_tail(jump); + + /* If we are inside loop, check if continue got called inside switch. */ + if (state->loop_nesting_ast != NULL) { + ir_dereference_variable *deref_continue_inside = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + ir_if *irif = new(ctx) ir_if(deref_continue_inside); + ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_continue); + + if (state->loop_nesting_ast != NULL) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(&irif->then_instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(&irif->then_instructions, state); + } + } + irif->then_instructions.push_tail(jump); + instructions->push_tail(irif); + } + + hash_table_dtor(state->switch_state.labels_ht); + + state->switch_state = saved; + + /* Switch statements do not have r-values. */ + return NULL; + } + + + void + ast_switch_statement::test_to_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + /* Cache value of test expression. */ + ir_rvalue *const test_val = + test_expression->hir(instructions, + state); + + state->switch_state.test_var = new(ctx) ir_variable(test_val->type, + "switch_test_tmp", + ir_var_temporary); + ir_dereference_variable *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); + + instructions->push_tail(state->switch_state.test_var); + instructions->push_tail(new(ctx) ir_assignment(deref_test_var, test_val)); + } + + + ir_rvalue * + ast_switch_body::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + if (stmts != NULL) + stmts->hir(instructions, state); + + /* Switch bodies do not have r-values. */ + return NULL; + } + + ir_rvalue * + ast_case_statement_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + exec_list default_case, after_default, tmp; + + foreach_list_typed (ast_case_statement, case_stmt, link, & this->cases) { + case_stmt->hir(&tmp, state); + + /* Default case. */ + if (state->switch_state.previous_default && default_case.is_empty()) { + default_case.append_list(&tmp); + continue; + } + + /* If default case found, append 'after_default' list. */ + if (!default_case.is_empty()) + after_default.append_list(&tmp); + else + instructions->append_list(&tmp); + } + + /* Handle the default case. This is done here because default might not be + * the last case. We need to add checks against following cases first to see + * if default should be chosen or not. + */ + if (!default_case.is_empty()) { + + ir_rvalue *const true_val = new (state) ir_constant(true); + ir_dereference_variable *deref_run_default_var = + new(state) ir_dereference_variable(state->switch_state.run_default); + + /* Choose to run default case initially, following conditional + * assignments might change this. + */ + ir_assignment *const init_var = + new(state) ir_assignment(deref_run_default_var, true_val); + instructions->push_tail(init_var); + + /* Default case was the last one, no checks required. */ + if (after_default.is_empty()) { + instructions->append_list(&default_case); + return NULL; + } + + foreach_in_list(ir_instruction, ir, &after_default) { + ir_assignment *assign = ir->as_assignment(); + + if (!assign) + continue; + + /* Clone the check between case label and init expression. */ + ir_expression *exp = (ir_expression*) assign->condition; + ir_expression *clone = exp->clone(state, NULL); + + ir_dereference_variable *deref_var = + new(state) ir_dereference_variable(state->switch_state.run_default); + ir_rvalue *const false_val = new (state) ir_constant(false); + + ir_assignment *const set_false = + new(state) ir_assignment(deref_var, false_val, clone); + + instructions->push_tail(set_false); + } + + /* Append default case and all cases after it. */ + instructions->append_list(&default_case); + instructions->append_list(&after_default); + } + + /* Case statements do not have r-values. */ + return NULL; + } + + ir_rvalue * + ast_case_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + labels->hir(instructions, state); + + /* Guard case statements depending on fallthru state. */ + ir_dereference_variable *const deref_fallthru_guard = + new(state) ir_dereference_variable(state->switch_state.is_fallthru_var); + ir_if *const test_fallthru = new(state) ir_if(deref_fallthru_guard); + + foreach_list_typed (ast_node, stmt, link, & this->stmts) + stmt->hir(& test_fallthru->then_instructions, state); + + instructions->push_tail(test_fallthru); + + /* Case statements do not have r-values. */ + return NULL; + } + + + ir_rvalue * + ast_case_label_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + foreach_list_typed (ast_case_label, label, link, & this->labels) + label->hir(instructions, state); + + /* Case labels do not have r-values. */ + return NULL; + } + + ir_rvalue * + ast_case_label::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + ir_dereference_variable *deref_fallthru_var = + new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + + ir_rvalue *const true_val = new(ctx) ir_constant(true); + + /* If not default case, ... */ + if (this->test_value != NULL) { + /* Conditionally set fallthru state based on + * comparison of cached test expression value to case label. + */ + ir_rvalue *const label_rval = this->test_value->hir(instructions, state); + ir_constant *label_const = label_rval->constant_expression_value(); + + if (!label_const) { + YYLTYPE loc = this->test_value->get_location(); + + _mesa_glsl_error(& loc, state, + "switch statement case label must be a " + "constant expression"); + + /* Stuff a dummy value in to allow processing to continue. */ + label_const = new(ctx) ir_constant(0); + } else { + ast_expression *previous_label = (ast_expression *) + hash_table_find(state->switch_state.labels_ht, + (void *)(uintptr_t)label_const->value.u[0]); + + if (previous_label) { + YYLTYPE loc = this->test_value->get_location(); + _mesa_glsl_error(& loc, state, "duplicate case value"); + + loc = previous_label->get_location(); + _mesa_glsl_error(& loc, state, "this is the previous case label"); + } else { + hash_table_insert(state->switch_state.labels_ht, + this->test_value, + (void *)(uintptr_t)label_const->value.u[0]); + } + } + + ir_dereference_variable *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); + + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + label_const, + deref_test_var); + + /* + * From GLSL 4.40 specification section 6.2 ("Selection"): + * + * "The type of the init-expression value in a switch statement must + * be a scalar int or uint. The type of the constant-expression value + * in a case label also must be a scalar int or uint. When any pair + * of these values is tested for "equal value" and the types do not + * match, an implicit conversion will be done to convert the int to a + * uint (see section 4.1.10 “Implicit Conversions”) before the compare + * is done." + */ + if (label_const->type != state->switch_state.test_var->type) { + YYLTYPE loc = this->test_value->get_location(); + + const glsl_type *type_a = label_const->type; + const glsl_type *type_b = state->switch_state.test_var->type; + + /* Check if int->uint implicit conversion is supported. */ + bool integer_conversion_supported = + glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type, + state); + + if ((!type_a->is_integer() || !type_b->is_integer()) || + !integer_conversion_supported) { + _mesa_glsl_error(&loc, state, "type mismatch with switch " + "init-expression and case label (%s != %s)", + type_a->name, type_b->name); + } else { + /* Conversion of the case label. */ + if (type_a->base_type == GLSL_TYPE_INT) { + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[0], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } else { + /* Conversion of the init-expression value. */ + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[1], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } + } + } + + ir_assignment *set_fallthru_on_test = + new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + + instructions->push_tail(set_fallthru_on_test); + } else { /* default case */ + if (state->switch_state.previous_default) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "multiple default labels in one switch"); + + loc = state->switch_state.previous_default->get_location(); + _mesa_glsl_error(& loc, state, "this is the first default label"); + } + state->switch_state.previous_default = this; + + /* Set fallthru condition on 'run_default' bool. */ + ir_dereference_variable *deref_run_default = + new(ctx) ir_dereference_variable(state->switch_state.run_default); + ir_rvalue *const cond_true = new(ctx) ir_constant(true); + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + cond_true, + deref_run_default); + + /* Set falltrhu state. */ + ir_assignment *set_fallthru = + new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + + instructions->push_tail(set_fallthru); + } + + /* Case statements do not have r-values. */ + return NULL; + } + + void + ast_iteration_statement::condition_to_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + if (condition != NULL) { + ir_rvalue *const cond = + condition->hir(instructions, state); + + if ((cond == NULL) + || !cond->type->is_boolean() || !cond->type->is_scalar()) { + YYLTYPE loc = condition->get_location(); + + _mesa_glsl_error(& loc, state, + "loop condition must be scalar boolean"); + } else { + /* As the first code in the loop body, generate a block that looks + * like 'if (!condition) break;' as the loop termination condition. + */ + ir_rvalue *const not_cond = + new(ctx) ir_expression(ir_unop_logic_not, cond); + + ir_if *const if_stmt = new(ctx) ir_if(not_cond); + + ir_jump *const break_stmt = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(break_stmt); + instructions->push_tail(if_stmt); + } + } + } + + + ir_rvalue * + ast_iteration_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + void *ctx = state; + + /* For-loops and while-loops start a new scope, but do-while loops do not. + */ + if (mode != ast_do_while) + state->symbols->push_scope(); + + if (init_statement != NULL) + init_statement->hir(instructions, state); + + ir_loop *const stmt = new(ctx) ir_loop(); + instructions->push_tail(stmt); + + /* Track the current loop nesting. */ + ast_iteration_statement *nesting_ast = state->loop_nesting_ast; + + state->loop_nesting_ast = this; + + /* Likewise, indicate that following code is closest to a loop, + * NOT closest to a switch. + */ + bool saved_is_switch_innermost = state->switch_state.is_switch_innermost; + state->switch_state.is_switch_innermost = false; + + if (mode != ast_do_while) + condition_to_hir(&stmt->body_instructions, state); + + if (body != NULL) + body->hir(& stmt->body_instructions, state); + + if (rest_expression != NULL) + rest_expression->hir(& stmt->body_instructions, state); + + if (mode == ast_do_while) + condition_to_hir(&stmt->body_instructions, state); + + if (mode != ast_do_while) + state->symbols->pop_scope(); + + /* Restore previous nesting before returning. */ + state->loop_nesting_ast = nesting_ast; + state->switch_state.is_switch_innermost = saved_is_switch_innermost; + + /* Loops do not have r-values. + */ + return NULL; + } + + + /** + * Determine if the given type is valid for establishing a default precision + * qualifier. + * + * From GLSL ES 3.00 section 4.5.4 ("Default Precision Qualifiers"): + * + * "The precision statement + * + * precision precision-qualifier type; + * + * can be used to establish a default precision qualifier. The type field + * can be either int or float or any of the sampler types, and the + * precision-qualifier can be lowp, mediump, or highp." + * + * GLSL ES 1.00 has similar language. GLSL 1.30 doesn't allow precision + * qualifiers on sampler types, but this seems like an oversight (since the + * intention of including these in GLSL 1.30 is to allow compatibility with ES + * shaders). So we allow int, float, and all sampler types regardless of GLSL + * version. + */ + static bool + is_valid_default_precision_type(const struct glsl_type *const type) + { + if (type == NULL) + return false; + + switch (type->base_type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + /* "int" and "float" are valid, but vectors and matrices are not. */ + return type->vector_elements == 1 && type->matrix_columns == 1; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + return true; + default: + return false; + } + } + + + ir_rvalue * + ast_type_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + if (this->default_precision == ast_precision_none && this->structure == NULL) + return NULL; + + YYLTYPE loc = this->get_location(); + + /* If this is a precision statement, check that the type to which it is + * applied is either float or int. + * + * From section 4.5.3 of the GLSL 1.30 spec: + * "The precision statement + * precision precision-qualifier type; + * can be used to establish a default precision qualifier. The type + * field can be either int or float [...]. Any other types or + * qualifiers will result in an error. + */ + if (this->default_precision != ast_precision_none) { + if (!state->check_precision_qualifiers_allowed(&loc)) + return NULL; + + if (this->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers do not apply to structures"); + return NULL; + } + + if (this->array_specifier != NULL) { + _mesa_glsl_error(&loc, state, + "default precision statements do not apply to " + "arrays"); + return NULL; + } + + const struct glsl_type *const type = + state->symbols->get_type(this->type_name); + if (!is_valid_default_precision_type(type)) { + _mesa_glsl_error(&loc, state, + "default precision statements apply only to " + "float, int, and opaque types"); + return NULL; + } + + if (state->es_shader) { + /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00 + * spec says: + * + * "Non-precision qualified declarations will use the precision + * qualifier specified in the most recent precision statement + * that is still in scope. The precision statement has the same + * scoping rules as variable declarations. If it is declared + * inside a compound statement, its effect stops at the end of + * the innermost statement it was declared in. Precision + * statements in nested scopes override precision statements in + * outer scopes. Multiple precision statements for the same basic + * type can appear inside the same scope, with later statements + * overriding earlier statements within that scope." + * + * Default precision specifications follow the same scope rules as + * variables. So, we can track the state of the default precision + * qualifiers in the symbol table, and the rules will just work. This + * is a slight abuse of the symbol table, but it has the semantics + * that we want. + */ + state->symbols->add_default_precision_qualifier(this->type_name, + this->default_precision); + } + + /* FINISHME: Translate precision statements into IR. */ + return NULL; + } + + /* _mesa_ast_set_aggregate_type() sets the field so that + * process_record_constructor() can do type-checking on C-style initializer + * expressions of structs, but ast_struct_specifier should only be translated + * to HIR if it is declaring the type of a structure. + * + * The ->is_declaration field is false for initializers of variables + * declared separately from the struct's type definition. + * + * struct S { ... }; (is_declaration = true) + * struct T { ... } t = { ... }; (is_declaration = true) + * S s = { ... }; (is_declaration = false) + */ + if (this->structure != NULL && this->structure->is_declaration) + return this->structure->hir(instructions, state); + + return NULL; + } + + + /** + * Process a structure or interface block tree into an array of structure fields + * + * After parsing, where there are some syntax differnces, structures and + * interface blocks are almost identical. They are similar enough that the + * AST for each can be processed the same way into a set of + * \c glsl_struct_field to describe the members. + * + * If we're processing an interface block, var_mode should be the type of the + * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or + * ir_var_shader_storage). If we're processing a structure, var_mode should be + * ir_var_auto. + * + * \return + * The number of fields processed. A pointer to the array structure fields is + * stored in \c *fields_ret. + */ + static unsigned + ast_process_struct_or_iface_block_members(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + exec_list *declarations, + glsl_struct_field **fields_ret, + bool is_interface, + enum glsl_matrix_layout matrix_layout, + bool allow_reserved_names, + ir_variable_mode var_mode, + ast_type_qualifier *layout, + unsigned block_stream, + unsigned expl_location) + { + unsigned decl_count = 0; + + /* Make an initial pass over the list of fields to determine how + * many there are. Each element in this list is an ast_declarator_list. + * This means that we actually need to count the number of elements in the + * 'declarations' list in each of the elements. + */ + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { + decl_count += decl_list->declarations.length(); + } + + /* Allocate storage for the fields and process the field + * declarations. As the declarations are processed, try to also convert + * the types to HIR. This ensures that structure definitions embedded in + * other structure definitions or in interface blocks are processed. + */ + glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field, + decl_count); + + bool first_member = true; + bool first_member_has_explicit_location; + + unsigned i = 0; + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { + const char *type_name; + YYLTYPE loc = decl_list->get_location(); + + decl_list->type->specifier->hir(instructions, state); + + /* Section 10.9 of the GLSL ES 1.00 specification states that + * embedded structure definitions have been removed from the language. + */ + if (state->es_shader && decl_list->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, "embedded structure definitions are " + "not allowed in GLSL ES 1.00"); + } + + const glsl_type *decl_type = + decl_list->type->glsl_type(& type_name, state); + + const struct ast_type_qualifier *const qual = + &decl_list->type->qualifier; + + /* From section 4.3.9 of the GLSL 4.40 spec: + * + * "[In interface blocks] opaque types are not allowed." + * + * It should be impossible for decl_type to be NULL here. Cases that + * might naturally lead to decl_type being NULL, especially for the + * is_interface case, will have resulted in compilation having + * already halted due to a syntax error. + */ + assert(decl_type); + + if (is_interface && decl_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "uniform/buffer in non-default interface block contains " + "opaque variable"); + } + + if (decl_type->contains_atomic()) { + /* From section 4.1.7.3 of the GLSL 4.40 spec: + * + * "Members of structures cannot be declared as atomic counter + * types." + */ + _mesa_glsl_error(&loc, state, "atomic counter in structure, " + "shader storage block or uniform block"); + } + + if (decl_type->contains_image()) { + /* FINISHME: Same problem as with atomic counters. + * FINISHME: Request clarification from Khronos and add + * FINISHME: spec quotation here. + */ + _mesa_glsl_error(&loc, state, + "image in structure, shader storage block or " + "uniform block"); + } + + if (qual->flags.q.explicit_binding) { + _mesa_glsl_error(&loc, state, + "binding layout qualifier cannot be applied " + "to struct or interface block members"); + } + + if (is_interface) { + if (!first_member) { + if (!layout->flags.q.explicit_location && + ((first_member_has_explicit_location && + !qual->flags.q.explicit_location) || + (!first_member_has_explicit_location && + qual->flags.q.explicit_location))) { + _mesa_glsl_error(&loc, state, + "when block-level location layout qualifier " + "is not supplied either all members must " + "have a location layout qualifier or all " + "members must not have a location layout " + "qualifier"); + } + } else { + first_member = false; + first_member_has_explicit_location = + qual->flags.q.explicit_location; + } + } + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(&loc, state, + "uniform/shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform/shader storage blocks, not " + "members"); + } + + if (qual->flags.q.constant) { + _mesa_glsl_error(&loc, state, + "const storage qualifier cannot be applied " + "to struct or interface block members"); + } + + /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: + * + * "A block member may be declared with a stream identifier, but + * the specified stream must match the stream associated with the + * containing block." + */ + if (qual->flags.q.explicit_stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, &loc, "stream", + qual->stream, &qual_stream) && + qual_stream != block_stream) { + _mesa_glsl_error(&loc, state, "stream layout qualifier on " + "interface block member does not match " + "the interface block (%u vs %u)", qual_stream, + block_stream); + } + } + + if (qual->flags.q.uniform && qual->has_interpolation()) { + _mesa_glsl_error(&loc, state, + "interpolation qualifiers cannot be used " + "with uniform interface blocks"); + } + + if ((qual->flags.q.uniform || !is_interface) && + qual->has_auxiliary_storage()) { + _mesa_glsl_error(&loc, state, + "auxiliary storage qualifiers cannot be used " + "in uniform blocks or structures."); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { + _mesa_glsl_error(&loc, state, + "row_major and column_major can only be " + "applied to interface blocks"); + } else + validate_matrix_layout_for_type(state, &loc, decl_type, NULL); + } + + if (qual->flags.q.read_only && qual->flags.q.write_only) { + _mesa_glsl_error(&loc, state, "buffer variable can't be both " + "readonly and writeonly."); + } + + foreach_list_typed (ast_declaration, decl, link, + &decl_list->declarations) { + YYLTYPE loc = decl->get_location(); + + if (!allow_reserved_names) + validate_identifier(decl->identifier, loc, state); + + const struct glsl_type *field_type = + process_array_type(&loc, decl_type, decl->array_specifier, state); + validate_array_dimensions(field_type, state, &loc); + fields[i].type = field_type; + fields[i].name = decl->identifier; + fields[i].interpolation = + interpret_interpolation_qualifier(qual, var_mode, state, &loc); + fields[i].centroid = qual->flags.q.centroid ? 1 : 0; + fields[i].sample = qual->flags.q.sample ? 1 : 0; + fields[i].patch = qual->flags.q.patch ? 1 : 0; + fields[i].precision = qual->precision; + + if (qual->flags.q.explicit_location) { + unsigned qual_location; + if (process_qualifier_constant(state, &loc, "location", + qual->location, &qual_location)) { + fields[i].location = VARYING_SLOT_VAR0 + qual_location; + expl_location = fields[i].location + + fields[i].type->count_attribute_slots(false); + } + } else { + if (layout && layout->flags.q.explicit_location) { + fields[i].location = expl_location; + expl_location += fields[i].type->count_attribute_slots(false); + } else { + fields[i].location = -1; + } + } + + /* Propogate row- / column-major information down the fields of the + * structure or interface block. Structures need this data because + * the structure may contain a structure that contains ... a matrix + * that need the proper layout. + */ + if (field_type->without_array()->is_matrix() + || field_type->without_array()->is_record()) { + /* If no layout is specified for the field, inherit the layout + * from the block. + */ + fields[i].matrix_layout = matrix_layout; + + if (qual->flags.q.row_major) + fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; + else if (qual->flags.q.column_major) + fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; + + /* If we're processing an interface block, the matrix layout must + * be decided by this point. + */ + assert(!is_interface + || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR + || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR); + } + + /* Image qualifiers are allowed on buffer variables, which can only + * be defined inside shader storage buffer objects + */ + if (layout && var_mode == ir_var_shader_storage) { + /* For readonly and writeonly qualifiers the field definition, + * if set, overwrites the layout qualifier. + */ + if (qual->flags.q.read_only) { + fields[i].image_read_only = true; + fields[i].image_write_only = false; + } else if (qual->flags.q.write_only) { + fields[i].image_read_only = false; + fields[i].image_write_only = true; + } else { + fields[i].image_read_only = layout->flags.q.read_only; + fields[i].image_write_only = layout->flags.q.write_only; + } + + /* For other qualifiers, we set the flag if either the layout + * qualifier or the field qualifier are set + */ + fields[i].image_coherent = qual->flags.q.coherent || + layout->flags.q.coherent; + fields[i].image_volatile = qual->flags.q._volatile || + layout->flags.q._volatile; + fields[i].image_restrict = qual->flags.q.restrict_flag || + layout->flags.q.restrict_flag; + } + + i++; + } + } + + assert(i == decl_count); + + *fields_ret = fields; + return decl_count; + } + + + ir_rvalue * + ast_struct_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + YYLTYPE loc = this->get_location(); + + /* Section 4.1.8 (Structures) of the GLSL 1.10 spec says: + * + * "Anonymous structures are not supported; so embedded structures must + * have a declarator. A name given to an embedded struct is scoped at + * the same level as the struct it is embedded in." + * + * The same section of the GLSL 1.20 spec says: + * + * "Anonymous structures are not supported. Embedded structures are not + * supported. + * + * struct S { float f; }; + * struct T { + * S; // Error: anonymous structures disallowed + * struct { ... }; // Error: embedded structures disallowed + * S s; // Okay: nested structures with name are allowed + * };" + * + * The GLSL ES 1.00 and 3.00 specs have similar langauge and examples. So, + * we allow embedded structures in 1.10 only. + */ + if (state->language_version != 110 && state->struct_specifier_depth != 0) + _mesa_glsl_error(&loc, state, + "embedded structure declarations are not allowed"); + + state->struct_specifier_depth++; + + unsigned expl_location = 0; + if (layout && layout->flags.q.explicit_location) { + if (!process_qualifier_constant(state, &loc, "location", + layout->location, &expl_location)) { + return NULL; + } else { + expl_location = VARYING_SLOT_VAR0 + expl_location; + } + } + + glsl_struct_field *fields; + unsigned decl_count = + ast_process_struct_or_iface_block_members(instructions, + state, + &this->declarations, + &fields, + false, + GLSL_MATRIX_LAYOUT_INHERITED, + false /* allow_reserved_names */, + ir_var_auto, + layout, + 0, /* for interface only */ + expl_location); + + validate_identifier(this->name, loc, state); + + const glsl_type *t = + glsl_type::get_record_instance(fields, decl_count, this->name); + + if (!state->symbols->add_type(name, t)) { + _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name); + } else { + const glsl_type **s = reralloc(state, state->user_structures, + const glsl_type *, + state->num_user_structures + 1); + if (s != NULL) { + s[state->num_user_structures] = t; + state->user_structures = s; + state->num_user_structures++; + } + } + + state->struct_specifier_depth--; + + /* Structure type definitions do not have r-values. + */ + return NULL; + } + + + /** + * Visitor class which detects whether a given interface block has been used. + */ + class interface_block_usage_visitor : public ir_hierarchical_visitor + { + public: + interface_block_usage_visitor(ir_variable_mode mode, const glsl_type *block) + : mode(mode), block(block), found(false) + { + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (ir->var->data.mode == mode && ir->var->get_interface_type() == block) { + found = true; + return visit_stop; + } + return visit_continue; + } + + bool usage_found() const + { + return this->found; + } + + private: + ir_variable_mode mode; + const glsl_type *block; + bool found; + }; + + static bool + is_unsized_array_last_element(ir_variable *v) + { + const glsl_type *interface_type = v->get_interface_type(); + int length = interface_type->length; + + assert(v->type->is_unsized_array()); + + /* Check if it is the last element of the interface */ + if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0) + return true; + return false; + } + + ir_rvalue * + ast_interface_block::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + YYLTYPE loc = this->get_location(); + + /* Interface blocks must be declared at global scope */ + if (state->current_function != NULL) { + _mesa_glsl_error(&loc, state, + "Interface block `%s' must be declared " + "at global scope", + this->block_name); + } + + if (!this->layout.flags.q.buffer && + this->layout.flags.q.std430) { + _mesa_glsl_error(&loc, state, + "std430 storage block layout qualifier is supported " + "only for shader storage blocks"); + } + + /* The ast_interface_block has a list of ast_declarator_lists. We + * need to turn those into ir_variables with an association + * with this uniform block. + */ + enum glsl_interface_packing packing; + if (this->layout.flags.q.shared) { + packing = GLSL_INTERFACE_PACKING_SHARED; + } else if (this->layout.flags.q.packed) { + packing = GLSL_INTERFACE_PACKING_PACKED; + } else if (this->layout.flags.q.std430) { + packing = GLSL_INTERFACE_PACKING_STD430; + } else { + /* The default layout is std140. + */ + packing = GLSL_INTERFACE_PACKING_STD140; + } + + ir_variable_mode var_mode; + const char *iface_type_name; + if (this->layout.flags.q.in) { + var_mode = ir_var_shader_in; + iface_type_name = "in"; + } else if (this->layout.flags.q.out) { + var_mode = ir_var_shader_out; + iface_type_name = "out"; + } else if (this->layout.flags.q.uniform) { + var_mode = ir_var_uniform; + iface_type_name = "uniform"; + } else if (this->layout.flags.q.buffer) { + var_mode = ir_var_shader_storage; + iface_type_name = "buffer"; + } else { + var_mode = ir_var_auto; + iface_type_name = "UNKNOWN"; + assert(!"interface block layout qualifier not found!"); + } + + enum glsl_matrix_layout matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; + if (this->layout.flags.q.row_major) + matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; + else if (this->layout.flags.q.column_major) + matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; + + bool redeclaring_per_vertex = strcmp(this->block_name, "gl_PerVertex") == 0; + exec_list declared_variables; + glsl_struct_field *fields; + + /* Treat an interface block as one level of nesting, so that embedded struct + * specifiers will be disallowed. + */ + state->struct_specifier_depth++; + + /* For blocks that accept memory qualifiers (i.e. shader storage), verify + * that we don't have incompatible qualifiers + */ + if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { + _mesa_glsl_error(&loc, state, + "Interface block sets both readonly and writeonly"); + } + + unsigned qual_stream; + if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream, + &qual_stream) || + !validate_stream_qualifier(&loc, state, qual_stream)) { + /* If the stream qualifier is invalid it doesn't make sense to continue + * on and try to compare stream layouts on member variables against it + * so just return early. + */ + return NULL; + } + + unsigned expl_location = 0; + if (layout.flags.q.explicit_location) { + if (!process_qualifier_constant(state, &loc, "location", + layout.location, &expl_location)) { + return NULL; + } else { + expl_location = VARYING_SLOT_VAR0 + expl_location; + } + } + + unsigned int num_variables = + ast_process_struct_or_iface_block_members(&declared_variables, + state, + &this->declarations, + &fields, + true, + matrix_layout, + redeclaring_per_vertex, + var_mode, + &this->layout, + qual_stream, + expl_location); + + state->struct_specifier_depth--; + + if (!redeclaring_per_vertex) { + validate_identifier(this->block_name, loc, state); + + /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec: + * + * "Block names have no other use within a shader beyond interface + * matching; it is a compile-time error to use a block name at global + * scope for anything other than as a block name." + */ + ir_variable *var = state->symbols->get_variable(this->block_name); + if (var && !var->type->is_interface()) { + _mesa_glsl_error(&loc, state, "Block name `%s' is " + "already used in the scope.", + this->block_name); + } + } + + const glsl_type *earlier_per_vertex = NULL; + if (redeclaring_per_vertex) { + /* Find the previous declaration of gl_PerVertex. If we're redeclaring + * the named interface block gl_in, we can find it by looking at the + * previous declaration of gl_in. Otherwise we can find it by looking + * at the previous decalartion of any of the built-in outputs, + * e.g. gl_Position. + * + * Also check that the instance name and array-ness of the redeclaration + * are correct. + */ + switch (var_mode) { + case ir_var_shader_in: + if (ir_variable *earlier_gl_in = + state->symbols->get_variable("gl_in")) { + earlier_per_vertex = earlier_gl_in->get_interface_type(); + } else { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex input not allowed " + "in the %s shader", + _mesa_shader_stage_to_string(state->stage)); + } + if (this->instance_name == NULL || + strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL || + !this->array_specifier->is_single_dimension()) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex input must be redeclared as " + "gl_in[]"); + } + break; + case ir_var_shader_out: + if (ir_variable *earlier_gl_Position = + state->symbols->get_variable("gl_Position")) { + earlier_per_vertex = earlier_gl_Position->get_interface_type(); + } else if (ir_variable *earlier_gl_out = + state->symbols->get_variable("gl_out")) { + earlier_per_vertex = earlier_gl_out->get_interface_type(); + } else { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex output not " + "allowed in the %s shader", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_TESS_CTRL) { + if (this->instance_name == NULL || + strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex output must be redeclared as " + "gl_out[]"); + } + } else { + if (this->instance_name != NULL) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex output may not be redeclared with " + "an instance name"); + } + } + break; + default: + _mesa_glsl_error(&loc, state, + "gl_PerVertex must be declared as an input or an " + "output"); + break; + } + + if (earlier_per_vertex == NULL) { + /* An error has already been reported. Bail out to avoid null + * dereferences later in this function. + */ + return NULL; + } + + /* Copy locations from the old gl_PerVertex interface block. */ + for (unsigned i = 0; i < num_variables; i++) { + int j = earlier_per_vertex->field_index(fields[i].name); + if (j == -1) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex must be a subset " + "of the built-in members of gl_PerVertex"); + } else { + fields[i].location = + earlier_per_vertex->fields.structure[j].location; + fields[i].interpolation = + earlier_per_vertex->fields.structure[j].interpolation; + fields[i].centroid = + earlier_per_vertex->fields.structure[j].centroid; + fields[i].sample = + earlier_per_vertex->fields.structure[j].sample; + fields[i].patch = + earlier_per_vertex->fields.structure[j].patch; + fields[i].precision = + earlier_per_vertex->fields.structure[j].precision; + } + } + + /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 + * spec: + * + * If a built-in interface block is redeclared, it must appear in + * the shader before any use of any member included in the built-in + * declaration, or a compilation error will result. + * + * This appears to be a clarification to the behaviour established for + * gl_PerVertex by GLSL 1.50, therefore we implement this behaviour + * regardless of GLSL version. + */ + interface_block_usage_visitor v(var_mode, earlier_per_vertex); + v.run(instructions); + if (v.usage_found()) { + _mesa_glsl_error(&loc, state, + "redeclaration of a built-in interface block must " + "appear before any use of any member of the " + "interface block"); + } + } + + const glsl_type *block_type = + glsl_type::get_interface_instance(fields, + num_variables, + packing, + this->block_name); + + if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' " + "already taken in the current scope", + this->block_name, iface_type_name); + } + + /* Since interface blocks cannot contain statements, it should be + * impossible for the block to generate any instructions. + */ + assert(declared_variables.is_empty()); + + /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * Geometry shader input variables get the per-vertex values written + * out by vertex shader output variables of the same names. Since a + * geometry shader operates on a set of vertices, each input varying + * variable (or input block, see interface blocks below) needs to be + * declared as an array. + */ + if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL && + var_mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays"); + } else if ((state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) && + this->array_specifier == NULL && + var_mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays"); + } else if (state->stage == MESA_SHADER_TESS_CTRL && + this->array_specifier == NULL && + var_mode == ir_var_shader_out) { + _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays"); + } + + + /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec + * says: + * + * "If an instance name (instance-name) is used, then it puts all the + * members inside a scope within its own name space, accessed with the + * field selector ( . ) operator (analogously to structures)." + */ + if (this->instance_name) { + if (redeclaring_per_vertex) { + /* When a built-in in an unnamed interface block is redeclared, + * get_variable_being_redeclared() calls + * check_builtin_array_max_size() to make sure that built-in array + * variables aren't redeclared to illegal sizes. But we're looking + * at a redeclaration of a named built-in interface block. So we + * have to manually call check_builtin_array_max_size() for all parts + * of the interface that are arrays. + */ + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].type->is_array()) { + const unsigned size = fields[i].type->array_size(); + check_builtin_array_max_size(fields[i].name, size, loc, state); + } + } + } else { + validate_identifier(this->instance_name, loc, state); + } + + ir_variable *var; + + if (this->array_specifier != NULL) { + const glsl_type *block_array_type = + process_array_type(&loc, block_type, this->array_specifier, state); + + /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says: + * + * For uniform blocks declared an array, each individual array + * element corresponds to a separate buffer object backing one + * instance of the block. As the array size indicates the number + * of buffer objects needed, uniform block array declarations + * must specify an array size. + * + * And a few paragraphs later: + * + * Geometry shader input blocks must be declared as arrays and + * follow the array declaration and linking rules for all + * geometry shader inputs. All other input and output block + * arrays must specify an array size. + * + * The same applies to tessellation shaders. + * + * The upshot of this is that the only circumstance where an + * interface array size *doesn't* need to be specified is on a + * geometry shader input, tessellation control shader input, + * tessellation control shader output, and tessellation evaluation + * shader input. + */ + if (block_array_type->is_unsized_array()) { + bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY || + state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL; + bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL; + + if (this->layout.flags.q.in) { + if (!allow_inputs) + _mesa_glsl_error(&loc, state, + "unsized input block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } else if (this->layout.flags.q.out) { + if (!allow_outputs) + _mesa_glsl_error(&loc, state, + "unsized output block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } else { + /* by elimination, this is a uniform block array */ + _mesa_glsl_error(&loc, state, + "unsized uniform block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } + } + + /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: + * + * * Arrays of arrays of blocks are not allowed + */ + if (state->es_shader && block_array_type->is_array() && + block_array_type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "arrays of arrays interface blocks are " + "not allowed"); + } + + var = new(state) ir_variable(block_array_type, + this->instance_name, + var_mode); + } else { + var = new(state) ir_variable(block_type, + this->instance_name, + var_mode); + } + + var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED + ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + + if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in) + handle_geometry_shader_input_decl(state, loc, var); + else if ((state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in) + handle_tess_shader_input_decl(state, loc, var); + else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out) + handle_tess_ctrl_shader_output_decl(state, loc, var); + + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].type->is_unsized_array()) { + if (var_mode == ir_var_shader_storage) { + if (i != (num_variables - 1)) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + fields[i].name); + } + } else { + /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": + * + * "If an array is declared as the last member of a shader storage + * block and the size is not specified at compile-time, it is + * sized at run-time. In all other cases, arrays are sized only + * at compile-time." + */ + if (state->es_shader) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + fields[i].name); + } + } + } + } + + if (ir_variable *earlier = + state->symbols->get_variable(this->instance_name)) { + if (!redeclaring_per_vertex) { + _mesa_glsl_error(&loc, state, "`%s' redeclared", + this->instance_name); + } + earlier->data.how_declared = ir_var_declared_normally; + earlier->type = var->type; + earlier->reinit_interface_type(block_type); + delete var; + } else { + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, var->type, + &this->layout); + } + + var->data.stream = qual_stream; + if (layout.flags.q.explicit_location) { + var->data.location = expl_location; + var->data.explicit_location = true; + } + + state->symbols->add_variable(var); + instructions->push_tail(var); + } + } else { + /* In order to have an array size, the block must also be declared with + * an instance name. + */ + assert(this->array_specifier == NULL); + + for (unsigned i = 0; i < num_variables; i++) { + ir_variable *var = + new(state) ir_variable(fields[i].type, + ralloc_strdup(state, fields[i].name), + var_mode); + var->data.interpolation = fields[i].interpolation; + var->data.centroid = fields[i].centroid; + var->data.sample = fields[i].sample; + var->data.patch = fields[i].patch; + var->data.stream = qual_stream; + var->data.location = fields[i].location; + if (fields[i].location != -1) + var->data.explicit_location = true; + var->init_interface_type(block_type); + + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + + /* Precision qualifiers do not have any meaning in Desktop GLSL */ + if (state->es_shader) { + var->data.precision = + select_gles_precision(fields[i].precision, fields[i].type, + state, &loc); + } + + if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) { + var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED + ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + } else { + var->data.matrix_layout = fields[i].matrix_layout; + } + + if (var->data.mode == ir_var_shader_storage) { + var->data.image_read_only = fields[i].image_read_only; + var->data.image_write_only = fields[i].image_write_only; + var->data.image_coherent = fields[i].image_coherent; + var->data.image_volatile = fields[i].image_volatile; + var->data.image_restrict = fields[i].image_restrict; + } + + /* Examine var name here since var may get deleted in the next call */ + bool var_is_gl_id = is_gl_identifier(var->name); + + if (redeclaring_per_vertex) { + ir_variable *earlier = + get_variable_being_redeclared(var, loc, state, + true /* allow_all_redeclarations */); + if (!var_is_gl_id || earlier == NULL) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex can only " + "include built-in variables"); + } else if (earlier->data.how_declared == ir_var_declared_normally) { + _mesa_glsl_error(&loc, state, + "`%s' has already been redeclared", + earlier->name); + } else { + earlier->data.how_declared = ir_var_declared_in_block; + earlier->reinit_interface_type(block_type); + } + continue; + } + + if (state->symbols->get_variable(var->name) != NULL) + _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); + + /* Propagate the "binding" keyword into this UBO/SSBO's fields. + * The UBO declaration itself doesn't get an ir_variable unless it + * has an instance name. This is ugly. + */ + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, + var->get_interface_type(), &this->layout); + } + + if (var->type->is_unsized_array()) { + if (var->is_in_shader_storage_block()) { + if (!is_unsized_array_last_element(var)) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + var->name); + } + var->data.from_ssbo_unsized_array = true; + } else { + /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": + * + * "If an array is declared as the last member of a shader storage + * block and the size is not specified at compile-time, it is + * sized at run-time. In all other cases, arrays are sized only + * at compile-time." + */ + if (state->es_shader) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + var->name); + } + } + } + + state->symbols->add_variable(var); + instructions->push_tail(var); + } + + if (redeclaring_per_vertex && block_type != earlier_per_vertex) { + /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 spec: + * + * It is also a compilation error ... to redeclare a built-in + * block and then use a member from that built-in block that was + * not included in the redeclaration. + * + * This appears to be a clarification to the behaviour established + * for gl_PerVertex by GLSL 1.50, therefore we implement this + * behaviour regardless of GLSL version. + * + * To prevent the shader from using a member that was not included in + * the redeclaration, we disable any ir_variables that are still + * associated with the old declaration of gl_PerVertex (since we've + * already updated all of the variables contained in the new + * gl_PerVertex to point to it). + * + * As a side effect this will prevent + * validate_intrastage_interface_blocks() from getting confused and + * thinking there are conflicting definitions of gl_PerVertex in the + * shader. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + if (var != NULL && + var->get_interface_type() == earlier_per_vertex && + var->data.mode == var_mode) { + if (var->data.how_declared == ir_var_declared_normally) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex cannot " + "follow a redeclaration of `%s'", + var->name); + } + state->symbols->disable_variable(var->name); + var->remove(); + } + } + } + } + + return NULL; + } + + + ir_rvalue * + ast_tcs_output_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + YYLTYPE loc = this->get_location(); + + unsigned num_vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &num_vertices, + false)) { + /* return here to stop cascading incorrect error messages */ + return NULL; + } + + /* If any shader outputs occurred before this declaration and specified an + * array size, make sure the size they specified is consistent with the + * primitive type. + */ + if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) { + _mesa_glsl_error(&loc, state, + "this tessellation control shader output layout " + "specifies %u vertices, but a previous output " + "is declared with size %u", + num_vertices, state->tcs_output_size); + return NULL; + } + + state->tcs_output_vertices_specified = true; + + /* If any shader outputs occurred before this declaration and did not + * specify an array size, their size is determined now. + */ + foreach_in_list (ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var == NULL || var->data.mode != ir_var_shader_out) + continue; + + /* Note: Not all tessellation control shader output are arrays. */ + if (!var->type->is_unsized_array() || var->data.patch) + continue; + + if (var->data.max_array_access >= num_vertices) { + _mesa_glsl_error(&loc, state, + "this tessellation control shader output layout " + "specifies %u vertices, but an access to element " + "%u of output `%s' already exists", num_vertices, + var->data.max_array_access, var->name); + } else { + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } + } + + return NULL; + } + + + ir_rvalue * + ast_gs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + YYLTYPE loc = this->get_location(); + + /* If any geometry input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->gs_input_prim_type_specified && + state->in_qualifier->prim_type != this->prim_type) { + _mesa_glsl_error(&loc, state, + "geometry shader input layout does not match" + " previous declaration"); + return NULL; + } + + /* If any shader inputs occurred before this declaration and specified an + * array size, make sure the size they specified is consistent with the + * primitive type. + */ + unsigned num_vertices = vertices_per_prim(this->prim_type); + if (state->gs_input_size != 0 && state->gs_input_size != num_vertices) { + _mesa_glsl_error(&loc, state, + "this geometry shader input layout implies %u vertices" + " per primitive, but a previous input is declared" + " with size %u", num_vertices, state->gs_input_size); + return NULL; + } + + state->gs_input_prim_type_specified = true; + + /* If any shader inputs occurred before this declaration and did not + * specify an array size, their size is determined now. + */ + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var == NULL || var->data.mode != ir_var_shader_in) + continue; + + /* Note: gl_PrimitiveIDIn has mode ir_var_shader_in, but it's not an + * array; skip it. + */ + + if (var->type->is_unsized_array()) { + if (var->data.max_array_access >= num_vertices) { + _mesa_glsl_error(&loc, state, + "this geometry shader input layout implies %u" + " vertices, but an access to element %u of input" + " `%s' already exists", num_vertices, + var->data.max_array_access, var->name); + } else { + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } + } + } + + return NULL; + } + + + ir_rvalue * + ast_cs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) + { + YYLTYPE loc = this->get_location(); + + /* From the ARB_compute_shader specification: + * + * If the local size of the shader in any dimension is greater + * than the maximum size supported by the implementation for that + * dimension, a compile-time error results. + * + * It is not clear from the spec how the error should be reported if + * the total size of the work group exceeds + * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to + * report it at compile time as well. + */ + GLuint64 total_invocations = 1; + unsigned qual_local_size[3]; + for (int i = 0; i < 3; i++) { + + char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c", + 'x' + i); + /* Infer a local_size of 1 for unspecified dimensions */ + if (this->local_size[i] == NULL) { + qual_local_size[i] = 1; + } else if (!this->local_size[i]-> + process_qualifier_constant(state, local_size_str, + &qual_local_size[i], false)) { + ralloc_free(local_size_str); + return NULL; + } + ralloc_free(local_size_str); + + if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { + _mesa_glsl_error(&loc, state, + "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" + " (%d)", 'x' + i, + state->ctx->Const.MaxComputeWorkGroupSize[i]); + break; + } + total_invocations *= qual_local_size[i]; + if (total_invocations > + state->ctx->Const.MaxComputeWorkGroupInvocations) { + _mesa_glsl_error(&loc, state, + "product of local_sizes exceeds " + "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)", + state->ctx->Const.MaxComputeWorkGroupInvocations); + break; + } + } + + /* If any compute input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) { + if (state->cs_input_local_size[i] != qual_local_size[i]) { + _mesa_glsl_error(&loc, state, + "compute shader input layout does not match" + " previous declaration"); + return NULL; + } + } + } + + state->cs_input_local_size_specified = true; + for (int i = 0; i < 3; i++) + state->cs_input_local_size[i] = qual_local_size[i]; + + /* We may now declare the built-in constant gl_WorkGroupSize (see + * builtin_variable_generator::generate_constants() for why we didn't + * declare it earlier). + */ + ir_variable *var = new(state->symbols) + ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto); + var->data.how_declared = ir_var_declared_implicitly; + var->data.read_only = true; + instructions->push_tail(var); + state->symbols->add_variable(var); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + for (int i = 0; i < 3; i++) + data.u[i] = qual_local_size[i]; + var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::uvec3_type, &data); + var->data.has_initializer = true; + + return NULL; + } + + + static void + detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, + exec_list *instructions) + { + bool gl_FragColor_assigned = false; + bool gl_FragData_assigned = false; + bool gl_FragSecondaryColor_assigned = false; + bool gl_FragSecondaryData_assigned = false; + bool user_defined_fs_output_assigned = false; + ir_variable *user_defined_fs_output = NULL; + + /* It would be nice to have proper location information. */ + YYLTYPE loc; + memset(&loc, 0, sizeof(loc)); + + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + + if (!var || !var->data.assigned) + continue; + + if (strcmp(var->name, "gl_FragColor") == 0) + gl_FragColor_assigned = true; + else if (strcmp(var->name, "gl_FragData") == 0) + gl_FragData_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0) + gl_FragSecondaryColor_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0) + gl_FragSecondaryData_assigned = true; + else if (!is_gl_identifier(var->name)) { + if (state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_out) { + user_defined_fs_output_assigned = true; + user_defined_fs_output = var; + } + } + } + + /* From the GLSL 1.30 spec: + * + * "If a shader statically assigns a value to gl_FragColor, it + * may not assign a value to any element of gl_FragData. If a + * shader statically writes a value to any element of + * gl_FragData, it may not assign a value to + * gl_FragColor. That is, a shader may assign values to either + * gl_FragColor or gl_FragData, but not both. Multiple shaders + * linked together must also consistently write just one of + * these variables. Similarly, if user declared output + * variables are in use (statically assigned to), then the + * built-in variables gl_FragColor and gl_FragData may not be + * assigned to. These incorrect usages all generate compile + * time errors." + */ + if (gl_FragColor_assigned && gl_FragData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'"); + } else if (gl_FragColor_assigned && user_defined_fs_output_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and `%s'", + user_defined_fs_output->name); + } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragSecondaryColorEXT' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and" + " `gl_FragSecondaryColorEXT'"); + } else if (gl_FragData_assigned && user_defined_fs_output_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and `%s'", + user_defined_fs_output->name); + } + + if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) && + !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(&loc, state, + "Dual source blending requires EXT_blend_func_extended"); + } + } + + + static void + remove_per_vertex_blocks(exec_list *instructions, + _mesa_glsl_parse_state *state, ir_variable_mode mode) + { + /* Find the gl_PerVertex interface block of the appropriate (in/out) mode, + * if it exists in this shader type. + */ + const glsl_type *per_vertex = NULL; + switch (mode) { + case ir_var_shader_in: + if (ir_variable *gl_in = state->symbols->get_variable("gl_in")) + per_vertex = gl_in->get_interface_type(); + break; + case ir_var_shader_out: + if (ir_variable *gl_Position = + state->symbols->get_variable("gl_Position")) { + per_vertex = gl_Position->get_interface_type(); + } + break; + default: + assert(!"Unexpected mode"); + break; + } + + /* If we didn't find a built-in gl_PerVertex interface block, then we don't + * need to do anything. + */ + if (per_vertex == NULL) + return; + + /* If the interface block is used by the shader, then we don't need to do + * anything. + */ + interface_block_usage_visitor v(mode, per_vertex); + v.run(instructions); + if (v.usage_found()) + return; + + /* Remove any ir_variable declarations that refer to the interface block + * we're removing. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + if (var != NULL && var->get_interface_type() == per_vertex && + var->data.mode == mode) { + state->symbols->disable_variable(var->name); + var->remove(); + } + } + } diff --cc src/compiler/glsl/glsl_parser_extras.cpp index 00000000000,603895497d1..ecf0d7f76e5 mode 000000,100644..100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@@ -1,0 -1,1952 +1,1954 @@@ + /* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #include + #include + #include + #include + + #include "main/core.h" /* for struct gl_context */ + #include "main/context.h" + #include "main/shaderobj.h" + #include "util/u_atomic.h" /* for p_atomic_cmpxchg */ + #include "util/ralloc.h" + #include "ast.h" + #include "glsl_parser_extras.h" + #include "glsl_parser.h" + #include "ir_optimization.h" + #include "loop_analysis.h" + + /** + * Format a short human-readable description of the given GLSL version. + */ + const char * + glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version) + { + return ralloc_asprintf(mem_ctx, "GLSL%s %d.%02d", is_es ? " ES" : "", + version / 100, version % 100); + } + + + static const unsigned known_desktop_glsl_versions[] = + { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 }; + + + _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, + gl_shader_stage stage, + void *mem_ctx) + : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(), + switch_state() + { + assert(stage < MESA_SHADER_STAGES); + this->stage = stage; + + this->scanner = NULL; + this->translation_unit.make_empty(); + this->symbols = new(mem_ctx) glsl_symbol_table; + + this->info_log = ralloc_strdup(mem_ctx, ""); + this->error = false; + this->loop_nesting_ast = NULL; + + this->struct_specifier_depth = 0; + + this->uses_builtin_functions = false; + + /* Set default language version and extensions */ + this->language_version = 110; + this->forced_language_version = ctx->Const.ForceGLSLVersion; + this->es_shader = false; + this->ARB_texture_rectangle_enable = true; + + /* OpenGL ES 2.0 has different defaults from desktop GL. */ + if (ctx->API == API_OPENGLES2) { + this->language_version = 100; + this->es_shader = true; + this->ARB_texture_rectangle_enable = false; + } + + this->extensions = &ctx->Extensions; + ++ this->ARB_compute_shader_enable = true; ++ + this->Const.MaxLights = ctx->Const.MaxLights; + this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; + this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; + this->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits; + this->Const.MaxVertexAttribs = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs; + this->Const.MaxVertexUniformComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents; + this->Const.MaxVertexTextureImageUnits = ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits; + this->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits; + this->Const.MaxTextureImageUnits = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + this->Const.MaxFragmentUniformComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents; + this->Const.MinProgramTexelOffset = ctx->Const.MinProgramTexelOffset; + this->Const.MaxProgramTexelOffset = ctx->Const.MaxProgramTexelOffset; + + this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers; + + this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers; + + /* 1.50 constants */ + this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents; + this->Const.MaxGeometryOutputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; + this->Const.MaxFragmentInputComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents; + this->Const.MaxGeometryTextureImageUnits = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits; + this->Const.MaxGeometryOutputVertices = ctx->Const.MaxGeometryOutputVertices; + this->Const.MaxGeometryTotalOutputComponents = ctx->Const.MaxGeometryTotalOutputComponents; + this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents; + + this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters; + this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters; + this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters; + this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters; + this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; + this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; + this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; + this->Const.MaxVertexAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers; + this->Const.MaxTessControlAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers; + this->Const.MaxTessEvaluationAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers; + this->Const.MaxGeometryAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; + this->Const.MaxFragmentAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + this->Const.MaxCombinedAtomicCounterBuffers = + ctx->Const.MaxCombinedAtomicBuffers; + this->Const.MaxAtomicCounterBufferSize = + ctx->Const.MaxAtomicBufferSize; + + /* Compute shader constants */ + for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++) + this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i]; + for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++) + this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; + + this->Const.MaxImageUnits = ctx->Const.MaxImageUnits; + this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources; + this->Const.MaxImageSamples = ctx->Const.MaxImageSamples; + this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms; + this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms; + this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms; + this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms; + this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms; + this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms; + + /* ARB_viewport_array */ + this->Const.MaxViewports = ctx->Const.MaxViewports; + + /* tessellation shader constants */ + this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices; + this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel; + this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents; + this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents; + this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits; + this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents; + this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents; + this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits; + this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents; + this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents; + this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents; + this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents; + + this->current_function = NULL; + this->toplevel_ir = NULL; + this->found_return = false; + this->all_invariant = false; + this->user_structures = NULL; + this->num_user_structures = 0; + this->num_subroutines = 0; + this->subroutines = NULL; + this->num_subroutine_types = 0; + this->subroutine_types = NULL; + + /* supported_versions should be large enough to support the known desktop + * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10)) + */ + STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) == + ARRAY_SIZE(this->supported_versions)); + + /* Populate the list of supported GLSL versions */ + /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or + * the OpenGL 3.2 Core context is supported, this logic will need + * change. Older versions of GLSL are no longer supported + * outside the compatibility contexts of 3.x. + */ + this->num_supported_versions = 0; + if (_mesa_is_desktop_gl(ctx)) { + for (unsigned i = 0; i < ARRAY_SIZE(known_desktop_glsl_versions); i++) { + if (known_desktop_glsl_versions[i] <= ctx->Const.GLSLVersion) { + this->supported_versions[this->num_supported_versions].ver + = known_desktop_glsl_versions[i]; + this->supported_versions[this->num_supported_versions].es = false; + this->num_supported_versions++; + } + } + } + if (ctx->API == API_OPENGLES2 || ctx->Extensions.ARB_ES2_compatibility) { + this->supported_versions[this->num_supported_versions].ver = 100; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { + this->supported_versions[this->num_supported_versions].ver = 300; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + if (_mesa_is_gles31(ctx)) { + this->supported_versions[this->num_supported_versions].ver = 310; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + + /* Create a string for use in error messages to tell the user which GLSL + * versions are supported. + */ + char *supported = ralloc_strdup(this, ""); + for (unsigned i = 0; i < this->num_supported_versions; i++) { + unsigned ver = this->supported_versions[i].ver; + const char *const prefix = (i == 0) + ? "" + : ((i == this->num_supported_versions - 1) ? ", and " : ", "); + const char *const suffix = (this->supported_versions[i].es) ? " ES" : ""; + + ralloc_asprintf_append(& supported, "%s%u.%02u%s", + prefix, + ver / 100, ver % 100, + suffix); + } + + this->supported_version_string = supported; + + if (ctx->Const.ForceGLSLExtensionsWarn) + _mesa_glsl_process_extension("all", NULL, "warn", NULL, this); + + this->default_uniform_qualifier = new(this) ast_type_qualifier(); + this->default_uniform_qualifier->flags.q.shared = 1; + this->default_uniform_qualifier->flags.q.column_major = 1; + this->default_uniform_qualifier->is_default_qualifier = true; + + this->default_shader_storage_qualifier = new(this) ast_type_qualifier(); + this->default_shader_storage_qualifier->flags.q.shared = 1; + this->default_shader_storage_qualifier->flags.q.column_major = 1; + this->default_shader_storage_qualifier->is_default_qualifier = true; + + this->fs_uses_gl_fragcoord = false; + this->fs_redeclares_gl_fragcoord = false; + this->fs_origin_upper_left = false; + this->fs_pixel_center_integer = false; + this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false; + + this->gs_input_prim_type_specified = false; + this->tcs_output_vertices_specified = false; + this->gs_input_size = 0; + this->in_qualifier = new(this) ast_type_qualifier(); + this->out_qualifier = new(this) ast_type_qualifier(); + this->fs_early_fragment_tests = false; + memset(this->atomic_counter_offsets, 0, + sizeof(this->atomic_counter_offsets)); + this->allow_extension_directive_midshader = + ctx->Const.AllowGLSLExtensionDirectiveMidShader; + } + + /** + * Determine whether the current GLSL version is sufficiently high to support + * a certain feature, and generate an error message if it isn't. + * + * \param required_glsl_version and \c required_glsl_es_version are + * interpreted as they are in _mesa_glsl_parse_state::is_version(). + * + * \param locp is the parser location where the error should be reported. + * + * \param fmt (and additional arguments) constitute a printf-style error + * message to report if the version check fails. Information about the + * current and required GLSL versions will be appended. So, for example, if + * the GLSL version being compiled is 1.20, and check_version(130, 300, locp, + * "foo unsupported") is called, the error message will be "foo unsupported in + * GLSL 1.20 (GLSL 1.30 or GLSL 3.00 ES required)". + */ + bool + _mesa_glsl_parse_state::check_version(unsigned required_glsl_version, + unsigned required_glsl_es_version, + YYLTYPE *locp, const char *fmt, ...) + { + if (this->is_version(required_glsl_version, required_glsl_es_version)) + return true; + + va_list args; + va_start(args, fmt); + char *problem = ralloc_vasprintf(this, fmt, args); + va_end(args); + const char *glsl_version_string + = glsl_compute_version_string(this, false, required_glsl_version); + const char *glsl_es_version_string + = glsl_compute_version_string(this, true, required_glsl_es_version); + const char *requirement_string = ""; + if (required_glsl_version && required_glsl_es_version) { + requirement_string = ralloc_asprintf(this, " (%s or %s required)", + glsl_version_string, + glsl_es_version_string); + } else if (required_glsl_version) { + requirement_string = ralloc_asprintf(this, " (%s required)", + glsl_version_string); + } else if (required_glsl_es_version) { + requirement_string = ralloc_asprintf(this, " (%s required)", + glsl_es_version_string); + } + _mesa_glsl_error(locp, this, "%s in %s%s", + problem, this->get_version_string(), + requirement_string); + + return false; + } + + /** + * Process a GLSL #version directive. + * + * \param version is the integer that follows the #version token. + * + * \param ident is a string identifier that follows the integer, if any is + * present. Otherwise NULL. + */ + void + _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, + const char *ident) + { + bool es_token_present = false; + if (ident) { + if (strcmp(ident, "es") == 0) { + es_token_present = true; + } else if (version >= 150) { + if (strcmp(ident, "core") == 0) { + /* Accept the token. There's no need to record that this is + * a core profile shader since that's the only profile we support. + */ + } else if (strcmp(ident, "compatibility") == 0) { + _mesa_glsl_error(locp, this, + "the compatibility profile is not supported"); + } else { + _mesa_glsl_error(locp, this, + "\"%s\" is not a valid shading language profile; " + "if present, it must be \"core\"", ident); + } + } else { + _mesa_glsl_error(locp, this, + "illegal text following version number"); + } + } + + this->es_shader = es_token_present; + if (version == 100) { + if (es_token_present) { + _mesa_glsl_error(locp, this, + "GLSL 1.00 ES should be selected using " + "`#version 100'"); + } else { + this->es_shader = true; + } + } + + if (this->es_shader) { + this->ARB_texture_rectangle_enable = false; + } + + if (this->forced_language_version) + this->language_version = this->forced_language_version; + else + this->language_version = version; + + bool supported = false; + for (unsigned i = 0; i < this->num_supported_versions; i++) { + if (this->supported_versions[i].ver == this->language_version + && this->supported_versions[i].es == this->es_shader) { + supported = true; + break; + } + } + + if (!supported) { + _mesa_glsl_error(locp, this, "%s is not supported. " + "Supported versions are: %s", + this->get_version_string(), + this->supported_version_string); + + /* On exit, the language_version must be set to a valid value. + * Later calls to _mesa_glsl_initialize_types will misbehave if + * the version is invalid. + */ + switch (this->ctx->API) { + case API_OPENGL_COMPAT: + case API_OPENGL_CORE: + this->language_version = this->ctx->Const.GLSLVersion; + break; + + case API_OPENGLES: + assert(!"Should not get here."); + /* FALLTHROUGH */ + + case API_OPENGLES2: + this->language_version = 100; + break; + } + } + } + + + /* This helper function will append the given message to the shader's + info log and report it via GL_ARB_debug_output. Per that extension, + 'type' is one of the enum values classifying the message, and + 'id' is the implementation-defined ID of the given message. */ + static void + _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state, + GLenum type, const char *fmt, va_list ap) + { + bool error = (type == MESA_DEBUG_TYPE_ERROR); + GLuint msg_id = 0; + + assert(state->info_log != NULL); + + /* Get the offset that the new message will be written to. */ + int msg_offset = strlen(state->info_log); + + ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ", + locp->source, + locp->first_line, + locp->first_column, + error ? "error" : "warning"); + ralloc_vasprintf_append(&state->info_log, fmt, ap); + + const char *const msg = &state->info_log[msg_offset]; + struct gl_context *ctx = state->ctx; + + /* Report the error via GL_ARB_debug_output. */ + _mesa_shader_debug(ctx, type, &msg_id, msg); + + ralloc_strcat(&state->info_log, "\n"); + } + + void + _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...) + { + va_list ap; + + state->error = true; + + va_start(ap, fmt); + _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_ERROR, fmt, ap); + va_end(ap); + } + + + void + _mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...) + { + va_list ap; + + va_start(ap, fmt); + _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap); + va_end(ap); + } + + + /** + * Enum representing the possible behaviors that can be specified in + * an #extension directive. + */ + enum ext_behavior { + extension_disable, + extension_enable, + extension_require, + extension_warn + }; + + /** + * Element type for _mesa_glsl_supported_extensions + */ + struct _mesa_glsl_extension { + /** + * Name of the extension when referred to in a GLSL extension + * statement + */ + const char *name; + + /** True if this extension is available to desktop GL shaders */ + bool avail_in_GL; + + /** True if this extension is available to GLES shaders */ + bool avail_in_ES; + + /** + * Flag in the gl_extensions struct indicating whether this + * extension is supported by the driver, or + * &gl_extensions::dummy_true if supported by all drivers. + * + * Note: the type (GLboolean gl_extensions::*) is a "pointer to + * member" type, the type-safe alternative to the "offsetof" macro. + * In a nutshell: + * + * - foo bar::* p declares p to be an "offset" to a field of type + * foo that exists within struct bar + * - &bar::baz computes the "offset" of field baz within struct bar + * - x.*p accesses the field of x that exists at "offset" p + * - x->*p is equivalent to (*x).*p + */ + const GLboolean gl_extensions::* supported_flag; + + /** + * Flag in the _mesa_glsl_parse_state struct that should be set + * when this extension is enabled. + * + * See note in _mesa_glsl_extension::supported_flag about "pointer + * to member" types. + */ + bool _mesa_glsl_parse_state::* enable_flag; + + /** + * Flag in the _mesa_glsl_parse_state struct that should be set + * when the shader requests "warn" behavior for this extension. + * + * See note in _mesa_glsl_extension::supported_flag about "pointer + * to member" types. + */ + bool _mesa_glsl_parse_state::* warn_flag; + + + bool compatible_with_state(const _mesa_glsl_parse_state *state) const; + void set_flags(_mesa_glsl_parse_state *state, ext_behavior behavior) const; + }; + + #define EXT(NAME, GL, ES, SUPPORTED_FLAG) \ + { "GL_" #NAME, GL, ES, &gl_extensions::SUPPORTED_FLAG, \ + &_mesa_glsl_parse_state::NAME##_enable, \ + &_mesa_glsl_parse_state::NAME##_warn } + + /** + * Table of extensions that can be enabled/disabled within a shader, + * and the conditions under which they are supported. + */ + static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { + /* API availability */ + /* name GL ES supported flag */ + + /* ARB extensions go here, sorted alphabetically. + */ + EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays), + EXT(ARB_compute_shader, true, false, ARB_compute_shader), + EXT(ARB_conservative_depth, true, false, ARB_conservative_depth), + EXT(ARB_derivative_control, true, false, ARB_derivative_control), + EXT(ARB_draw_buffers, true, false, dummy_true), + EXT(ARB_draw_instanced, true, false, ARB_draw_instanced), + EXT(ARB_enhanced_layouts, true, false, ARB_enhanced_layouts), + EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location), + EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location), + EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions), + EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport), + EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5), + EXT(ARB_gpu_shader_fp64, true, false, ARB_gpu_shader_fp64), + EXT(ARB_sample_shading, true, false, ARB_sample_shading), + EXT(ARB_separate_shader_objects, true, false, dummy_true), + EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), + EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), + EXT(ARB_shader_clock, true, false, ARB_shader_clock), + EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters), + EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), + EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), + EXT(ARB_shader_precision, true, false, ARB_shader_precision), + EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export), + EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object), + EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine), + EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples), + EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod), + EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack), + EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing), + EXT(ARB_tessellation_shader, true, false, ARB_tessellation_shader), + EXT(ARB_texture_cube_map_array, true, false, ARB_texture_cube_map_array), + EXT(ARB_texture_gather, true, false, ARB_texture_gather), + EXT(ARB_texture_multisample, true, false, ARB_texture_multisample), + EXT(ARB_texture_query_levels, true, false, ARB_texture_query_levels), + EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod), + EXT(ARB_texture_rectangle, true, false, dummy_true), + EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object), + EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit), + EXT(ARB_viewport_array, true, false, ARB_viewport_array), + + /* KHR extensions go here, sorted alphabetically. + */ + + /* OES extensions go here, sorted alphabetically. + */ + EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), + EXT(OES_geometry_shader, false, true, OES_geometry_shader), + EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), + EXT(OES_texture_3D, false, true, dummy_true), + EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), + + /* All other extensions go here, sorted alphabetically. + */ + EXT(AMD_conservative_depth, true, false, ARB_conservative_depth), + EXT(AMD_shader_stencil_export, true, false, ARB_shader_stencil_export), + EXT(AMD_shader_trinary_minmax, true, false, dummy_true), + EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), + EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), + EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), + EXT(EXT_draw_buffers, false, true, dummy_true), + EXT(EXT_separate_shader_objects, false, true, dummy_true), + EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), + EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), + EXT(EXT_texture_array, true, false, EXT_texture_array), + }; + + #undef EXT + + + /** + * Determine whether a given extension is compatible with the target, + * API, and extension information in the current parser state. + */ + bool _mesa_glsl_extension::compatible_with_state(const _mesa_glsl_parse_state * + state) const + { + /* Check that this extension matches whether we are compiling + * for desktop GL or GLES. + */ + if (state->es_shader) { + if (!this->avail_in_ES) return false; + } else { + if (!this->avail_in_GL) return false; + } + + /* Check that this extension is supported by the OpenGL + * implementation. + * + * Note: the ->* operator indexes into state->extensions by the + * offset this->supported_flag. See + * _mesa_glsl_extension::supported_flag for more info. + */ + return state->extensions->*(this->supported_flag); + } + + /** + * Set the appropriate flags in the parser state to establish the + * given behavior for this extension. + */ + void _mesa_glsl_extension::set_flags(_mesa_glsl_parse_state *state, + ext_behavior behavior) const + { + /* Note: the ->* operator indexes into state by the + * offsets this->enable_flag and this->warn_flag. See + * _mesa_glsl_extension::supported_flag for more info. + */ + state->*(this->enable_flag) = (behavior != extension_disable); + state->*(this->warn_flag) = (behavior == extension_warn); + } + + /** + * Find an extension by name in _mesa_glsl_supported_extensions. If + * the name is not found, return NULL. + */ + static const _mesa_glsl_extension *find_extension(const char *name) + { + for (unsigned i = 0; i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { + if (strcmp(name, _mesa_glsl_supported_extensions[i].name) == 0) { + return &_mesa_glsl_supported_extensions[i]; + } + } + return NULL; + } + + + bool + _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, + const char *behavior_string, YYLTYPE *behavior_locp, + _mesa_glsl_parse_state *state) + { + ext_behavior behavior; + if (strcmp(behavior_string, "warn") == 0) { + behavior = extension_warn; + } else if (strcmp(behavior_string, "require") == 0) { + behavior = extension_require; + } else if (strcmp(behavior_string, "enable") == 0) { + behavior = extension_enable; + } else if (strcmp(behavior_string, "disable") == 0) { + behavior = extension_disable; + } else { + _mesa_glsl_error(behavior_locp, state, + "unknown extension behavior `%s'", + behavior_string); + return false; + } + + if (strcmp(name, "all") == 0) { + if ((behavior == extension_enable) || (behavior == extension_require)) { + _mesa_glsl_error(name_locp, state, "cannot %s all extensions", + (behavior == extension_enable) + ? "enable" : "require"); + return false; + } else { + for (unsigned i = 0; + i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { + const _mesa_glsl_extension *extension + = &_mesa_glsl_supported_extensions[i]; + if (extension->compatible_with_state(state)) { + _mesa_glsl_supported_extensions[i].set_flags(state, behavior); + } + } + } + } else { + const _mesa_glsl_extension *extension = find_extension(name); + if (extension && extension->compatible_with_state(state)) { + extension->set_flags(state, behavior); + } else { + static const char fmt[] = "extension `%s' unsupported in %s shader"; + + if (behavior == extension_require) { + _mesa_glsl_error(name_locp, state, fmt, + name, _mesa_shader_stage_to_string(state->stage)); + return false; + } else { + _mesa_glsl_warning(name_locp, state, fmt, + name, _mesa_shader_stage_to_string(state->stage)); + } + } + } + + return true; + } + + + /** + * Recurses through and if is an aggregate initializer + * and sets 's field to . Gives later functions + * (process_array_constructor, et al) sufficient information to do type + * checking. + * + * Operates on assignments involving an aggregate initializer. E.g., + * + * vec4 pos = {1.0, -1.0, 0.0, 1.0}; + * + * or more ridiculously, + * + * struct S { + * vec4 v[2]; + * }; + * + * struct { + * S a[2], b; + * int c; + * } aggregate = { + * { + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // a[0].v[0] + * {5.0, 6.0, 7.0, 8.0} // a[0].v[1] + * } // a[0].v + * }, // a[0] + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // a[1].v[0] + * {5.0, 6.0, 7.0, 8.0} // a[1].v[1] + * } // a[1].v + * } // a[1] + * }, // a + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // b.v[0] + * {5.0, 6.0, 7.0, 8.0} // b.v[1] + * } // b.v + * }, // b + * 4 // c + * }; + * + * This pass is necessary because the right-hand side of e = { ... } + * doesn't contain sufficient information to determine if the types match. + */ + void + _mesa_ast_set_aggregate_type(const glsl_type *type, + ast_expression *expr) + { + ast_aggregate_initializer *ai = (ast_aggregate_initializer *)expr; + ai->constructor_type = type; + + /* If the aggregate is an array, recursively set its elements' types. */ + if (type->is_array()) { + /* Each array element has the type type->fields.array. + * + * E.g., if if struct S[2] we want to set each element's type to + * struct S. + */ + for (exec_node *expr_node = ai->expressions.head; + !expr_node->is_tail_sentinel(); + expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(type->fields.array, expr); + } + + /* If the aggregate is a struct, recursively set its fields' types. */ + } else if (type->is_record()) { + exec_node *expr_node = ai->expressions.head; + + /* Iterate through the struct's fields. */ + for (unsigned i = 0; !expr_node->is_tail_sentinel() && i < type->length; + i++, expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) { + _mesa_ast_set_aggregate_type(type->fields.structure[i].type, expr); + } + } + /* If the aggregate is a matrix, set its columns' types. */ + } else if (type->is_matrix()) { + for (exec_node *expr_node = ai->expressions.head; + !expr_node->is_tail_sentinel(); + expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(type->column_type(), expr); + } + } + } + + void + _mesa_ast_process_interface_block(YYLTYPE *locp, + _mesa_glsl_parse_state *state, + ast_interface_block *const block, + const struct ast_type_qualifier &q) + { + if (q.flags.q.buffer) { + if (!state->has_shader_storage_buffer_objects()) { + _mesa_glsl_error(locp, state, + "#version 430 / GL_ARB_shader_storage_buffer_object " + "required for defining shader storage blocks"); + } else if (state->ARB_shader_storage_buffer_object_warn) { + _mesa_glsl_warning(locp, state, + "#version 430 / GL_ARB_shader_storage_buffer_object " + "required for defining shader storage blocks"); + } + } else if (q.flags.q.uniform) { + if (!state->has_uniform_buffer_objects()) { + _mesa_glsl_error(locp, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "required for defining uniform blocks"); + } else if (state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(locp, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "required for defining uniform blocks"); + } + } else { + if (state->es_shader || state->language_version < 150) { + _mesa_glsl_error(locp, state, + "#version 150 required for using " + "interface blocks"); + } + } + + /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"): + * "It is illegal to have an input block in a vertex shader + * or an output block in a fragment shader" + */ + if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) { + _mesa_glsl_error(locp, state, + "`in' interface block is not allowed for " + "a vertex shader"); + } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) { + _mesa_glsl_error(locp, state, + "`out' interface block is not allowed for " + "a fragment shader"); + } + + /* Since block arrays require names, and both features are added in + * the same language versions, we don't have to explicitly + * version-check both things. + */ + if (block->instance_name != NULL) { + state->check_version(150, 300, locp, "interface blocks with " + "an instance name are not allowed"); + } + + uint64_t interface_type_mask; + struct ast_type_qualifier temp_type_qualifier; + + /* Get a bitmask containing only the in/out/uniform/buffer + * flags, allowing us to ignore other irrelevant flags like + * interpolation qualifiers. + */ + temp_type_qualifier.flags.i = 0; + temp_type_qualifier.flags.q.uniform = true; + temp_type_qualifier.flags.q.in = true; + temp_type_qualifier.flags.q.out = true; + temp_type_qualifier.flags.q.buffer = true; + interface_type_mask = temp_type_qualifier.flags.i; + + /* Get the block's interface qualifier. The interface_qualifier + * production rule guarantees that only one bit will be set (and + * it will be in/out/uniform). + */ + uint64_t block_interface_qualifier = q.flags.i; + + block->layout.flags.i |= block_interface_qualifier; + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + /* Assign global layout's stream value. */ + block->layout.flags.q.stream = 1; + block->layout.flags.q.explicit_stream = 0; + block->layout.stream = state->out_qualifier->stream; + } + + foreach_list_typed (ast_declarator_list, member, link, &block->declarations) { + ast_type_qualifier& qualifier = member->type->qualifier; + if ((qualifier.flags.i & interface_type_mask) == 0) { + /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): + * "If no optional qualifier is used in a member declaration, the + * qualifier of the variable is just in, out, or uniform as declared + * by interface-qualifier." + */ + qualifier.flags.i |= block_interface_qualifier; + } else if ((qualifier.flags.i & interface_type_mask) != + block_interface_qualifier) { + /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): + * "If optional qualifiers are used, they can include interpolation + * and storage qualifiers and they must declare an input, output, + * or uniform variable consistent with the interface qualifier of + * the block." + */ + _mesa_glsl_error(locp, state, + "uniform/in/out qualifier on " + "interface block member does not match " + "the interface block"); + } + + /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks": + * + * "GLSL ES 3.0 does not support interface blocks for shader inputs or + * outputs." + * + * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":. + * + * "Only variables output from a shader can be candidates for + * invariance." + * + * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks": + * + * "If optional qualifiers are used, they can include interpolation + * qualifiers, auxiliary storage qualifiers, and storage qualifiers + * and they must declare an input, output, or uniform member + * consistent with the interface qualifier of the block" + */ + if (qualifier.flags.q.invariant) + _mesa_glsl_error(locp, state, + "invariant qualifiers cannot be used " + "with interface blocks members"); + } + } + + void + _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q) + { + if (q->flags.q.subroutine) + printf("subroutine "); + + if (q->flags.q.subroutine_def) { + printf("subroutine ("); + q->subroutine_list->print(); + printf(")"); + } + + if (q->flags.q.constant) + printf("const "); + + if (q->flags.q.invariant) + printf("invariant "); + + if (q->flags.q.attribute) + printf("attribute "); + + if (q->flags.q.varying) + printf("varying "); + + if (q->flags.q.in && q->flags.q.out) + printf("inout "); + else { + if (q->flags.q.in) + printf("in "); + + if (q->flags.q.out) + printf("out "); + } + + if (q->flags.q.centroid) + printf("centroid "); + if (q->flags.q.sample) + printf("sample "); + if (q->flags.q.patch) + printf("patch "); + if (q->flags.q.uniform) + printf("uniform "); + if (q->flags.q.buffer) + printf("buffer "); + if (q->flags.q.smooth) + printf("smooth "); + if (q->flags.q.flat) + printf("flat "); + if (q->flags.q.noperspective) + printf("noperspective "); + } + + + void + ast_node::print(void) const + { + printf("unhandled node "); + } + + + ast_node::ast_node(void) + { + this->location.source = 0; + this->location.first_line = 0; + this->location.first_column = 0; + this->location.last_line = 0; + this->location.last_column = 0; + } + + + static void + ast_opt_array_dimensions_print(const ast_array_specifier *array_specifier) + { + if (array_specifier) + array_specifier->print(); + } + + + void + ast_compound_statement::print(void) const + { + printf("{\n"); + + foreach_list_typed(ast_node, ast, link, &this->statements) { + ast->print(); + } + + printf("}\n"); + } + + + ast_compound_statement::ast_compound_statement(int new_scope, + ast_node *statements) + { + this->new_scope = new_scope; + + if (statements != NULL) { + this->statements.push_degenerate_list_at_head(&statements->link); + } + } + + + void + ast_expression::print(void) const + { + switch (oper) { + case ast_assign: + case ast_mul_assign: + case ast_div_assign: + case ast_mod_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + subexpressions[1]->print(); + break; + + case ast_field_selection: + subexpressions[0]->print(); + printf(". %s ", primary_expression.identifier); + break; + + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + printf("%s ", operator_string(oper)); + subexpressions[0]->print(); + break; + + case ast_post_inc: + case ast_post_dec: + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + break; + + case ast_conditional: + subexpressions[0]->print(); + printf("? "); + subexpressions[1]->print(); + printf(": "); + subexpressions[2]->print(); + break; + + case ast_array_index: + subexpressions[0]->print(); + printf("[ "); + subexpressions[1]->print(); + printf("] "); + break; + + case ast_function_call: { + subexpressions[0]->print(); + printf("( "); + + foreach_list_typed (ast_node, ast, link, &this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + + printf(") "); + break; + } + + case ast_identifier: + printf("%s ", primary_expression.identifier); + break; + + case ast_int_constant: + printf("%d ", primary_expression.int_constant); + break; + + case ast_uint_constant: + printf("%u ", primary_expression.uint_constant); + break; + + case ast_float_constant: + printf("%f ", primary_expression.float_constant); + break; + + case ast_double_constant: + printf("%f ", primary_expression.double_constant); + break; + + case ast_bool_constant: + printf("%s ", + primary_expression.bool_constant + ? "true" : "false"); + break; + + case ast_sequence: { + printf("( "); + foreach_list_typed (ast_node, ast, link, & this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + printf(") "); + break; + } + + case ast_aggregate: { + printf("{ "); + foreach_list_typed (ast_node, ast, link, & this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + printf("} "); + break; + } + + default: + assert(0); + break; + } + } + + ast_expression::ast_expression(int oper, + ast_expression *ex0, + ast_expression *ex1, + ast_expression *ex2) : + primary_expression() + { + this->oper = ast_operators(oper); + this->subexpressions[0] = ex0; + this->subexpressions[1] = ex1; + this->subexpressions[2] = ex2; + this->non_lvalue_description = NULL; + } + + + void + ast_expression_statement::print(void) const + { + if (expression) + expression->print(); + + printf("; "); + } + + + ast_expression_statement::ast_expression_statement(ast_expression *ex) : + expression(ex) + { + /* empty */ + } + + + void + ast_function::print(void) const + { + return_type->print(); + printf(" %s (", identifier); + + foreach_list_typed(ast_node, ast, link, & this->parameters) { + ast->print(); + } + + printf(")"); + } + + + ast_function::ast_function(void) + : return_type(NULL), identifier(NULL), is_definition(false), + signature(NULL) + { + /* empty */ + } + + + void + ast_fully_specified_type::print(void) const + { + _mesa_ast_type_qualifier_print(& qualifier); + specifier->print(); + } + + + void + ast_parameter_declarator::print(void) const + { + type->print(); + if (identifier) + printf("%s ", identifier); + ast_opt_array_dimensions_print(array_specifier); + } + + + void + ast_function_definition::print(void) const + { + prototype->print(); + body->print(); + } + + + void + ast_declaration::print(void) const + { + printf("%s ", identifier); + ast_opt_array_dimensions_print(array_specifier); + + if (initializer) { + printf("= "); + initializer->print(); + } + } + + + ast_declaration::ast_declaration(const char *identifier, + ast_array_specifier *array_specifier, + ast_expression *initializer) + { + this->identifier = identifier; + this->array_specifier = array_specifier; + this->initializer = initializer; + } + + + void + ast_declarator_list::print(void) const + { + assert(type || invariant); + + if (type) + type->print(); + else if (invariant) + printf("invariant "); + else + printf("precise "); + + foreach_list_typed (ast_node, ast, link, & this->declarations) { + if (&ast->link != this->declarations.get_head()) + printf(", "); + + ast->print(); + } + + printf("; "); + } + + + ast_declarator_list::ast_declarator_list(ast_fully_specified_type *type) + { + this->type = type; + this->invariant = false; + this->precise = false; + } + + void + ast_jump_statement::print(void) const + { + switch (mode) { + case ast_continue: + printf("continue; "); + break; + case ast_break: + printf("break; "); + break; + case ast_return: + printf("return "); + if (opt_return_value) + opt_return_value->print(); + + printf("; "); + break; + case ast_discard: + printf("discard; "); + break; + } + } + + + ast_jump_statement::ast_jump_statement(int mode, ast_expression *return_value) + : opt_return_value(NULL) + { + this->mode = ast_jump_modes(mode); + + if (mode == ast_return) + opt_return_value = return_value; + } + + + void + ast_selection_statement::print(void) const + { + printf("if ( "); + condition->print(); + printf(") "); + + then_statement->print(); + + if (else_statement) { + printf("else "); + else_statement->print(); + } + } + + + ast_selection_statement::ast_selection_statement(ast_expression *condition, + ast_node *then_statement, + ast_node *else_statement) + { + this->condition = condition; + this->then_statement = then_statement; + this->else_statement = else_statement; + } + + + void + ast_switch_statement::print(void) const + { + printf("switch ( "); + test_expression->print(); + printf(") "); + + body->print(); + } + + + ast_switch_statement::ast_switch_statement(ast_expression *test_expression, + ast_node *body) + { + this->test_expression = test_expression; + this->body = body; + } + + + void + ast_switch_body::print(void) const + { + printf("{\n"); + if (stmts != NULL) { + stmts->print(); + } + printf("}\n"); + } + + + ast_switch_body::ast_switch_body(ast_case_statement_list *stmts) + { + this->stmts = stmts; + } + + + void ast_case_label::print(void) const + { + if (test_value != NULL) { + printf("case "); + test_value->print(); + printf(": "); + } else { + printf("default: "); + } + } + + + ast_case_label::ast_case_label(ast_expression *test_value) + { + this->test_value = test_value; + } + + + void ast_case_label_list::print(void) const + { + foreach_list_typed(ast_node, ast, link, & this->labels) { + ast->print(); + } + printf("\n"); + } + + + ast_case_label_list::ast_case_label_list(void) + { + } + + + void ast_case_statement::print(void) const + { + labels->print(); + foreach_list_typed(ast_node, ast, link, & this->stmts) { + ast->print(); + printf("\n"); + } + } + + + ast_case_statement::ast_case_statement(ast_case_label_list *labels) + { + this->labels = labels; + } + + + void ast_case_statement_list::print(void) const + { + foreach_list_typed(ast_node, ast, link, & this->cases) { + ast->print(); + } + } + + + ast_case_statement_list::ast_case_statement_list(void) + { + } + + + void + ast_iteration_statement::print(void) const + { + switch (mode) { + case ast_for: + printf("for( "); + if (init_statement) + init_statement->print(); + printf("; "); + + if (condition) + condition->print(); + printf("; "); + + if (rest_expression) + rest_expression->print(); + printf(") "); + + body->print(); + break; + + case ast_while: + printf("while ( "); + if (condition) + condition->print(); + printf(") "); + body->print(); + break; + + case ast_do_while: + printf("do "); + body->print(); + printf("while ( "); + if (condition) + condition->print(); + printf("); "); + break; + } + } + + + ast_iteration_statement::ast_iteration_statement(int mode, + ast_node *init, + ast_node *condition, + ast_expression *rest_expression, + ast_node *body) + { + this->mode = ast_iteration_modes(mode); + this->init_statement = init; + this->condition = condition; + this->rest_expression = rest_expression; + this->body = body; + } + + + void + ast_struct_specifier::print(void) const + { + printf("struct %s { ", name); + foreach_list_typed(ast_node, ast, link, &this->declarations) { + ast->print(); + } + printf("} "); + } + + + ast_struct_specifier::ast_struct_specifier(const char *identifier, + ast_declarator_list *declarator_list) + { + if (identifier == NULL) { + static mtx_t mutex = _MTX_INITIALIZER_NP; + static unsigned anon_count = 1; + unsigned count; + + mtx_lock(&mutex); + count = anon_count++; + mtx_unlock(&mutex); + + identifier = ralloc_asprintf(this, "#anon_struct_%04x", count); + } + name = identifier; + this->declarations.push_degenerate_list_at_head(&declarator_list->link); + is_declaration = true; + } + + void ast_subroutine_list::print(void) const + { + foreach_list_typed (ast_node, ast, link, & this->declarations) { + if (&ast->link != this->declarations.get_head()) + printf(", "); + ast->print(); + } + } + + static void + set_shader_inout_layout(struct gl_shader *shader, + struct _mesa_glsl_parse_state *state) + { + /* Should have been prevented by the parser. */ + if (shader->Stage == MESA_SHADER_TESS_CTRL) { + assert(!state->in_qualifier->flags.i); + } else if (shader->Stage == MESA_SHADER_TESS_EVAL) { + assert(!state->out_qualifier->flags.i); + } else if (shader->Stage != MESA_SHADER_GEOMETRY) { + assert(!state->in_qualifier->flags.i); + assert(!state->out_qualifier->flags.i); + } + + if (shader->Stage != MESA_SHADER_COMPUTE) { + /* Should have been prevented by the parser. */ + assert(!state->cs_input_local_size_specified); + } + + if (shader->Stage != MESA_SHADER_FRAGMENT) { + /* Should have been prevented by the parser. */ + assert(!state->fs_uses_gl_fragcoord); + assert(!state->fs_redeclares_gl_fragcoord); + assert(!state->fs_pixel_center_integer); + assert(!state->fs_origin_upper_left); + assert(!state->fs_early_fragment_tests); + } + + switch (shader->Stage) { + case MESA_SHADER_TESS_CTRL: + shader->TessCtrl.VerticesOut = 0; + if (state->tcs_output_vertices_specified) { + unsigned vertices; + if (state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &vertices, + false)) { + + YYLTYPE loc = state->out_qualifier->vertices->get_location(); + if (vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", vertices); + } + shader->TessCtrl.VerticesOut = vertices; + } + } + break; + case MESA_SHADER_TESS_EVAL: + shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; + if (state->in_qualifier->flags.q.prim_type) + shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type; + + shader->TessEval.Spacing = 0; + if (state->in_qualifier->flags.q.vertex_spacing) + shader->TessEval.Spacing = state->in_qualifier->vertex_spacing; + + shader->TessEval.VertexOrder = 0; + if (state->in_qualifier->flags.q.ordering) + shader->TessEval.VertexOrder = state->in_qualifier->ordering; + + shader->TessEval.PointMode = -1; + if (state->in_qualifier->flags.q.point_mode) + shader->TessEval.PointMode = state->in_qualifier->point_mode; + break; + case MESA_SHADER_GEOMETRY: + shader->Geom.VerticesOut = 0; + if (state->out_qualifier->flags.q.max_vertices) { + unsigned qual_max_vertices; + if (state->out_qualifier->max_vertices-> + process_qualifier_constant(state, "max_vertices", + &qual_max_vertices, true)) { + shader->Geom.VerticesOut = qual_max_vertices; + } + } + + if (state->gs_input_prim_type_specified) { + shader->Geom.InputType = state->in_qualifier->prim_type; + } else { + shader->Geom.InputType = PRIM_UNKNOWN; + } + + if (state->out_qualifier->flags.q.prim_type) { + shader->Geom.OutputType = state->out_qualifier->prim_type; + } else { + shader->Geom.OutputType = PRIM_UNKNOWN; + } + + shader->Geom.Invocations = 0; + if (state->in_qualifier->flags.q.invocations) { + unsigned invocations; + if (state->in_qualifier->invocations-> + process_qualifier_constant(state, "invocations", + &invocations, false)) { + + YYLTYPE loc = state->in_qualifier->invocations->get_location(); + if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) { + _mesa_glsl_error(&loc, state, + "invocations (%d) exceeds " + "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", + invocations); + } + shader->Geom.Invocations = invocations; + } + } + break; + + case MESA_SHADER_COMPUTE: + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = state->cs_input_local_size[i]; + } else { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = 0; + } + break; + + case MESA_SHADER_FRAGMENT: + shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord; + shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord; + shader->pixel_center_integer = state->fs_pixel_center_integer; + shader->origin_upper_left = state->fs_origin_upper_left; + shader->ARB_fragment_coord_conventions_enable = + state->ARB_fragment_coord_conventions_enable; + shader->EarlyFragmentTests = state->fs_early_fragment_tests; + break; + + default: + /* Nothing to do. */ + break; + } + } + + extern "C" { + + void + _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, + bool dump_ast, bool dump_hir) + { + struct _mesa_glsl_parse_state *state = + new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + const char *source = shader->Source; + + if (ctx->Const.GenerateTemporaryNames) + (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names, + false, true); + + state->error = glcpp_preprocess(state, &source, &state->info_log, + &ctx->Extensions, ctx); + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + if (dump_ast) { + foreach_list_typed(ast_node, ast, link, &state->translation_unit) { + ast->print(); + } + printf("\n\n"); + } + + ralloc_free(shader->ir); + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + + if (!state->error) { + validate_ir_tree(shader->ir); + + /* Print out the unoptimized IR. */ + if (dump_hir) { + _mesa_print_ir(stdout, shader->ir, state); + } + } + + + if (!state->error && !shader->ir->is_empty()) { + struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + + lower_subroutine(shader->ir, state); + /* Do some optimization at compile time to reduce shader IR size + * and reduce later work if the same shader is linked multiple times + */ + while (do_common_optimization(shader->ir, false, false, options, + ctx->Const.NativeIntegers)) + ; + + validate_ir_tree(shader->ir); + + enum ir_variable_mode other; + switch (shader->Stage) { + case MESA_SHADER_VERTEX: + other = ir_var_shader_in; + break; + case MESA_SHADER_FRAGMENT: + other = ir_var_shader_out; + break; + default: + /* Something invalid to ensure optimize_dead_builtin_uniforms + * doesn't remove anything other than uniforms or constants. + */ + other = ir_var_mode_count; + break; + } + + optimize_dead_builtin_variables(shader->ir, other); + + validate_ir_tree(shader->ir); + } + + if (shader->InfoLog) + ralloc_free(shader->InfoLog); + + if (!state->error) + set_shader_inout_layout(shader, state); + + shader->symbols = new(shader->ir) glsl_symbol_table; + shader->CompileStatus = !state->error; + shader->InfoLog = state->info_log; + shader->Version = state->language_version; + shader->IsES = state->es_shader; + shader->uses_builtin_functions = state->uses_builtin_functions; + + /* Retain any live IR, but trash the rest. */ + reparent_ir(shader->ir, shader->ir); + + /* Destroy the symbol table. Create a new symbol table that contains only + * the variables and functions that still exist in the IR. The symbol + * table will be used later during linking. + * + * There must NOT be any freed objects still referenced by the symbol + * table. That could cause the linker to dereference freed memory. + * + * We don't have to worry about types or interface-types here because those + * are fly-weights that are looked up by glsl_type. + */ + foreach_in_list (ir_instruction, ir, shader->ir) { + switch (ir->ir_type) { + case ir_type_function: + shader->symbols->add_function((ir_function *) ir); + break; + case ir_type_variable: { + ir_variable *const var = (ir_variable *) ir; + + if (var->data.mode != ir_var_temporary) + shader->symbols->add_variable(var); + break; + } + default: + break; + } + } + + _mesa_glsl_initialize_derived_variables(shader); + + delete state->symbols; + ralloc_free(state); + } + + } /* extern "C" */ + /** + * Do the set of common optimizations passes + * + * \param ir List of instructions to be optimized + * \param linked Is the shader linked? This enables + * optimizations passes that remove code at + * global scope and could cause linking to + * fail. + * \param uniform_locations_assigned Have locations already been assigned for + * uniforms? This prevents the declarations + * of unused uniforms from being removed. + * The setting of this flag only matters if + * \c linked is \c true. + * \param max_unroll_iterations Maximum number of loop iterations to be + * unrolled. Setting to 0 disables loop + * unrolling. + * \param options The driver's preferred shader options. + */ + bool + do_common_optimization(exec_list *ir, bool linked, + bool uniform_locations_assigned, + const struct gl_shader_compiler_options *options, + bool native_integers) + { + GLboolean progress = GL_FALSE; + + progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; + + if (linked) { + progress = do_function_inlining(ir) || progress; + progress = do_dead_functions(ir) || progress; + progress = do_structure_splitting(ir) || progress; + } + progress = do_if_simplification(ir) || progress; + progress = opt_flatten_nested_if_blocks(ir) || progress; + progress = opt_conditional_discard(ir) || progress; + progress = do_copy_propagation(ir) || progress; + progress = do_copy_propagation_elements(ir) || progress; + + if (options->OptimizeForAOS && !linked) + progress = opt_flip_matrices(ir) || progress; + + if (linked && options->OptimizeForAOS) { + progress = do_vectorize(ir) || progress; + } + + if (linked) + progress = do_dead_code(ir, uniform_locations_assigned) || progress; + else + progress = do_dead_code_unlinked(ir) || progress; + progress = do_dead_code_local(ir) || progress; + progress = do_tree_grafting(ir) || progress; + progress = do_constant_propagation(ir) || progress; + if (linked) + progress = do_constant_variable(ir) || progress; + else + progress = do_constant_variable_unlinked(ir) || progress; + progress = do_constant_folding(ir) || progress; + progress = do_minmax_prune(ir) || progress; + progress = do_rebalance_tree(ir) || progress; + progress = do_algebraic(ir, native_integers, options) || progress; + progress = do_lower_jumps(ir) || progress; + progress = do_vec_index_to_swizzle(ir) || progress; + progress = lower_vector_insert(ir, false) || progress; + progress = do_swizzle_swizzle(ir) || progress; + progress = do_noop_swizzle(ir) || progress; + + progress = optimize_split_arrays(ir, linked) || progress; + progress = optimize_redundant_jumps(ir) || progress; + + loop_state *ls = analyze_loop_variables(ir); + if (ls->loop_found) { + progress = set_loop_controls(ir, ls) || progress; + progress = unroll_loops(ir, ls, options) || progress; + } + delete ls; + + return progress; + } + + extern "C" { + + /** + * To be called at GL teardown time, this frees compiler datastructures. + * + * After calling this, any previously compiled shaders and shader + * programs would be invalid. So this should happen at approximately + * program exit. + */ + void + _mesa_destroy_shader_compiler(void) + { + _mesa_destroy_shader_compiler_caches(); + + _mesa_glsl_release_types(); + } + + /** + * Releases compiler caches to trade off performance for memory. + * + * Intended to be used with glReleaseShaderCompiler(). + */ + void + _mesa_destroy_shader_compiler_caches(void) + { + _mesa_glsl_release_builtin_functions(); + } + + } diff --cc src/compiler/glsl/ir.cpp index 00000000000,de9d314bae4..5debca32411 mode 000000,100644..100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@@ -1,0 -1,2039 +1,2030 @@@ + /* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #include + #include "main/core.h" /* for MAX2 */ + #include "ir.h" + #include "compiler/glsl_types.h" + + ir_rvalue::ir_rvalue(enum ir_node_type t) + : ir_instruction(t) + { + this->type = glsl_type::error_type; + } + + bool ir_rvalue::is_zero() const + { + return false; + } + + bool ir_rvalue::is_one() const + { + return false; + } + + bool ir_rvalue::is_negative_one() const + { + return false; + } + + /** + * Modify the swizzle make to move one component to another + * + * \param m IR swizzle to be modified + * \param from Component in the RHS that is to be swizzled + * \param to Desired swizzle location of \c from + */ + static void + update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to) + { + switch (to) { + case 0: m.x = from; break; + case 1: m.y = from; break; + case 2: m.z = from; break; + case 3: m.w = from; break; + default: assert(!"Should not get here."); + } + } + + void + ir_assignment::set_lhs(ir_rvalue *lhs) + { + void *mem_ctx = this; + bool swizzled = false; + + while (lhs != NULL) { + ir_swizzle *swiz = lhs->as_swizzle(); + + if (swiz == NULL) + break; + + unsigned write_mask = 0; + ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; + + for (unsigned i = 0; i < swiz->mask.num_components; i++) { + unsigned c = 0; + + switch (i) { + case 0: c = swiz->mask.x; break; + case 1: c = swiz->mask.y; break; + case 2: c = swiz->mask.z; break; + case 3: c = swiz->mask.w; break; + default: assert(!"Should not get here."); + } + + write_mask |= (((this->write_mask >> i) & 1) << c); + update_rhs_swizzle(rhs_swiz, i, c); + rhs_swiz.num_components = swiz->val->type->vector_elements; + } + + this->write_mask = write_mask; + lhs = swiz->val; + + this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); + swizzled = true; + } + + if (swizzled) { + /* Now, RHS channels line up with the LHS writemask. Collapse it + * to just the channels that will be written. + */ + ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; + int rhs_chan = 0; + for (int i = 0; i < 4; i++) { + if (write_mask & (1 << i)) + update_rhs_swizzle(rhs_swiz, i, rhs_chan++); + } + rhs_swiz.num_components = rhs_chan; + this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); + } + + assert((lhs == NULL) || lhs->as_dereference()); + + this->lhs = (ir_dereference *) lhs; + } + + ir_variable * + ir_assignment::whole_variable_written() + { + ir_variable *v = this->lhs->whole_variable_referenced(); + + if (v == NULL) + return NULL; + + if (v->type->is_scalar()) + return v; + + if (v->type->is_vector()) { + const unsigned mask = (1U << v->type->vector_elements) - 1; + + if (mask != this->write_mask) + return NULL; + } + + /* Either all the vector components are assigned or the variable is some + * composite type (and the whole thing is assigned. + */ + return v; + } + + ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, + ir_rvalue *condition, unsigned write_mask) + : ir_instruction(ir_type_assignment) + { + this->condition = condition; + this->rhs = rhs; + this->lhs = lhs; + this->write_mask = write_mask; + + if (lhs->type->is_scalar() || lhs->type->is_vector()) { + int lhs_components = 0; + for (int i = 0; i < 4; i++) { + if (write_mask & (1 << i)) + lhs_components++; + } + + assert(lhs_components == this->rhs->type->vector_elements); + } + } + + ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, + ir_rvalue *condition) + : ir_instruction(ir_type_assignment) + { + this->condition = condition; + this->rhs = rhs; + + /* If the RHS is a vector type, assume that all components of the vector + * type are being written to the LHS. The write mask comes from the RHS + * because we can have a case where the LHS is a vec4 and the RHS is a + * vec3. In that case, the assignment is: + * + * (assign (...) (xyz) (var_ref lhs) (var_ref rhs)) + */ + if (rhs->type->is_vector()) + this->write_mask = (1U << rhs->type->vector_elements) - 1; + else if (rhs->type->is_scalar()) + this->write_mask = 1; + else + this->write_mask = 0; + + this->set_lhs(lhs); + } + + ir_expression::ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2, ir_rvalue *op3) + : ir_rvalue(ir_type_expression) + { + this->type = type; + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = op3; + #ifndef NDEBUG + int num_operands = get_num_operands(this->operation); + for (int i = num_operands; i < 4; i++) { + assert(this->operands[i] == NULL); + } + #endif + } + + ir_expression::ir_expression(int op, ir_rvalue *op0) + : ir_rvalue(ir_type_expression) + { + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = NULL; + this->operands[2] = NULL; + this->operands[3] = NULL; + + assert(op <= ir_last_unop); + + switch (this->operation) { + case ir_unop_bit_not: + case ir_unop_logic_not: + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_trunc: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + case ir_unop_round_even: + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + case ir_unop_bitfield_reverse: + case ir_unop_interpolate_at_centroid: + case ir_unop_saturate: + this->type = op0->type; + break; + + case ir_unop_f2i: + case ir_unop_b2i: + case ir_unop_u2i: + case ir_unop_d2i: + case ir_unop_bitcast_f2i: + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + case ir_unop_subroutine_to_int: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + + case ir_unop_b2f: + case ir_unop_i2f: + case ir_unop_u2f: + case ir_unop_d2f: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_u2f: + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + op0->type->vector_elements, 1); + break; + + case ir_unop_f2b: + case ir_unop_i2b: + case ir_unop_d2b: + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, + op0->type->vector_elements, 1); + break; + + case ir_unop_f2d: + case ir_unop_i2d: + case ir_unop_u2d: + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, + op0->type->vector_elements, 1); + break; + + case ir_unop_i2u: + case ir_unop_f2u: + case ir_unop_d2u: + case ir_unop_bitcast_f2u: + this->type = glsl_type::get_instance(GLSL_TYPE_UINT, + op0->type->vector_elements, 1); + break; + + case ir_unop_noise: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: + this->type = glsl_type::float_type; + break; + + case ir_unop_unpack_double_2x32: + this->type = glsl_type::uvec2_type; + break; + + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + case ir_unop_pack_half_2x16: + this->type = glsl_type::uint_type; + break; + + case ir_unop_pack_double_2x32: + this->type = glsl_type::double_type; + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + this->type = glsl_type::vec2_type; + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + this->type = glsl_type::vec4_type; + break; + + case ir_unop_frexp_sig: + this->type = op0->type; + break; + case ir_unop_frexp_exp: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + + case ir_unop_get_buffer_size: + case ir_unop_ssbo_unsized_array_length: + this->type = glsl_type::int_type; + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = op0->type; + break; + } + } + + ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) + : ir_rvalue(ir_type_expression) + { + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = NULL; + this->operands[3] = NULL; + + assert(op > ir_last_unop); + + switch (this->operation) { + case ir_binop_all_equal: + case ir_binop_any_nequal: + this->type = glsl_type::bool_type; + break; + + case ir_binop_add: + case ir_binop_sub: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + if (op0->type->is_scalar()) { + this->type = op1->type; + } else if (op1->type->is_scalar()) { + this->type = op0->type; + } else { + if (this->operation == ir_binop_mul) { + this->type = glsl_type::get_mul_type(op0->type, op1->type); + } else { + assert(op0->type == op1->type); + this->type = op0->type; + } + } + break; + + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(!op0->type->is_matrix()); + assert(!op1->type->is_matrix()); + if (op0->type->is_scalar()) { + this->type = op1->type; + } else if (op1->type->is_scalar()) { + this->type = op0->type; + } else { + assert(op0->type->vector_elements == op1->type->vector_elements); + this->type = op0->type; + } + break; + + case ir_binop_equal: + case ir_binop_nequal: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_greater: + assert(op0->type == op1->type); + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, + op0->type->vector_elements, 1); + break; + + case ir_binop_dot: + this->type = op0->type->get_base_type(); + break; + - case ir_binop_pack_half_2x16_split: - this->type = glsl_type::uint_type; - break; - + case ir_binop_imul_high: + case ir_binop_carry: + case ir_binop_borrow: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_ldexp: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + this->type = op0->type; + break; + + case ir_binop_vector_extract: + this->type = op0->type->get_scalar_type(); + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = glsl_type::float_type; + } + } + + ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2) + : ir_rvalue(ir_type_expression) + { + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = NULL; + + assert(op > ir_last_binop && op <= ir_last_triop); + + switch (this->operation) { + case ir_triop_fma: + case ir_triop_lrp: + case ir_triop_bitfield_extract: + case ir_triop_vector_insert: + this->type = op0->type; + break; + + case ir_triop_csel: + this->type = op1->type; + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = glsl_type::float_type; + } + } + + unsigned int + ir_expression::get_num_operands(ir_expression_operation op) + { + assert(op <= ir_last_opcode); + + if (op <= ir_last_unop) + return 1; + + if (op <= ir_last_binop) + return 2; + + if (op <= ir_last_triop) + return 3; + + if (op <= ir_last_quadop) + return 4; + + assert(false); + return 0; + } + + static const char *const operator_strs[] = { + "~", + "!", + "neg", + "abs", + "sign", + "rcp", + "rsq", + "sqrt", + "exp", + "log", + "exp2", + "log2", + "f2i", + "f2u", + "i2f", + "f2b", + "b2f", + "i2b", + "b2i", + "u2f", + "i2u", + "u2i", + "d2f", + "f2d", + "d2i", + "i2d", + "d2u", + "u2d", + "d2b", + "bitcast_i2f", + "bitcast_f2i", + "bitcast_u2f", + "bitcast_f2u", + "trunc", + "ceil", + "floor", + "fract", + "round_even", + "sin", + "cos", + "dFdx", + "dFdxCoarse", + "dFdxFine", + "dFdy", + "dFdyCoarse", + "dFdyFine", + "packSnorm2x16", + "packSnorm4x8", + "packUnorm2x16", + "packUnorm4x8", + "packHalf2x16", + "unpackSnorm2x16", + "unpackSnorm4x8", + "unpackUnorm2x16", + "unpackUnorm4x8", + "unpackHalf2x16", - "unpackHalf2x16_split_x", - "unpackHalf2x16_split_y", + "bitfield_reverse", + "bit_count", + "find_msb", + "find_lsb", + "sat", + "packDouble2x32", + "unpackDouble2x32", + "frexp_sig", + "frexp_exp", + "noise", + "subroutine_to_int", + "interpolate_at_centroid", + "get_buffer_size", + "ssbo_unsized_array_length", + "+", + "-", + "*", + "imul_high", + "/", + "carry", + "borrow", + "%", + "<", + ">", + "<=", + ">=", + "==", + "!=", + "all_equal", + "any_nequal", + "<<", + ">>", + "&", + "^", + "|", + "&&", + "^^", + "||", + "dot", + "min", + "max", + "pow", - "packHalf2x16_split", + "ubo_load", + "ldexp", + "vector_extract", + "interpolate_at_offset", + "interpolate_at_sample", + "fma", + "lrp", + "csel", + "bitfield_extract", + "vector_insert", + "bitfield_insert", + "vector", + }; + + const char *ir_expression::operator_string(ir_expression_operation op) + { + assert((unsigned int) op < ARRAY_SIZE(operator_strs)); + assert(ARRAY_SIZE(operator_strs) == (ir_quadop_vector + 1)); + return operator_strs[op]; + } + + const char *ir_expression::operator_string() + { + return operator_string(this->operation); + } + + const char* + depth_layout_string(ir_depth_layout layout) + { + switch(layout) { + case ir_depth_layout_none: return ""; + case ir_depth_layout_any: return "depth_any"; + case ir_depth_layout_greater: return "depth_greater"; + case ir_depth_layout_less: return "depth_less"; + case ir_depth_layout_unchanged: return "depth_unchanged"; + + default: + assert(0); + return ""; + } + } + + ir_expression_operation + ir_expression::get_operator(const char *str) + { + const int operator_count = sizeof(operator_strs) / sizeof(operator_strs[0]); + for (int op = 0; op < operator_count; op++) { + if (strcmp(str, operator_strs[op]) == 0) + return (ir_expression_operation) op; + } + return (ir_expression_operation) -1; + } + + ir_variable * + ir_expression::variable_referenced() const + { + switch (operation) { + case ir_binop_vector_extract: + case ir_triop_vector_insert: + /* We get these for things like a[0] where a is a vector type. In these + * cases we want variable_referenced() to return the actual vector + * variable this is wrapping. + */ + return operands[0]->variable_referenced(); + default: + return ir_rvalue::variable_referenced(); + } + } + + ir_constant::ir_constant() + : ir_rvalue(ir_type_constant) + { + } + + ir_constant::ir_constant(const struct glsl_type *type, + const ir_constant_data *data) + : ir_rvalue(ir_type_constant) + { + assert((type->base_type >= GLSL_TYPE_UINT) + && (type->base_type <= GLSL_TYPE_BOOL)); + + this->type = type; + memcpy(& this->value, data, sizeof(this->value)); + } + + ir_constant::ir_constant(float f, unsigned vector_elements) + : ir_rvalue(ir_type_constant) + { + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.f[i] = f; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.f[i] = 0; + } + } + + ir_constant::ir_constant(double d, unsigned vector_elements) + : ir_rvalue(ir_type_constant) + { + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.d[i] = d; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.d[i] = 0.0; + } + } + + ir_constant::ir_constant(unsigned int u, unsigned vector_elements) + : ir_rvalue(ir_type_constant) + { + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.u[i] = u; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.u[i] = 0; + } + } + + ir_constant::ir_constant(int integer, unsigned vector_elements) + : ir_rvalue(ir_type_constant) + { + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.i[i] = integer; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.i[i] = 0; + } + } + + ir_constant::ir_constant(bool b, unsigned vector_elements) + : ir_rvalue(ir_type_constant) + { + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.b[i] = b; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.b[i] = false; + } + } + + ir_constant::ir_constant(const ir_constant *c, unsigned i) + : ir_rvalue(ir_type_constant) + { + this->type = c->type->get_base_type(); + + switch (this->type->base_type) { + case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break; + case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break; + case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break; + case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break; + case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break; + default: assert(!"Should not get here."); break; + } + } + + ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) + : ir_rvalue(ir_type_constant) + { + this->type = type; + + assert(type->is_scalar() || type->is_vector() || type->is_matrix() + || type->is_record() || type->is_array()); + + if (type->is_array()) { + this->array_elements = ralloc_array(this, ir_constant *, type->length); + unsigned i = 0; + foreach_in_list(ir_constant, value, value_list) { + assert(value->as_constant() != NULL); + + this->array_elements[i++] = value; + } + return; + } + + /* If the constant is a record, the types of each of the entries in + * value_list must be a 1-for-1 match with the structure components. Each + * entry must also be a constant. Just move the nodes from the value_list + * to the list in the ir_constant. + */ + /* FINISHME: Should there be some type checking and / or assertions here? */ + /* FINISHME: Should the new constant take ownership of the nodes from + * FINISHME: value_list, or should it make copies? + */ + if (type->is_record()) { + value_list->move_nodes_to(& this->components); + return; + } + + for (unsigned i = 0; i < 16; i++) { + this->value.u[i] = 0; + } + + ir_constant *value = (ir_constant *) (value_list->head); + + /* Constructors with exactly one scalar argument are special for vectors + * and matrices. For vectors, the scalar value is replicated to fill all + * the components. For matrices, the scalar fills the components of the + * diagonal while the rest is filled with 0. + */ + if (value->type->is_scalar() && value->next->is_tail_sentinel()) { + if (type->is_matrix()) { + /* Matrix - fill diagonal (rest is already set to 0) */ + assert(type->base_type == GLSL_TYPE_FLOAT || + type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned i = 0; i < type->matrix_columns; i++) { + if (type->base_type == GLSL_TYPE_FLOAT) + this->value.f[i * type->vector_elements + i] = + value->value.f[0]; + else + this->value.d[i * type->vector_elements + i] = + value->value.d[0]; + } + } else { + /* Vector or scalar - fill all components */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + for (unsigned i = 0; i < type->components(); i++) + this->value.u[i] = value->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + for (unsigned i = 0; i < type->components(); i++) + this->value.f[i] = value->value.f[0]; + break; + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < type->components(); i++) + this->value.d[i] = value->value.d[0]; + break; + case GLSL_TYPE_BOOL: + for (unsigned i = 0; i < type->components(); i++) + this->value.b[i] = value->value.b[0]; + break; + default: + assert(!"Should not get here."); + break; + } + } + return; + } + + if (type->is_matrix() && value->type->is_matrix()) { + assert(value->next->is_tail_sentinel()); + + /* From section 5.4.2 of the GLSL 1.20 spec: + * "If a matrix is constructed from a matrix, then each component + * (column i, row j) in the result that has a corresponding component + * (column i, row j) in the argument will be initialized from there." + */ + unsigned cols = MIN2(type->matrix_columns, value->type->matrix_columns); + unsigned rows = MIN2(type->vector_elements, value->type->vector_elements); + for (unsigned i = 0; i < cols; i++) { + for (unsigned j = 0; j < rows; j++) { + const unsigned src = i * value->type->vector_elements + j; + const unsigned dst = i * type->vector_elements + j; + this->value.f[dst] = value->value.f[src]; + } + } + + /* "All other components will be initialized to the identity matrix." */ + for (unsigned i = cols; i < type->matrix_columns; i++) + this->value.f[i * type->vector_elements + i] = 1.0; + + return; + } + + /* Use each component from each entry in the value_list to initialize one + * component of the constant being constructed. + */ + for (unsigned i = 0; i < type->components(); /* empty */) { + assert(value->as_constant() != NULL); + assert(!value->is_tail_sentinel()); + + for (unsigned j = 0; j < value->type->components(); j++) { + switch (type->base_type) { + case GLSL_TYPE_UINT: + this->value.u[i] = value->get_uint_component(j); + break; + case GLSL_TYPE_INT: + this->value.i[i] = value->get_int_component(j); + break; + case GLSL_TYPE_FLOAT: + this->value.f[i] = value->get_float_component(j); + break; + case GLSL_TYPE_BOOL: + this->value.b[i] = value->get_bool_component(j); + break; + case GLSL_TYPE_DOUBLE: + this->value.d[i] = value->get_double_component(j); + break; + default: + /* FINISHME: What to do? Exceptions are not the answer. + */ + break; + } + + i++; + if (i >= type->components()) + break; + } + + value = (ir_constant *) value->next; + } + } + + ir_constant * + ir_constant::zero(void *mem_ctx, const glsl_type *type) + { + assert(type->is_scalar() || type->is_vector() || type->is_matrix() + || type->is_record() || type->is_array()); + + ir_constant *c = new(mem_ctx) ir_constant; + c->type = type; + memset(&c->value, 0, sizeof(c->value)); + + if (type->is_array()) { + c->array_elements = ralloc_array(c, ir_constant *, type->length); + + for (unsigned i = 0; i < type->length; i++) + c->array_elements[i] = ir_constant::zero(c, type->fields.array); + } + + if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type); + c->components.push_tail(comp); + } + } + + return c; + } + + bool + ir_constant::get_bool_component(unsigned i) const + { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i] != 0; + case GLSL_TYPE_INT: return this->value.i[i] != 0; + case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0; + case GLSL_TYPE_BOOL: return this->value.b[i]; + case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return false; + } + + float + ir_constant::get_float_component(unsigned i) const + { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (float) this->value.u[i]; + case GLSL_TYPE_INT: return (float) this->value.i[i]; + case GLSL_TYPE_FLOAT: return this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f; + case GLSL_TYPE_DOUBLE: return (float) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; + } + + double + ir_constant::get_double_component(unsigned i) const + { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (double) this->value.u[i]; + case GLSL_TYPE_INT: return (double) this->value.i[i]; + case GLSL_TYPE_FLOAT: return (double) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0; + case GLSL_TYPE_DOUBLE: return this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; + } + + int + ir_constant::get_int_component(unsigned i) const + { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i]; + case GLSL_TYPE_INT: return this->value.i[i]; + case GLSL_TYPE_FLOAT: return (int) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (int) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0; + } + + unsigned + ir_constant::get_uint_component(unsigned i) const + { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i]; + case GLSL_TYPE_INT: return this->value.i[i]; + case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0; + } + + ir_constant * + ir_constant::get_array_element(unsigned i) const + { + assert(this->type->is_array()); + + /* From page 35 (page 41 of the PDF) of the GLSL 1.20 spec: + * + * "Behavior is undefined if a shader subscripts an array with an index + * less than 0 or greater than or equal to the size the array was + * declared with." + * + * Most out-of-bounds accesses are removed before things could get this far. + * There are cases where non-constant array index values can get constant + * folded. + */ + if (int(i) < 0) + i = 0; + else if (i >= this->type->length) + i = this->type->length - 1; + + return array_elements[i]; + } + + ir_constant * + ir_constant::get_record_field(const char *name) + { + int idx = this->type->field_index(name); + + if (idx < 0) + return NULL; + + if (this->components.is_empty()) + return NULL; + + exec_node *node = this->components.head; + for (int i = 0; i < idx; i++) { + node = node->next; + + /* If the end of the list is encountered before the element matching the + * requested field is found, return NULL. + */ + if (node->is_tail_sentinel()) + return NULL; + } + + return (ir_constant *) node; + } + + void + ir_constant::copy_offset(ir_constant *src, int offset) + { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: { + unsigned int size = src->type->components(); + assert (size <= this->type->components() - offset); + for (unsigned int i=0; itype->base_type) { + case GLSL_TYPE_UINT: + value.u[i+offset] = src->get_uint_component(i); + break; + case GLSL_TYPE_INT: + value.i[i+offset] = src->get_int_component(i); + break; + case GLSL_TYPE_FLOAT: + value.f[i+offset] = src->get_float_component(i); + break; + case GLSL_TYPE_BOOL: + value.b[i+offset] = src->get_bool_component(i); + break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(i); + break; + default: // Shut up the compiler + break; + } + } + break; + } + + case GLSL_TYPE_STRUCT: { + assert (src->type == this->type); + this->components.make_empty(); + foreach_in_list(ir_constant, orig, &src->components) { + this->components.push_tail(orig->clone(this, NULL)); + } + break; + } + + case GLSL_TYPE_ARRAY: { + assert (src->type == this->type); + for (unsigned i = 0; i < this->type->length; i++) { + this->array_elements[i] = src->array_elements[i]->clone(this, NULL); + } + break; + } + + default: + assert(!"Should not get here."); + break; + } + } + + void + ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask) + { + assert (!type->is_array() && !type->is_record()); + + if (!type->is_vector() && !type->is_matrix()) { + offset = 0; + mask = 1; + } + + int id = 0; + for (int i=0; i<4; i++) { + if (mask & (1 << i)) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + value.u[i+offset] = src->get_uint_component(id++); + break; + case GLSL_TYPE_INT: + value.i[i+offset] = src->get_int_component(id++); + break; + case GLSL_TYPE_FLOAT: + value.f[i+offset] = src->get_float_component(id++); + break; + case GLSL_TYPE_BOOL: + value.b[i+offset] = src->get_bool_component(id++); + break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(id++); + break; + default: + assert(!"Should not get here."); + return; + } + } + } + } + + bool + ir_constant::has_value(const ir_constant *c) const + { + if (this->type != c->type) + return false; + + if (this->type->is_array()) { + for (unsigned i = 0; i < this->type->length; i++) { + if (!this->array_elements[i]->has_value(c->array_elements[i])) + return false; + } + return true; + } + + if (this->type->base_type == GLSL_TYPE_STRUCT) { + const exec_node *a_node = this->components.head; + const exec_node *b_node = c->components.head; + + while (!a_node->is_tail_sentinel()) { + assert(!b_node->is_tail_sentinel()); + + const ir_constant *const a_field = (ir_constant *) a_node; + const ir_constant *const b_field = (ir_constant *) b_node; + + if (!a_field->has_value(b_field)) + return false; + + a_node = a_node->next; + b_node = b_node->next; + } + + return true; + } + + for (unsigned i = 0; i < this->type->components(); i++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + if (this->value.u[i] != c->value.u[i]) + return false; + break; + case GLSL_TYPE_INT: + if (this->value.i[i] != c->value.i[i]) + return false; + break; + case GLSL_TYPE_FLOAT: + if (this->value.f[i] != c->value.f[i]) + return false; + break; + case GLSL_TYPE_BOOL: + if (this->value.b[i] != c->value.b[i]) + return false; + break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[i] != c->value.d[i]) + return false; + break; + default: + assert(!"Should not get here."); + return false; + } + } + + return true; + } + + bool + ir_constant::is_value(float f, int i) const + { + if (!this->type->is_scalar() && !this->type->is_vector()) + return false; + + /* Only accept boolean values for 0/1. */ + if (int(bool(i)) != i && this->type->is_boolean()) + return false; + + for (unsigned c = 0; c < this->type->vector_elements; c++) { + switch (this->type->base_type) { + case GLSL_TYPE_FLOAT: + if (this->value.f[c] != f) + return false; + break; + case GLSL_TYPE_INT: + if (this->value.i[c] != i) + return false; + break; + case GLSL_TYPE_UINT: + if (this->value.u[c] != unsigned(i)) + return false; + break; + case GLSL_TYPE_BOOL: + if (this->value.b[c] != bool(i)) + return false; + break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[c] != double(f)) + return false; + break; + default: + /* The only other base types are structures, arrays, and samplers. + * Samplers cannot be constants, and the others should have been + * filtered out above. + */ + assert(!"Should not get here."); + return false; + } + } + + return true; + } + + bool + ir_constant::is_zero() const + { + return is_value(0.0, 0); + } + + bool + ir_constant::is_one() const + { + return is_value(1.0, 1); + } + + bool + ir_constant::is_negative_one() const + { + return is_value(-1.0, -1); + } + + bool + ir_constant::is_uint16_constant() const + { + if (!type->is_integer()) + return false; + + return value.u[0] < (1 << 16); + } + + ir_loop::ir_loop() + : ir_instruction(ir_type_loop) + { + } + + + ir_dereference_variable::ir_dereference_variable(ir_variable *var) + : ir_dereference(ir_type_dereference_variable) + { + assert(var != NULL); + + this->var = var; + this->type = var->type; + } + + + ir_dereference_array::ir_dereference_array(ir_rvalue *value, + ir_rvalue *array_index) + : ir_dereference(ir_type_dereference_array) + { + this->array_index = array_index; + this->set_array(value); + } + + + ir_dereference_array::ir_dereference_array(ir_variable *var, + ir_rvalue *array_index) + : ir_dereference(ir_type_dereference_array) + { + void *ctx = ralloc_parent(var); + + this->array_index = array_index; + this->set_array(new(ctx) ir_dereference_variable(var)); + } + + + void + ir_dereference_array::set_array(ir_rvalue *value) + { + assert(value != NULL); + + this->array = value; + + const glsl_type *const vt = this->array->type; + + if (vt->is_array()) { + type = vt->fields.array; + } else if (vt->is_matrix()) { + type = vt->column_type(); + } else if (vt->is_vector()) { + type = vt->get_base_type(); + } + } + + + ir_dereference_record::ir_dereference_record(ir_rvalue *value, + const char *field) + : ir_dereference(ir_type_dereference_record) + { + assert(value != NULL); + + this->record = value; + this->field = ralloc_strdup(this, field); + this->type = this->record->type->field_type(field); + } + + + ir_dereference_record::ir_dereference_record(ir_variable *var, + const char *field) + : ir_dereference(ir_type_dereference_record) + { + void *ctx = ralloc_parent(var); + + this->record = new(ctx) ir_dereference_variable(var); + this->field = ralloc_strdup(this, field); + this->type = this->record->type->field_type(field); + } + + bool + ir_dereference::is_lvalue() const + { + ir_variable *var = this->variable_referenced(); + + /* Every l-value derference chain eventually ends in a variable. + */ + if ((var == NULL) || var->data.read_only) + return false; + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables cannot be treated as l-values; hence cannot + * be used as out or inout function parameters, nor can they be + * assigned into." + */ + if (this->type->contains_opaque()) + return false; + + return true; + } + + + static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" }; + + const char *ir_texture::opcode_string() + { + assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs)); + return tex_opcode_strs[op]; + } + + ir_texture_opcode + ir_texture::get_opcode(const char *str) + { + const int count = sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]); + for (int op = 0; op < count; op++) { + if (strcmp(str, tex_opcode_strs[op]) == 0) + return (ir_texture_opcode) op; + } + return (ir_texture_opcode) -1; + } + + + void + ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) + { + assert(sampler != NULL); + assert(type != NULL); + this->sampler = sampler; + this->type = type; + + if (this->op == ir_txs || this->op == ir_query_levels || + this->op == ir_texture_samples) { + assert(type->base_type == GLSL_TYPE_INT); + } else if (this->op == ir_lod) { + assert(type->vector_elements == 2); + assert(type->base_type == GLSL_TYPE_FLOAT); + } else if (this->op == ir_samples_identical) { + assert(type == glsl_type::bool_type); + assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); + assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); + } else { + assert(sampler->type->sampler_type == (int) type->base_type); + if (sampler->type->sampler_shadow) + assert(type->vector_elements == 4 || type->vector_elements == 1); + else + assert(type->vector_elements == 4); + } + } + + + void + ir_swizzle::init_mask(const unsigned *comp, unsigned count) + { + assert((count >= 1) && (count <= 4)); + + memset(&this->mask, 0, sizeof(this->mask)); + this->mask.num_components = count; + + unsigned dup_mask = 0; + switch (count) { + case 4: + assert(comp[3] <= 3); + dup_mask |= (1U << comp[3]) + & ((1U << comp[0]) | (1U << comp[1]) | (1U << comp[2])); + this->mask.w = comp[3]; + + case 3: + assert(comp[2] <= 3); + dup_mask |= (1U << comp[2]) + & ((1U << comp[0]) | (1U << comp[1])); + this->mask.z = comp[2]; + + case 2: + assert(comp[1] <= 3); + dup_mask |= (1U << comp[1]) + & ((1U << comp[0])); + this->mask.y = comp[1]; + + case 1: + assert(comp[0] <= 3); + this->mask.x = comp[0]; + } + + this->mask.has_duplicates = dup_mask != 0; + + /* Based on the number of elements in the swizzle and the base type + * (i.e., float, int, unsigned, or bool) of the vector being swizzled, + * generate the type of the resulting value. + */ + type = glsl_type::get_instance(val->type->base_type, mask.num_components, 1); + } + + ir_swizzle::ir_swizzle(ir_rvalue *val, unsigned x, unsigned y, unsigned z, + unsigned w, unsigned count) + : ir_rvalue(ir_type_swizzle), val(val) + { + const unsigned components[4] = { x, y, z, w }; + this->init_mask(components, count); + } + + ir_swizzle::ir_swizzle(ir_rvalue *val, const unsigned *comp, + unsigned count) + : ir_rvalue(ir_type_swizzle), val(val) + { + this->init_mask(comp, count); + } + + ir_swizzle::ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask) + : ir_rvalue(ir_type_swizzle) + { + this->val = val; + this->mask = mask; + this->type = glsl_type::get_instance(val->type->base_type, + mask.num_components, 1); + } + + #define X 1 + #define R 5 + #define S 9 + #define I 13 + + ir_swizzle * + ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length) + { + void *ctx = ralloc_parent(val); + + /* For each possible swizzle character, this table encodes the value in + * \c idx_map that represents the 0th element of the vector. For invalid + * swizzle characters (e.g., 'k'), a special value is used that will allow + * detection of errors. + */ + static const unsigned char base_idx[26] = { + /* a b c d e f g h i j k l m */ + R, R, I, I, I, I, R, I, I, I, I, I, I, + /* n o p q r s t u v w x y z */ + I, I, S, S, R, S, S, I, I, X, X, X, X + }; + + /* Each valid swizzle character has an entry in the previous table. This + * table encodes the base index encoded in the previous table plus the actual + * index of the swizzle character. When processing swizzles, the first + * character in the string is indexed in the previous table. Each character + * in the string is indexed in this table, and the value found there has the + * value form the first table subtracted. The result must be on the range + * [0,3]. + * + * For example, the string "wzyx" will get X from the first table. Each of + * the charcaters will get X+3, X+2, X+1, and X+0 from this table. After + * subtraction, the swizzle values are { 3, 2, 1, 0 }. + * + * The string "wzrg" will get X from the first table. Each of the characters + * will get X+3, X+2, R+0, and R+1 from this table. After subtraction, the + * swizzle values are { 3, 2, 4, 5 }. Since 4 and 5 are outside the range + * [0,3], the error is detected. + */ + static const unsigned char idx_map[26] = { + /* a b c d e f g h i j k l m */ + R+3, R+2, 0, 0, 0, 0, R+1, 0, 0, 0, 0, 0, 0, + /* n o p q r s t u v w x y z */ + 0, 0, S+2, S+3, R+0, S+0, S+1, 0, 0, X+3, X+0, X+1, X+2 + }; + + int swiz_idx[4] = { 0, 0, 0, 0 }; + unsigned i; + + + /* Validate the first character in the swizzle string and look up the base + * index value as described above. + */ + if ((str[0] < 'a') || (str[0] > 'z')) + return NULL; + + const unsigned base = base_idx[str[0] - 'a']; + + + for (i = 0; (i < 4) && (str[i] != '\0'); i++) { + /* Validate the next character, and, as described above, convert it to a + * swizzle index. + */ + if ((str[i] < 'a') || (str[i] > 'z')) + return NULL; + + swiz_idx[i] = idx_map[str[i] - 'a'] - base; + if ((swiz_idx[i] < 0) || (swiz_idx[i] >= (int) vector_length)) + return NULL; + } + + if (str[i] != '\0') + return NULL; + + return new(ctx) ir_swizzle(val, swiz_idx[0], swiz_idx[1], swiz_idx[2], + swiz_idx[3], i); + } + + #undef X + #undef R + #undef S + #undef I + + ir_variable * + ir_swizzle::variable_referenced() const + { + return this->val->variable_referenced(); + } + + + bool ir_variable::temporaries_allocate_names = false; + + const char ir_variable::tmp_name[] = "compiler_temp"; + + ir_variable::ir_variable(const struct glsl_type *type, const char *name, + ir_variable_mode mode) + : ir_instruction(ir_type_variable) + { + this->type = type; + + if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names) + name = NULL; + + /* The ir_variable clone method may call this constructor with name set to + * tmp_name. + */ + assert(name != NULL + || mode == ir_var_temporary + || mode == ir_var_function_in + || mode == ir_var_function_out + || mode == ir_var_function_inout); + assert(name != ir_variable::tmp_name + || mode == ir_var_temporary); + if (mode == ir_var_temporary + && (name == NULL || name == ir_variable::tmp_name)) { + this->name = ir_variable::tmp_name; + } else { + this->name = ralloc_strdup(this, name); + } + + this->u.max_ifc_array_access = NULL; + + this->data.explicit_location = false; + this->data.has_initializer = false; + this->data.location = -1; + this->data.location_frac = 0; + this->data.binding = 0; + this->data.warn_extension_index = 0; + this->constant_value = NULL; + this->constant_initializer = NULL; + this->data.origin_upper_left = false; + this->data.pixel_center_integer = false; + this->data.depth_layout = ir_depth_layout_none; + this->data.used = false; + this->data.always_active_io = false; + this->data.read_only = false; + this->data.centroid = false; + this->data.sample = false; + this->data.patch = false; + this->data.invariant = false; + this->data.how_declared = ir_var_declared_normally; + this->data.mode = mode; + this->data.interpolation = INTERP_QUALIFIER_NONE; + this->data.max_array_access = 0; + this->data.offset = 0; + this->data.precision = GLSL_PRECISION_NONE; + this->data.image_read_only = false; + this->data.image_write_only = false; + this->data.image_coherent = false; + this->data.image_volatile = false; + this->data.image_restrict = false; + this->data.from_ssbo_unsized_array = false; + + if (type != NULL) { + if (type->base_type == GLSL_TYPE_SAMPLER) + this->data.read_only = true; + + if (type->is_interface()) + this->init_interface_type(type); + else if (type->without_array()->is_interface()) + this->init_interface_type(type->without_array()); + } + } + + + const char * + interpolation_string(unsigned interpolation) + { + switch (interpolation) { + case INTERP_QUALIFIER_NONE: return "no"; + case INTERP_QUALIFIER_SMOOTH: return "smooth"; + case INTERP_QUALIFIER_FLAT: return "flat"; + case INTERP_QUALIFIER_NOPERSPECTIVE: return "noperspective"; + } + + assert(!"Should not get here."); + return ""; + } + + + glsl_interp_qualifier + ir_variable::determine_interpolation_mode(bool flat_shade) + { + if (this->data.interpolation != INTERP_QUALIFIER_NONE) + return (glsl_interp_qualifier) this->data.interpolation; + int location = this->data.location; + bool is_gl_Color = + location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; + if (flat_shade && is_gl_Color) + return INTERP_QUALIFIER_FLAT; + else + return INTERP_QUALIFIER_SMOOTH; + } + + const char *const ir_variable::warn_extension_table[] = { + "", + "GL_ARB_shader_stencil_export", + "GL_AMD_shader_stencil_export", + }; + + void + ir_variable::enable_extension_warning(const char *extension) + { + for (unsigned i = 0; i < ARRAY_SIZE(warn_extension_table); i++) { + if (strcmp(warn_extension_table[i], extension) == 0) { + this->data.warn_extension_index = i; + return; + } + } + + assert(!"Should not get here."); + this->data.warn_extension_index = 0; + } + + const char * + ir_variable::get_extension_warning() const + { + return this->data.warn_extension_index == 0 + ? NULL : warn_extension_table[this->data.warn_extension_index]; + } + + ir_function_signature::ir_function_signature(const glsl_type *return_type, + builtin_available_predicate b) + : ir_instruction(ir_type_function_signature), + return_type(return_type), is_defined(false), is_intrinsic(false), + builtin_avail(b), _function(NULL) + { + this->origin = NULL; + } + + + bool + ir_function_signature::is_builtin() const + { + return builtin_avail != NULL; + } + + + bool + ir_function_signature::is_builtin_available(const _mesa_glsl_parse_state *state) const + { + /* We can't call the predicate without a state pointer, so just say that + * the signature is available. At compile time, we need the filtering, + * but also receive a valid state pointer. At link time, we're resolving + * imported built-in prototypes to their definitions, which will always + * be an exact match. So we can skip the filtering. + */ + if (state == NULL) + return true; + + assert(builtin_avail != NULL); + return builtin_avail(state); + } + + + static bool + modes_match(unsigned a, unsigned b) + { + if (a == b) + return true; + + /* Accept "in" vs. "const in" */ + if ((a == ir_var_const_in && b == ir_var_function_in) || + (b == ir_var_const_in && a == ir_var_function_in)) + return true; + + return false; + } + + + const char * + ir_function_signature::qualifiers_match(exec_list *params) + { + /* check that the qualifiers match. */ + foreach_two_lists(a_node, &this->parameters, b_node, params) { + ir_variable *a = (ir_variable *) a_node; + ir_variable *b = (ir_variable *) b_node; + + if (a->data.read_only != b->data.read_only || + !modes_match(a->data.mode, b->data.mode) || + a->data.interpolation != b->data.interpolation || + a->data.centroid != b->data.centroid || + a->data.sample != b->data.sample || + a->data.patch != b->data.patch || + a->data.image_read_only != b->data.image_read_only || + a->data.image_write_only != b->data.image_write_only || + a->data.image_coherent != b->data.image_coherent || + a->data.image_volatile != b->data.image_volatile || + a->data.image_restrict != b->data.image_restrict) { + + /* parameter a's qualifiers don't match */ + return a->name; + } + } + return NULL; + } + + + void + ir_function_signature::replace_parameters(exec_list *new_params) + { + /* Destroy all of the previous parameter information. If the previous + * parameter information comes from the function prototype, it may either + * specify incorrect parameter names or not have names at all. + */ + new_params->move_nodes_to(¶meters); + } + + + ir_function::ir_function(const char *name) + : ir_instruction(ir_type_function) + { + this->subroutine_index = -1; + this->name = ralloc_strdup(this, name); + } + + + bool + ir_function::has_user_signature() + { + foreach_in_list(ir_function_signature, sig, &this->signatures) { + if (!sig->is_builtin()) + return true; + } + return false; + } + + + ir_rvalue * + ir_rvalue::error_value(void *mem_ctx) + { + ir_rvalue *v = new(mem_ctx) ir_rvalue(ir_type_unset); + + v->type = glsl_type::error_type; + return v; + } + + + void + visit_exec_list(exec_list *list, ir_visitor *visitor) + { + foreach_in_list_safe(ir_instruction, node, list) { + node->accept(visitor); + } + } + + + static void + steal_memory(ir_instruction *ir, void *new_ctx) + { + ir_variable *var = ir->as_variable(); + ir_function *fn = ir->as_function(); + ir_constant *constant = ir->as_constant(); + if (var != NULL && var->constant_value != NULL) + steal_memory(var->constant_value, ir); + + if (var != NULL && var->constant_initializer != NULL) + steal_memory(var->constant_initializer, ir); + + if (fn != NULL && fn->subroutine_types) + ralloc_steal(new_ctx, fn->subroutine_types); + + /* The components of aggregate constants are not visited by the normal + * visitor, so steal their values by hand. + */ + if (constant != NULL) { + if (constant->type->is_record()) { + foreach_in_list(ir_constant, field, &constant->components) { + steal_memory(field, ir); + } + } else if (constant->type->is_array()) { + for (unsigned int i = 0; i < constant->type->length; i++) { + steal_memory(constant->array_elements[i], ir); + } + } + } + + ralloc_steal(new_ctx, ir); + } + + + void + reparent_ir(exec_list *list, void *mem_ctx) + { + foreach_in_list(ir_instruction, node, list) { + visit_tree(node, steal_memory, mem_ctx); + } + } + + + static ir_rvalue * + try_min_one(ir_rvalue *ir) + { + ir_expression *expr = ir->as_expression(); + + if (!expr || expr->operation != ir_binop_min) + return NULL; + + if (expr->operands[0]->is_one()) + return expr->operands[1]; + + if (expr->operands[1]->is_one()) + return expr->operands[0]; + + return NULL; + } + + static ir_rvalue * + try_max_zero(ir_rvalue *ir) + { + ir_expression *expr = ir->as_expression(); + + if (!expr || expr->operation != ir_binop_max) + return NULL; + + if (expr->operands[0]->is_zero()) + return expr->operands[1]; + + if (expr->operands[1]->is_zero()) + return expr->operands[0]; + + return NULL; + } + + ir_rvalue * + ir_rvalue::as_rvalue_to_saturate() + { + ir_expression *expr = this->as_expression(); + + if (!expr) + return NULL; + + ir_rvalue *max_zero = try_max_zero(expr); + if (max_zero) { + return try_min_one(max_zero); + } else { + ir_rvalue *min_one = try_min_one(expr); + if (min_one) { + return try_max_zero(min_one); + } + } + + return NULL; + } + + + unsigned + vertices_per_prim(GLenum prim) + { + switch (prim) { + case GL_POINTS: + return 1; + case GL_LINES: + return 2; + case GL_TRIANGLES: + return 3; + case GL_LINES_ADJACENCY: + return 4; + case GL_TRIANGLES_ADJACENCY: + return 6; + default: + assert(!"Bad primitive"); + return 3; + } + } + + /** + * Generate a string describing the mode of a variable + */ + const char * + mode_string(const ir_variable *var) + { + switch (var->data.mode) { + case ir_var_auto: + return (var->data.read_only) ? "global constant" : "global variable"; + + case ir_var_uniform: + return "uniform"; + + case ir_var_shader_storage: + return "buffer"; + + case ir_var_shader_in: + return "shader input"; + + case ir_var_shader_out: + return "shader output"; + + case ir_var_function_in: + case ir_var_const_in: + return "function input"; + + case ir_var_function_out: + return "function output"; + + case ir_var_function_inout: + return "function inout"; + + case ir_var_system_value: + return "shader input"; + + case ir_var_temporary: + return "compiler temporary"; + + case ir_var_mode_count: + break; + } + + assert(!"Should not get here."); + return "invalid variable"; + } diff --cc src/compiler/glsl/ir.h index 00000000000,bd7b5506343..09e21b22188 mode 000000,100644..100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@@ -1,0 -1,2632 +1,2613 @@@ + /* -*- c++ -*- */ + /* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #pragma once + #ifndef IR_H + #define IR_H + + #include + #include + + #include "util/ralloc.h" + #include "compiler/glsl_types.h" + #include "list.h" + #include "ir_visitor.h" + #include "ir_hierarchical_visitor.h" + #include "main/mtypes.h" + + #ifdef __cplusplus + + /** + * \defgroup IR Intermediate representation nodes + * + * @{ + */ + + /** + * Class tags + * + * Each concrete class derived from \c ir_instruction has a value in this + * enumerant. The value for the type is stored in \c ir_instruction::ir_type + * by the constructor. While using type tags is not very C++, it is extremely + * convenient. For example, during debugging you can simply inspect + * \c ir_instruction::ir_type to find out the actual type of the object. + * + * In addition, it is possible to use a switch-statement based on \c + * \c ir_instruction::ir_type to select different behavior for different object + * types. For functions that have only slight differences for several object + * types, this allows writing very straightforward, readable code. + */ + enum ir_node_type { + ir_type_dereference_array, + ir_type_dereference_record, + ir_type_dereference_variable, + ir_type_constant, + ir_type_expression, + ir_type_swizzle, + ir_type_texture, + ir_type_variable, + ir_type_assignment, + ir_type_call, + ir_type_function, + ir_type_function_signature, + ir_type_if, + ir_type_loop, + ir_type_loop_jump, + ir_type_return, + ir_type_discard, + ir_type_emit_vertex, + ir_type_end_primitive, + ir_type_barrier, + ir_type_max, /**< maximum ir_type enum number, for validation */ + ir_type_unset = ir_type_max + }; + + + /** + * Base class of all IR instructions + */ + class ir_instruction : public exec_node { + public: + enum ir_node_type ir_type; + + /** + * GCC 4.7+ and clang warn when deleting an ir_instruction unless + * there's a virtual destructor present. Because we almost + * universally use ralloc for our memory management of + * ir_instructions, the destructor doesn't need to do any work. + */ + virtual ~ir_instruction() + { + } + + /** ir_print_visitor helper for debugging. */ + void print(void) const; + void fprint(FILE *f) const; + + virtual void accept(ir_visitor *) = 0; + virtual ir_visitor_status accept(ir_hierarchical_visitor *) = 0; + virtual ir_instruction *clone(void *mem_ctx, + struct hash_table *ht) const = 0; + + bool is_rvalue() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable || + ir_type == ir_type_constant || + ir_type == ir_type_expression || + ir_type == ir_type_swizzle || + ir_type == ir_type_texture; + } + + bool is_dereference() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable; + } + + bool is_jump() const + { + return ir_type == ir_type_loop_jump || + ir_type == ir_type_return || + ir_type == ir_type_discard; + } + + /** + * \name IR instruction downcast functions + * + * These functions either cast the object to a derived class or return + * \c NULL if the object's type does not match the specified derived class. + * Additional downcast functions will be added as needed. + */ + /*@{*/ + #define AS_BASE(TYPE) \ + class ir_##TYPE *as_##TYPE() \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE *as_##TYPE() const \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } + + AS_BASE(rvalue) + AS_BASE(dereference) + AS_BASE(jump) + #undef AS_BASE + + #define AS_CHILD(TYPE) \ + class ir_##TYPE * as_##TYPE() \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE * as_##TYPE() const \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \ + } + AS_CHILD(variable) + AS_CHILD(function) + AS_CHILD(dereference_array) + AS_CHILD(dereference_variable) + AS_CHILD(dereference_record) + AS_CHILD(expression) + AS_CHILD(loop) + AS_CHILD(assignment) + AS_CHILD(call) + AS_CHILD(return) + AS_CHILD(if) + AS_CHILD(swizzle) + AS_CHILD(texture) + AS_CHILD(constant) + AS_CHILD(discard) + #undef AS_CHILD + /*@}*/ + + /** + * IR equality method: Return true if the referenced instruction would + * return the same value as this one. + * + * This intended to be used for CSE and algebraic optimizations, on rvalues + * in particular. No support for other instruction types (assignments, + * jumps, calls, etc.) is planned. + */ + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + protected: + ir_instruction(enum ir_node_type t) + : ir_type(t) + { + } + + private: + ir_instruction() + { + assert(!"Should not get here."); + } + }; + + + /** + * The base class for all "values"/expression trees. + */ + class ir_rvalue : public ir_instruction { + public: + const struct glsl_type *type; + + virtual ir_rvalue *clone(void *mem_ctx, struct hash_table *) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + ir_rvalue *as_rvalue_to_saturate(); + + virtual bool is_lvalue() const + { + return false; + } + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return NULL; + } + + + /** + * If an r-value is a reference to a whole variable, get that variable + * + * \return + * Pointer to a variable that is completely dereferenced by the r-value. If + * the r-value is not a dereference or the dereference does not access the + * entire variable (i.e., it's just one array element, struct field), \c NULL + * is returned. + */ + virtual ir_variable *whole_variable_referenced() + { + return NULL; + } + + /** + * Determine if an r-value has the value zero + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * zero (or \c false for booleans). + * + * \sa ir_constant::has_value, ir_rvalue::is_one, ir_rvalue::is_negative_one + */ + virtual bool is_zero() const; + + /** + * Determine if an r-value has the value one + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * one (or \c true for booleans). + * + * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_negative_one + */ + virtual bool is_one() const; + + /** + * Determine if an r-value has the value negative one + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * negative one. For boolean types, the result is always \c false. + * + * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_one + */ + virtual bool is_negative_one() const; + + /** + * Determine if an r-value is an unsigned integer constant which can be + * stored in 16 bits. + * + * \sa ir_constant::is_uint16_constant. + */ + virtual bool is_uint16_constant() const { return false; } + + /** + * Return a generic value of error_type. + * + * Allocation will be performed with 'mem_ctx' as ralloc owner. + */ + static ir_rvalue *error_value(void *mem_ctx); + + protected: + ir_rvalue(enum ir_node_type t); + }; + + + /** + * Variable storage classes + */ + enum ir_variable_mode { + ir_var_auto = 0, /**< Function local variables and globals. */ + ir_var_uniform, /**< Variable declared as a uniform. */ + ir_var_shader_storage, /**< Variable declared as an ssbo. */ + ir_var_shader_shared, /**< Variable declared as shared. */ + ir_var_shader_in, + ir_var_shader_out, + ir_var_function_in, + ir_var_function_out, + ir_var_function_inout, + ir_var_const_in, /**< "in" param that must be a constant expression */ + ir_var_system_value, /**< Ex: front-face, instance-id, etc. */ + ir_var_temporary, /**< Temporary variable generated during compilation. */ + ir_var_mode_count /**< Number of variable modes */ + }; + + /** + * Enum keeping track of how a variable was declared. For error checking of + * the gl_PerVertex redeclaration rules. + */ + enum ir_var_declaration_type { + /** + * Normal declaration (for most variables, this means an explicit + * declaration. Exception: temporaries are always implicitly declared, but + * they still use ir_var_declared_normally). + * + * Note: an ir_variable that represents a named interface block uses + * ir_var_declared_normally. + */ + ir_var_declared_normally = 0, + + /** + * Variable was explicitly declared (or re-declared) in an unnamed + * interface block. + */ + ir_var_declared_in_block, + + /** + * Variable is an implicitly declared built-in that has not been explicitly + * re-declared by the shader. + */ + ir_var_declared_implicitly, + + /** + * Variable is implicitly generated by the compiler and should not be + * visible via the API. + */ + ir_var_hidden, + }; + + /** + * \brief Layout qualifiers for gl_FragDepth. + * + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared + * with a layout qualifier. + */ + enum ir_depth_layout { + ir_depth_layout_none, /**< No depth layout is specified. */ + ir_depth_layout_any, + ir_depth_layout_greater, + ir_depth_layout_less, + ir_depth_layout_unchanged + }; + + /** + * \brief Convert depth layout qualifier to string. + */ + const char* + depth_layout_string(ir_depth_layout layout); + + /** + * Description of built-in state associated with a uniform + * + * \sa ir_variable::state_slots + */ + struct ir_state_slot { + int tokens[5]; + int swizzle; + }; + + + /** + * Get the string value for an interpolation qualifier + * + * \return The string that would be used in a shader to specify \c + * mode will be returned. + * + * This function is used to generate error messages of the form "shader + * uses %s interpolation qualifier", so in the case where there is no + * interpolation qualifier, it returns "no". + * + * This function should only be used on a shader input or output variable. + */ + const char *interpolation_string(unsigned interpolation); + + + class ir_variable : public ir_instruction { + public: + ir_variable(const struct glsl_type *, const char *, ir_variable_mode); + + virtual ir_variable *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + + /** + * Determine how this variable should be interpolated based on its + * interpolation qualifier (if present), whether it is gl_Color or + * gl_SecondaryColor, and whether flatshading is enabled in the current GL + * state. + * + * The return value will always be either INTERP_QUALIFIER_SMOOTH, + * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT. + */ + glsl_interp_qualifier determine_interpolation_mode(bool flat_shade); + + /** + * Determine whether or not a variable is part of a uniform or + * shader storage block. + */ + inline bool is_in_buffer_block() const + { + return (this->data.mode == ir_var_uniform || + this->data.mode == ir_var_shader_storage) && + this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is part of a shader storage block. + */ + inline bool is_in_shader_storage_block() const + { + return this->data.mode == ir_var_shader_storage && + this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is the declaration of an interface + * block + * + * For the first declaration below, there will be an \c ir_variable named + * "instance" whose type and whose instance_type will be the same + * \cglsl_type. For the second declaration, there will be an \c ir_variable + * named "f" whose type is float and whose instance_type is B2. + * + * "instance" is an interface instance variable, but "f" is not. + * + * uniform B1 { + * float f; + * } instance; + * + * uniform B2 { + * float f; + * }; + */ + inline bool is_interface_instance() const + { + return this->type->without_array() == this->interface_type; + } + + /** + * Set this->interface_type on a newly created variable. + */ + void init_interface_type(const struct glsl_type *type) + { + assert(this->interface_type == NULL); + this->interface_type = type; + if (this->is_interface_instance()) { + this->u.max_ifc_array_access = + rzalloc_array(this, unsigned, type->length); + } + } + + /** + * Change this->interface_type on a variable that previously had a + * different, but compatible, interface_type. This is used during linking + * to set the size of arrays in interface blocks. + */ + void change_interface_type(const struct glsl_type *type) + { + if (this->u.max_ifc_array_access != NULL) { + /* max_ifc_array_access has already been allocated, so make sure the + * new interface has the same number of fields as the old one. + */ + assert(this->interface_type->length == type->length); + } + this->interface_type = type; + } + + /** + * Change this->interface_type on a variable that previously had a + * different, and incompatible, interface_type. This is used during + * compilation to handle redeclaration of the built-in gl_PerVertex + * interface block. + */ + void reinit_interface_type(const struct glsl_type *type) + { + if (this->u.max_ifc_array_access != NULL) { + #ifndef NDEBUG + /* Redeclaring gl_PerVertex is only allowed if none of the built-ins + * it defines have been accessed yet; so it's safe to throw away the + * old max_ifc_array_access pointer, since all of its values are + * zero. + */ + for (unsigned i = 0; i < this->interface_type->length; i++) + assert(this->u.max_ifc_array_access[i] == 0); + #endif + ralloc_free(this->u.max_ifc_array_access); + this->u.max_ifc_array_access = NULL; + } + this->interface_type = NULL; + init_interface_type(type); + } + + const glsl_type *get_interface_type() const + { + return this->interface_type; + } + + /** + * Get the max_ifc_array_access pointer + * + * A "set" function is not needed because the array is dynmically allocated + * as necessary. + */ + inline unsigned *get_max_ifc_array_access() + { + assert(this->data._num_state_slots == 0); + return this->u.max_ifc_array_access; + } + + inline unsigned get_num_state_slots() const + { + assert(!this->is_interface_instance() + || this->data._num_state_slots == 0); + return this->data._num_state_slots; + } + + inline void set_num_state_slots(unsigned n) + { + assert(!this->is_interface_instance() + || n == 0); + this->data._num_state_slots = n; + } + + inline ir_state_slot *get_state_slots() + { + return this->is_interface_instance() ? NULL : this->u.state_slots; + } + + inline const ir_state_slot *get_state_slots() const + { + return this->is_interface_instance() ? NULL : this->u.state_slots; + } + + inline ir_state_slot *allocate_state_slots(unsigned n) + { + assert(!this->is_interface_instance()); + + this->u.state_slots = ralloc_array(this, ir_state_slot, n); + this->data._num_state_slots = 0; + + if (this->u.state_slots != NULL) + this->data._num_state_slots = n; + + return this->u.state_slots; + } + + inline bool is_name_ralloced() const + { + return this->name != ir_variable::tmp_name; + } + + /** + * Enable emitting extension warnings for this variable + */ + void enable_extension_warning(const char *extension); + + /** + * Get the extension warning string for this variable + * + * If warnings are not enabled, \c NULL is returned. + */ + const char *get_extension_warning() const; + + /** + * Declared type of the variable + */ + const struct glsl_type *type; + + /** + * Declared name of the variable + */ + const char *name; + + struct ir_variable_data { + + /** + * Is the variable read-only? + * + * This is set for variables declared as \c const, shader inputs, + * and uniforms. + */ + unsigned read_only:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned invariant:1; + unsigned precise:1; + + /** + * Has this variable been used for reading or writing? + * + * Several GLSL semantic checks require knowledge of whether or not a + * variable has been used. For example, it is an error to redeclare a + * variable as invariant after it has been used. + * + * This is only maintained in the ast_to_hir.cpp path, not in + * Mesa's fixed function or ARB program paths. + */ + unsigned used:1; + + /** + * Has this variable been statically assigned? + * + * This answers whether the variable was assigned in any path of + * the shader during ast_to_hir. This doesn't answer whether it is + * still written after dead code removal, nor is it maintained in + * non-ast_to_hir.cpp (GLSL parsing) paths. + */ + unsigned assigned:1; + + /** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other input/outputs must remains active. + */ + unsigned always_active_io:1; + + /** + * Enum indicating how the variable was declared. See + * ir_var_declaration_type. + * + * This is used to detect certain kinds of illegal variable redeclarations. + */ + unsigned how_declared:2; + + /** + * Storage class of the variable. + * + * \sa ir_variable_mode + */ + unsigned mode:4; + + /** + * Interpolation mode for shader inputs / outputs + * + * \sa ir_variable_interpolation + */ + unsigned interpolation:2; + + /** + * \name ARB_fragment_coord_conventions + * @{ + */ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + unsigned explicit_index:1; + + /** + * Was an initial binding explicitly set in the shader? + * + * If so, constant_value contains an integer ir_constant representing the + * initial binding point. + */ + unsigned explicit_binding:1; + + /** + * Does this variable have an initializer? + * + * This is used by the linker to cross-validiate initializers of global + * variables. + */ + unsigned has_initializer:1; + + /** + * Is this variable a generic output or input that has not yet been matched + * up to a variable in another stage of the pipeline? + * + * This is used by the linker as scratch storage while assigning locations + * to generic inputs and outputs. + */ + unsigned is_unmatched_generic_inout:1; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + */ + unsigned location_frac:2; + + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was not an array. + * + * Note that this variable and \c from_named_ifc_block_array will never + * both be non-zero. + */ + unsigned from_named_ifc_block_nonarray:1; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was an array. + * + * Note that this variable and \c from_named_ifc_block_nonarray will never + * both be non-zero. + */ + unsigned from_named_ifc_block_array:1; + + /** + * Non-zero if the variable must be a shader input. This is useful for + * constraints on function parameters. + */ + unsigned must_be_shader_input:1; + + /** + * Output index for dual source blending. + * + * \note + * The GLSL spec only allows the values 0 or 1 for the index in \b dual + * source blending. + */ + unsigned index:1; + + /** + * Precision qualifier. + * + * In desktop GLSL we do not care about precision qualifiers at all, in + * fact, the spec says that precision qualifiers are ignored. + * + * To make things easy, we make it so that this field is always + * GLSL_PRECISION_NONE on desktop shaders. This way all the variables + * have the same precision value and the checks we add in the compiler + * for this field will never break a desktop shader compile. + */ + unsigned precision:2; + + /** + * \brief Layout qualifier for gl_FragDepth. + * + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. + */ + ir_depth_layout depth_layout:3; + + /** + * ARB_shader_image_load_store qualifiers. + */ + unsigned image_read_only:1; /**< "readonly" qualifier. */ + unsigned image_write_only:1; /**< "writeonly" qualifier. */ + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + + /** + * ARB_shader_storage_buffer_object + */ + unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */ + + /** + * Emit a warning if this variable is accessed. + */ + private: + uint8_t warn_extension_index; + + public: + /** Image internal format if specified explicitly, otherwise GL_NONE. */ + uint16_t image_format; + + private: + /** + * Number of state slots used + * + * \note + * This could be stored in as few as 7-bits, if necessary. If it is made + * smaller, add an assertion to \c ir_variable::allocate_state_slots to + * be safe. + */ + uint16_t _num_state_slots; + + public: + /** + * Initial binding point for a sampler, atomic, or UBO. + * + * For array types, this represents the binding point for the first element. + */ + int16_t binding; + + /** + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: explicit location until linking then reused to + * store uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. + */ + int location; + + /** + * Vertex stream output identifier. + */ + unsigned stream; + + /** + * Location an atomic counter is stored at. + */ + unsigned offset; + + /** + * Highest element accessed with a constant expression array index + * + * Not used for non-array variables. + */ + unsigned max_array_access; + + /** + * Allow (only) ir_variable direct access private members. + */ + friend class ir_variable; + } data; + + /** + * Value assigned in the initializer of a variable declared "const" + */ + ir_constant *constant_value; + + /** + * Constant expression assigned in the initializer of the variable + * + * \warning + * This field and \c ::constant_value are distinct. Even if the two fields + * refer to constants with the same value, they must point to separate + * objects. + */ + ir_constant *constant_initializer; + + private: + static const char *const warn_extension_table[]; + + union { + /** + * For variables which satisfy the is_interface_instance() predicate, + * this points to an array of integers such that if the ith member of + * the interface block is an array, max_ifc_array_access[i] is the + * maximum array element of that member that has been accessed. If the + * ith member of the interface block is not an array, + * max_ifc_array_access[i] is unused. + * + * For variables whose type is not an interface block, this pointer is + * NULL. + */ + unsigned *max_ifc_array_access; + + /** + * Built-in state that backs this uniform + * + * Once set at variable creation, \c state_slots must remain invariant. + * + * If the variable is not a uniform, \c _num_state_slots will be zero + * and \c state_slots will be \c NULL. + */ + ir_state_slot *state_slots; + } u; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const glsl_type *interface_type; + + /** + * Name used for anonymous compiler temporaries + */ + static const char tmp_name[]; + + public: + /** + * Should the construct keep names for ir_var_temporary variables? + * + * When this global is false, names passed to the constructor for + * \c ir_var_temporary variables will be dropped. Instead, the variable will + * be named "compiler_temp". This name will be in static storage. + * + * \warning + * \b NEVER change the mode of an \c ir_var_temporary. + * + * \warning + * This variable is \b not thread-safe. It is global, \b not + * per-context. It begins life false. A context can, at some point, make + * it true. From that point on, it will be true forever. This should be + * okay since it will only be set true while debugging. + */ + static bool temporaries_allocate_names; + }; + + /** + * A function that returns whether a built-in function is available in the + * current shading language (based on version, ES or desktop, and extensions). + */ + typedef bool (*builtin_available_predicate)(const _mesa_glsl_parse_state *); + + /*@{*/ + /** + * The representation of a function instance; may be the full definition or + * simply a prototype. + */ + class ir_function_signature : public ir_instruction { + /* An ir_function_signature will be part of the list of signatures in + * an ir_function. + */ + public: + ir_function_signature(const glsl_type *return_type, + builtin_available_predicate builtin_avail = NULL); + + virtual ir_function_signature *clone(void *mem_ctx, + struct hash_table *ht) const; + ir_function_signature *clone_prototype(void *mem_ctx, + struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Attempt to evaluate this function as a constant expression, + * given a list of the actual parameters and the variable context. + * Returns NULL for non-built-ins. + */ + ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context); + + /** + * Get the name of the function for which this is a signature + */ + const char *function_name() const; + + /** + * Get a handle to the function for which this is a signature + * + * There is no setter function, this function returns a \c const pointer, + * and \c ir_function_signature::_function is private for a reason. The + * only way to make a connection between a function and function signature + * is via \c ir_function::add_signature. This helps ensure that certain + * invariants (i.e., a function signature is in the list of signatures for + * its \c _function) are met. + * + * \sa ir_function::add_signature + */ + inline const class ir_function *function() const + { + return this->_function; + } + + /** + * Check whether the qualifiers match between this signature's parameters + * and the supplied parameter list. If not, returns the name of the first + * parameter with mismatched qualifiers (for use in error messages). + */ + const char *qualifiers_match(exec_list *params); + + /** + * Replace the current parameter list with the given one. This is useful + * if the current information came from a prototype, and either has invalid + * or missing parameter names. + */ + void replace_parameters(exec_list *new_params); + + /** + * Function return type. + * + * \note This discards the optional precision qualifier. + */ + const struct glsl_type *return_type; + + /** + * List of ir_variable of function parameters. + * + * This represents the storage. The paramaters passed in a particular + * call will be in ir_call::actual_paramaters. + */ + struct exec_list parameters; + + /** Whether or not this function has a body (which may be empty). */ + unsigned is_defined:1; + + /** Whether or not this function signature is a built-in. */ + bool is_builtin() const; + + /** + * Whether or not this function is an intrinsic to be implemented + * by the driver. + */ + bool is_intrinsic; + + /** Whether or not a built-in is available for this shader. */ + bool is_builtin_available(const _mesa_glsl_parse_state *state) const; + + /** Body of instructions in the function. */ + struct exec_list body; + + private: + /** + * A function pointer to a predicate that answers whether a built-in + * function is available in the current shader. NULL if not a built-in. + */ + builtin_available_predicate builtin_avail; + + /** Function of which this signature is one overload. */ + class ir_function *_function; + + /** Function signature of which this one is a prototype clone */ + const ir_function_signature *origin; + + friend class ir_function; + + /** + * Helper function to run a list of instructions for constant + * expression evaluation. + * + * The hash table represents the values of the visible variables. + * There are no scoping issues because the table is indexed on + * ir_variable pointers, not variable names. + * + * Returns false if the expression is not constant, true otherwise, + * and the value in *result if result is non-NULL. + */ + bool constant_expression_evaluate_expression_list(const struct exec_list &body, + struct hash_table *variable_context, + ir_constant **result); + }; + + + /** + * Header for tracking multiple overloaded functions with the same name. + * Contains a list of ir_function_signatures representing each of the + * actual functions. + */ + class ir_function : public ir_instruction { + public: + ir_function(const char *name); + + virtual ir_function *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + void add_signature(ir_function_signature *sig) + { + sig->_function = this; + this->signatures.push_tail(sig); + } + + /** + * Find a signature that matches a set of actual parameters, taking implicit + * conversions into account. Also flags whether the match was exact. + */ + ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_param, + bool allow_builtins, + bool *match_is_exact); + + /** + * Find a signature that matches a set of actual parameters, taking implicit + * conversions into account. + */ + ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_param, + bool allow_builtins); + + /** + * Find a signature that exactly matches a set of actual parameters without + * any implicit type conversions. + */ + ir_function_signature *exact_matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_ps); + + /** + * Name of the function. + */ + const char *name; + + /** Whether or not this function has a signature that isn't a built-in. */ + bool has_user_signature(); + + /** + * List of ir_function_signature for each overloaded function with this name. + */ + struct exec_list signatures; + + /** + * is this function a subroutine type declaration + * e.g. subroutine void type1(float arg1); + */ + bool is_subroutine; + + /** + * is this function associated to a subroutine type + * e.g. subroutine (type1, type2) function_name { function_body }; + * would have num_subroutine_types 2, + * and pointers to the type1 and type2 types. + */ + int num_subroutine_types; + const struct glsl_type **subroutine_types; + + int subroutine_index; + }; + + inline const char *ir_function_signature::function_name() const + { + return this->_function->name; + } + /*@}*/ + + + /** + * IR instruction representing high-level if-statements + */ + class ir_if : public ir_instruction { + public: + ir_if(ir_rvalue *condition) + : ir_instruction(ir_type_if), condition(condition) + { + } + + virtual ir_if *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *condition; + /** List of ir_instruction for the body of the then branch */ + exec_list then_instructions; + /** List of ir_instruction for the body of the else branch */ + exec_list else_instructions; + }; + + + /** + * IR instruction representing a high-level loop structure. + */ + class ir_loop : public ir_instruction { + public: + ir_loop(); + + virtual ir_loop *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** List of ir_instruction that make up the body of the loop. */ + exec_list body_instructions; + }; + + + class ir_assignment : public ir_instruction { + public: + ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL); + + /** + * Construct an assignment with an explicit write mask + * + * \note + * Since a write mask is supplied, the LHS must already be a bare + * \c ir_dereference. The cannot be any swizzles in the LHS. + */ + ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, ir_rvalue *condition, + unsigned write_mask); + + virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Get a whole variable written by an assignment + * + * If the LHS of the assignment writes a whole variable, the variable is + * returned. Otherwise \c NULL is returned. Examples of whole-variable + * assignment are: + * + * - Assigning to a scalar + * - Assigning to all components of a vector + * - Whole array (or matrix) assignment + * - Whole structure assignment + */ + ir_variable *whole_variable_written(); + + /** + * Set the LHS of an assignment + */ + void set_lhs(ir_rvalue *lhs); + + /** + * Left-hand side of the assignment. + * + * This should be treated as read only. If you need to set the LHS of an + * assignment, use \c ir_assignment::set_lhs. + */ + ir_dereference *lhs; + + /** + * Value being assigned + */ + ir_rvalue *rhs; + + /** + * Optional condition for the assignment. + */ + ir_rvalue *condition; + + + /** + * Component mask written + * + * For non-vector types in the LHS, this field will be zero. For vector + * types, a bit will be set for each component that is written. Note that + * for \c vec2 and \c vec3 types only the lower bits will ever be set. + * + * A partially-set write mask means that each enabled channel gets + * the value from a consecutive channel of the rhs. For example, + * to write just .xyw of gl_FrontColor with color: + * + * (assign (constant bool (1)) (xyw) + * (var_ref gl_FragColor) + * (swiz xyw (var_ref color))) + */ + unsigned write_mask:4; + }; + + /* Update ir_expression::get_num_operands() and operator_strs when + * updating this list. + */ + enum ir_expression_operation { + ir_unop_bit_not, + ir_unop_logic_not, + ir_unop_neg, + ir_unop_abs, + ir_unop_sign, + ir_unop_rcp, + ir_unop_rsq, + ir_unop_sqrt, + ir_unop_exp, /**< Log base e on gentype */ + ir_unop_log, /**< Natural log on gentype */ + ir_unop_exp2, + ir_unop_log2, + ir_unop_f2i, /**< Float-to-integer conversion. */ + ir_unop_f2u, /**< Float-to-unsigned conversion. */ + ir_unop_i2f, /**< Integer-to-float conversion. */ + ir_unop_f2b, /**< Float-to-boolean conversion */ + ir_unop_b2f, /**< Boolean-to-float conversion */ + ir_unop_i2b, /**< int-to-boolean conversion */ + ir_unop_b2i, /**< Boolean-to-int conversion */ + ir_unop_u2f, /**< Unsigned-to-float conversion. */ + ir_unop_i2u, /**< Integer-to-unsigned conversion. */ + ir_unop_u2i, /**< Unsigned-to-integer conversion. */ + ir_unop_d2f, /**< Double-to-float conversion. */ + ir_unop_f2d, /**< Float-to-double conversion. */ + ir_unop_d2i, /**< Double-to-integer conversion. */ + ir_unop_i2d, /**< Integer-to-double conversion. */ + ir_unop_d2u, /**< Double-to-unsigned conversion. */ + ir_unop_u2d, /**< Unsigned-to-double conversion. */ + ir_unop_d2b, /**< Double-to-boolean conversion. */ + ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */ + ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */ + ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */ + ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */ + + /** + * \name Unary floating-point rounding operations. + */ + /*@{*/ + ir_unop_trunc, + ir_unop_ceil, + ir_unop_floor, + ir_unop_fract, + ir_unop_round_even, + /*@}*/ + + /** + * \name Trigonometric operations. + */ + /*@{*/ + ir_unop_sin, + ir_unop_cos, + /*@}*/ + + /** + * \name Partial derivatives. + */ + /*@{*/ + ir_unop_dFdx, + ir_unop_dFdx_coarse, + ir_unop_dFdx_fine, + ir_unop_dFdy, + ir_unop_dFdy_coarse, + ir_unop_dFdy_fine, + /*@}*/ + + /** + * \name Floating point pack and unpack operations. + */ + /*@{*/ + ir_unop_pack_snorm_2x16, + ir_unop_pack_snorm_4x8, + ir_unop_pack_unorm_2x16, + ir_unop_pack_unorm_4x8, + ir_unop_pack_half_2x16, + ir_unop_unpack_snorm_2x16, + ir_unop_unpack_snorm_4x8, + ir_unop_unpack_unorm_2x16, + ir_unop_unpack_unorm_4x8, + ir_unop_unpack_half_2x16, + /*@}*/ + - /** - * \name Lowered floating point unpacking operations. - * - * \see lower_packing_builtins_visitor::split_unpack_half_2x16 - */ - /*@{*/ - ir_unop_unpack_half_2x16_split_x, - ir_unop_unpack_half_2x16_split_y, - /*@}*/ - + /** + * \name Bit operations, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_unop_bitfield_reverse, + ir_unop_bit_count, + ir_unop_find_msb, + ir_unop_find_lsb, + /*@}*/ + + ir_unop_saturate, + + /** + * \name Double packing, part of ARB_gpu_shader_fp64. + */ + /*@{*/ + ir_unop_pack_double_2x32, + ir_unop_unpack_double_2x32, + /*@}*/ + + ir_unop_frexp_sig, + ir_unop_frexp_exp, + + ir_unop_noise, + + ir_unop_subroutine_to_int, + /** + * Interpolate fs input at centroid + * + * operand0 is the fs input. + */ + ir_unop_interpolate_at_centroid, + + /** + * Ask the driver for the total size of a buffer block. + * + * operand0 is the ir_constant buffer block index in the linked shader. + */ + ir_unop_get_buffer_size, + + /** + * Calculate length of an unsized array inside a buffer block. + * This opcode is going to be replaced in a lowering pass inside + * the linker. + * + * operand0 is the unsized array's ir_value for the calculation + * of its length. + */ + ir_unop_ssbo_unsized_array_length, + + /** + * A sentinel marking the last of the unary operations. + */ + ir_last_unop = ir_unop_ssbo_unsized_array_length, + + ir_binop_add, + ir_binop_sub, + ir_binop_mul, /**< Floating-point or low 32-bit integer multiply. */ + ir_binop_imul_high, /**< Calculates the high 32-bits of a 64-bit multiply. */ + ir_binop_div, + + /** + * Returns the carry resulting from the addition of the two arguments. + */ + /*@{*/ + ir_binop_carry, + /*@}*/ + + /** + * Returns the borrow resulting from the subtraction of the second argument + * from the first argument. + */ + /*@{*/ + ir_binop_borrow, + /*@}*/ + + /** + * Takes one of two combinations of arguments: + * + * - mod(vecN, vecN) + * - mod(vecN, float) + * + * Does not take integer types. + */ + ir_binop_mod, + + /** + * \name Binary comparison operators which return a boolean vector. + * The type of both operands must be equal. + */ + /*@{*/ + ir_binop_less, + ir_binop_greater, + ir_binop_lequal, + ir_binop_gequal, + ir_binop_equal, + ir_binop_nequal, + /** + * Returns single boolean for whether all components of operands[0] + * equal the components of operands[1]. + */ + ir_binop_all_equal, + /** + * Returns single boolean for whether any component of operands[0] + * is not equal to the corresponding component of operands[1]. + */ + ir_binop_any_nequal, + /*@}*/ + + /** + * \name Bit-wise binary operations. + */ + /*@{*/ + ir_binop_lshift, + ir_binop_rshift, + ir_binop_bit_and, + ir_binop_bit_xor, + ir_binop_bit_or, + /*@}*/ + + ir_binop_logic_and, + ir_binop_logic_xor, + ir_binop_logic_or, + + ir_binop_dot, + ir_binop_min, + ir_binop_max, + + ir_binop_pow, + - /** - * \name Lowered floating point packing operations. - * - * \see lower_packing_builtins_visitor::split_pack_half_2x16 - */ - /*@{*/ - ir_binop_pack_half_2x16_split, - /*@}*/ - + /** + * Load a value the size of a given GLSL type from a uniform block. + * + * operand0 is the ir_constant uniform block index in the linked shader. + * operand1 is a byte offset within the uniform block. + */ + ir_binop_ubo_load, + + /** + * \name Multiplies a number by two to a power, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_binop_ldexp, + /*@}*/ + + /** + * Extract a scalar from a vector + * + * operand0 is the vector + * operand1 is the index of the field to read from operand0 + */ + ir_binop_vector_extract, + + /** + * Interpolate fs input at offset + * + * operand0 is the fs input + * operand1 is the offset from the pixel center + */ + ir_binop_interpolate_at_offset, + + /** + * Interpolate fs input at sample position + * + * operand0 is the fs input + * operand1 is the sample ID + */ + ir_binop_interpolate_at_sample, + + /** + * A sentinel marking the last of the binary operations. + */ + ir_last_binop = ir_binop_interpolate_at_sample, + + /** + * \name Fused floating-point multiply-add, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_triop_fma, + /*@}*/ + + ir_triop_lrp, + + /** + * \name Conditional Select + * + * A vector conditional select instruction (like ?:, but operating per- + * component on vectors). + * + * \see lower_instructions_visitor::ldexp_to_arith + */ + /*@{*/ + ir_triop_csel, + /*@}*/ + + ir_triop_bitfield_extract, + + /** + * Generate a value with one field of a vector changed + * + * operand0 is the vector + * operand1 is the value to write into the vector result + * operand2 is the index in operand0 to be modified + */ + ir_triop_vector_insert, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_triop = ir_triop_vector_insert, + + ir_quadop_bitfield_insert, + + ir_quadop_vector, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_quadop = ir_quadop_vector, + + /** + * A sentinel marking the last of all operations. + */ + ir_last_opcode = ir_quadop_vector + }; + + class ir_expression : public ir_rvalue { + public: + ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1 = NULL, + ir_rvalue *op2 = NULL, ir_rvalue *op3 = NULL); + + /** + * Constructor for unary operation expressions + */ + ir_expression(int op, ir_rvalue *); + + /** + * Constructor for binary operation expressions + */ + ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1); + + /** + * Constructor for ternary operation expressions + */ + ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const; + + /** + * Attempt to constant-fold the expression + * + * The "variable_context" hash table links ir_variable * to ir_constant * + * that represent the variables' values. \c NULL represents an empty + * context. + * + * If the expression cannot be constant folded, this method will return + * \c NULL. + */ + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Determine the number of operands used by an expression + */ + static unsigned int get_num_operands(ir_expression_operation); + + /** + * Determine the number of operands used by an expression + */ + unsigned int get_num_operands() const + { + return (this->operation == ir_quadop_vector) + ? this->type->vector_elements : get_num_operands(operation); + } + + /** + * Return whether the expression operates on vectors horizontally. + */ + bool is_horizontal() const + { + return operation == ir_binop_all_equal || + operation == ir_binop_any_nequal || + operation == ir_binop_dot || + operation == ir_binop_vector_extract || + operation == ir_triop_vector_insert || + operation == ir_quadop_vector; + } + + /** + * Return a string representing this expression's operator. + */ + const char *operator_string(); + + /** + * Return a string representing this expression's operator. + */ + static const char *operator_string(ir_expression_operation); + + + /** + * Do a reverse-lookup to translate the given string into an operator. + */ + static ir_expression_operation get_operator(const char *); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual ir_variable *variable_referenced() const; + + ir_expression_operation operation; + ir_rvalue *operands[4]; + }; + + + /** + * HIR instruction representing a high-level function call, containing a list + * of parameters and returning a value in the supplied temporary. + */ + class ir_call : public ir_instruction { + public: + ir_call(ir_function_signature *callee, + ir_dereference_variable *return_deref, + exec_list *actual_parameters) + : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL) + { + assert(callee->return_type != NULL); + actual_parameters->move_nodes_to(& this->actual_parameters); + this->use_builtin = callee->is_builtin(); + } + + ir_call(ir_function_signature *callee, + ir_dereference_variable *return_deref, + exec_list *actual_parameters, + ir_variable *var, ir_rvalue *array_idx) + : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx) + { + assert(callee->return_type != NULL); + actual_parameters->move_nodes_to(& this->actual_parameters); + this->use_builtin = callee->is_builtin(); + } + + virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Get the name of the function being called. + */ + const char *callee_name() const + { + return callee->function_name(); + } + + /** + * Generates an inline version of the function before @ir, + * storing the return value in return_deref. + */ + void generate_inline(ir_instruction *ir); + + /** + * Storage for the function's return value. + * This must be NULL if the return type is void. + */ + ir_dereference_variable *return_deref; + + /** + * The specific function signature being called. + */ + ir_function_signature *callee; + + /* List of ir_rvalue of paramaters passed in this call. */ + exec_list actual_parameters; + + /** Should this call only bind to a built-in function? */ + bool use_builtin; + + /* + * ARB_shader_subroutine support - + * the subroutine uniform variable and array index + * rvalue to be used in the lowering pass later. + */ + ir_variable *sub_var; + ir_rvalue *array_idx; + }; + + + /** + * \name Jump-like IR instructions. + * + * These include \c break, \c continue, \c return, and \c discard. + */ + /*@{*/ + class ir_jump : public ir_instruction { + protected: + ir_jump(enum ir_node_type t) + : ir_instruction(t) + { + } + }; + + class ir_return : public ir_jump { + public: + ir_return() + : ir_jump(ir_type_return), value(NULL) + { + } + + ir_return(ir_rvalue *value) + : ir_jump(ir_type_return), value(value) + { + } + + virtual ir_return *clone(void *mem_ctx, struct hash_table *) const; + + ir_rvalue *get_value() const + { + return value; + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *value; + }; + + + /** + * Jump instructions used inside loops + * + * These include \c break and \c continue. The \c break within a loop is + * different from the \c break within a switch-statement. + * + * \sa ir_switch_jump + */ + class ir_loop_jump : public ir_jump { + public: + enum jump_mode { + jump_break, + jump_continue + }; + + ir_loop_jump(jump_mode mode) + : ir_jump(ir_type_loop_jump) + { + this->mode = mode; + } + + virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + bool is_break() const + { + return mode == jump_break; + } + + bool is_continue() const + { + return mode == jump_continue; + } + + /** Mode selector for the jump instruction. */ + enum jump_mode mode; + }; + + /** + * IR instruction representing discard statements. + */ + class ir_discard : public ir_jump { + public: + ir_discard() + : ir_jump(ir_type_discard) + { + this->condition = NULL; + } + + ir_discard(ir_rvalue *cond) + : ir_jump(ir_type_discard) + { + this->condition = cond; + } + + virtual ir_discard *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *condition; + }; + /*@}*/ + + + /** + * Texture sampling opcodes used in ir_texture + */ + enum ir_texture_opcode { + ir_tex, /**< Regular texture look-up */ + ir_txb, /**< Texture look-up with LOD bias */ + ir_txl, /**< Texture look-up with explicit LOD */ + ir_txd, /**< Texture look-up with partial derivatvies */ + ir_txf, /**< Texel fetch with explicit LOD */ + ir_txf_ms, /**< Multisample texture fetch */ + ir_txs, /**< Texture size */ + ir_lod, /**< Texture lod query */ + ir_tg4, /**< Texture gather */ + ir_query_levels, /**< Texture levels query */ + ir_texture_samples, /**< Texture samples query */ + ir_samples_identical, /**< Query whether all samples are definitely identical. */ + }; + + + /** + * IR instruction to sample a texture + * + * The specific form of the IR instruction depends on the \c mode value + * selected from \c ir_texture_opcodes. In the printed IR, these will + * appear as: + * + * Texel offset (0 or an expression) + * | Projection divisor + * | | Shadow comparitor + * | | | + * v v v + * (tex 0 1 ( )) + * (txb 0 1 ( ) ) + * (txl 0 1 ( ) ) + * (txd 0 1 ( ) (dPdx dPdy)) + * (txf 0 ) + * (txf_ms + * ) + * (txs ) + * (lod ) + * (tg4 ) + * (query_levels ) + * (samples_identical ) + */ + class ir_texture : public ir_rvalue { + public: + ir_texture(enum ir_texture_opcode op) + : ir_rvalue(ir_type_texture), + op(op), sampler(NULL), coordinate(NULL), projector(NULL), + shadow_comparitor(NULL), offset(NULL) + { + memset(&lod_info, 0, sizeof(lod_info)); + } + + virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Return a string representing the ir_texture_opcode. + */ + const char *opcode_string(); + + /** Set the sampler and type. */ + void set_sampler(ir_dereference *sampler, const glsl_type *type); + + /** + * Do a reverse-lookup to translate a string into an ir_texture_opcode. + */ + static ir_texture_opcode get_opcode(const char *); + + enum ir_texture_opcode op; + + /** Sampler to use for the texture access. */ + ir_dereference *sampler; + + /** Texture coordinate to sample */ + ir_rvalue *coordinate; + + /** + * Value used for projective divide. + * + * If there is no projective divide (the common case), this will be + * \c NULL. Optimization passes should check for this to point to a constant + * of 1.0 and replace that with \c NULL. + */ + ir_rvalue *projector; + + /** + * Coordinate used for comparison on shadow look-ups. + * + * If there is no shadow comparison, this will be \c NULL. For the + * \c ir_txf opcode, this *must* be \c NULL. + */ + ir_rvalue *shadow_comparitor; + + /** Texel offset. */ + ir_rvalue *offset; + + union { + ir_rvalue *lod; /**< Floating point LOD */ + ir_rvalue *bias; /**< Floating point LOD bias */ + ir_rvalue *sample_index; /**< MSAA sample index */ + ir_rvalue *component; /**< Gather component selector */ + struct { + ir_rvalue *dPdx; /**< Partial derivative of coordinate wrt X */ + ir_rvalue *dPdy; /**< Partial derivative of coordinate wrt Y */ + } grad; + } lod_info; + }; + + + struct ir_swizzle_mask { + unsigned x:2; + unsigned y:2; + unsigned z:2; + unsigned w:2; + + /** + * Number of components in the swizzle. + */ + unsigned num_components:3; + + /** + * Does the swizzle contain duplicate components? + * + * L-value swizzles cannot contain duplicate components. + */ + unsigned has_duplicates:1; + }; + + + class ir_swizzle : public ir_rvalue { + public: + ir_swizzle(ir_rvalue *, unsigned x, unsigned y, unsigned z, unsigned w, + unsigned count); + + ir_swizzle(ir_rvalue *val, const unsigned *components, unsigned count); + + ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask); + + virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Construct an ir_swizzle from the textual representation. Can fail. + */ + static ir_swizzle *create(ir_rvalue *, const char *, unsigned vector_length); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + bool is_lvalue() const + { + return val->is_lvalue() && !mask.has_duplicates; + } + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const; + + ir_rvalue *val; + ir_swizzle_mask mask; + + private: + /** + * Initialize the mask component of a swizzle + * + * This is used by the \c ir_swizzle constructors. + */ + void init_mask(const unsigned *components, unsigned count); + }; + + + class ir_dereference : public ir_rvalue { + public: + virtual ir_dereference *clone(void *mem_ctx, struct hash_table *) const = 0; + + bool is_lvalue() const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const = 0; + + protected: + ir_dereference(enum ir_node_type t) + : ir_rvalue(t) + { + } + }; + + + class ir_dereference_variable : public ir_dereference { + public: + ir_dereference_variable(ir_variable *var); + + virtual ir_dereference_variable *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->var; + } + + virtual ir_variable *whole_variable_referenced() + { + /* ir_dereference_variable objects always dereference the entire + * variable. However, if this dereference is dereferenced by anything + * else, the complete deferefernce chain is not a whole-variable + * dereference. This method should only be called on the top most + * ir_rvalue in a dereference chain. + */ + return this->var; + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Object being dereferenced. + */ + ir_variable *var; + }; + + + class ir_dereference_array : public ir_dereference { + public: + ir_dereference_array(ir_rvalue *value, ir_rvalue *array_index); + + ir_dereference_array(ir_variable *var, ir_rvalue *array_index); + + virtual ir_dereference_array *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->array->variable_referenced(); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *array; + ir_rvalue *array_index; + + private: + void set_array(ir_rvalue *value); + }; + + + class ir_dereference_record : public ir_dereference { + public: + ir_dereference_record(ir_rvalue *value, const char *field); + + ir_dereference_record(ir_variable *var, const char *field); + + virtual ir_dereference_record *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->record->variable_referenced(); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *record; + const char *field; + }; + + + /** + * Data stored in an ir_constant + */ + union ir_constant_data { + unsigned u[16]; + int i[16]; + float f[16]; + bool b[16]; + double d[16]; + }; + + + class ir_constant : public ir_rvalue { + public: + ir_constant(const struct glsl_type *type, const ir_constant_data *data); + ir_constant(bool b, unsigned vector_elements=1); + ir_constant(unsigned int u, unsigned vector_elements=1); + ir_constant(int i, unsigned vector_elements=1); + ir_constant(float f, unsigned vector_elements=1); + ir_constant(double d, unsigned vector_elements=1); + + /** + * Construct an ir_constant from a list of ir_constant values + */ + ir_constant(const struct glsl_type *type, exec_list *values); + + /** + * Construct an ir_constant from a scalar component of another ir_constant + * + * The new \c ir_constant inherits the type of the component from the + * source constant. + * + * \note + * In the case of a matrix constant, the new constant is a scalar, \b not + * a vector. + */ + ir_constant(const ir_constant *c, unsigned i); + + /** + * Return a new ir_constant of the specified type containing all zeros. + */ + static ir_constant *zero(void *mem_ctx, const glsl_type *type); + + virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get a particular component of a constant as a specific type + * + * This is useful, for example, to get a value from an integer constant + * as a float or bool. This appears frequently when constructors are + * called with all constant parameters. + */ + /*@{*/ + bool get_bool_component(unsigned i) const; + float get_float_component(unsigned i) const; + double get_double_component(unsigned i) const; + int get_int_component(unsigned i) const; + unsigned get_uint_component(unsigned i) const; + /*@}*/ + + ir_constant *get_array_element(unsigned i) const; + + ir_constant *get_record_field(const char *name); + + /** + * Copy the values on another constant at a given offset. + * + * The offset is ignored for array or struct copies, it's only for + * scalars or vectors into vectors or matrices. + * + * With identical types on both sides and zero offset it's clone() + * without creating a new object. + */ + + void copy_offset(ir_constant *src, int offset); + + /** + * Copy the values on another constant at a given offset and + * following an assign-like mask. + * + * The mask is ignored for scalars. + * + * Note that this function only handles what assign can handle, + * i.e. at most a vector as source and a column of a matrix as + * destination. + */ + + void copy_masked_offset(ir_constant *src, int offset, unsigned int mask); + + /** + * Determine whether a constant has the same value as another constant + * + * \sa ir_constant::is_zero, ir_constant::is_one, + * ir_constant::is_negative_one + */ + bool has_value(const ir_constant *) const; + + /** + * Return true if this ir_constant represents the given value. + * + * For vectors, this checks that each component is the given value. + */ + virtual bool is_value(float f, int i) const; + virtual bool is_zero() const; + virtual bool is_one() const; + virtual bool is_negative_one() const; + + /** + * Return true for constants that could be stored as 16-bit unsigned values. + * + * Note that this will return true even for signed integer ir_constants, as + * long as the value is non-negative and fits in 16-bits. + */ + virtual bool is_uint16_constant() const; + + /** + * Value of the constant. + * + * The field used to back the values supplied by the constant is determined + * by the type associated with the \c ir_instruction. Constants may be + * scalars, vectors, or matrices. + */ + union ir_constant_data value; + + /* Array elements */ + ir_constant **array_elements; + + /* Structure fields */ + exec_list components; + + private: + /** + * Parameterless constructor only used by the clone method + */ + ir_constant(void); + }; + + /** + * IR instruction to emit a vertex in a geometry shader. + */ + class ir_emit_vertex : public ir_instruction { + public: + ir_emit_vertex(ir_rvalue *stream) + : ir_instruction(ir_type_emit_vertex), + stream(stream) + { + assert(stream); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_emit_vertex *clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_emit_vertex(this->stream->clone(mem_ctx, ht)); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + int stream_id() const + { + return stream->as_constant()->value.i[0]; + } + + ir_rvalue *stream; + }; + + /** + * IR instruction to complete the current primitive and start a new one in a + * geometry shader. + */ + class ir_end_primitive : public ir_instruction { + public: + ir_end_primitive(ir_rvalue *stream) + : ir_instruction(ir_type_end_primitive), + stream(stream) + { + assert(stream); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_end_primitive *clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_end_primitive(this->stream->clone(mem_ctx, ht)); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + int stream_id() const + { + return stream->as_constant()->value.i[0]; + } + + ir_rvalue *stream; + }; + + /** + * IR instruction for tessellation control and compute shader barrier. + */ + class ir_barrier : public ir_instruction { + public: + ir_barrier() + : ir_instruction(ir_type_barrier) + { + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const + { + return new(mem_ctx) ir_barrier(); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + }; + + /*@}*/ + + /** + * Apply a visitor to each IR node in a list + */ + void + visit_exec_list(exec_list *list, ir_visitor *visitor); + + /** + * Validate invariants on each IR node in a list + */ + void validate_ir_tree(exec_list *instructions); + + struct _mesa_glsl_parse_state; + struct gl_shader_program; + + /** + * Detect whether an unlinked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c _mesa_glsl_error will be called to emit error messages for each function + * that is in the recursion cycle. + */ + void + detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions); + + /** + * Detect whether a linked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c link_error_printf will be called to emit error messages for each function + * that is in the recursion cycle. In addition, + * \c gl_shader_program::LinkStatus will be set to false. + */ + void + detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions); + + /** + * Make a clone of each IR instruction in a list + * + * \param in List of IR instructions that are to be cloned + * \param out List to hold the cloned instructions + */ + void + clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in); + + extern void + _mesa_glsl_initialize_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + extern void + _mesa_glsl_initialize_derived_variables(gl_shader *shader); + + extern void + _mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state); + + extern void + _mesa_glsl_initialize_builtin_functions(); + + extern ir_function_signature * + _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters); + + extern ir_function * + _mesa_glsl_find_builtin_function_by_name(const char *name); + + extern gl_shader * + _mesa_glsl_get_builtin_function_shader(void); + + extern ir_function_signature * + _mesa_get_main_function_signature(gl_shader *sh); + + extern void + _mesa_glsl_release_functions(void); + + extern void + _mesa_glsl_release_builtin_functions(void); + + extern void + reparent_ir(exec_list *list, void *mem_ctx); + + struct glsl_symbol_table; + + extern void + import_prototypes(const exec_list *source, exec_list *dest, + struct glsl_symbol_table *symbols, void *mem_ctx); + + extern bool + ir_has_call(ir_instruction *ir); + + extern void + do_set_program_inouts(exec_list *instructions, struct gl_program *prog, + gl_shader_stage shader_stage); + + extern char * + prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters); + + const char * + mode_string(const ir_variable *var); + + /** + * Built-in / reserved GL variables names start with "gl_" + */ + static inline bool + is_gl_identifier(const char *s) + { + return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_'; + } + + extern "C" { + #endif /* __cplusplus */ + + extern void _mesa_print_ir(FILE *f, struct exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + extern void + fprint_ir(FILE *f, const void *instruction); + + #ifdef __cplusplus + } /* extern "C" */ + #endif + + unsigned + vertices_per_prim(GLenum prim); + + #endif /* IR_H */ diff --cc src/compiler/glsl/ir_clone.cpp index 00000000000,0965b0d3719..b32ec17f1af mode 000000,100644..100644 --- a/src/compiler/glsl/ir_clone.cpp +++ b/src/compiler/glsl/ir_clone.cpp @@@ -1,0 -1,440 +1,441 @@@ + /* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include + #include "main/compiler.h" + #include "ir.h" + #include "compiler/glsl_types.h" + #include "program/hash_table.h" + + ir_rvalue * + ir_rvalue::clone(void *mem_ctx, struct hash_table *) const + { + /* The only possible instantiation is the generic error value. */ + return error_value(mem_ctx); + } + + /** + * Duplicate an IR variable + */ + ir_variable * + ir_variable::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_variable *var = new(mem_ctx) ir_variable(this->type, this->name, + (ir_variable_mode) this->data.mode); + + var->data.max_array_access = this->data.max_array_access; + if (this->is_interface_instance()) { + var->u.max_ifc_array_access = + rzalloc_array(var, unsigned, this->interface_type->length); + memcpy(var->u.max_ifc_array_access, this->u.max_ifc_array_access, + this->interface_type->length * sizeof(unsigned)); + } + + memcpy(&var->data, &this->data, sizeof(var->data)); + + if (this->get_state_slots()) { + ir_state_slot *s = var->allocate_state_slots(this->get_num_state_slots()); + memcpy(s, this->get_state_slots(), + sizeof(s[0]) * var->get_num_state_slots()); + } + + if (this->constant_value) + var->constant_value = this->constant_value->clone(mem_ctx, ht); + + if (this->constant_initializer) + var->constant_initializer = + this->constant_initializer->clone(mem_ctx, ht); + + var->interface_type = this->interface_type; + + if (ht) { + hash_table_insert(ht, var, (void *)const_cast(this)); + } + + return var; + } + + ir_swizzle * + ir_swizzle::clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_swizzle(this->val->clone(mem_ctx, ht), this->mask); + } + + ir_return * + ir_return::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_rvalue *new_value = NULL; + + if (this->value) + new_value = this->value->clone(mem_ctx, ht); + + return new(mem_ctx) ir_return(new_value); + } + + ir_discard * + ir_discard::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_rvalue *new_condition = NULL; + + if (this->condition != NULL) + new_condition = this->condition->clone(mem_ctx, ht); + + return new(mem_ctx) ir_discard(new_condition); + } + + ir_loop_jump * + ir_loop_jump::clone(void *mem_ctx, struct hash_table *ht) const + { + (void)ht; + + return new(mem_ctx) ir_loop_jump(this->mode); + } + + ir_if * + ir_if::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_if *new_if = new(mem_ctx) ir_if(this->condition->clone(mem_ctx, ht)); + + foreach_in_list(ir_instruction, ir, &this->then_instructions) { + new_if->then_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + foreach_in_list(ir_instruction, ir, &this->else_instructions) { + new_if->else_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + return new_if; + } + + ir_loop * + ir_loop::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_loop *new_loop = new(mem_ctx) ir_loop(); + + foreach_in_list(ir_instruction, ir, &this->body_instructions) { + new_loop->body_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + return new_loop; + } + + ir_call * + ir_call::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_dereference_variable *new_return_ref = NULL; + if (this->return_deref != NULL) + new_return_ref = this->return_deref->clone(mem_ctx, ht); + + exec_list new_parameters; + + foreach_in_list(ir_instruction, ir, &this->actual_parameters) { + new_parameters.push_tail(ir->clone(mem_ctx, ht)); + } + + return new(mem_ctx) ir_call(this->callee, new_return_ref, &new_parameters); + } + + ir_expression * + ir_expression::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, }; + unsigned int i; + + for (i = 0; i < get_num_operands(); i++) { + op[i] = this->operands[i]->clone(mem_ctx, ht); + } + + return new(mem_ctx) ir_expression(this->operation, this->type, + op[0], op[1], op[2], op[3]); + } + + ir_dereference_variable * + ir_dereference_variable::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_variable *new_var; + + if (ht) { + new_var = (ir_variable *)hash_table_find(ht, this->var); + if (!new_var) + new_var = this->var; + } else { + new_var = this->var; + } + + return new(mem_ctx) ir_dereference_variable(new_var); + } + + ir_dereference_array * + ir_dereference_array::clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, ht), + this->array_index->clone(mem_ctx, + ht)); + } + + ir_dereference_record * + ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht), + this->field); + } + + ir_texture * + ir_texture::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_texture *new_tex = new(mem_ctx) ir_texture(this->op); + new_tex->type = this->type; + + new_tex->sampler = this->sampler->clone(mem_ctx, ht); + if (this->coordinate) + new_tex->coordinate = this->coordinate->clone(mem_ctx, ht); + if (this->projector) + new_tex->projector = this->projector->clone(mem_ctx, ht); + if (this->shadow_comparitor) { + new_tex->shadow_comparitor = this->shadow_comparitor->clone(mem_ctx, ht); + } + + if (this->offset != NULL) + new_tex->offset = this->offset->clone(mem_ctx, ht); + + switch (this->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht); + break; + case ir_txl: + case ir_txf: + case ir_txs: + new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht); + break; + case ir_txf_ms: + new_tex->lod_info.sample_index = this->lod_info.sample_index->clone(mem_ctx, ht); + break; + case ir_txd: + new_tex->lod_info.grad.dPdx = this->lod_info.grad.dPdx->clone(mem_ctx, ht); + new_tex->lod_info.grad.dPdy = this->lod_info.grad.dPdy->clone(mem_ctx, ht); + break; + case ir_tg4: + new_tex->lod_info.component = this->lod_info.component->clone(mem_ctx, ht); + break; + } + + return new_tex; + } + + ir_assignment * + ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_rvalue *new_condition = NULL; + + if (this->condition) + new_condition = this->condition->clone(mem_ctx, ht); + + ir_assignment *cloned = + new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht), + this->rhs->clone(mem_ctx, ht), + new_condition); + cloned->write_mask = this->write_mask; + return cloned; + } + + ir_function * + ir_function::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_function *copy = new(mem_ctx) ir_function(this->name); + + copy->is_subroutine = this->is_subroutine; + copy->subroutine_index = this->subroutine_index; + copy->num_subroutine_types = this->num_subroutine_types; + copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types); + for (int i = 0; i < copy->num_subroutine_types; i++) + copy->subroutine_types[i] = this->subroutine_types[i]; + + foreach_in_list(const ir_function_signature, sig, &this->signatures) { + ir_function_signature *sig_copy = sig->clone(mem_ctx, ht); + copy->add_signature(sig_copy); + + if (ht != NULL) + hash_table_insert(ht, sig_copy, + (void *)const_cast(sig)); + } + + return copy; + } + + ir_function_signature * + ir_function_signature::clone(void *mem_ctx, struct hash_table *ht) const + { + ir_function_signature *copy = this->clone_prototype(mem_ctx, ht); + + copy->is_defined = this->is_defined; + + /* Clone the instruction list. + */ + foreach_in_list(const ir_instruction, inst, &this->body) { + ir_instruction *const inst_copy = inst->clone(mem_ctx, ht); + copy->body.push_tail(inst_copy); + } + + return copy; + } + + ir_function_signature * + ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) const + { + ir_function_signature *copy = + new(mem_ctx) ir_function_signature(this->return_type); + + copy->is_defined = false; + copy->builtin_avail = this->builtin_avail; + copy->origin = this; + + /* Clone the parameter list, but NOT the body. + */ + foreach_in_list(const ir_variable, param, &this->parameters) { + assert(const_cast(param)->as_variable() != NULL); + + ir_variable *const param_copy = param->clone(mem_ctx, ht); + copy->parameters.push_tail(param_copy); + } + + return copy; + } + + ir_constant * + ir_constant::clone(void *mem_ctx, struct hash_table *ht) const + { + (void)ht; + + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + return new(mem_ctx) ir_constant(this->type, &this->value); + + case GLSL_TYPE_STRUCT: { + ir_constant *c = new(mem_ctx) ir_constant; + + c->type = this->type; + for (exec_node *node = this->components.head + ; !node->is_tail_sentinel() + ; node = node->next) { + ir_constant *const orig = (ir_constant *) node; + + c->components.push_tail(orig->clone(mem_ctx, NULL)); + } + + return c; + } + + case GLSL_TYPE_ARRAY: { + ir_constant *c = new(mem_ctx) ir_constant; + + c->type = this->type; + c->array_elements = ralloc_array(c, ir_constant *, this->type->length); + for (unsigned i = 0; i < this->type->length; i++) { + c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL); + } + return c; + } + ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_INTERFACE: + assert(!"Should not get here."); + break; + } + + return NULL; + } + + + class fixup_ir_call_visitor : public ir_hierarchical_visitor { + public: + fixup_ir_call_visitor(struct hash_table *ht) + { + this->ht = ht; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + /* Try to find the function signature referenced by the ir_call in the + * table. If it is found, replace it with the value from the table. + */ + ir_function_signature *sig = + (ir_function_signature *) hash_table_find(this->ht, ir->callee); + if (sig != NULL) + ir->callee = sig; + + /* Since this may be used before function call parameters are flattened, + * the children also need to be processed. + */ + return visit_continue; + } + + private: + struct hash_table *ht; + }; + + + static void + fixup_function_calls(struct hash_table *ht, exec_list *instructions) + { + fixup_ir_call_visitor v(ht); + v.run(instructions); + } + + + void + clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in) + { + struct hash_table *ht = + hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + + foreach_in_list(const ir_instruction, original, in) { + ir_instruction *copy = original->clone(mem_ctx, ht); + + out->push_tail(copy); + } + + /* Make a pass over the cloned tree to fix up ir_call nodes to point to the + * cloned ir_function_signature nodes. This cannot be done automatically + * during cloning because the ir_call might be a forward reference (i.e., + * the function signature that it references may not have been cloned yet). + */ + fixup_function_calls(ht, out); + + hash_table_dtor(ht); + } diff --cc src/compiler/glsl/ir_optimization.h index 00000000000,be86f547f77..b56413a1500 mode 000000,100644..100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@@ -1,0 -1,147 +1,144 @@@ + /* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + + /** + * \file ir_optimization.h + * + * Prototypes for optimization passes to be called by the compiler and drivers. + */ + + /* Operations for lower_instructions() */ + #define SUB_TO_ADD_NEG 0x01 + #define DIV_TO_MUL_RCP 0x02 + #define EXP_TO_EXP2 0x04 + #define POW_TO_EXP2 0x08 + #define LOG_TO_LOG2 0x10 + #define MOD_TO_FLOOR 0x20 + #define INT_DIV_TO_MUL_RCP 0x40 + #define LDEXP_TO_ARITH 0x80 + #define CARRY_TO_ARITH 0x100 + #define BORROW_TO_ARITH 0x200 + #define SAT_TO_CLAMP 0x400 + #define DOPS_TO_DFRAC 0x800 + #define DFREXP_DLDEXP_TO_ARITH 0x1000 + + /** + * \see class lower_packing_builtins_visitor + */ + enum lower_packing_builtins_op { + LOWER_PACK_UNPACK_NONE = 0x0000, + + LOWER_PACK_SNORM_2x16 = 0x0001, + LOWER_UNPACK_SNORM_2x16 = 0x0002, + + LOWER_PACK_UNORM_2x16 = 0x0004, + LOWER_UNPACK_UNORM_2x16 = 0x0008, + + LOWER_PACK_HALF_2x16 = 0x0010, + LOWER_UNPACK_HALF_2x16 = 0x0020, + - LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, - LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, ++ LOWER_PACK_SNORM_4x8 = 0x0040, ++ LOWER_UNPACK_SNORM_4x8 = 0x0080, + - LOWER_PACK_SNORM_4x8 = 0x0100, - LOWER_UNPACK_SNORM_4x8 = 0x0200, ++ LOWER_PACK_UNORM_4x8 = 0x0100, ++ LOWER_UNPACK_UNORM_4x8 = 0x0200, + - LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800, - - LOWER_PACK_USE_BFI = 0x1000, - LOWER_PACK_USE_BFE = 0x2000, ++ LOWER_PACK_USE_BFI = 0x0400, ++ LOWER_PACK_USE_BFE = 0x0800, + }; + + bool do_common_optimization(exec_list *ir, bool linked, + bool uniform_locations_assigned, + const struct gl_shader_compiler_options *options, + bool native_integers); + + bool do_rebalance_tree(exec_list *instructions); + bool do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options); + bool opt_conditional_discard(exec_list *instructions); + bool do_constant_folding(exec_list *instructions); + bool do_constant_variable(exec_list *instructions); + bool do_constant_variable_unlinked(exec_list *instructions); + bool do_copy_propagation(exec_list *instructions); + bool do_copy_propagation_elements(exec_list *instructions); + bool do_constant_propagation(exec_list *instructions); + void do_dead_builtin_varyings(struct gl_context *ctx, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + class tfeedback_decl *tfeedback_decls); + bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned); + bool do_dead_code_local(exec_list *instructions); + bool do_dead_code_unlinked(exec_list *instructions); + bool do_dead_functions(exec_list *instructions); + bool opt_flip_matrices(exec_list *instructions); + bool do_function_inlining(exec_list *instructions); + bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); + bool do_lower_texture_projection(exec_list *instructions); + bool do_if_simplification(exec_list *instructions); + bool opt_flatten_nested_if_blocks(exec_list *instructions); + bool do_discard_simplification(exec_list *instructions); + bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); + bool do_mat_op_to_vec(exec_list *instructions); + bool do_minmax_prune(exec_list *instructions); + bool do_noop_swizzle(exec_list *instructions); + bool do_structure_splitting(exec_list *instructions); + bool do_swizzle_swizzle(exec_list *instructions); + bool do_vectorize(exec_list *instructions); + bool do_tree_grafting(exec_list *instructions); + bool do_vec_index_to_cond_assign(exec_list *instructions); + bool do_vec_index_to_swizzle(exec_list *instructions); + bool lower_discard(exec_list *instructions); + void lower_discard_flow(exec_list *instructions); + bool lower_instructions(exec_list *instructions, unsigned what_to_lower); + bool lower_noise(exec_list *instructions); + bool lower_variable_index_to_cond_assign(gl_shader_stage stage, + exec_list *instructions, bool lower_input, bool lower_output, + bool lower_temp, bool lower_uniform); + bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); + bool lower_const_arrays_to_uniforms(exec_list *instructions); + bool lower_clip_distance(gl_shader *shader); + void lower_output_reads(unsigned stage, exec_list *instructions); + bool lower_packing_builtins(exec_list *instructions, int op_mask); + void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size); + void lower_ubo_reference(struct gl_shader *shader); + void lower_packed_varyings(void *mem_ctx, + unsigned locations_used, ir_variable_mode mode, + unsigned gs_input_vertices, gl_shader *shader); + bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index); + bool lower_vector_derefs(gl_shader *shader); + void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); + bool optimize_redundant_jumps(exec_list *instructions); + bool optimize_split_arrays(exec_list *instructions, bool linked); + bool lower_offset_arrays(exec_list *instructions); + void optimize_dead_builtin_variables(exec_list *instructions, + enum ir_variable_mode other); + bool lower_tess_level(gl_shader *shader); + + bool lower_vertex_id(gl_shader *shader); + + bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); + + ir_rvalue * + compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx); diff --cc src/compiler/glsl/ir_validate.cpp index 00000000000,cad7069bf98..2ec5a3f73f7 mode 000000,100644..100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@@ -1,0 -1,930 +1,918 @@@ + /* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /** + * \file ir_validate.cpp + * + * Attempts to verify that various invariants of the IR tree are true. + * + * In particular, at the moment it makes sure that no single + * ir_instruction node except for ir_variable appears multiple times + * in the ir tree. ir_variable does appear multiple times: Once as a + * declaration in an exec_list, and multiple times as the endpoint of + * a dereference chain. + */ + + #include "ir.h" + #include "ir_hierarchical_visitor.h" + #include "util/hash_table.h" + #include "util/set.h" + #include "compiler/glsl_types.h" + + namespace { + + class ir_validate : public ir_hierarchical_visitor { + public: + ir_validate() + { + this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + this->current_function = NULL; + + this->callback_enter = ir_validate::validate_ir; + this->data_enter = ir_set; + } + + ~ir_validate() + { + _mesa_set_destroy(this->ir_set, NULL); + } + + virtual ir_visitor_status visit(ir_variable *v); + virtual ir_visitor_status visit(ir_dereference_variable *ir); + + virtual ir_visitor_status visit_enter(ir_discard *ir); + virtual ir_visitor_status visit_enter(ir_if *ir); + + virtual ir_visitor_status visit_enter(ir_function *ir); + virtual ir_visitor_status visit_leave(ir_function *ir); + virtual ir_visitor_status visit_enter(ir_function_signature *ir); + + virtual ir_visitor_status visit_leave(ir_expression *ir); + virtual ir_visitor_status visit_leave(ir_swizzle *ir); + + virtual ir_visitor_status visit_enter(class ir_dereference_array *); + + virtual ir_visitor_status visit_enter(ir_assignment *ir); + virtual ir_visitor_status visit_enter(ir_call *ir); + + static void validate_ir(ir_instruction *ir, void *data); + + ir_function *current_function; + + struct set *ir_set; + }; + + } /* anonymous namespace */ + + ir_visitor_status + ir_validate::visit(ir_dereference_variable *ir) + { + if ((ir->var == NULL) || (ir->var->as_variable() == NULL)) { + printf("ir_dereference_variable @ %p does not specify a variable %p\n", + (void *) ir, (void *) ir->var); + abort(); + } + + if (_mesa_set_search(ir_set, ir->var) == NULL) { + printf("ir_dereference_variable @ %p specifies undeclared variable " + "`%s' @ %p\n", + (void *) ir, ir->var->name, (void *) ir->var); + abort(); + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_enter(class ir_dereference_array *ir) + { + if (!ir->array->type->is_array() && !ir->array->type->is_matrix() && + !ir->array->type->is_vector()) { + printf("ir_dereference_array @ %p does not specify an array, a vector " + "or a matrix\n", + (void *) ir); + ir->print(); + printf("\n"); + abort(); + } + + if (!ir->array_index->type->is_scalar()) { + printf("ir_dereference_array @ %p does not have scalar index: %s\n", + (void *) ir, ir->array_index->type->name); + abort(); + } + + if (!ir->array_index->type->is_integer()) { + printf("ir_dereference_array @ %p does not have integer index: %s\n", + (void *) ir, ir->array_index->type->name); + abort(); + } + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_enter(ir_discard *ir) + { + if (ir->condition && ir->condition->type != glsl_type::bool_type) { + printf("ir_discard condition %s type instead of bool.\n", + ir->condition->type->name); + ir->print(); + printf("\n"); + abort(); + } + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_enter(ir_if *ir) + { + if (ir->condition->type != glsl_type::bool_type) { + printf("ir_if condition %s type instead of bool.\n", + ir->condition->type->name); + ir->print(); + printf("\n"); + abort(); + } + + return visit_continue; + } + + + ir_visitor_status + ir_validate::visit_enter(ir_function *ir) + { + /* Function definitions cannot be nested. + */ + if (this->current_function != NULL) { + printf("Function definition nested inside another function " + "definition:\n"); + printf("%s %p inside %s %p\n", + ir->name, (void *) ir, + this->current_function->name, (void *) this->current_function); + abort(); + } + + /* Store the current function hierarchy being traversed. This is used + * by the function signature visitor to ensure that the signatures are + * linked with the correct functions. + */ + this->current_function = ir; + + this->validate_ir(ir, this->data_enter); + + /* Verify that all of the things stored in the list of signatures are, + * in fact, function signatures. + */ + foreach_in_list(ir_instruction, sig, &ir->signatures) { + if (sig->ir_type != ir_type_function_signature) { + printf("Non-signature in signature list of function `%s'\n", + ir->name); + abort(); + } + } + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_leave(ir_function *ir) + { + assert(ralloc_parent(ir->name) == ir); + + this->current_function = NULL; + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_enter(ir_function_signature *ir) + { + if (this->current_function != ir->function()) { + printf("Function signature nested inside wrong function " + "definition:\n"); + printf("%p inside %s %p instead of %s %p\n", + (void *) ir, + this->current_function->name, (void *) this->current_function, + ir->function_name(), (void *) ir->function()); + abort(); + } + + if (ir->return_type == NULL) { + printf("Function signature %p for function %s has NULL return type.\n", + (void *) ir, ir->function_name()); + abort(); + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_leave(ir_expression *ir) + { + switch (ir->operation) { + case ir_unop_bit_not: + assert(ir->operands[0]->type == ir->type); + break; + case ir_unop_logic_not: + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + assert(ir->type == ir->operands[0]->type); + break; + + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_saturate: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type == ir->operands[0]->type); + break; + + case ir_unop_f2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_f2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_i2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + case ir_unop_b2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_i2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + case ir_unop_b2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_u2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_i2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_bitcast_i2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_bitcast_f2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_bitcast_u2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_bitcast_f2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + + case ir_unop_trunc: + case ir_unop_round_even: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->type); + break; + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->operands[0]->type == ir->type); + break; + + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec2_type); + break; + + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec4_type); + break; + + case ir_unop_pack_double_2x32: + assert(ir->type == glsl_type::double_type); + assert(ir->operands[0]->type == glsl_type::uvec2_type); + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + assert(ir->type == glsl_type::vec2_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + assert(ir->type == glsl_type::vec4_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - assert(ir->type == glsl_type::float_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - + case ir_unop_unpack_double_2x32: + assert(ir->type == glsl_type::uvec2_type); + assert(ir->operands[0]->type == glsl_type::double_type); + break; + + case ir_unop_bitfield_reverse: + assert(ir->operands[0]->type == ir->type); + assert(ir->type->is_integer()); + break; + + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + assert(ir->operands[0]->type->vector_elements == ir->type->vector_elements); + assert(ir->operands[0]->type->is_integer()); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + + case ir_unop_noise: + /* XXX what can we assert here? */ + break; + + case ir_unop_interpolate_at_centroid: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + break; + + case ir_unop_get_buffer_size: + assert(ir->type == glsl_type::int_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_ssbo_unsized_array_length: + assert(ir->type == glsl_type::int_type); + assert(ir->operands[0]->type->is_array()); + assert(ir->operands[0]->type->is_unsized_array()); + break; + + case ir_unop_d2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_i2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_frexp_sig: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_frexp_exp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_subroutine_to_int: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_binop_add: + case ir_binop_sub: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + assert(ir->operands[0]->type->base_type == + ir->operands[1]->type->base_type); + + if (ir->operands[0]->type->is_scalar()) + assert(ir->operands[1]->type == ir->type); + else if (ir->operands[1]->type->is_scalar()) + assert(ir->operands[0]->type == ir->type); + else if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[0]->type == ir->type); + } + break; + + case ir_binop_imul_high: + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type->is_integer()); + break; + + case ir_binop_carry: + case ir_binop_borrow: + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + /* The semantics of the IR operators differ from the GLSL <, >, <=, >=, + * ==, and != operators. The IR operators perform a component-wise + * comparison on scalar or vector types and return a boolean scalar or + * vector type of the same size. + */ + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[0]->type->is_vector() + || ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->vector_elements + == ir->type->vector_elements); + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + /* GLSL == and != operate on scalars, vectors, matrices and arrays, and + * return a scalar boolean. The IR matches that. + */ + assert(ir->type == glsl_type::bool_type); + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(ir->operands[0]->type->is_integer() && + ir->operands[1]->type->is_integer()); + if (ir->operands[0]->type->is_scalar()) { + assert(ir->operands[1]->type->is_scalar()); + } + if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type->components() == + ir->operands[1]->type->components()); + } + assert(ir->type == ir->operands[0]->type); + break; + + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(ir->operands[0]->type->base_type == + ir->operands[1]->type->base_type); + assert(ir->type->is_integer()); + if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type->vector_elements == + ir->operands[1]->type->vector_elements); + } + break; + + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_binop_dot: + assert(ir->type == glsl_type::float_type || + ir->type == glsl_type::double_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + - case ir_binop_pack_half_2x16_split: - assert(ir->type == glsl_type::uint_type); - assert(ir->operands[0]->type == glsl_type::float_type); - assert(ir->operands[1]->type == glsl_type::float_type); - break; - + case ir_binop_ubo_load: + assert(ir->operands[0]->type == glsl_type::uint_type); + + assert(ir->operands[1]->type == glsl_type::uint_type); + break; + + case ir_binop_ldexp: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float() || + ir->operands[0]->type->is_double()); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT); + assert(ir->operands[0]->type->components() == + ir->operands[1]->type->components()); + break; + + case ir_binop_vector_extract: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[1]->type->is_scalar() + && ir->operands[1]->type->is_integer()); + break; + + case ir_binop_interpolate_at_offset: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + assert(ir->operands[1]->type->components() == 2); + assert(ir->operands[1]->type->is_float()); + break; + + case ir_binop_interpolate_at_sample: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + assert(ir->operands[1]->type == glsl_type::int_type); + break; + + case ir_triop_fma: + assert(ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type == ir->operands[2]->type); + break; + + case ir_triop_lrp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[2]->type == ir->operands[0]->type || + ir->operands[2]->type == glsl_type::float_type || + ir->operands[2]->type == glsl_type::double_type); + break; + + case ir_triop_csel: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->vector_elements == ir->operands[0]->type->vector_elements); + assert(ir->type == ir->operands[1]->type); + assert(ir->type == ir->operands[2]->type); + break; + + case ir_triop_bitfield_extract: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); + break; + + case ir_triop_vector_insert: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type); + assert(ir->operands[2]->type->is_scalar() + && ir->operands[2]->type->is_integer()); + assert(ir->type == ir->operands[0]->type); + break; + + case ir_quadop_bitfield_insert: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); + assert(ir->operands[3]->type == ir->type); + break; + + case ir_quadop_vector: + /* The vector operator collects some number of scalars and generates a + * vector from them. + * + * - All of the operands must be scalar. + * - Number of operands must matche the size of the resulting vector. + * - Base type of the operands must match the base type of the result. + */ + assert(ir->type->is_vector()); + switch (ir->type->vector_elements) { + case 2: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2] == NULL); + assert(ir->operands[3] == NULL); + break; + case 3: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3] == NULL); + break; + case 4: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3]->type->is_scalar()); + assert(ir->operands[3]->type->base_type == ir->type->base_type); + break; + default: + /* The is_vector assertion above should prevent execution from ever + * getting here. + */ + assert(!"Should not get here."); + break; + } + } + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_leave(ir_swizzle *ir) + { + unsigned int chans[4] = {ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w}; + + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + if (chans[i] >= ir->val->type->vector_elements) { + printf("ir_swizzle @ %p specifies a channel not present " + "in the value.\n", (void *) ir); + ir->print(); + abort(); + } + } + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit(ir_variable *ir) + { + /* An ir_variable is the one thing that can (and will) appear multiple times + * in an IR tree. It is added to the hashtable so that it can be used + * in the ir_dereference_variable handler to ensure that a variable is + * declared before it is dereferenced. + */ + if (ir->name && ir->is_name_ralloced()) + assert(ralloc_parent(ir->name) == ir); + + _mesa_set_add(ir_set, ir); + + /* If a variable is an array, verify that the maximum array index is in + * bounds. There was once an error in AST-to-HIR conversion that set this + * to be out of bounds. + */ + if (ir->type->array_size() > 0) { + if (ir->data.max_array_access >= ir->type->length) { + printf("ir_variable has maximum access out of bounds (%d vs %d)\n", + ir->data.max_array_access, ir->type->length - 1); + ir->print(); + abort(); + } + } + + /* If a variable is an interface block (or an array of interface blocks), + * verify that the maximum array index for each interface member is in + * bounds. + */ + if (ir->is_interface_instance()) { + const glsl_struct_field *fields = + ir->get_interface_type()->fields.structure; + for (unsigned i = 0; i < ir->get_interface_type()->length; i++) { + if (fields[i].type->array_size() > 0) { + const unsigned *const max_ifc_array_access = + ir->get_max_ifc_array_access(); + + assert(max_ifc_array_access != NULL); + + if (max_ifc_array_access[i] >= fields[i].type->length) { + printf("ir_variable has maximum access out of bounds for " + "field %s (%d vs %d)\n", fields[i].name, + max_ifc_array_access[i], fields[i].type->length); + ir->print(); + abort(); + } + } + } + } + + if (ir->constant_initializer != NULL && !ir->data.has_initializer) { + printf("ir_variable didn't have an initializer, but has a constant " + "initializer value.\n"); + ir->print(); + abort(); + } + + if (ir->data.mode == ir_var_uniform + && is_gl_identifier(ir->name) + && ir->get_state_slots() == NULL) { + printf("built-in uniform has no state\n"); + ir->print(); + abort(); + } + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_enter(ir_assignment *ir) + { + const ir_dereference *const lhs = ir->lhs; + if (lhs->type->is_scalar() || lhs->type->is_vector()) { + if (ir->write_mask == 0) { + printf("Assignment LHS is %s, but write mask is 0:\n", + lhs->type->is_scalar() ? "scalar" : "vector"); + ir->print(); + abort(); + } + + int lhs_components = 0; + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << i)) + lhs_components++; + } + + if (lhs_components != ir->rhs->type->vector_elements) { + printf("Assignment count of LHS write mask channels enabled not\n" + "matching RHS vector size (%d LHS, %d RHS).\n", + lhs_components, ir->rhs->type->vector_elements); + ir->print(); + abort(); + } + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; + } + + ir_visitor_status + ir_validate::visit_enter(ir_call *ir) + { + ir_function_signature *const callee = ir->callee; + + if (callee->ir_type != ir_type_function_signature) { + printf("IR called by ir_call is not ir_function_signature!\n"); + abort(); + } + + if (ir->return_deref) { + if (ir->return_deref->type != callee->return_type) { + printf("callee type %s does not match return storage type %s\n", + callee->return_type->name, ir->return_deref->type->name); + abort(); + } + } else if (callee->return_type != glsl_type::void_type) { + printf("ir_call has non-void callee but no return storage\n"); + abort(); + } + + const exec_node *formal_param_node = callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (true) { + if (formal_param_node->is_tail_sentinel() + != actual_param_node->is_tail_sentinel()) { + printf("ir_call has the wrong number of parameters:\n"); + goto dump_ir; + } + if (formal_param_node->is_tail_sentinel()) { + break; + } + const ir_variable *formal_param + = (const ir_variable *) formal_param_node; + const ir_rvalue *actual_param + = (const ir_rvalue *) actual_param_node; + if (formal_param->type != actual_param->type) { + printf("ir_call parameter type mismatch:\n"); + goto dump_ir; + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + if (!actual_param->is_lvalue()) { + printf("ir_call out/inout parameters must be lvalues:\n"); + goto dump_ir; + } + } + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + } + + return visit_continue; + + dump_ir: + ir->print(); + printf("callee:\n"); + callee->print(); + abort(); + return visit_stop; + } + + void + ir_validate::validate_ir(ir_instruction *ir, void *data) + { + struct set *ir_set = (struct set *) data; + + if (_mesa_set_search(ir_set, ir)) { + printf("Instruction node present twice in ir tree:\n"); + ir->print(); + printf("\n"); + abort(); + } + _mesa_set_add(ir_set, ir); + } + + void + check_node_type(ir_instruction *ir, void *data) + { + (void) data; + + if (ir->ir_type >= ir_type_max) { + printf("Instruction node with unset type\n"); + ir->print(); printf("\n"); + } + ir_rvalue *value = ir->as_rvalue(); + if (value != NULL) + assert(value->type != glsl_type::error_type); + } + + void + validate_ir_tree(exec_list *instructions) + { + /* We shouldn't have any reason to validate IR in a release build, + * and it's half composed of assert()s anyway which wouldn't do + * anything. + */ + #ifdef DEBUG + ir_validate v; + + v.run(instructions); + + foreach_in_list(ir_instruction, ir, instructions) { + visit_tree(ir, check_node_type, NULL); + } + #endif + } diff --cc src/compiler/glsl/link_uniform_initializers.cpp index 00000000000,58d21e5125e..cdc1d3ac7be mode 000000,100644..100644 --- a/src/compiler/glsl/link_uniform_initializers.cpp +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@@ -1,0 -1,355 +1,356 @@@ + /* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include "main/core.h" + #include "ir.h" + #include "linker.h" + #include "ir_uniform.h" + + /* These functions are put in a "private" namespace instead of being marked + * static so that the unit tests can access them. See + * http://code.google.com/p/googletest/wiki/AdvancedGuide#Testing_Private_Code + */ + namespace linker { + + gl_uniform_storage * + get_storage(gl_uniform_storage *storage, unsigned num_storage, + const char *name) + { + for (unsigned int i = 0; i < num_storage; i++) { + if (strcmp(name, storage[i].name) == 0) + return &storage[i]; + } + + return NULL; + } + + static unsigned + get_uniform_block_index(const gl_shader_program *shProg, + const char *uniformBlockName) + { + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName)) + return i; + } + + return GL_INVALID_INDEX; + } + + void + copy_constant_to_storage(union gl_constant_value *storage, + const ir_constant *val, + const enum glsl_base_type base_type, + const unsigned int elements, + unsigned int boolean_true) + { + for (unsigned int i = 0; i < elements; i++) { + switch (base_type) { + case GLSL_TYPE_UINT: + storage[i].u = val->value.u[i]; + break; + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + storage[i].i = val->value.i[i]; + break; + case GLSL_TYPE_FLOAT: + storage[i].f = val->value.f[i]; + break; + case GLSL_TYPE_DOUBLE: + /* XXX need to check on big-endian */ + storage[i * 2].u = *(uint32_t *)&val->value.d[i]; + storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1); + break; + case GLSL_TYPE_BOOL: + storage[i].b = val->value.b[i] ? boolean_true : 0; + break; + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_INTERFACE: ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_ERROR: + /* All other types should have already been filtered by other + * paths in the caller. + */ + assert(!"Should not get here."); + break; + } + } + } + + /** + * Initialize an opaque uniform from the value of an explicit binding + * qualifier specified in the shader. Atomic counters are different because + * they have no storage and should be handled elsewhere. + */ + void + set_opaque_binding(void *mem_ctx, gl_shader_program *prog, + const glsl_type *type, const char *name, int *binding) + { + + if (type->is_array() && type->fields.array->is_array()) { + const glsl_type *const element_type = type->fields.array; + + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); + + set_opaque_binding(mem_ctx, prog, element_type, + element_name, binding); + } + } else { + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, prog->NumUniformStorage, name); + + if (storage == NULL) { + assert(storage != NULL); + return; + } + + const unsigned elements = MAX2(storage->array_elements, 1); + + /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec + * says: + * + * "If the binding identifier is used with an array, the first element + * of the array takes the specified unit and each subsequent element + * takes the next consecutive unit." + */ + for (unsigned int i = 0; i < elements; i++) { + storage->storage[i].i = (*binding)++; + } + + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; + + if (shader) { + if (storage->type->base_type == GLSL_TYPE_SAMPLER && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->SamplerUnits[index] = storage->storage[i].i; + } + + } else if (storage->type->base_type == GLSL_TYPE_IMAGE && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->ImageUnits[index] = storage->storage[i].i; + } + } + } + } + + storage->initialized = true; + } + } + + void + set_block_binding(gl_shader_program *prog, const char *block_name, int binding) + { + const unsigned block_index = get_uniform_block_index(prog, block_name); + + if (block_index == GL_INVALID_INDEX) { + assert(block_index != GL_INVALID_INDEX); + return; + } + + /* This is a field of a UBO. val is the binding index. */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + int stage_index = prog->InterfaceBlockStageIndex[i][block_index]; + + if (stage_index != -1) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + sh->BufferInterfaceBlocks[stage_index].Binding = binding; + } + } + } + + void + set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, + const char *name, const glsl_type *type, + ir_constant *val, unsigned int boolean_true) + { + const glsl_type *t_without_array = type->without_array(); + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(mem_ctx, prog, field_name, + field_type, field_constant, boolean_true); + field_constant = (ir_constant *)field_constant->next; + } + return; + } else if (t_without_array->is_record() || + (type->is_array() && type->fields.array->is_array())) { + const glsl_type *const element_type = type->fields.array; + + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); + + set_uniform_initializer(mem_ctx, prog, element_name, + element_type, val->array_elements[i], + boolean_true); + } + return; + } + + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, + prog->NumUniformStorage, + name); + if (storage == NULL) { + assert(storage != NULL); + return; + } + + if (val->type->is_array()) { + const enum glsl_base_type base_type = + val->array_elements[0]->type->base_type; + const unsigned int elements = val->array_elements[0]->type->components(); + unsigned int idx = 0; + unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1; + + assert(val->type->length >= storage->array_elements); + for (unsigned int i = 0; i < storage->array_elements; i++) { + copy_constant_to_storage(& storage->storage[idx], + val->array_elements[i], + base_type, + elements, + boolean_true); + + idx += elements * dmul; + } + } else { + copy_constant_to_storage(storage->storage, + val, + val->type->base_type, + val->type->components(), + boolean_true); + + if (storage->type->is_sampler()) { + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; + + if (shader && storage->opaque[sh].active) { + unsigned index = storage->opaque[sh].index; + + shader->SamplerUnits[index] = storage->storage[0].i; + } + } + } + } + + storage->initialized = true; + } + } + + void + link_set_uniform_initializers(struct gl_shader_program *prog, + unsigned int boolean_true) + { + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *shader = prog->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if (!var || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + if (var->data.explicit_binding) { + const glsl_type *const type = var->type; + + if (type->without_array()->is_sampler() || + type->without_array()->is_image()) { + int binding = var->data.binding; + linker::set_opaque_binding(mem_ctx, prog, var->type, + var->name, &binding); + } else if (var->is_in_buffer_block()) { + const glsl_type *const iface_type = var->get_interface_type(); + + /* If the variable is an array and it is an interface instance, + * we need to set the binding for each array element. Just + * checking that the variable is an array is not sufficient. + * The variable could be an array element of a uniform block + * that lacks an instance name. For example: + * + * uniform U { + * float f[4]; + * }; + * + * In this case "f" would pass is_in_buffer_block (above) and + * type->is_array(), but it will fail is_interface_instance(). + */ + if (var->is_interface_instance() && var->type->is_array()) { + for (unsigned i = 0; i < var->type->length; i++) { + const char *name = + ralloc_asprintf(mem_ctx, "%s[%u]", iface_type->name, i); + + /* Section 4.4.3 (Uniform Block Layout Qualifiers) of the + * GLSL 4.20 spec says: + * + * "If the binding identifier is used with a uniform + * block instanced as an array then the first element + * of the array takes the specified block binding and + * each subsequent element takes the next consecutive + * uniform block binding point." + */ + linker::set_block_binding(prog, name, + var->data.binding + i); + } + } else { + linker::set_block_binding(prog, iface_type->name, + var->data.binding); + } + } else if (type->contains_atomic()) { + /* we don't actually need to do anything. */ + } else { + assert(!"Explicit binding not on a sampler, UBO or atomic."); + } + } else if (var->constant_initializer) { + linker::set_uniform_initializer(mem_ctx, prog, var->name, + var->type, var->constant_initializer, + boolean_true); + } + } + } + + ralloc_free(mem_ctx); + } diff --cc src/compiler/glsl/lower_packing_builtins.cpp index 00000000000,7f18238bc6e..a41627bd561 mode 000000,100644..100644 --- a/src/compiler/glsl/lower_packing_builtins.cpp +++ b/src/compiler/glsl/lower_packing_builtins.cpp @@@ -1,0 -1,1412 +1,1311 @@@ + /* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include "ir.h" + #include "ir_builder.h" + #include "ir_optimization.h" + #include "ir_rvalue_visitor.h" + + namespace { + + using namespace ir_builder; + + /** + * A visitor that lowers built-in floating-point pack/unpack expressions + * such packSnorm2x16. + */ + class lower_packing_builtins_visitor : public ir_rvalue_visitor { + public: + /** + * \param op_mask is a bitmask of `enum lower_packing_builtins_op` + */ + explicit lower_packing_builtins_visitor(int op_mask) + : op_mask(op_mask), + progress(false) + { - /* Mutually exclusive options. */ - assert(!((op_mask & LOWER_PACK_HALF_2x16) && - (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); - - assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && - (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); - + factory.instructions = &factory_instructions; + } + + virtual ~lower_packing_builtins_visitor() + { + assert(factory_instructions.is_empty()); + } + + bool get_progress() { return progress; } + + void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr) + return; + + enum lower_packing_builtins_op lowering_op = + choose_lowering_op(expr->operation); + + if (lowering_op == LOWER_PACK_UNPACK_NONE) + return; + + setup_factory(ralloc_parent(expr)); + + ir_rvalue *op0 = expr->operands[0]; + ralloc_steal(factory.mem_ctx, op0); + + switch (lowering_op) { + case LOWER_PACK_SNORM_2x16: + *rvalue = lower_pack_snorm_2x16(op0); + break; + case LOWER_PACK_SNORM_4x8: + *rvalue = lower_pack_snorm_4x8(op0); + break; + case LOWER_PACK_UNORM_2x16: + *rvalue = lower_pack_unorm_2x16(op0); + break; + case LOWER_PACK_UNORM_4x8: + *rvalue = lower_pack_unorm_4x8(op0); + break; + case LOWER_PACK_HALF_2x16: + *rvalue = lower_pack_half_2x16(op0); + break; - case LOWER_PACK_HALF_2x16_TO_SPLIT: - *rvalue = split_pack_half_2x16(op0); - break; + case LOWER_UNPACK_SNORM_2x16: + *rvalue = lower_unpack_snorm_2x16(op0); + break; + case LOWER_UNPACK_SNORM_4x8: + *rvalue = lower_unpack_snorm_4x8(op0); + break; + case LOWER_UNPACK_UNORM_2x16: + *rvalue = lower_unpack_unorm_2x16(op0); + break; + case LOWER_UNPACK_UNORM_4x8: + *rvalue = lower_unpack_unorm_4x8(op0); + break; + case LOWER_UNPACK_HALF_2x16: + *rvalue = lower_unpack_half_2x16(op0); + break; - case LOWER_UNPACK_HALF_2x16_TO_SPLIT: - *rvalue = split_unpack_half_2x16(op0); - break; + case LOWER_PACK_UNPACK_NONE: + case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: + assert(!"not reached"); + break; + } + + teardown_factory(); + progress = true; + } + + private: + const int op_mask; + bool progress; + ir_factory factory; + exec_list factory_instructions; + + /** + * Determine the needed lowering operation by filtering \a expr_op + * through \ref op_mask. + */ + enum lower_packing_builtins_op + choose_lowering_op(ir_expression_operation expr_op) + { + /* C++ regards int and enum as fundamentally different types. + * So, we can't simply return from each case; we must cast the return + * value. + */ + int result; + + switch (expr_op) { + case ir_unop_pack_snorm_2x16: + result = op_mask & LOWER_PACK_SNORM_2x16; + break; + case ir_unop_pack_snorm_4x8: + result = op_mask & LOWER_PACK_SNORM_4x8; + break; + case ir_unop_pack_unorm_2x16: + result = op_mask & LOWER_PACK_UNORM_2x16; + break; + case ir_unop_pack_unorm_4x8: + result = op_mask & LOWER_PACK_UNORM_4x8; + break; + case ir_unop_pack_half_2x16: - result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); ++ result = op_mask & LOWER_PACK_HALF_2x16; + break; + case ir_unop_unpack_snorm_2x16: + result = op_mask & LOWER_UNPACK_SNORM_2x16; + break; + case ir_unop_unpack_snorm_4x8: + result = op_mask & LOWER_UNPACK_SNORM_4x8; + break; + case ir_unop_unpack_unorm_2x16: + result = op_mask & LOWER_UNPACK_UNORM_2x16; + break; + case ir_unop_unpack_unorm_4x8: + result = op_mask & LOWER_UNPACK_UNORM_4x8; + break; + case ir_unop_unpack_half_2x16: - result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); ++ result = op_mask & LOWER_UNPACK_HALF_2x16; + break; + default: + result = LOWER_PACK_UNPACK_NONE; + break; + } + + return static_cast(result); + } + + void + setup_factory(void *mem_ctx) + { + assert(factory.mem_ctx == NULL); + assert(factory.instructions->is_empty()); + + factory.mem_ctx = mem_ctx; + } + + void + teardown_factory() + { + base_ir->insert_before(factory.instructions); + assert(factory.instructions->is_empty()); + factory.mem_ctx = NULL; + } + + template + ir_constant* + constant(T x) + { + return factory.constant(x); + } + + /** + * \brief Pack two uint16's into a single uint32. + * + * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 + * where the least significant bits specify the first element of the pair. + * Return the uint32. + */ + ir_rvalue* + pack_uvec2_to_uint(ir_rvalue *uvec2_rval) + { + assert(uvec2_rval->type == glsl_type::uvec2_type); + + /* uvec2 u = UVEC2_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_uvec2_to_uint"); + factory.emit(assign(u, uvec2_rval)); + + if (op_mask & LOWER_PACK_USE_BFI) { + return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), + swizzle_y(u), + constant(16u), + constant(16u)); + } + + /* return (u.y << 16) | (u.x & 0xffff); */ + return bit_or(lshift(swizzle_y(u), constant(16u)), + bit_and(swizzle_x(u), constant(0xffffu))); + } + + /** + * \brief Pack four uint8's into a single uint32. + * + * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a + * uint32 where the least significant bits specify the first element of the + * 4-tuple. Return the uint32. + */ + ir_rvalue* + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) + { + assert(uvec4_rval->type == glsl_type::uvec4_type); + + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, + "tmp_pack_uvec4_to_uint"); + + if (op_mask & LOWER_PACK_USE_BFI) { + /* uvec4 u = UVEC4_RVAL; */ + factory.emit(assign(u, uvec4_rval)); + + return bitfield_insert(bitfield_insert( + bitfield_insert( + bit_and(swizzle_x(u), constant(0xffu)), + swizzle_y(u), constant(8u), constant(8u)), + swizzle_z(u), constant(16u), constant(8u)), + swizzle_w(u), constant(24u), constant(8u)); + } + + /* uvec4 u = UVEC4_RVAL & 0xff */ + factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); + + /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ + return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), + lshift(swizzle_z(u), constant(16u))), + bit_or(lshift(swizzle_y(u), constant(8u)), + swizzle_x(u))); + } + + /** + * \brief Unpack a uint32 into two uint16's. + * + * Interpret the given uint32 as a uint16 pair where the uint32's least + * significant bits specify the pair's first element. Return the uint16 + * pair as a uvec2. + */ + ir_rvalue* + unpack_uint_to_uvec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec2_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec2 u2; */ + ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_uint_to_uvec2_u2"); + + /* u2.x = u & 0xffffu; */ + factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); + + /* u2.y = u >> 16u; */ + factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); + + return deref(u2).val; + } + + /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + + /** + * \brief Unpack a uint32 into four uint8's. + * + * Interpret the given uint32 as a uint8 4-tuple where the uint32's least + * significant bits specify the 4-tuple's first element. Return the uint8 + * 4-tuple as a uvec4. + */ + ir_rvalue* + unpack_uint_to_uvec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec4_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec4 u4; */ + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, + "tmp_unpack_uint_to_uvec4_u4"); + + /* u4.x = u & 0xffu; */ + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); + + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), + WRITEMASK_Y)); + + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } + + /* u4.w = (u >> 24u) */ + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); + + return deref(u4).val; + } + + /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + + /** + * \brief Lower a packSnorm2x16 expression. + * + * \param vec2_rval is packSnorm2x16's input + * \return packSnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packSnorm2x16(vec2 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint( + * uvec2(ivec2( + * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); + * + * It is necessary to first convert the vec2 to ivec2 rather than directly + * converting vec2 to uvec2 because the latter conversion is undefined. + * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + i2u(f2i(round_even(mul(clamp(vec2_rval, + constant(-1.0f), + constant(1.0f)), + constant(32767.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packSnorm4x8 expression. + * + * \param vec4_rval is packSnorm4x8's input + * \return packSnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packSnorm4x8(vec4 v) + * ------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint( + * uvec4(ivec4( + * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); + * + * It is necessary to first convert the vec4 to ivec4 rather than directly + * converting vec4 to uvec4 because the latter conversion is undefined. + * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + i2u(f2i(round_even(mul(clamp(vec4_rval, + constant(-1.0f), + constant(1.0f)), + constant(127.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackSnorm2x16 expression. + * + * \param uint_rval is unpackSnorm2x16's input + * \return unpackSnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_snorm_2x16(ir_rvalue *uint_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackSnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec2 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec2(-1.0, 0.0). + * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we + * place that int16 into an int32, which results in the *positive* integer + * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather + * unimportant bit 16. We must now extend the int16's sign bit into bits + * 17-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), + constant(32767.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackSnorm4x8 expression. + * + * \param uint_rval is unpackSnorm4x8's input + * \return unpackSnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_snorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackSnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * four-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec4 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, + * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we + * place that int8 into an int32, which results in the *positive* integer + * 0x000000ff. The int8's sign bit becomes, in the int32, the rather + * unimportant bit 8. We must now extend the int8's sign bit into bits + * 9-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), + constant(127.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower a packUnorm2x16 expression. + * + * \param vec2_rval is packUnorm2x16's input + * \return packUnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packUnorm2x16 (vec2 v) + * --------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint(uvec2( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); + * + * Here it is safe to directly convert the vec2 to uvec2 because the vec2 + * has been clamped to a non-negative range. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packUnorm4x8 expression. + * + * \param vec4_rval is packUnorm4x8's input + * \return packUnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packUnorm4x8 (vec4 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint(uvec4( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); + * + * Here it is safe to directly convert the vec4 to uvec4 because the vec4 + * has been clamped to a non-negative range. + */ + + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackUnorm2x16 expression. + * + * \param uint_rval is unpackUnorm2x16's input + * \return unpackUnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_unorm_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackUnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), + constant(65535.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackUnorm4x8 expression. + * + * \param uint_rval is unpackUnorm4x8's input + * \return unpackUnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_unorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackUnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), + constant(255.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower the component-wise calculation of packHalf2x16. + * + * \param f_rval is one component of packHafl2x16's input + * \param e_rval is the unshifted exponent bits of f_rval + * \param m_rval is the unshifted mantissa bits of f_rval + * + * \return a uint rvalue that encodes a float16 in its lower 16 bits + */ + ir_rvalue* + pack_half_1x16_nosign(ir_rvalue *f_rval, + ir_rvalue *e_rval, + ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u16; */ + ir_variable *u16 = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_u16"); + + /* float f = FLOAT_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::float_type, + "tmp_pack_half_1x16_f"); + factory.emit(assign(f, f_rval)); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * The minimum and maximum normal float16 values are + * + * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) + * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) + * + * The step at max_norm16 is + * + * max_step16 = 2^5 (22) + * + * Observe that the float16 boundary values in equations 20-21 lie in the + * range of normal float32 values. + * + * + * Rounding Behavior + * ----------------- + * Not all float32 values can be exactly represented as a float16. We + * round all such intermediate float32 values to the nearest float16; if + * the float32 is exactly between to float16 values, we round to the one + * with an even mantissa. This rounding behavior has several benefits: + * + * - It has no sign bias. + * + * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + * GPU ISA. + * + * - By reproducing the behavior of the GPU (at least on Intel hardware), + * compile-time evaluation of constant packHalf2x16 GLSL expressions will + * result in the same value as if the expression were executed on the + * GPU. + * + * Calculation + * ----------- + * Our task is to compute s16, e16, m16 given f32. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f32 is NaN + * + * The resultant f16 will also be NaN. + */ + + /* if (e32 == 255 && m32 != 0) { */ + if_tree(logic_and(equal(e, constant(0xffu << 23u)), + logic_not(equal(m, constant(0u)))), + + assign(u16, constant(0x7fffu)), + + /* Case 2) f32 lies in the range [0, min_norm16). + * + * The resultant float16 will be either zero, subnormal, or normal. + * + * Solving + * + * f32 = min_norm16 (30) + * + * gives + * + * e32 = 113 and m32 = 0 (31) + * + * Therefore this case occurs if and only if + * + * e32 < 113 (32) + */ + + /* } else if (e32 < 113) { */ + if_tree(less(e, constant(113u << 23u)), + + /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ + assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), + constant((float) (1 << 24)))))), + + /* Case 3) f32 lies in the range + * [min_norm16, max_norm16 + max_step16). + * + * The resultant float16 will be either normal or infinite. + * + * Solving + * + * f32 = max_norm16 + max_step16 (40) + * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) + * = 2^16 (42) + * gives + * + * e32 = 143 and m32 = 0 (43) + * + * We already solved the boundary condition f32 = min_norm16 above + * in equation 31. Therefore this case occurs if and only if + * + * 113 <= e32 and e32 < 143 + */ + + /* } else if (e32 < 143) { */ + if_tree(less(e, constant(143u << 23u)), + + /* The addition below handles the case where the mantissa rounds + * up to 1024 and bumps the exponent. + * + * u16 = ((e - (112u << 23u)) >> 13u) + * + round_to_even((float(m) / (1u << 13u)); + */ + assign(u16, add(rshift(sub(e, constant(112u << 23u)), + constant(13u)), + f2u(round_even( + div(u2f(m), constant((float) (1 << 13))))))), + + /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. + * + * The resultant float16 will be infinite. + * + * The cases above caught all float32 values in the range + * [0, max_norm16 + max_step16), so this is the fall-through case. + */ + + /* } else { */ + + assign(u16, constant(31u << 10u)))))); + + /* } */ + + return deref(u16).val; + } + + /** + * \brief Lower a packHalf2x16 expression. + * + * \param vec2_rval is packHalf2x16's input + * \return packHalf2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_half_2x16(ir_rvalue *vec2_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packHalf2x16 (mediump vec2 v) + * ---------------------------------------- + * Returns an unsigned integer obtained by converting the components of + * a two-component floating-point vector to the 16-bit floating-point + * representation found in the OpenGL ES Specification, and then packing + * these two 16-bit integers into a 32-bit unsigned integer. + * + * The first vector component specifies the 16 least- significant bits + * of the result; the second component specifies the 16 most-significant + * bits. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + /* vec2 f = VEC2_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::vec2_type, + "tmp_pack_half_2x16_f"); + factory.emit(assign(f, vec2_rval)); + + /* uvec2 f32 = bitcast_f2u(f); */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f32"); + factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); + + /* uvec2 f16; */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f16"); + + /* Get f32's unshifted exponent bits. + * + * uvec2 e = f32 & 0x7f800000u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_e"); + factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); + + /* Get f32's unshifted mantissa bits. + * + * uvec2 m = f32 & 0x007fffffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_m"); + factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); + + /* Set f16's exponent and mantissa bits. + * + * f16.x = pack_half_1x16_nosign(e.x, m.x); + * f16.y = pack_half_1y16_nosign(e.y, m.y); + */ + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), + swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), + swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f16's sign bits. + * + * f16 |= (f32 & (1u << 31u) >> 16u; + */ + factory.emit( + assign(f16, bit_or(f16, + rshift(bit_and(f32, constant(1u << 31u)), + constant(16u))))); + + + /* return (f16.y << 16u) | f16.x; */ + ir_rvalue *result = bit_or(lshift(swizzle_y(f16), + constant(16u)), + swizzle_x(f16)); + + assert(result->type == glsl_type::uint_type); + return result; + } + - /** - * \brief Split packHalf2x16's vec2 operand into two floats. - * - * \param vec2_rval is packHalf2x16's input - * \return a uint rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, packHalf2x16 cannot be scalarized by the same mechanism as - * a true vector operation because its input and output have a differing - * number of vector components. - * - * This method scalarizes packHalf2x16 by transforming it from an unary - * operation having vector input to a binary operation having scalar input. - * That is, it transforms - * - * packHalf2x16(VEC2_RVAL); - * - * into - * - * vec2 v = VEC2_RVAL; - * return packHalf2x16_split(v.x, v.y); - */ - ir_rvalue* - split_pack_half_2x16(ir_rvalue *vec2_rval) - { - assert(vec2_rval->type == glsl_type::vec2_type); - - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_pack_half_2x16_v"); - factory.emit(assign(v, vec2_rval)); - - return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); - } - + /** + * \brief Lower the component-wise calculation of unpackHalf2x16. + * + * Given a uint that encodes a float16 in its lower 16 bits, this function + * returns a uint that encodes a float32 with the same value. The sign bit + * of the float16 is ignored. + * + * \param e_rval is the unshifted exponent bits of a float16 + * \param m_rval is the unshifted mantissa bits of a float16 + * \param a uint rvalue that encodes a float32 + */ + ir_rvalue* + unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u32; */ + ir_variable *u32 = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_u32"); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * Calculation + * ----------- + * Our task is to compute s32, e32, m32 given f16. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f16 is zero or subnormal. + * + * The simplest method of calcuating f32 in this case is + * + * f32 = f16 (20) + * = 2^(-14) * (m16 / 2^10) (21) + * = m16 / 2^(-24) (22) + */ + + /* if (e16 == 0) { */ + if_tree(equal(e, constant(0u)), + + /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ + assign(u32, expr(ir_unop_bitcast_f2u, + div(u2f(m), constant((float)(1 << 24))))), + + /* Case 2) f16 is normal. + * + * The equation + * + * f32 = f16 (30) + * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) + * 2^(e16 - 15) * (1 + m16 / 2^10) + * + * can be decomposed into two + * + * 2^(e32 - 127) = 2^(e16 - 15) (32) + * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) + * + * which solve to + * + * e32 = e16 + 112 (34) + * m32 = m16 * 2^13 (35) + */ + + /* } else if (e16 < 31)) { */ + if_tree(less(e, constant(31u << 10u)), + + /* u32 = ((e + (112 << 10)) | m) << 13; + */ + assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), + constant(13u))), + + + /* Case 3) f16 is infinite. */ + if_tree(equal(m, constant(0u)), + + assign(u32, constant(255u << 23u)), + + /* Case 4) f16 is NaN. */ + /* } else { */ + + assign(u32, constant(0x7fffffffu)))))); + + /* } */ + + return deref(u32).val; + } + + /** + * \brief Lower an unpackHalf2x16 expression. + * + * \param uint_rval is unpackHalf2x16's input + * \return unpackHalf2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_half_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * mediump vec2 unpackHalf2x16 (highp uint v) + * ------------------------------------------ + * Returns a two-component floating-point vector with components + * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit + * values, interpreting those values as 16-bit floating-point numbers + * according to the OpenGL ES Specification, and converting them to + * 32-bit floating-point values. + * + * The first component of the vector is obtained from the + * 16 least-significant bits of v; the second component is obtained + * from the 16 most-significant bits of v. + */ + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = RVALUE; + * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); + */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f16"); + factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); + + /* uvec2 f32; */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f32"); + + /* Get f16's unshifted exponent bits. + * + * uvec2 e = f16 & 0x7c00u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_e"); + factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); + + /* Get f16's unshifted mantissa bits. + * + * uvec2 m = f16 & 0x03ffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_m"); + factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); + + /* Set f32's exponent and mantissa bits. + * + * f32.x = unpack_half_1x16_nosign(e.x, m.x); + * f32.y = unpack_half_1x16_nosign(e.y, m.y); + */ + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f32's sign bit. + * + * f32 |= (f16 & 0x8000u) << 16u; + */ + factory.emit(assign(f32, bit_or(f32, + lshift(bit_and(f16, + constant(0x8000u)), + constant(16u))))); + + /* return bitcast_u2f(f32); */ + ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); + assert(result->type == glsl_type::vec2_type); + return result; + } - - /** - * \brief Split unpackHalf2x16 into two operations. - * - * \param uint_rval is unpackHalf2x16's input - * \return a vec2 rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, unpackHalf2x16 cannot be scalarized by the same method as - * a true vector operation because the number of components of its input - * and output differ. - * - * This method scalarizes unpackHalf2x16 by transforming it from a single - * operation having vec2 output to a pair of operations each having float - * output. That is, it transforms - * - * unpackHalf2x16(UINT_RVAL) - * - * into - * - * uint u = UINT_RVAL; - * vec2 v; - * - * v.x = unpackHalf2x16_split_x(u); - * v.y = unpackHalf2x16_split_y(u); - * - * return v; - */ - ir_rvalue* - split_unpack_half_2x16(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = uint_rval; */ - ir_variable *u = factory.make_temp(glsl_type::uint_type, - "tmp_split_unpack_half_2x16_u"); - factory.emit(assign(u, uint_rval)); - - /* vec2 v; */ - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_unpack_half_2x16_v"); - - /* v.x = unpack_half_2x16_split_x(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), - WRITEMASK_X)); - - /* v.y = unpack_half_2x16_split_y(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), - WRITEMASK_Y)); - - return deref(v).val; - } + }; + + } // namespace anonymous + + /** + * \brief Lower the builtin packing functions. + * + * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. + */ + bool + lower_packing_builtins(exec_list *instructions, int op_mask) + { + lower_packing_builtins_visitor v(op_mask); + visit_list_elements(&v, instructions, true); + return v.get_progress(); + } diff --cc src/compiler/glsl/standalone_scaffolding.cpp index 00000000000,d5d214b57cc..0f7a16a5e6f mode 000000,100644..100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@@ -1,0 -1,221 +1,227 @@@ + /* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + + #include "standalone_scaffolding.h" + + #include + #include + #include + #include "util/ralloc.h" + #include "util/strtod.h" + ++extern "C" void ++_mesa_error_no_memory(const char *caller) ++{ ++ fprintf(stderr, "Mesa error: out of memory in %s", caller); ++} ++ + void + _mesa_warning(struct gl_context *ctx, const char *fmt, ...) + { + va_list vargs; + (void) ctx; + + va_start(vargs, fmt); + + /* This output is not thread-safe, but that's good enough for the + * standalone compiler. + */ + fprintf(stderr, "Mesa warning: "); + vfprintf(stderr, fmt, vargs); + fprintf(stderr, "\n"); + + va_end(vargs); + } + + void + _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) + { + (void) ctx; + *ptr = sh; + } + + void + _mesa_shader_debug(struct gl_context *, GLenum, GLuint *, + const char *) + { + } + + struct gl_shader * + _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) + { + struct gl_shader *shader; + + (void) ctx; + + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Stage = _mesa_shader_enum_to_shader_stage(type); + shader->Name = name; + shader->RefCount = 1; + } + return shader; + } + + void + _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) + { + free((void *)sh->Source); + free(sh->Label); + ralloc_free(sh); + } + + void + _mesa_clear_shader_program_data(struct gl_shader_program *shProg) + { + unsigned i; + + shProg->NumUniformStorage = 0; + shProg->UniformStorage = NULL; + shProg->NumUniformRemapTable = 0; + shProg->UniformRemapTable = NULL; + shProg->UniformHash = NULL; + + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); + + ralloc_free(shProg->BufferInterfaceBlocks); + shProg->BufferInterfaceBlocks = NULL; + shProg->NumBufferInterfaceBlocks = 0; + + ralloc_free(shProg->UniformBlocks); + shProg->UniformBlocks = NULL; + shProg->NumUniformBlocks = 0; + + ralloc_free(shProg->ShaderStorageBlocks); + shProg->ShaderStorageBlocks = NULL; + shProg->NumShaderStorageBlocks = 0; + + for (i = 0; i < MESA_SHADER_STAGES; i++) { + ralloc_free(shProg->InterfaceBlockStageIndex[i]); + shProg->InterfaceBlockStageIndex[i] = NULL; + } + + ralloc_free(shProg->UboInterfaceBlockIndex); + shProg->UboInterfaceBlockIndex = NULL; + ralloc_free(shProg->SsboInterfaceBlockIndex); + shProg->SsboInterfaceBlockIndex = NULL; + + ralloc_free(shProg->AtomicBuffers); + shProg->AtomicBuffers = NULL; + shProg->NumAtomicBuffers = 0; + } + + void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) + { + memset(ctx, 0, sizeof(*ctx)); + + ctx->API = api; + + ctx->Extensions.dummy_false = false; + ctx->Extensions.dummy_true = true; + ctx->Extensions.ARB_compute_shader = true; + ctx->Extensions.ARB_conservative_depth = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_explicit_attrib_location = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.ARB_fragment_layer_viewport = true; + ctx->Extensions.ARB_gpu_shader5 = true; + ctx->Extensions.ARB_gpu_shader_fp64 = true; + ctx->Extensions.ARB_sample_shading = true; + ctx->Extensions.ARB_shader_bit_encoding = true; + ctx->Extensions.ARB_shader_draw_parameters = true; + ctx->Extensions.ARB_shader_stencil_export = true; + ctx->Extensions.ARB_shader_subroutine = true; + ctx->Extensions.ARB_shader_texture_lod = true; + ctx->Extensions.ARB_shading_language_420pack = true; + ctx->Extensions.ARB_shading_language_packing = true; + ctx->Extensions.ARB_tessellation_shader = true; + ctx->Extensions.ARB_texture_cube_map_array = true; + ctx->Extensions.ARB_texture_gather = true; + ctx->Extensions.ARB_texture_multisample = true; + ctx->Extensions.ARB_texture_query_levels = true; + ctx->Extensions.ARB_texture_query_lod = true; + ctx->Extensions.ARB_uniform_buffer_object = true; + ctx->Extensions.ARB_viewport_array = true; + + ctx->Extensions.OES_EGL_image_external = true; + ctx->Extensions.OES_standard_derivatives = true; + + ctx->Extensions.EXT_shader_integer_mix = true; + ctx->Extensions.EXT_texture_array = true; + + ctx->Extensions.NV_texture_rectangle = true; + + ctx->Const.GLSLVersion = 120; + + /* 1.20 minimums. */ + ctx->Const.MaxLights = 8; + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 2; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32; + + ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ + + /* Set up default shader compiler options. */ + struct gl_shader_compiler_options options; + memset(&options, 0, sizeof(options)); + options.MaxUnrollIterations = 32; + options.MaxIfDepth = UINT_MAX; + + for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh) + memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options)); + + _mesa_locale_init(); + } diff --cc src/compiler/glsl_types.cpp index 00000000000,17ebf07acbc..5920c2e2611 mode 000000,100644..100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@@ -1,0 -1,1758 +1,1950 @@@ + /* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include + #include "main/macros.h" + #include "compiler/glsl/glsl_parser_extras.h" + #include "glsl_types.h" + #include "util/hash_table.h" + + + mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; + hash_table *glsl_type::array_types = NULL; + hash_table *glsl_type::record_types = NULL; + hash_table *glsl_type::interface_types = NULL; ++hash_table *glsl_type::function_types = NULL; + hash_table *glsl_type::subroutine_types = NULL; + void *glsl_type::mem_ctx = NULL; + + void + glsl_type::init_ralloc_type_ctx(void) + { + if (glsl_type::mem_ctx == NULL) { + glsl_type::mem_ctx = ralloc_autofree_context(); + assert(glsl_type::mem_ctx != NULL); + } + } + + glsl_type::glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name) : + gl_type(gl_type), + base_type(base_type), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(vector_elements), matrix_columns(matrix_columns), + length(0) + { + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + + mtx_unlock(&glsl_type::mutex); + + /* Neither dimension is zero or both dimensions are zero. + */ + assert((vector_elements == 0) == (matrix_columns == 0)); + memset(& fields, 0, sizeof(fields)); + } + + glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name) : + gl_type(gl_type), + base_type(base_type), + sampler_dimensionality(dim), sampler_shadow(shadow), + sampler_array(array), sampler_type(type), interface_packing(0), + length(0) + { + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + + mtx_unlock(&glsl_type::mutex); + + memset(& fields, 0, sizeof(fields)); + + if (base_type == GLSL_TYPE_SAMPLER) { + /* Samplers take no storage whatsoever. */ + matrix_columns = vector_elements = 0; + } else { + matrix_columns = vector_elements = 1; + } + } + + glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name) : + gl_type(0), + base_type(GLSL_TYPE_STRUCT), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_fields) + { + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].location = fields[i].location; + this->fields.structure[i].interpolation = fields[i].interpolation; + this->fields.structure[i].centroid = fields[i].centroid; + this->fields.structure[i].sample = fields[i].sample; + this->fields.structure[i].matrix_layout = fields[i].matrix_layout; + this->fields.structure[i].patch = fields[i].patch; + this->fields.structure[i].image_read_only = fields[i].image_read_only; + this->fields.structure[i].image_write_only = fields[i].image_write_only; + this->fields.structure[i].image_coherent = fields[i].image_coherent; + this->fields.structure[i].image_volatile = fields[i].image_volatile; + this->fields.structure[i].image_restrict = fields[i].image_restrict; + this->fields.structure[i].precision = fields[i].precision; + } + + mtx_unlock(&glsl_type::mutex); + } + + glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name) : + gl_type(0), + base_type(GLSL_TYPE_INTERFACE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing((unsigned) packing), + vector_elements(0), matrix_columns(0), + length(num_fields) + { + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].location = fields[i].location; + this->fields.structure[i].interpolation = fields[i].interpolation; + this->fields.structure[i].centroid = fields[i].centroid; + this->fields.structure[i].sample = fields[i].sample; + this->fields.structure[i].matrix_layout = fields[i].matrix_layout; + this->fields.structure[i].patch = fields[i].patch; + this->fields.structure[i].precision = fields[i].precision; + } + + mtx_unlock(&glsl_type::mutex); + } + ++glsl_type::glsl_type(const glsl_type *return_type, ++ const glsl_function_param *params, unsigned num_params) : ++ gl_type(0), ++ base_type(GLSL_TYPE_FUNCTION), ++ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), ++ sampler_type(0), interface_packing(0), ++ vector_elements(0), matrix_columns(0), ++ length(num_params) ++{ ++ unsigned int i; ++ ++ mtx_lock(&glsl_type::mutex); ++ ++ init_ralloc_type_ctx(); ++ ++ this->fields.parameters = rzalloc_array(this->mem_ctx, ++ glsl_function_param, num_params + 1); ++ ++ /* We store the return type as the first parameter */ ++ this->fields.parameters[0].type = return_type; ++ this->fields.parameters[0].in = false; ++ this->fields.parameters[0].out = true; ++ ++ /* We store the i'th parameter in slot i+1 */ ++ for (i = 0; i < length; i++) { ++ this->fields.parameters[i + 1].type = params[i].type; ++ this->fields.parameters[i + 1].in = params[i].in; ++ this->fields.parameters[i + 1].out = params[i].out; ++ } ++ ++ mtx_unlock(&glsl_type::mutex); ++} ++ + glsl_type::glsl_type(const char *subroutine_name) : + gl_type(0), + base_type(GLSL_TYPE_SUBROUTINE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(1), matrix_columns(1), + length(0) + { + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(subroutine_name != NULL); + this->name = ralloc_strdup(this->mem_ctx, subroutine_name); + mtx_unlock(&glsl_type::mutex); + } + + bool + glsl_type::contains_sampler() const + { + if (this->is_array()) { + return this->fields.array->contains_sampler(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_sampler()) + return true; + } + return false; + } else { + return this->is_sampler(); + } + } + + + bool + glsl_type::contains_integer() const + { + if (this->is_array()) { + return this->fields.array->contains_integer(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_integer()) + return true; + } + return false; + } else { + return this->is_integer(); + } + } + + bool + glsl_type::contains_double() const + { + if (this->is_array()) { + return this->fields.array->contains_double(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_double()) + return true; + } + return false; + } else { + return this->is_double(); + } + } + + bool + glsl_type::contains_opaque() const { + switch (base_type) { + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + return true; + case GLSL_TYPE_ARRAY: + return fields.array->contains_opaque(); + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < length; i++) { + if (fields.structure[i].type->contains_opaque()) + return true; + } + return false; + default: + return false; + } + } + + bool + glsl_type::contains_subroutine() const + { + if (this->is_array()) { + return this->fields.array->contains_subroutine(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_subroutine()) + return true; + } + return false; + } else { + return this->is_subroutine(); + } + } + + gl_texture_index + glsl_type::sampler_index() const + { + const glsl_type *const t = (this->is_array()) ? this->fields.array : this; + + assert(t->is_sampler()); + + switch (t->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + return (t->sampler_array) ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + case GLSL_SAMPLER_DIM_2D: + return (t->sampler_array) ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + case GLSL_SAMPLER_DIM_3D: + return TEXTURE_3D_INDEX; + case GLSL_SAMPLER_DIM_CUBE: + return (t->sampler_array) ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; + case GLSL_SAMPLER_DIM_RECT: + return TEXTURE_RECT_INDEX; + case GLSL_SAMPLER_DIM_BUF: + return TEXTURE_BUFFER_INDEX; + case GLSL_SAMPLER_DIM_EXTERNAL: + return TEXTURE_EXTERNAL_INDEX; + case GLSL_SAMPLER_DIM_MS: + return (t->sampler_array) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + default: + assert(!"Should not get here."); + return TEXTURE_BUFFER_INDEX; + } + } + + bool + glsl_type::contains_image() const + { + if (this->is_array()) { + return this->fields.array->contains_image(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_image()) + return true; + } + return false; + } else { + return this->is_image(); + } + } + + const glsl_type *glsl_type::get_base_type() const + { + switch (base_type) { + case GLSL_TYPE_UINT: + return uint_type; + case GLSL_TYPE_INT: + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; + case GLSL_TYPE_BOOL: + return bool_type; + default: + return error_type; + } + } + + + const glsl_type *glsl_type::get_scalar_type() const + { + const glsl_type *type = this; + + /* Handle arrays */ + while (type->base_type == GLSL_TYPE_ARRAY) + type = type->fields.array; + + /* Handle vectors and matrices */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + return uint_type; + case GLSL_TYPE_INT: + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; + case GLSL_TYPE_BOOL: + return bool_type; + default: + /* Handle everything else */ + return type; + } + } + + + void + _mesa_glsl_release_types(void) + { + /* Should only be called during atexit (either when unloading shared + * object, or if process terminates), so no mutex-locking should be + * necessary. + */ + if (glsl_type::array_types != NULL) { + _mesa_hash_table_destroy(glsl_type::array_types, NULL); + glsl_type::array_types = NULL; + } + + if (glsl_type::record_types != NULL) { + _mesa_hash_table_destroy(glsl_type::record_types, NULL); + glsl_type::record_types = NULL; + } + + if (glsl_type::interface_types != NULL) { + _mesa_hash_table_destroy(glsl_type::interface_types, NULL); + glsl_type::interface_types = NULL; + } + } + + + glsl_type::glsl_type(const glsl_type *array, unsigned length) : + base_type(GLSL_TYPE_ARRAY), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(length), name(NULL) + { + this->fields.array = array; + /* Inherit the gl type of the base. The GL type is used for + * uniform/statevar handling in Mesa and the arrayness of the type + * is represented by the size rather than the type. + */ + this->gl_type = array->gl_type; + + /* Allow a maximum of 10 characters for the array size. This is enough + * for 32-bits of ~0. The extra 3 are for the '[', ']', and terminating + * NUL. + */ + const unsigned name_length = strlen(array->name) + 10 + 3; + + mtx_lock(&glsl_type::mutex); + char *const n = (char *) ralloc_size(this->mem_ctx, name_length); + mtx_unlock(&glsl_type::mutex); + + if (length == 0) + snprintf(n, name_length, "%s[]", array->name); + else { + /* insert outermost dimensions in the correct spot + * otherwise the dimension order will be backwards + */ + const char *pos = strchr(array->name, '['); + if (pos) { + int idx = pos - array->name; + snprintf(n, idx+1, "%s", array->name); + snprintf(n + idx, name_length - idx, "[%u]%s", + length, array->name + idx); + } else { + snprintf(n, name_length, "%s[%u]", array->name, length); + } + } + + this->name = n; + } + + + const glsl_type * + glsl_type::vec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + float_type, vec2_type, vec3_type, vec4_type + }; + return ts[components - 1]; + } + + const glsl_type * + glsl_type::dvec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + double_type, dvec2_type, dvec3_type, dvec4_type + }; + return ts[components - 1]; + } + + const glsl_type * + glsl_type::ivec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + int_type, ivec2_type, ivec3_type, ivec4_type + }; + return ts[components - 1]; + } + + + const glsl_type * + glsl_type::uvec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + uint_type, uvec2_type, uvec3_type, uvec4_type + }; + return ts[components - 1]; + } + + + const glsl_type * + glsl_type::bvec(unsigned components) + { + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + bool_type, bvec2_type, bvec3_type, bvec4_type + }; + return ts[components - 1]; + } + + + const glsl_type * + glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) + { + if (base_type == GLSL_TYPE_VOID) + return void_type; + + if ((rows < 1) || (rows > 4) || (columns < 1) || (columns > 4)) + return error_type; + + /* Treat GLSL vectors as Nx1 matrices. + */ + if (columns == 1) { + switch (base_type) { + case GLSL_TYPE_UINT: + return uvec(rows); + case GLSL_TYPE_INT: + return ivec(rows); + case GLSL_TYPE_FLOAT: + return vec(rows); + case GLSL_TYPE_DOUBLE: + return dvec(rows); + case GLSL_TYPE_BOOL: + return bvec(rows); + default: + return error_type; + } + } else { + if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1)) + return error_type; + + /* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following + * combinations are valid: + * + * 1 2 3 4 + * 1 + * 2 x x x + * 3 x x x + * 4 x x x + */ + #define IDX(c,r) (((c-1)*3) + (r-1)) + + if (base_type == GLSL_TYPE_DOUBLE) { + switch (IDX(columns, rows)) { + case IDX(2,2): return dmat2_type; + case IDX(2,3): return dmat2x3_type; + case IDX(2,4): return dmat2x4_type; + case IDX(3,2): return dmat3x2_type; + case IDX(3,3): return dmat3_type; + case IDX(3,4): return dmat3x4_type; + case IDX(4,2): return dmat4x2_type; + case IDX(4,3): return dmat4x3_type; + case IDX(4,4): return dmat4_type; + default: return error_type; + } + } else { + switch (IDX(columns, rows)) { + case IDX(2,2): return mat2_type; + case IDX(2,3): return mat2x3_type; + case IDX(2,4): return mat2x4_type; + case IDX(3,2): return mat3x2_type; + case IDX(3,3): return mat3_type; + case IDX(3,4): return mat3x4_type; + case IDX(4,2): return mat4x2_type; + case IDX(4,3): return mat4x3_type; + case IDX(4,4): return mat4_type; + default: return error_type; + } + } + } + + assert(!"Should not get here."); + return error_type; + } + + const glsl_type * + glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type) + { + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + if (shadow) + return (array ? sampler1DArrayShadow_type : sampler1DShadow_type); + else + return (array ? sampler1DArray_type : sampler1D_type); + case GLSL_SAMPLER_DIM_2D: + if (shadow) + return (array ? sampler2DArrayShadow_type : sampler2DShadow_type); + else + return (array ? sampler2DArray_type : sampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (shadow || array) + return error_type; + else + return sampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + if (shadow) + return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type); + else + return (array ? samplerCubeArray_type : samplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + if (shadow) + return sampler2DRectShadow_type; + else + return sampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (shadow || array) + return error_type; + else + return samplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + if (shadow) + return error_type; + return (array ? sampler2DMSArray_type : sampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + if (shadow || array) + return error_type; + else + return samplerExternalOES_type; + } + case GLSL_TYPE_INT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? isampler1DArray_type : isampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? isampler2DArray_type : isampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return isampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? isamplerCubeArray_type : isamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return isampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return isamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? isampler2DMSArray_type : isampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? usampler1DArray_type : usampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? usampler2DArray_type : usampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return usampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? usamplerCubeArray_type : usamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return usampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return usamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? usampler2DMSArray_type : usampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); + } + ++const glsl_type * ++glsl_type::get_image_instance(enum glsl_sampler_dim dim, ++ bool array, glsl_base_type type) ++{ ++ switch (type) { ++ case GLSL_TYPE_FLOAT: ++ switch (dim) { ++ case GLSL_SAMPLER_DIM_1D: ++ return (array ? image1DArray_type : image1D_type); ++ case GLSL_SAMPLER_DIM_2D: ++ return (array ? image2DArray_type : image2D_type); ++ case GLSL_SAMPLER_DIM_3D: ++ return image3D_type; ++ case GLSL_SAMPLER_DIM_CUBE: ++ return (array ? imageCubeArray_type : imageCube_type); ++ case GLSL_SAMPLER_DIM_RECT: ++ if (array) ++ return error_type; ++ else ++ return image2DRect_type; ++ case GLSL_SAMPLER_DIM_BUF: ++ if (array) ++ return error_type; ++ else ++ return imageBuffer_type; ++ case GLSL_SAMPLER_DIM_MS: ++ return (array ? image2DMSArray_type : image2DMS_type); ++ case GLSL_SAMPLER_DIM_EXTERNAL: ++ return error_type; ++ } ++ case GLSL_TYPE_INT: ++ switch (dim) { ++ case GLSL_SAMPLER_DIM_1D: ++ return (array ? iimage1DArray_type : iimage1D_type); ++ case GLSL_SAMPLER_DIM_2D: ++ return (array ? iimage2DArray_type : iimage2D_type); ++ case GLSL_SAMPLER_DIM_3D: ++ if (array) ++ return error_type; ++ return iimage3D_type; ++ case GLSL_SAMPLER_DIM_CUBE: ++ return (array ? iimageCubeArray_type : iimageCube_type); ++ case GLSL_SAMPLER_DIM_RECT: ++ if (array) ++ return error_type; ++ return iimage2DRect_type; ++ case GLSL_SAMPLER_DIM_BUF: ++ if (array) ++ return error_type; ++ return iimageBuffer_type; ++ case GLSL_SAMPLER_DIM_MS: ++ return (array ? iimage2DMSArray_type : iimage2DMS_type); ++ case GLSL_SAMPLER_DIM_EXTERNAL: ++ return error_type; ++ } ++ case GLSL_TYPE_UINT: ++ switch (dim) { ++ case GLSL_SAMPLER_DIM_1D: ++ return (array ? uimage1DArray_type : uimage1D_type); ++ case GLSL_SAMPLER_DIM_2D: ++ return (array ? uimage2DArray_type : uimage2D_type); ++ case GLSL_SAMPLER_DIM_3D: ++ if (array) ++ return error_type; ++ return uimage3D_type; ++ case GLSL_SAMPLER_DIM_CUBE: ++ return (array ? uimageCubeArray_type : uimageCube_type); ++ case GLSL_SAMPLER_DIM_RECT: ++ if (array) ++ return error_type; ++ return uimage2DRect_type; ++ case GLSL_SAMPLER_DIM_BUF: ++ if (array) ++ return error_type; ++ return uimageBuffer_type; ++ case GLSL_SAMPLER_DIM_MS: ++ return (array ? uimage2DMSArray_type : uimage2DMS_type); ++ case GLSL_SAMPLER_DIM_EXTERNAL: ++ return error_type; ++ } ++ default: ++ return error_type; ++ } ++ ++ unreachable("switch statement above should be complete"); ++} ++ + const glsl_type * + glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) + { + /* Generate a name using the base type pointer in the key. This is + * done because the name of the base type may not be unique across + * shaders. For example, two shaders may have different record types + * named 'foo'. + */ + char key[128]; + snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size); + + mtx_lock(&glsl_type::mutex); + + if (array_types == NULL) { + array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + } + + const struct hash_entry *entry = _mesa_hash_table_search(array_types, key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(base, array_size); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(array_types, + ralloc_strdup(mem_ctx, key), + (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY); + assert(((glsl_type *) entry->data)->length == array_size); + assert(((glsl_type *) entry->data)->fields.array == base); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + + bool + glsl_type::record_compare(const glsl_type *b) const + { + if (this->length != b->length) + return false; + + if (this->interface_packing != b->interface_packing) + return false; + + /* From the GLSL 4.20 specification (Sec 4.2): + * + * "Structures must have the same name, sequence of type names, and + * type definitions, and field names to be considered the same type." + * + * GLSL ES behaves the same (Ver 1.00 Sec 4.2.4, Ver 3.00 Sec 4.2.5). + * + * Note that we cannot force type name check when comparing unnamed + * structure types, these have a unique name assigned during parsing. + */ + if (!this->is_anonymous() && !b->is_anonymous()) + if (strcmp(this->name, b->name) != 0) + return false; + + for (unsigned i = 0; i < this->length; i++) { + if (this->fields.structure[i].type != b->fields.structure[i].type) + return false; + if (strcmp(this->fields.structure[i].name, + b->fields.structure[i].name) != 0) + return false; + if (this->fields.structure[i].matrix_layout + != b->fields.structure[i].matrix_layout) + return false; + if (this->fields.structure[i].location + != b->fields.structure[i].location) + return false; + if (this->fields.structure[i].interpolation + != b->fields.structure[i].interpolation) + return false; + if (this->fields.structure[i].centroid + != b->fields.structure[i].centroid) + return false; + if (this->fields.structure[i].sample + != b->fields.structure[i].sample) + return false; + if (this->fields.structure[i].patch + != b->fields.structure[i].patch) + return false; + if (this->fields.structure[i].image_read_only + != b->fields.structure[i].image_read_only) + return false; + if (this->fields.structure[i].image_write_only + != b->fields.structure[i].image_write_only) + return false; + if (this->fields.structure[i].image_coherent + != b->fields.structure[i].image_coherent) + return false; + if (this->fields.structure[i].image_volatile + != b->fields.structure[i].image_volatile) + return false; + if (this->fields.structure[i].image_restrict + != b->fields.structure[i].image_restrict) + return false; + if (this->fields.structure[i].precision + != b->fields.structure[i].precision) + return false; + } + + return true; + } + + + bool + glsl_type::record_key_compare(const void *a, const void *b) + { + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2); + } + + + /** + * Generate an integer hash value for a glsl_type structure type. + */ + unsigned + glsl_type::record_key_hash(const void *a) + { + const glsl_type *const key = (glsl_type *) a; + uintptr_t hash = key->length; + unsigned retval; + + for (unsigned i = 0; i < key->length; i++) { + /* casting pointer to uintptr_t */ + hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type; + } + + if (sizeof(hash) == 8) + retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32); + else + retval = hash; + + return retval; + } + + + const glsl_type * + glsl_type::get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name) + { + const glsl_type key(fields, num_fields, name); + + mtx_lock(&glsl_type::mutex); + + if (record_types == NULL) { + record_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(record_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(fields, num_fields, name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(record_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT); + assert(((glsl_type *) entry->data)->length == num_fields); + assert(strcmp(((glsl_type *) entry->data)->name, name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + + const glsl_type * + glsl_type::get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *block_name) + { + const glsl_type key(fields, num_fields, packing, block_name); + + mtx_lock(&glsl_type::mutex); + + if (interface_types == NULL) { + interface_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(interface_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(fields, num_fields, + packing, block_name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(interface_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE); + assert(((glsl_type *) entry->data)->length == num_fields); + assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + const glsl_type * + glsl_type::get_subroutine_instance(const char *subroutine_name) + { + const glsl_type key(subroutine_name); + + mtx_lock(&glsl_type::mutex); + + if (subroutine_types == NULL) { + subroutine_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(subroutine_name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE); + assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; + } + + ++static bool ++function_key_compare(const void *a, const void *b) ++{ ++ const glsl_type *const key1 = (glsl_type *) a; ++ const glsl_type *const key2 = (glsl_type *) b; ++ ++ if (key1->length != key2->length) ++ return 1; ++ ++ return memcmp(key1->fields.parameters, key2->fields.parameters, ++ (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0; ++} ++ ++ ++static uint32_t ++function_key_hash(const void *a) ++{ ++ const glsl_type *const key = (glsl_type *) a; ++ char hash_key[128]; ++ unsigned size = 0; ++ ++ size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); ++ ++ for (unsigned i = 0; i < key->length; i++) { ++ if (size >= sizeof(hash_key)) ++ break; ++ ++ size += snprintf(& hash_key[size], sizeof(hash_key) - size, ++ "%p", (void *) key->fields.structure[i].type); ++ } ++ ++ return _mesa_hash_string(hash_key); ++} ++ ++const glsl_type * ++glsl_type::get_function_instance(const glsl_type *return_type, ++ const glsl_function_param *params, ++ unsigned num_params) ++{ ++ const glsl_type key(return_type, params, num_params); ++ ++ mtx_lock(&glsl_type::mutex); ++ ++ if (function_types == NULL) { ++ function_types = _mesa_hash_table_create(NULL, function_key_hash, ++ function_key_compare); ++ } ++ ++ struct hash_entry *entry = _mesa_hash_table_search(function_types, &key); ++ if (entry == NULL) { ++ mtx_unlock(&glsl_type::mutex); ++ const glsl_type *t = new glsl_type(return_type, params, num_params); ++ mtx_lock(&glsl_type::mutex); ++ ++ entry = _mesa_hash_table_insert(function_types, t, (void *) t); ++ } ++ ++ const glsl_type *t = (const glsl_type *)entry->data; ++ ++ assert(t->base_type == GLSL_TYPE_FUNCTION); ++ assert(t->length == num_params); ++ ++ mtx_unlock(&glsl_type::mutex); ++ ++ return t; ++} ++ ++ + const glsl_type * + glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) + { + if (type_a == type_b) { + return type_a; + } else if (type_a->is_matrix() && type_b->is_matrix()) { + /* Matrix multiply. The columns of A must match the rows of B. Given + * the other previously tested constraints, this means the vector type + * of a row from A must be the same as the vector type of a column from + * B. + */ + if (type_a->row_type() == type_b->column_type()) { + /* The resulting matrix has the number of columns of matrix B and + * the number of rows of matrix A. We get the row count of A by + * looking at the size of a vector that makes up a column. The + * transpose (size of a row) is done for B. + */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + type_b->row_type()->vector_elements); + assert(type != error_type); + + return type; + } + } else if (type_a->is_matrix()) { + /* A is a matrix and B is a column vector. Columns of A must match + * rows of B. Given the other previously tested constraints, this + * means the vector type of a row from A must be the same as the + * vector the type of B. + */ + if (type_a->row_type() == type_b) { + /* The resulting vector has a number of elements equal to + * the number of rows of matrix A. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } else { + assert(type_b->is_matrix()); + + /* A is a row vector and B is a matrix. Columns of A must match rows + * of B. Given the other previously tested constraints, this means + * the type of A must be the same as the vector type of a column from + * B. + */ + if (type_a == type_b->column_type()) { + /* The resulting vector has a number of elements equal to + * the number of columns of matrix B. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_b->row_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } + + return error_type; + } + + + const glsl_type * + glsl_type::field_type(const char *name) const + { + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) + return error_type; + + for (unsigned i = 0; i < this->length; i++) { + if (strcmp(name, this->fields.structure[i].name) == 0) + return this->fields.structure[i].type; + } + + return error_type; + } + + + int + glsl_type::field_index(const char *name) const + { + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) + return -1; + + for (unsigned i = 0; i < this->length; i++) { + if (strcmp(name, this->fields.structure[i].name) == 0) + return i; + } + + return -1; + } + + + unsigned + glsl_type::component_slots() const + { + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return this->components(); + + case GLSL_TYPE_DOUBLE: + return 2 * this->components(); + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->component_slots(); + + return size; + } + + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->component_slots(); + + case GLSL_TYPE_IMAGE: + return 1; + case GLSL_TYPE_SUBROUTINE: + return 1; ++ ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + break; + } + + return 0; + } + + unsigned + glsl_type::record_location_offset(unsigned length) const + { + unsigned offset = 0; + const glsl_type *t = this->without_array(); + if (t->is_record()) { + assert(length <= t->length); + + for (unsigned i = 0; i < length; i++) { + const glsl_type *st = t->fields.structure[i].type; + const glsl_type *wa = st->without_array(); + if (wa->is_record()) { + unsigned r_offset = wa->record_location_offset(wa->length); + offset += st->is_array() ? + st->arrays_of_arrays_size() * r_offset : r_offset; + } else if (st->is_array() && st->fields.array->is_array()) { + unsigned outer_array_size = st->length; + const glsl_type *base_type = st->fields.array; + + /* For arrays of arrays the outer arrays take up a uniform + * slot for each element. The innermost array elements share a + * single slot so we ignore the innermost array when calculating + * the offset. + */ + while (base_type->fields.array->is_array()) { + outer_array_size = outer_array_size * base_type->length; + base_type = base_type->fields.array; + } + offset += outer_array_size; + } else { + /* We dont worry about arrays here because unless the array + * contains a structure or another array it only takes up a single + * uniform slot. + */ + offset += 1; + } + } + } + return offset; + } + + unsigned + glsl_type::uniform_locations() const + { + unsigned size = 0; + + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: + return 1; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->uniform_locations(); + return size; + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->uniform_locations(); + default: + return 0; + } + } + + bool + glsl_type::can_implicitly_convert_to(const glsl_type *desired, + _mesa_glsl_parse_state *state) const + { + if (this == desired) + return true; + + /* There is no conversion among matrix types. */ + if (this->matrix_columns > 1 || desired->matrix_columns > 1) + return false; + + /* Vector size must match. */ + if (this->vector_elements != desired->vector_elements) + return false; + + /* int and uint can be converted to float. */ + if (desired->is_float() && this->is_integer()) + return true; + + /* With GLSL 4.0 / ARB_gpu_shader5, int can be converted to uint. + * Note that state may be NULL here, when resolving function calls in the + * linker. By this time, all the state-dependent checks have already + * happened though, so allow anything that's allowed in any shader version. */ + if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) && + desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT) + return true; + + /* No implicit conversions from double. */ + if ((!state || state->has_double()) && this->is_double()) + return false; + + /* Conversions from different types to double. */ + if ((!state || state->has_double()) && desired->is_double()) { + if (this->is_float()) + return true; + if (this->is_integer()) + return true; + } + + return false; + } + + unsigned + glsl_type::std140_base_alignment(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + switch (this->vector_elements) { + case 1: + return N; + case 2: + return 2 * N; + case 3: + case 4: + return 4 * N; + } + } + + /* (4) If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single + * array element, according to rules (1), (2), and (3), and rounded up + * to the base alignment of a vec4. The array may have padding at the + * end; the base offset of the member following the array is rounded up + * to the next multiple of the base alignment. + * + * (6) If the member is an array of column-major matrices with + * columns and rows, the matrix is stored identically to a row of + * * column vectors with components each, according to rule + * (4). + * + * (8) If the member is an array of row-major matrices with columns + * and rows, the matrix is stored identically to a row of * + * row vectors with components each, according to rule (4). + * + * (10) If the member is an array of structures, the elements of + * the array are laid out in order, according to rule (9). + */ + if (this->is_array()) { + if (this->fields.array->is_scalar() || + this->fields.array->is_vector() || + this->fields.array->is_matrix()) { + return MAX2(this->fields.array->std140_base_alignment(row_major), 16); + } else { + assert(this->fields.array->is_record() || + this->fields.array->is_array()); + return this->fields.array->std140_base_alignment(row_major); + } + } + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + */ + if (this->is_matrix()) { + const struct glsl_type *vec_type, *array_type; + int c = this->matrix_columns; + int r = this->vector_elements; + + if (row_major) { + vec_type = get_instance(base_type, c, 1); + array_type = glsl_type::get_array_instance(vec_type, r); + } else { + vec_type = get_instance(base_type, r, 1); + array_type = glsl_type::get_array_instance(vec_type, c); + } + + return array_type->std140_base_alignment(false); + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record()) { + unsigned base_alignment = 16; + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + base_alignment = MAX2(base_alignment, + field_type->std140_base_alignment(field_row_major)); + } + return base_alignment; + } + + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::std140_size(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + return this->vector_elements * N; + } + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (6) If the member is an array of column-major matrices with + * columns and rows, the matrix is stored identically to a row of + * * column vectors with components each, according to rule + * (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + * + * (8) If the member is an array of row-major matrices with columns + * and rows, the matrix is stored identically to a row of * + * row vectors with components each, according to rule (4). + */ + if (this->without_array()->is_matrix()) { + const struct glsl_type *element_type; + const struct glsl_type *vec_type; + unsigned int array_len; + + if (this->is_array()) { + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); + } else { + element_type = this; + array_len = 1; + } + + if (row_major) { + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + + array_len *= element_type->vector_elements; + } else { + vec_type = get_instance(element_type->base_type, + element_type->vector_elements, 1); + array_len *= element_type->matrix_columns; + } + const glsl_type *array_type = glsl_type::get_array_instance(vec_type, + array_len); + + return array_type->std140_size(false); + } + + /* (4) If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single + * array element, according to rules (1), (2), and (3), and rounded up + * to the base alignment of a vec4. The array may have padding at the + * end; the base offset of the member following the array is rounded up + * to the next multiple of the base alignment. + * + * (10) If the member is an array of structures, the elements of + * the array are laid out in order, according to rule (9). + */ + if (this->is_array()) { + if (this->without_array()->is_record()) { + return this->arrays_of_arrays_size() * + this->without_array()->std140_size(row_major); + } else { + unsigned element_base_align = + this->without_array()->std140_base_alignment(row_major); + return this->arrays_of_arrays_size() * MAX2(element_base_align, 16); + } + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record() || this->is_interface()) { + unsigned size = 0; + unsigned max_align = 0; + + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + unsigned align = field_type->std140_base_alignment(field_row_major); + + /* Ignore unsized arrays when calculating size */ + if (field_type->is_unsized_array()) + continue; + + size = glsl_align(size, align); + size += field_type->std140_size(field_row_major); + + max_align = MAX2(align, max_align); + + if (field_type->is_record() && (i + 1 < this->length)) + size = glsl_align(size, 16); + } + size = glsl_align(size, MAX2(max_align, 16)); + return size; + } + + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::std430_base_alignment(bool row_major) const + { + + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + switch (this->vector_elements) { + case 1: + return N; + case 2: + return 2 * N; + case 3: + case 4: + return 4 * N; + } + } + + /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": + * + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures + * in rule 9 are not rounded up a multiple of the base alignment of a vec4. + */ + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_array()) + return this->fields.array->std430_base_alignment(row_major); + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + */ + if (this->is_matrix()) { + const struct glsl_type *vec_type, *array_type; + int c = this->matrix_columns; + int r = this->vector_elements; + + if (row_major) { + vec_type = get_instance(base_type, c, 1); + array_type = glsl_type::get_array_instance(vec_type, r); + } else { + vec_type = get_instance(base_type, r, 1); + array_type = glsl_type::get_array_instance(vec_type, c); + } + + return array_type->std430_base_alignment(false); + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record()) { + unsigned base_alignment = 0; + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + base_alignment = MAX2(base_alignment, + field_type->std430_base_alignment(field_row_major)); + } + assert(base_alignment > 0); + return base_alignment; + } + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::std430_array_stride(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* Notice that the array stride of a vec3 is not 3 * N but 4 * N. + * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout" + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_vector() && this->vector_elements == 3) + return 4 * N; + + /* By default use std430_size(row_major) */ + return this->std430_size(row_major); + } + + unsigned + glsl_type::std430_size(bool row_major) const + { + unsigned N = is_double() ? 8 : 4; + + /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": + * + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures + * in rule 9 are not rounded up a multiple of the base alignment of a vec4. + */ + if (this->is_scalar() || this->is_vector()) + return this->vector_elements * N; + + if (this->without_array()->is_matrix()) { + const struct glsl_type *element_type; + const struct glsl_type *vec_type; + unsigned int array_len; + + if (this->is_array()) { + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); + } else { + element_type = this; + array_len = 1; + } + + if (row_major) { + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + + array_len *= element_type->vector_elements; + } else { + vec_type = get_instance(element_type->base_type, + element_type->vector_elements, 1); + array_len *= element_type->matrix_columns; + } + const glsl_type *array_type = glsl_type::get_array_instance(vec_type, + array_len); + + return array_type->std430_size(false); + } + + if (this->is_array()) { + if (this->without_array()->is_record()) + return this->arrays_of_arrays_size() * + this->without_array()->std430_size(row_major); + else + return this->arrays_of_arrays_size() * + this->without_array()->std430_base_alignment(row_major); + } + + if (this->is_record() || this->is_interface()) { + unsigned size = 0; + unsigned max_align = 0; + + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + unsigned align = field_type->std430_base_alignment(field_row_major); + size = glsl_align(size, align); + size += field_type->std430_size(field_row_major); + + max_align = MAX2(align, max_align); + } + size = glsl_align(size, max_align); + return size; + } + + assert(!"not reached"); + return -1; + } + + unsigned + glsl_type::count_attribute_slots(bool vertex_input_slots) const + { + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "A scalar input counts the same amount against this limit as a vec4, + * so applications may want to consider packing groups of four + * unrelated float inputs together into a vector to better utilize the + * capabilities of the underlying hardware. A matrix input will use up + * multiple locations. The number of locations used will equal the + * number of columns in the matrix." + * + * The spec does not explicitly say how arrays are counted. However, it + * should be safe to assume the total number of slots consumed by an array + * is the number of entries in the array multiplied by the number of slots + * consumed by a single element of the array. + * + * The spec says nothing about how structs are counted, because vertex + * attributes are not allowed to be (or contain) structs. However, Mesa + * allows varying structs, the number of varying slots taken up by a + * varying struct is simply equal to the sum of the number of slots taken + * up by each element. + * + * Doubles are counted different depending on whether they are vertex + * inputs or everything else. Vertex inputs from ARB_vertex_attrib_64bit + * take one location no matter what size they are, otherwise dvec3/4 + * take two locations. + */ + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return this->matrix_columns; + case GLSL_TYPE_DOUBLE: + if (this->vector_elements > 2 && !vertex_input_slots) + return this->matrix_columns * 2; + else + return this->matrix_columns; + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->count_attribute_slots(vertex_input_slots); + + return size; + } + + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->count_attribute_slots(vertex_input_slots); + ++ case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_ERROR: + break; + } + + assert(!"Unexpected type in count_attribute_slots()"); + + return 0; + } + + int + glsl_type::coordinate_components() const + { + int size; + + switch (sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + size = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_EXTERNAL: + size = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + size = 3; + break; + default: + assert(!"Should not get here."); + size = 1; + break; + } + + /* Array textures need an additional component for the array index, except + * for cubemap array images that behave like a 2D array of interleaved + * cubemap faces. + */ + if (sampler_array && + !(base_type == GLSL_TYPE_IMAGE && + sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE)) + size += 1; + + return size; + } + + /** + * Declarations of type flyweights (glsl_type::_foo_type) and + * convenience pointers (glsl_type::foo_type). + * @{ + */ + #define DECL_TYPE(NAME, ...) \ + const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \ + const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type; + + #define STRUCT_TYPE(NAME) + + #include "compiler/builtin_type_macros.h" + /** @} */ diff --cc src/compiler/glsl_types.h index 00000000000,e63d7945c9f..a9b5281e774 mode 000000,100644..100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@@ -1,0 -1,887 +1,913 @@@ + /* -*- c++ -*- */ + /* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #pragma once + #ifndef GLSL_TYPES_H + #define GLSL_TYPES_H + + #include + #include + + #ifdef __cplusplus + extern "C" { + #endif + + struct _mesa_glsl_parse_state; + struct glsl_symbol_table; + + extern void + _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state); + + extern void + _mesa_glsl_release_types(void); + + #ifdef __cplusplus + } + #endif + + enum glsl_base_type { + GLSL_TYPE_UINT = 0, + GLSL_TYPE_INT, + GLSL_TYPE_FLOAT, + GLSL_TYPE_DOUBLE, + GLSL_TYPE_BOOL, + GLSL_TYPE_SAMPLER, + GLSL_TYPE_IMAGE, + GLSL_TYPE_ATOMIC_UINT, + GLSL_TYPE_STRUCT, ++ GLSL_TYPE_FUNCTION, + GLSL_TYPE_INTERFACE, + GLSL_TYPE_ARRAY, + GLSL_TYPE_VOID, + GLSL_TYPE_SUBROUTINE, + GLSL_TYPE_ERROR + }; + + enum glsl_sampler_dim { + GLSL_SAMPLER_DIM_1D = 0, + GLSL_SAMPLER_DIM_2D, + GLSL_SAMPLER_DIM_3D, + GLSL_SAMPLER_DIM_CUBE, + GLSL_SAMPLER_DIM_RECT, + GLSL_SAMPLER_DIM_BUF, + GLSL_SAMPLER_DIM_EXTERNAL, + GLSL_SAMPLER_DIM_MS + }; + + enum glsl_interface_packing { + GLSL_INTERFACE_PACKING_STD140, + GLSL_INTERFACE_PACKING_SHARED, + GLSL_INTERFACE_PACKING_PACKED, + GLSL_INTERFACE_PACKING_STD430 + }; + + enum glsl_matrix_layout { + /** + * The layout of the matrix is inherited from the object containing the + * matrix (the top level structure or the uniform block). + */ + GLSL_MATRIX_LAYOUT_INHERITED, + + /** + * Explicit column-major layout + * + * If a uniform block doesn't have an explicit layout set, it will default + * to this layout. + */ + GLSL_MATRIX_LAYOUT_COLUMN_MAJOR, + + /** + * Row-major layout + */ + GLSL_MATRIX_LAYOUT_ROW_MAJOR + }; + + enum { + GLSL_PRECISION_NONE = 0, + GLSL_PRECISION_HIGH, + GLSL_PRECISION_MEDIUM, + GLSL_PRECISION_LOW + }; + + #ifdef __cplusplus + #include "GL/gl.h" + #include "util/ralloc.h" + #include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ + + struct glsl_type { + GLenum gl_type; + glsl_base_type base_type; + + unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */ + unsigned sampler_shadow:1; + unsigned sampler_array:1; + unsigned sampler_type:2; /**< Type of data returned using this + * sampler or image. Only \c + * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, + * and \c GLSL_TYPE_UINT are valid. + */ + unsigned interface_packing:2; + + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ + static void* operator new(size_t size) + { + mtx_lock(&glsl_type::mutex); + + /* mem_ctx should have been created by the static members */ + assert(glsl_type::mem_ctx != NULL); + + void *type; + + type = ralloc_size(glsl_type::mem_ctx, size); + assert(type != NULL); + + mtx_unlock(&glsl_type::mutex); + + return type; + } + + /* If the user *does* call delete, that's OK, we will just + * ralloc_free in that case. */ + static void operator delete(void *type) + { + mtx_lock(&glsl_type::mutex); + ralloc_free(type); + mtx_unlock(&glsl_type::mutex); + } + + /** + * \name Vector and matrix element counts + * + * For scalars, each of these values will be 1. For non-numeric types + * these will be 0. + */ + /*@{*/ + uint8_t vector_elements; /**< 1, 2, 3, or 4 vector elements. */ + uint8_t matrix_columns; /**< 1, 2, 3, or 4 matrix columns. */ + /*@}*/ + + /** + * For \c GLSL_TYPE_ARRAY, this is the length of the array. For + * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of + * elements in the structure and the number of values pointed to by + * \c fields.structure (below). + */ + unsigned length; + + /** + * Name of the data type + * + * Will never be \c NULL. + */ + const char *name; + + /** + * Subtype of composite data types. + */ + union { + const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ ++ struct glsl_function_param *parameters; /**< Parameters to function. */ + struct glsl_struct_field *structure; /**< List of struct fields. */ + } fields; + + /** + * \name Pointers to various public type singletons + */ + /*@{*/ + #undef DECL_TYPE + #define DECL_TYPE(NAME, ...) \ + static const glsl_type *const NAME##_type; + #undef STRUCT_TYPE + #define STRUCT_TYPE(NAME) \ + static const glsl_type *const struct_##NAME##_type; + #include "compiler/builtin_type_macros.h" + /*@}*/ + + /** + * Convenience accessors for vector types (shorter than get_instance()). + * @{ + */ + static const glsl_type *vec(unsigned components); + static const glsl_type *dvec(unsigned components); + static const glsl_type *ivec(unsigned components); + static const glsl_type *uvec(unsigned components); + static const glsl_type *bvec(unsigned components); + /**@}*/ + + /** + * For numeric and boolean derived types returns the basic scalar type + * + * If the type is a numeric or boolean scalar, vector, or matrix type, + * this function gets the scalar type of the individual components. For + * all other types, including arrays of numeric or boolean types, the + * error type is returned. + */ + const glsl_type *get_base_type() const; + + /** + * Get the basic scalar type which this type aggregates. + * + * If the type is a numeric or boolean scalar, vector, or matrix, or an + * array of any of those, this function gets the scalar type of the + * individual components. For structs and arrays of structs, this function + * returns the struct type. For samplers and arrays of samplers, this + * function returns the sampler type. + */ + const glsl_type *get_scalar_type() const; + + /** + * Get the instance of a built-in scalar, vector, or matrix type + */ + static const glsl_type *get_instance(unsigned base_type, unsigned rows, + unsigned columns); + + /** + * Get the instance of a sampler type + */ + static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type); + ++ static const glsl_type *get_image_instance(enum glsl_sampler_dim dim, ++ bool array, glsl_base_type type); + + /** + * Get the instance of an array type + */ + static const glsl_type *get_array_instance(const glsl_type *base, + unsigned elements); + + /** + * Get the instance of a record type + */ + static const glsl_type *get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name); + + /** + * Get the instance of an interface block type + */ + static const glsl_type *get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *block_name); + + /** + * Get the instance of an subroutine type + */ + static const glsl_type *get_subroutine_instance(const char *subroutine_name); + ++ /** ++ * Get the instance of a function type ++ */ ++ static const glsl_type *get_function_instance(const struct glsl_type *return_type, ++ const glsl_function_param *parameters, ++ unsigned num_params); ++ + /** + * Get the type resulting from a multiplication of \p type_a * \p type_b + */ + static const glsl_type *get_mul_type(const glsl_type *type_a, + const glsl_type *type_b); + + /** + * Query the total number of scalars that make up a scalar, vector or matrix + */ + unsigned components() const + { + return vector_elements * matrix_columns; + } + + /** + * Calculate the number of components slots required to hold this type + * + * This is used to determine how many uniform or varying locations a type + * might occupy. + */ + unsigned component_slots() const; + + /** + * Calculate offset between the base location of the struct in + * uniform storage and a struct member. + * For the initial call, length is the index of the member to find the + * offset for. + */ + unsigned record_location_offset(unsigned length) const; + + /** + * Calculate the number of unique values from glGetUniformLocation for the + * elements of the type. + * + * This is used to allocate slots in the UniformRemapTable, the amount of + * locations may not match with actual used storage space by the driver. + */ + unsigned uniform_locations() const; + + /** + * Calculate the number of attribute slots required to hold this type + * + * This implements the language rules of GLSL 1.50 for counting the number + * of slots used by a vertex attribute. It also determines the number of + * varying slots the type will use up in the absence of varying packing + * (and thus, it can be used to measure the number of varying slots used by + * the varyings that are generated by lower_packed_varyings). + * + * For vertex shader attributes - doubles only take one slot. + * For inter-shader varyings - dvec3/dvec4 take two slots. + */ + unsigned count_attribute_slots(bool vertex_input_slots) const; + + /** + * Alignment in bytes of the start of this type in a std140 uniform + * block. + */ + unsigned std140_base_alignment(bool row_major) const; + + /** Size in bytes of this type in a std140 uniform block. + * + * Note that this is not GL_UNIFORM_SIZE (which is the number of + * elements in the array) + */ + unsigned std140_size(bool row_major) const; + + /** + * Alignment in bytes of the start of this type in a std430 shader + * storage block. + */ + unsigned std430_base_alignment(bool row_major) const; + + /** + * Calculate array stride in bytes of this type in a std430 shader storage + * block. + */ + unsigned std430_array_stride(bool row_major) const; + + /** + * Size in bytes of this type in a std430 shader storage block. + * + * Note that this is not GL_BUFFER_SIZE + */ + unsigned std430_size(bool row_major) const; + + /** + * \brief Can this type be implicitly converted to another? + * + * \return True if the types are identical or if this type can be converted + * to \c desired according to Section 4.1.10 of the GLSL spec. + * + * \verbatim + * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10 + * Implicit Conversions: + * + * In some situations, an expression and its type will be implicitly + * converted to a different type. The following table shows all allowed + * implicit conversions: + * + * Type of expression | Can be implicitly converted to + * -------------------------------------------------- + * int float + * uint + * + * ivec2 vec2 + * uvec2 + * + * ivec3 vec3 + * uvec3 + * + * ivec4 vec4 + * uvec4 + * + * There are no implicit array or structure conversions. For example, + * an array of int cannot be implicitly converted to an array of float. + * There are no implicit conversions between signed and unsigned + * integers. + * \endverbatim + */ + bool can_implicitly_convert_to(const glsl_type *desired, + _mesa_glsl_parse_state *state) const; + + /** + * Query whether or not a type is a scalar (non-vector and non-matrix). + */ + bool is_scalar() const + { + return (vector_elements == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a vector + */ + bool is_vector() const + { + return (vector_elements > 1) + && (matrix_columns == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a matrix + */ + bool is_matrix() const + { + /* GLSL only has float matrices. */ + return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE); + } + + /** + * Query whether or not a type is a non-array numeric type + */ + bool is_numeric() const + { + return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE); + } + + /** + * Query whether or not a type is an integral type + */ + bool is_integer() const + { + return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT); + } + + /** + * Query whether or not type is an integral type, or for struct and array + * types, contains an integral type. + */ + bool contains_integer() const; + + /** + * Query whether or not type is a double type, or for struct and array + * types, contains a double type. + */ + bool contains_double() const; + + /** + * Query whether or not a type is a float type + */ + bool is_float() const + { + return base_type == GLSL_TYPE_FLOAT; + } + + /** + * Query whether or not a type is a double type + */ + bool is_double() const + { + return base_type == GLSL_TYPE_DOUBLE; + } + + /** + * Query whether a double takes two slots. + */ + bool is_dual_slot_double() const + { + return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2; + } + + /** + * Query whether or not a type is a non-array boolean type + */ + bool is_boolean() const + { + return base_type == GLSL_TYPE_BOOL; + } + + /** + * Query whether or not a type is a sampler + */ + bool is_sampler() const + { + return base_type == GLSL_TYPE_SAMPLER; + } + + /** + * Query whether or not type is a sampler, or for struct and array + * types, contains a sampler. + */ + bool contains_sampler() const; + + /** + * Get the Mesa texture target index for a sampler type. + */ + gl_texture_index sampler_index() const; + + /** + * Query whether or not type is an image, or for struct and array + * types, contains an image. + */ + bool contains_image() const; + + /** + * Query whether or not a type is an image + */ + bool is_image() const + { + return base_type == GLSL_TYPE_IMAGE; + } + + /** + * Query whether or not a type is an array + */ + bool is_array() const + { + return base_type == GLSL_TYPE_ARRAY; + } + + bool is_array_of_arrays() const + { + return is_array() && fields.array->is_array(); + } + + /** + * Query whether or not a type is a record + */ + bool is_record() const + { + return base_type == GLSL_TYPE_STRUCT; + } + + /** + * Query whether or not a type is an interface + */ + bool is_interface() const + { + return base_type == GLSL_TYPE_INTERFACE; + } + + /** + * Query whether or not a type is the void type singleton. + */ + bool is_void() const + { + return base_type == GLSL_TYPE_VOID; + } + + /** + * Query whether or not a type is the error type singleton. + */ + bool is_error() const + { + return base_type == GLSL_TYPE_ERROR; + } + + /** + * Query if a type is unnamed/anonymous (named by the parser) + */ + + bool is_subroutine() const + { + return base_type == GLSL_TYPE_SUBROUTINE; + } + bool contains_subroutine() const; + + bool is_anonymous() const + { + return !strncmp(name, "#anon", 5); + } + + /** + * Get the type stripped of any arrays + * + * \return + * Pointer to the type of elements of the first non-array type for array + * types, or pointer to itself for non-array types. + */ + const glsl_type *without_array() const + { + const glsl_type *t = this; + + while (t->is_array()) + t = t->fields.array; + + return t; + } + + /** + * Return the total number of elements in an array including the elements + * in arrays of arrays. + */ + unsigned arrays_of_arrays_size() const + { + if (!is_array()) + return 0; + + unsigned size = length; + const glsl_type *base_type = fields.array; + + while (base_type->is_array()) { + size = size * base_type->length; + base_type = base_type->fields.array; + } + return size; + } + + /** + * Return the amount of atomic counter storage required for a type. + */ + unsigned atomic_size() const + { + if (base_type == GLSL_TYPE_ATOMIC_UINT) + return ATOMIC_COUNTER_SIZE; + else if (is_array()) + return length * fields.array->atomic_size(); + else + return 0; + } + + /** + * Return whether a type contains any atomic counters. + */ + bool contains_atomic() const + { + return atomic_size() > 0; + } + + /** + * Return whether a type contains any opaque types. + */ + bool contains_opaque() const; + + /** + * Query the full type of a matrix row + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the rows of the matrix is returned. + */ + const glsl_type *row_type() const + { + return is_matrix() + ? get_instance(base_type, matrix_columns, 1) + : error_type; + } + + /** + * Query the full type of a matrix column + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the columns of the matrix is returned. + */ + const glsl_type *column_type() const + { + return is_matrix() + ? get_instance(base_type, vector_elements, 1) + : error_type; + } + + /** + * Get the type of a structure field + * + * \return + * Pointer to the type of the named field. If the type is not a structure + * or the named field does not exist, \c glsl_type::error_type is returned. + */ + const glsl_type *field_type(const char *name) const; + + /** + * Get the location of a field within a record type + */ + int field_index(const char *name) const; + + /** + * Query the number of elements in an array type + * + * \return + * The number of elements in the array for array types or -1 for non-array + * types. If the number of elements in the array has not yet been declared, + * zero is returned. + */ + int array_size() const + { + return is_array() ? length : -1; + } + + /** + * Query whether the array size for all dimensions has been declared. + */ + bool is_unsized_array() const + { + return is_array() && length == 0; + } + + /** + * Return the number of coordinate components needed for this + * sampler or image type. + * + * This is based purely on the sampler's dimensionality. For example, this + * returns 1 for sampler1D, and 3 for sampler2DArray. + * + * Note that this is often different than actual coordinate type used in + * a texturing built-in function, since those pack additional values (such + * as the shadow comparitor or projector) into the coordinate type. + */ + int coordinate_components() const; + + /** + * Compare a record type against another record type. + * + * This is useful for matching record types declared across shader stages. + */ + bool record_compare(const glsl_type *b) const; + + private: + + static mtx_t mutex; + + /** + * ralloc context for all glsl_type allocations + * + * Set on the first call to \c glsl_type::new. + */ + static void *mem_ctx; + + void init_ralloc_type_ctx(void); + + /** Constructor for vector and matrix types */ + glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name); + + /** Constructor for sampler or image types */ + glsl_type(GLenum gl_type, glsl_base_type base_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name); + + /** Constructor for record types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name); + + /** Constructor for interface types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name); + ++ /** Constructor for interface types */ ++ glsl_type(const glsl_type *return_type, ++ const glsl_function_param *params, unsigned num_params); ++ + /** Constructor for array types */ + glsl_type(const glsl_type *array, unsigned length); + + /** Constructor for subroutine types */ + glsl_type(const char *name); + + /** Hash table containing the known array types. */ + static struct hash_table *array_types; + + /** Hash table containing the known record types. */ + static struct hash_table *record_types; + + /** Hash table containing the known interface types. */ + static struct hash_table *interface_types; + + /** Hash table containing the known subroutine types. */ + static struct hash_table *subroutine_types; + ++ /** Hash table containing the known function types. */ ++ static struct hash_table *function_types; ++ + static bool record_key_compare(const void *a, const void *b); + static unsigned record_key_hash(const void *key); + + /** + * \name Built-in type flyweights + */ + /*@{*/ + #undef DECL_TYPE + #define DECL_TYPE(NAME, ...) static const glsl_type _##NAME##_type; + #undef STRUCT_TYPE + #define STRUCT_TYPE(NAME) static const glsl_type _struct_##NAME##_type; + #include "compiler/builtin_type_macros.h" + /*@}*/ + + /** + * \name Friend functions. + * + * These functions are friends because they must have C linkage and the + * need to call various private methods or access various private static + * data. + */ + /*@{*/ + friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *); + friend void _mesa_glsl_release_types(void); + /*@}*/ + }; + ++#undef DECL_TYPE ++#undef STRUCT_TYPE ++#endif /* __cplusplus */ ++ + struct glsl_struct_field { + const struct glsl_type *type; + const char *name; + + /** + * For interface blocks, gl_varying_slot corresponding to the input/output + * if this is a built-in input/output (i.e. a member of the built-in + * gl_PerVertex interface block); -1 otherwise. + * + * Ignored for structs. + */ + int location; + + /** + * For interface blocks, the interpolation mode (as in + * ir_variable::interpolation). 0 otherwise. + */ + unsigned interpolation:2; + + /** + * For interface blocks, 1 if this variable uses centroid interpolation (as + * in ir_variable::centroid). 0 otherwise. + */ + unsigned centroid:1; + + /** + * For interface blocks, 1 if this variable uses sample interpolation (as + * in ir_variable::sample). 0 otherwise. + */ + unsigned sample:1; + + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * For interface blocks, 1 if this variable is a per-patch input or output + * (as in ir_variable::patch). 0 otherwise. + */ + unsigned patch:1; + + /** + * Precision qualifier + */ + unsigned precision:2; + + /** + * Image qualifiers, applicable to buffer variables defined in shader + * storage buffer objects (SSBOs) + */ + unsigned image_read_only:1; + unsigned image_write_only:1; + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + ++#ifdef __cplusplus + glsl_struct_field(const struct glsl_type *_type, const char *_name) + : type(_type), name(_name), location(-1), interpolation(0), centroid(0), + sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), + precision(GLSL_PRECISION_NONE) + { + /* empty */ + } + + glsl_struct_field() + { + /* empty */ + } ++#endif ++}; ++ ++struct glsl_function_param { ++ const struct glsl_type *type; ++ ++ bool in; ++ bool out; + }; + + static inline unsigned int + glsl_align(unsigned int a, unsigned int align) + { + return (a + align - 1) / align * align; + } + -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - + #endif /* GLSL_TYPES_H */ diff --cc src/compiler/nir/Makefile.sources index 00000000000,0755a100e65..04e8ab88a35 mode 000000,100644..100644 --- a/src/compiler/nir/Makefile.sources +++ b/src/compiler/nir/Makefile.sources @@@ -1,0 -1,71 +1,87 @@@ + NIR_GENERATED_FILES = \ + nir_builder_opcodes.h \ + nir_constant_expressions.c \ + nir_opcodes.c \ + nir_opcodes.h \ + nir_opt_algebraic.c + + NIR_FILES = \ + glsl_to_nir.cpp \ + glsl_to_nir.h \ + nir.c \ + nir.h \ + nir_array.h \ + nir_builder.h \ + nir_clone.c \ + nir_constant_expressions.h \ + nir_control_flow.c \ + nir_control_flow.h \ + nir_control_flow_private.h \ + nir_dominance.c \ + nir_from_ssa.c \ ++ nir_gather_info.c \ + nir_gs_count_vertices.c \ ++ nir_inline_functions.c \ + nir_intrinsics.c \ + nir_intrinsics.h \ + nir_instr_set.c \ + nir_instr_set.h \ + nir_liveness.c \ + nir_lower_alu_to_scalar.c \ + nir_lower_atomics.c \ + nir_lower_clip.c \ + nir_lower_global_vars_to_local.c \ + nir_lower_gs_intrinsics.c \ ++ nir_lower_indirect_derefs.c \ + nir_lower_load_const_to_scalar.c \ + nir_lower_locals_to_regs.c \ + nir_lower_idiv.c \ + nir_lower_io.c \ + nir_lower_outputs_to_temporaries.c \ + nir_lower_phis_to_scalar.c \ ++ nir_lower_returns.c \ + nir_lower_samplers.c \ + nir_lower_system_values.c \ + nir_lower_tex.c \ + nir_lower_to_source_mods.c \ + nir_lower_two_sided_color.c \ + nir_lower_vars_to_ssa.c \ + nir_lower_var_copies.c \ + nir_lower_vec_to_movs.c \ + nir_metadata.c \ + nir_move_vec_src_uses_to_dest.c \ + nir_normalize_cubemap_coords.c \ + nir_opt_constant_folding.c \ + nir_opt_copy_propagate.c \ + nir_opt_cse.c \ + nir_opt_dce.c \ + nir_opt_dead_cf.c \ + nir_opt_gcm.c \ + nir_opt_global_to_local.c \ + nir_opt_peephole_select.c \ + nir_opt_remove_phis.c \ + nir_opt_undef.c \ ++ nir_phi_builder.c \ ++ nir_phi_builder.h \ + nir_print.c \ + nir_remove_dead_variables.c \ ++ nir_repair_ssa.c \ + nir_search.c \ + nir_search.h \ + nir_split_var_copies.c \ + nir_sweep.c \ + nir_to_ssa.c \ + nir_validate.c \ + nir_vla.h \ + nir_worklist.c \ + nir_worklist.h + ++SPIRV_FILES = \ ++ spirv/nir_spirv.h \ ++ spirv/spirv_to_nir.c \ ++ spirv/vtn_alu.c \ ++ spirv/vtn_cfg.c \ ++ spirv/vtn_glsl450.c \ ++ spirv/vtn_private.h \ ++ spirv/vtn_variables.c ++ diff --cc src/compiler/nir/glsl_to_nir.cpp index 00000000000,4b76d234420..2a3047dd33c mode 000000,100644..100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@@ -1,0 -1,2031 +1,2035 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "glsl_to_nir.h" + #include "nir_control_flow.h" + #include "nir_builder.h" + #include "compiler/glsl/ir_visitor.h" + #include "compiler/glsl/ir_hierarchical_visitor.h" + #include "compiler/glsl/ir.h" + #include "main/imports.h" + + /* + * pass to lower GLSL IR to NIR + * + * This will lower variable dereferences to loads/stores of corresponding + * variables in NIR - the variables will be converted to registers in a later + * pass. + */ + + namespace { + + class nir_visitor : public ir_visitor + { + public: - nir_visitor(nir_shader *shader); ++ nir_visitor(nir_shader *shader, gl_shader *sh); + ~nir_visitor(); + + virtual void visit(ir_variable *); + virtual void visit(ir_function *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_loop *); + virtual void visit(ir_if *); + virtual void visit(ir_discard *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_return *); + virtual void visit(ir_call *); + virtual void visit(ir_assignment *); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_texture *); + virtual void visit(ir_constant *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_barrier *); + + void create_function(ir_function_signature *ir); + + private: + void add_instr(nir_instr *instr, unsigned num_components); + nir_ssa_def *evaluate_rvalue(ir_rvalue *ir); + + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, + nir_ssa_def *src2); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, + nir_ssa_def *src2, nir_ssa_def *src3); + + bool supports_ints; + ++ struct gl_shader *sh; ++ + nir_shader *shader; + nir_function_impl *impl; + nir_builder b; + nir_ssa_def *result; /* result of the expression tree last visited */ + + nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); + + /* the head of the dereference chain we're creating */ + nir_deref_var *deref_head; + /* the tail of the dereference chain we're creating */ + nir_deref *deref_tail; + + nir_variable *var; /* variable created by ir_variable visitor */ + + /* whether the IR we're operating on is per-function or global */ + bool is_global; + + /* map of ir_variable -> nir_variable */ + struct hash_table *var_table; + + /* map of ir_function_signature -> nir_function_overload */ + struct hash_table *overload_table; + }; + + /* + * This visitor runs before the main visitor, calling create_function() for + * each function so that the main visitor can resolve forward references in + * calls. + */ + + class nir_function_visitor : public ir_hierarchical_visitor + { + public: + nir_function_visitor(nir_visitor *v) : visitor(v) + { + } + virtual ir_visitor_status visit_enter(ir_function *); + + private: + nir_visitor *visitor; + }; + + }; /* end of anonymous namespace */ + + nir_shader * + glsl_to_nir(const struct gl_shader_program *shader_prog, + gl_shader_stage stage, + const nir_shader_compiler_options *options) + { + struct gl_shader *sh = shader_prog->_LinkedShaders[stage]; + + nir_shader *shader = nir_shader_create(NULL, stage, options); + - nir_visitor v1(shader); ++ nir_visitor v1(shader, sh); + nir_function_visitor v2(&v1); + v2.run(sh->ir); + visit_exec_list(sh->ir, &v1); + - nir_lower_outputs_to_temporaries(shader); ++ nir_function *main = NULL; ++ nir_foreach_function(shader, func) { ++ if (strcmp(func->name, "main") == 0) { ++ main = func; ++ break; ++ } ++ } ++ assert(main); ++ ++ nir_lower_outputs_to_temporaries(shader, main); + + shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); + if (shader_prog->Label) + shader->info.label = ralloc_strdup(shader, shader_prog->Label); + shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed); + shader->info.num_ubos = sh->NumUniformBlocks; + shader->info.num_abos = shader_prog->NumAtomicBuffers; + shader->info.num_ssbos = sh->NumShaderStorageBlocks; + shader->info.num_images = sh->NumImages; + shader->info.inputs_read = sh->Program->InputsRead; + shader->info.outputs_written = sh->Program->OutputsWritten; + shader->info.patch_inputs_read = sh->Program->PatchInputsRead; + shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten; + shader->info.system_values_read = sh->Program->SystemValuesRead; + shader->info.uses_texture_gather = sh->Program->UsesGather; + shader->info.uses_clip_distance_out = + sh->Program->ClipDistanceArraySize != 0; + shader->info.separate_shader = shader_prog->SeparateShader; + shader->info.has_transform_feedback_varyings = + shader_prog->TransformFeedback.NumVarying > 0; + + switch (stage) { + case MESA_SHADER_TESS_CTRL: + shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; + break; + + case MESA_SHADER_GEOMETRY: + shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; + shader->info.gs.output_primitive = sh->Geom.OutputType; + shader->info.gs.vertices_out = sh->Geom.VerticesOut; + shader->info.gs.invocations = sh->Geom.Invocations; + shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive; + shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams; + break; + + case MESA_SHADER_FRAGMENT: { + struct gl_fragment_program *fp = + (struct gl_fragment_program *)sh->Program; + + shader->info.fs.uses_discard = fp->UsesKill; + shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests; + shader->info.fs.depth_layout = fp->FragDepthLayout; + break; + } + + case MESA_SHADER_COMPUTE: { + struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program; + shader->info.cs.local_size[0] = cp->LocalSize[0]; + shader->info.cs.local_size[1] = cp->LocalSize[1]; + shader->info.cs.local_size[2] = cp->LocalSize[2]; + break; + } + + default: + break; /* No stage-specific info */ + } + + return shader; + } + -nir_visitor::nir_visitor(nir_shader *shader) ++nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh) + { + this->supports_ints = shader->options->native_integers; + this->shader = shader; ++ this->sh = sh; + this->is_global = true; + this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + + nir_visitor::~nir_visitor() + { + _mesa_hash_table_destroy(this->var_table, NULL); + _mesa_hash_table_destroy(this->overload_table, NULL); + } + + nir_deref_var * + nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) + { + ir->accept(this); + ralloc_steal(mem_ctx, this->deref_head); + return this->deref_head; + } + + static nir_constant * + constant_copy(ir_constant *ir, void *mem_ctx) + { + if (ir == NULL) + return NULL; + + nir_constant *ret = ralloc(mem_ctx, nir_constant); + + unsigned total_elems = ir->type->components(); + unsigned i; + + ret->num_elements = 0; + switch (ir->type->base_type) { + case GLSL_TYPE_UINT: + for (i = 0; i < total_elems; i++) + ret->value.u[i] = ir->value.u[i]; + break; + + case GLSL_TYPE_INT: + for (i = 0; i < total_elems; i++) + ret->value.i[i] = ir->value.i[i]; + break; + + case GLSL_TYPE_FLOAT: + for (i = 0; i < total_elems; i++) + ret->value.f[i] = ir->value.f[i]; + break; + + case GLSL_TYPE_BOOL: + for (i = 0; i < total_elems; i++) + ret->value.b[i] = ir->value.b[i]; + break; + + case GLSL_TYPE_STRUCT: + ret->elements = ralloc_array(mem_ctx, nir_constant *, + ir->type->length); + ret->num_elements = ir->type->length; + + i = 0; + foreach_in_list(ir_constant, field, &ir->components) { + ret->elements[i] = constant_copy(field, mem_ctx); + i++; + } + break; + + case GLSL_TYPE_ARRAY: + ret->elements = ralloc_array(mem_ctx, nir_constant *, + ir->type->length); + ret->num_elements = ir->type->length; + + for (i = 0; i < ir->type->length; i++) + ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx); + break; + + default: + unreachable("not reached"); + } + + return ret; + } + + void + nir_visitor::visit(ir_variable *ir) + { + nir_variable *var = ralloc(shader, nir_variable); + var->type = ir->type; + var->name = ralloc_strdup(var, ir->name); + + var->data.read_only = ir->data.read_only; + var->data.centroid = ir->data.centroid; + var->data.sample = ir->data.sample; + var->data.patch = ir->data.patch; + var->data.invariant = ir->data.invariant; + var->data.location = ir->data.location; + + switch(ir->data.mode) { + case ir_var_auto: + case ir_var_temporary: + if (is_global) + var->data.mode = nir_var_global; + else + var->data.mode = nir_var_local; + break; + + case ir_var_function_in: + case ir_var_function_out: + case ir_var_function_inout: + case ir_var_const_in: + var->data.mode = nir_var_local; + break; + + case ir_var_shader_in: + if (shader->stage == MESA_SHADER_FRAGMENT && + ir->data.location == VARYING_SLOT_FACE) { + /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ + var->data.location = SYSTEM_VALUE_FRONT_FACE; + var->data.mode = nir_var_system_value; + } else if (shader->stage == MESA_SHADER_GEOMETRY && + ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { + /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ + var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; + var->data.mode = nir_var_system_value; + } else { + var->data.mode = nir_var_shader_in; + } + break; + + case ir_var_shader_out: + var->data.mode = nir_var_shader_out; + break; + + case ir_var_uniform: + var->data.mode = nir_var_uniform; + break; + + case ir_var_shader_storage: + var->data.mode = nir_var_shader_storage; + break; + + case ir_var_system_value: + var->data.mode = nir_var_system_value; + break; + + default: + unreachable("not reached"); + } + + var->data.interpolation = ir->data.interpolation; + var->data.origin_upper_left = ir->data.origin_upper_left; + var->data.pixel_center_integer = ir->data.pixel_center_integer; + var->data.explicit_location = ir->data.explicit_location; + var->data.explicit_index = ir->data.explicit_index; + var->data.explicit_binding = ir->data.explicit_binding; + var->data.has_initializer = ir->data.has_initializer; + var->data.location_frac = ir->data.location_frac; + var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array; + var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray; + + switch (ir->data.depth_layout) { + case ir_depth_layout_none: + var->data.depth_layout = nir_depth_layout_none; + break; + case ir_depth_layout_any: + var->data.depth_layout = nir_depth_layout_any; + break; + case ir_depth_layout_greater: + var->data.depth_layout = nir_depth_layout_greater; + break; + case ir_depth_layout_less: + var->data.depth_layout = nir_depth_layout_less; + break; + case ir_depth_layout_unchanged: + var->data.depth_layout = nir_depth_layout_unchanged; + break; + default: + unreachable("not reached"); + } + + var->data.index = ir->data.index; ++ var->data.descriptor_set = 0; + var->data.binding = ir->data.binding; + var->data.offset = ir->data.offset; + var->data.image.read_only = ir->data.image_read_only; + var->data.image.write_only = ir->data.image_write_only; + var->data.image.coherent = ir->data.image_coherent; + var->data.image._volatile = ir->data.image_volatile; + var->data.image.restrict_flag = ir->data.image_restrict; + var->data.image.format = ir->data.image_format; + var->data.max_array_access = ir->data.max_array_access; + + var->num_state_slots = ir->get_num_state_slots(); + if (var->num_state_slots > 0) { + var->state_slots = ralloc_array(var, nir_state_slot, + var->num_state_slots); + + ir_state_slot *state_slots = ir->get_state_slots(); + for (unsigned i = 0; i < var->num_state_slots; i++) { + for (unsigned j = 0; j < 5; j++) + var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; + var->state_slots[i].swizzle = state_slots[i].swizzle; + } + } else { + var->state_slots = NULL; + } + + var->constant_initializer = constant_copy(ir->constant_initializer, var); + + var->interface_type = ir->get_interface_type(); + + if (var->data.mode == nir_var_local) + nir_function_impl_add_variable(impl, var); + else + nir_shader_add_variable(shader, var); + + _mesa_hash_table_insert(var_table, ir, var); + this->var = var; + } + + ir_visitor_status + nir_function_visitor::visit_enter(ir_function *ir) + { + foreach_in_list(ir_function_signature, sig, &ir->signatures) { + visitor->create_function(sig); + } + return visit_continue_with_parent; + } + + void + nir_visitor::create_function(ir_function_signature *ir) + { + if (ir->is_intrinsic) + return; + + nir_function *func = nir_function_create(shader, ir->function_name()); + + unsigned num_params = ir->parameters.length(); + func->num_params = num_params; + func->params = ralloc_array(shader, nir_parameter, num_params); + + unsigned i = 0; + foreach_in_list(ir_variable, param, &ir->parameters) { + switch (param->data.mode) { + case ir_var_function_in: + func->params[i].param_type = nir_parameter_in; + break; + + case ir_var_function_out: + func->params[i].param_type = nir_parameter_out; + break; + + case ir_var_function_inout: + func->params[i].param_type = nir_parameter_inout; + break; + + default: + unreachable("not reached"); + } + + func->params[i].type = param->type; + i++; + } + + func->return_type = ir->return_type; + + _mesa_hash_table_insert(this->overload_table, ir, func); + } + + void + nir_visitor::visit(ir_function *ir) + { + foreach_in_list(ir_function_signature, sig, &ir->signatures) + sig->accept(this); + } + + void + nir_visitor::visit(ir_function_signature *ir) + { + if (ir->is_intrinsic) + return; + + struct hash_entry *entry = + _mesa_hash_table_search(this->overload_table, ir); + + assert(entry); + nir_function *func = (nir_function *) entry->data; + + if (ir->is_defined) { + nir_function_impl *impl = nir_function_impl_create(func); + this->impl = impl; + + unsigned num_params = func->num_params; + impl->num_params = num_params; + impl->params = ralloc_array(this->shader, nir_variable *, num_params); + unsigned i = 0; + foreach_in_list(ir_variable, param, &ir->parameters) { + param->accept(this); + impl->params[i] = this->var; + i++; + } + + if (func->return_type == glsl_type::void_type) { + impl->return_var = NULL; + } else { + impl->return_var = ralloc(this->shader, nir_variable); + impl->return_var->name = ralloc_strdup(impl->return_var, + "return_var"); + impl->return_var->type = func->return_type; + } + + this->is_global = false; + + nir_builder_init(&b, impl); + b.cursor = nir_after_cf_list(&impl->body); + visit_exec_list(&ir->body, this); + + this->is_global = true; + } else { + func->impl = NULL; + } + } + + void + nir_visitor::visit(ir_loop *ir) + { + nir_loop *loop = nir_loop_create(this->shader); + nir_builder_cf_insert(&b, &loop->cf_node); + + b.cursor = nir_after_cf_list(&loop->body); + visit_exec_list(&ir->body_instructions, this); + b.cursor = nir_after_cf_node(&loop->cf_node); + } + + void + nir_visitor::visit(ir_if *ir) + { + nir_src condition = + nir_src_for_ssa(evaluate_rvalue(ir->condition)); + + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = condition; + nir_builder_cf_insert(&b, &if_stmt->cf_node); + + b.cursor = nir_after_cf_list(&if_stmt->then_list); + visit_exec_list(&ir->then_instructions, this); + + b.cursor = nir_after_cf_list(&if_stmt->else_list); + visit_exec_list(&ir->else_instructions, this); + + b.cursor = nir_after_cf_node(&if_stmt->cf_node); + } + + void + nir_visitor::visit(ir_discard *ir) + { + /* + * discards aren't treated as control flow, because before we lower them + * they can appear anywhere in the shader and the stuff after them may still + * be executed (yay, crazy GLSL rules!). However, after lowering, all the + * discards will be immediately followed by a return. + */ + + nir_intrinsic_instr *discard; + if (ir->condition) { + discard = nir_intrinsic_instr_create(this->shader, + nir_intrinsic_discard_if); + discard->src[0] = + nir_src_for_ssa(evaluate_rvalue(ir->condition)); + } else { + discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard); + } + + nir_builder_instr_insert(&b, &discard->instr); + } + + void + nir_visitor::visit(ir_emit_vertex *ir) + { + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); + instr->const_index[0] = ir->stream_id(); + nir_builder_instr_insert(&b, &instr->instr); + } + + void + nir_visitor::visit(ir_end_primitive *ir) + { + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); + instr->const_index[0] = ir->stream_id(); + nir_builder_instr_insert(&b, &instr->instr); + } + + void + nir_visitor::visit(ir_loop_jump *ir) + { + nir_jump_type type; + switch (ir->mode) { + case ir_loop_jump::jump_break: + type = nir_jump_break; + break; + case ir_loop_jump::jump_continue: + type = nir_jump_continue; + break; + default: + unreachable("not reached"); + } + + nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); + nir_builder_instr_insert(&b, &instr->instr); + } + + void + nir_visitor::visit(ir_return *ir) + { + if (ir->value != NULL) { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); + + copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); + copy->variables[1] = evaluate_deref(©->instr, ir->value); + } + + nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); + nir_builder_instr_insert(&b, &instr->instr); + } + + void + nir_visitor::visit(ir_call *ir) + { + if (ir->callee->is_intrinsic) { + nir_intrinsic_op op; + if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) { + op = nir_intrinsic_atomic_counter_read_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) { + op = nir_intrinsic_atomic_counter_inc_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { + op = nir_intrinsic_atomic_counter_dec_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { + op = nir_intrinsic_image_load; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { + op = nir_intrinsic_image_store; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { + op = nir_intrinsic_image_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { + op = nir_intrinsic_image_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { + op = nir_intrinsic_image_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { + op = nir_intrinsic_image_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { + op = nir_intrinsic_image_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { + op = nir_intrinsic_image_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { + op = nir_intrinsic_image_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { + op = nir_intrinsic_image_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { + op = nir_intrinsic_memory_barrier; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) { + op = nir_intrinsic_image_size; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) { + op = nir_intrinsic_image_samples; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) { + op = nir_intrinsic_store_ssbo; + } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) { + op = nir_intrinsic_load_ssbo; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_ssbo_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_ssbo_atomic_umin; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_ssbo_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_ssbo_atomic_umax; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { + op = nir_intrinsic_shader_clock; + } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) { + op = nir_intrinsic_group_memory_barrier; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) { + op = nir_intrinsic_memory_barrier_atomic_counter; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) { + op = nir_intrinsic_memory_barrier_buffer; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) { + op = nir_intrinsic_memory_barrier_image; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) { + op = nir_intrinsic_memory_barrier_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) { + op = nir_intrinsic_load_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { + op = nir_intrinsic_store_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) { + op = nir_intrinsic_shared_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) { + op = nir_intrinsic_shared_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) { + op = nir_intrinsic_shared_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) { + op = nir_intrinsic_shared_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umin; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umax; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) { + op = nir_intrinsic_shared_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) { + op = nir_intrinsic_shared_atomic_comp_swap; + } else { + unreachable("not reached"); + } + + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); + nir_dest *dest = &instr->dest; + + switch (op) { + case nir_intrinsic_atomic_counter_read_var: + case nir_intrinsic_atomic_counter_inc_var: + case nir_intrinsic_atomic_counter_dec_var: { + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + instr->variables[0] = evaluate_deref(&instr->instr, param); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_samples: + case nir_intrinsic_image_size: { + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(shader, 1); + nir_builder_instr_insert(&b, &instr_undef->instr); + + /* Set the image variable dereference. */ + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *image = (ir_dereference *)param; + const glsl_type *type = + image->variable_referenced()->type->without_array(); + + instr->variables[0] = evaluate_deref(&instr->instr, image); + param = param->get_next(); + + /* Set the intrinsic destination. */ + if (ir->return_deref) { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[instr->intrinsic]; + nir_ssa_dest_init(&instr->instr, &instr->dest, + info->dest_components, NULL); + } + + if (op == nir_intrinsic_image_size || + op == nir_intrinsic_image_samples) { + nir_builder_instr_insert(&b, &instr->instr); + break; + } + + /* Set the address argument, extending the coordinate vector to four + * components. + */ + nir_ssa_def *src_addr = + evaluate_rvalue((ir_dereference *)param); + nir_ssa_def *srcs[4]; + + for (int i = 0; i < 4; i++) { + if (i < type->coordinate_components()) + srcs[i] = nir_channel(&b, src_addr, i); + else + srcs[i] = &instr_undef->def; + } + + instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4)); + param = param->get_next(); + + /* Set the sample argument, which is undefined for single-sample + * images. + */ + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + instr->src[1] = + nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); + param = param->get_next(); + } else { + instr->src[1] = nir_src_for_ssa(&instr_undef->def); + } + + /* Set the intrinsic parameters. */ + if (!param->is_tail_sentinel()) { + instr->src[2] = + nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + instr->src[3] = + nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); + param = param->get_next(); + } + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + nir_builder_instr_insert(&b, &instr->instr); + break; + case nir_intrinsic_shader_clock: + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + case nir_intrinsic_store_ssbo: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); + instr->const_index[0] = write_mask->value.u[0]; + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_load_ssbo: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); + + const glsl_type *type = ir->return_deref->var->type; + instr->num_components = type->vector_elements; + + /* Setup destination register */ + nir_ssa_dest_init(&instr->instr, &instr->dest, + type->vector_elements, NULL); + + /* Insert the created nir instruction now since in the case of boolean + * result we will need to emit another instruction after it + */ + nir_builder_instr_insert(&b, &instr->instr); + + /* + * In SSBO/UBO's, a true boolean value is any non-zero value, but we + * consider a true boolean to be ~0. Fix this up with a != 0 + * comparison. + */ + if (type->base_type == GLSL_TYPE_BOOL) { + nir_alu_instr *load_ssbo_compare = + nir_alu_instr_create(shader, nir_op_ine); + load_ssbo_compare->src[0].src.is_ssa = true; + load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa; + load_ssbo_compare->src[1].src = + nir_src_for_ssa(nir_imm_int(&b, 0)); + for (unsigned i = 0; i < type->vector_elements; i++) + load_ssbo_compare->src[1].swizzle[i] = 0; + nir_ssa_dest_init(&load_ssbo_compare->instr, + &load_ssbo_compare->dest.dest, + type->vector_elements, NULL); + load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1; + nir_builder_instr_insert(&b, &load_ssbo_compare->instr); + dest = &load_ssbo_compare->dest.dest; + } + break; + } + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 3 || param_count == 4); + + /* Block index */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* Offset */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 4) { + assert(op == nir_intrinsic_ssbo_atomic_comp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_load_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + instr->const_index[0] = 0; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); + + const glsl_type *type = ir->return_deref->var->type; + instr->num_components = type->vector_elements; + + /* Setup destination register */ + nir_ssa_dest_init(&instr->instr, &instr->dest, + type->vector_elements, NULL); + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_store_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + instr->const_index[0] = 0; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); + + instr->const_index[1] = write_mask->value.u[0]; + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* Offset */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 3) { + assert(op == nir_intrinsic_shared_atomic_comp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = + nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } + default: + unreachable("not reached"); + } + + if (ir->return_deref) { + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = ir->return_deref->type->vector_elements; + store_instr->const_index[0] = (1 << store_instr->num_components) - 1; + + store_instr->variables[0] = + evaluate_deref(&store_instr->instr, ir->return_deref); + store_instr->src[0] = nir_src_for_ssa(&dest->ssa); + + nir_builder_instr_insert(&b, &store_instr->instr); + } + + return; + } + + struct hash_entry *entry = + _mesa_hash_table_search(this->overload_table, ir->callee); + assert(entry); + nir_function *callee = (nir_function *) entry->data; + + nir_call_instr *instr = nir_call_instr_create(this->shader, callee); + + unsigned i = 0; + foreach_in_list(ir_dereference, param, &ir->actual_parameters) { + instr->params[i] = evaluate_deref(&instr->instr, param); + i++; + } + + instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); + nir_builder_instr_insert(&b, &instr->instr); + } + + void + nir_visitor::visit(ir_assignment *ir) + { + unsigned num_components = ir->lhs->type->vector_elements; + + if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && + (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { + /* We're doing a plain-as-can-be copy, so emit a copy_var */ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); + + copy->variables[0] = evaluate_deref(©->instr, ir->lhs); + copy->variables[1] = evaluate_deref(©->instr, ir->rhs); + + if (ir->condition) { + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition)); + nir_builder_cf_insert(&b, &if_stmt->cf_node); + nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); + b.cursor = nir_after_cf_node(&if_stmt->cf_node); + } else { + nir_builder_instr_insert(&b, ©->instr); + } + return; + } + + assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); + + ir->lhs->accept(this); + nir_deref_var *lhs_deref = this->deref_head; + nir_ssa_def *src = evaluate_rvalue(ir->rhs); + + if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) { + /* GLSL IR will give us the input to the write-masked assignment in a + * single packed vector. So, for example, if the writemask is xzw, then + * we have to swizzle x -> x, y -> z, and z -> w and get the y component + * from the load. + */ + unsigned swiz[4]; + unsigned component = 0; + for (unsigned i = 0; i < 4; i++) { + swiz[i] = ir->write_mask & (1 << i) ? component++ : 0; + } + src = nir_swizzle(&b, src, swiz, num_components, !supports_ints); + } + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); + store->num_components = ir->lhs->type->vector_elements; + store->const_index[0] = ir->write_mask; + nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); + store->variables[0] = nir_deref_as_var(store_deref); + store->src[0] = nir_src_for_ssa(src); + + if (ir->condition) { + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition)); + nir_builder_cf_insert(&b, &if_stmt->cf_node); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr); + b.cursor = nir_after_cf_node(&if_stmt->cf_node); + } else { + nir_builder_instr_insert(&b, &store->instr); + } + } + + /* + * Given an instruction, returns a pointer to its destination or NULL if there + * is no destination. + * + * Note that this only handles instructions we generate at this level. + */ + static nir_dest * + get_instr_dest(nir_instr *instr) + { + nir_alu_instr *alu_instr; + nir_intrinsic_instr *intrinsic_instr; + nir_tex_instr *tex_instr; + + switch (instr->type) { + case nir_instr_type_alu: + alu_instr = nir_instr_as_alu(instr); + return &alu_instr->dest.dest; + + case nir_instr_type_intrinsic: + intrinsic_instr = nir_instr_as_intrinsic(instr); + if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) + return &intrinsic_instr->dest; + else + return NULL; + + case nir_instr_type_tex: + tex_instr = nir_instr_as_tex(instr); + return &tex_instr->dest; + + default: + unreachable("not reached"); + } + + return NULL; + } + + void + nir_visitor::add_instr(nir_instr *instr, unsigned num_components) + { + nir_dest *dest = get_instr_dest(instr); + + if (dest) + nir_ssa_dest_init(instr, dest, num_components, NULL); + + nir_builder_instr_insert(&b, instr); + + if (dest) { + assert(dest->is_ssa); + this->result = &dest->ssa; + } + } + + nir_ssa_def * + nir_visitor::evaluate_rvalue(ir_rvalue* ir) + { + ir->accept(this); + if (ir->as_dereference() || ir->as_constant()) { + /* + * A dereference is being used on the right hand side, which means we + * must emit a variable load. + */ + + nir_intrinsic_instr *load_instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); + load_instr->num_components = ir->type->vector_elements; + load_instr->variables[0] = this->deref_head; + ralloc_steal(load_instr, load_instr->variables[0]); + add_instr(&load_instr->instr, ir->type->vector_elements); + } + + return this->result; + } + + void + nir_visitor::visit(ir_expression *ir) + { + /* Some special cases */ + switch (ir->operation) { + case ir_binop_ubo_load: { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); + load->num_components = ir->type->vector_elements; + load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); + load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + add_instr(&load->instr, ir->type->vector_elements); + + /* + * In UBO's, a true boolean value is any non-zero value, but we consider + * a true boolean to be ~0. Fix this up with a != 0 comparison. + */ + + if (ir->type->base_type == GLSL_TYPE_BOOL) + this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0)); + + return; + } + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: { + ir_dereference *deref = ir->operands[0]->as_dereference(); + ir_swizzle *swizzle = NULL; + if (!deref) { + /* the api does not allow a swizzle here, but the varying packing code + * may have pushed one into here. + */ + swizzle = ir->operands[0]->as_swizzle(); + assert(swizzle); + deref = swizzle->val->as_dereference(); + assert(deref); + } + + deref->accept(this); + + nir_intrinsic_op op; + if (this->deref_head->var->data.mode == nir_var_shader_in) { + switch (ir->operation) { + case ir_unop_interpolate_at_centroid: + op = nir_intrinsic_interp_var_at_centroid; + break; + case ir_binop_interpolate_at_offset: + op = nir_intrinsic_interp_var_at_offset; + break; + case ir_binop_interpolate_at_sample: + op = nir_intrinsic_interp_var_at_sample; + break; + default: + unreachable("Invalid interpolation intrinsic"); + } + } else { + /* This case can happen if the vertex shader does not write the + * given varying. In this case, the linker will lower it to a + * global variable. Since interpolating a variable makes no + * sense, we'll just turn it into a load which will probably + * eventually end up as an SSA definition. + */ + assert(this->deref_head->var->data.mode == nir_var_global); + op = nir_intrinsic_load_var; + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); + intrin->num_components = deref->type->vector_elements; + intrin->variables[0] = this->deref_head; + ralloc_steal(intrin, intrin->variables[0]); + + if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || + intrin->intrinsic == nir_intrinsic_interp_var_at_sample) + intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + + add_instr(&intrin->instr, deref->type->vector_elements); + + if (swizzle) { + unsigned swiz[4] = { + swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w + }; + + result = nir_swizzle(&b, result, swiz, + swizzle->type->vector_elements, false); + } + + return; + } + + default: + break; + } + + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < ir->get_num_operands(); i++) + srcs[i] = evaluate_rvalue(ir->operands[i]); + + glsl_base_type types[4]; + for (unsigned i = 0; i < ir->get_num_operands(); i++) + if (supports_ints) + types[i] = ir->operands[i]->type->base_type; + else + types[i] = GLSL_TYPE_FLOAT; + + glsl_base_type out_type; + if (supports_ints) + out_type = ir->type->base_type; + else + out_type = GLSL_TYPE_FLOAT; + + switch (ir->operation) { + case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; + case ir_unop_logic_not: + result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]); + break; + case ir_unop_neg: + result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0]) + : nir_ineg(&b, srcs[0]); + break; + case ir_unop_abs: + result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0]) + : nir_iabs(&b, srcs[0]); + break; + case ir_unop_saturate: + assert(types[0] == GLSL_TYPE_FLOAT); + result = nir_fsat(&b, srcs[0]); + break; + case ir_unop_sign: + result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0]) + : nir_isign(&b, srcs[0]); + break; + case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; + case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; + case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; + case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); + case ir_unop_log: unreachable("ir_unop_log should have been lowered"); + case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; + case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; + case ir_unop_i2f: + result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + break; + case ir_unop_u2f: + result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + break; + case ir_unop_b2f: + result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + break; + case ir_unop_f2i: result = nir_f2i(&b, srcs[0]); break; + case ir_unop_f2u: result = nir_f2u(&b, srcs[0]); break; + case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break; + case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break; + case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break; + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_f2i: + case ir_unop_bitcast_u2f: + case ir_unop_bitcast_f2u: + case ir_unop_subroutine_to_int: + /* no-op */ + result = nir_imov(&b, srcs[0]); + break; + case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; + case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; + case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break; + case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break; + case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break; + case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break; + case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break; + case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break; + case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break; + case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break; + case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break; + case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break; + case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break; + case ir_unop_pack_snorm_2x16: + result = nir_pack_snorm_2x16(&b, srcs[0]); + break; + case ir_unop_pack_snorm_4x8: + result = nir_pack_snorm_4x8(&b, srcs[0]); + break; + case ir_unop_pack_unorm_2x16: + result = nir_pack_unorm_2x16(&b, srcs[0]); + break; + case ir_unop_pack_unorm_4x8: + result = nir_pack_unorm_4x8(&b, srcs[0]); + break; + case ir_unop_pack_half_2x16: + result = nir_pack_half_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_snorm_2x16: + result = nir_unpack_snorm_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_snorm_4x8: + result = nir_unpack_snorm_4x8(&b, srcs[0]); + break; + case ir_unop_unpack_unorm_2x16: + result = nir_unpack_unorm_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_unorm_4x8: + result = nir_unpack_unorm_4x8(&b, srcs[0]); + break; + case ir_unop_unpack_half_2x16: + result = nir_unpack_half_2x16(&b, srcs[0]); + break; - case ir_unop_unpack_half_2x16_split_x: - result = nir_unpack_half_2x16_split_x(&b, srcs[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - result = nir_unpack_half_2x16_split_y(&b, srcs[0]); - break; + case ir_unop_bitfield_reverse: + result = nir_bitfield_reverse(&b, srcs[0]); + break; + case ir_unop_bit_count: + result = nir_bit_count(&b, srcs[0]); + break; + case ir_unop_find_msb: + switch (types[0]) { + case GLSL_TYPE_UINT: + result = nir_ufind_msb(&b, srcs[0]); + break; + case GLSL_TYPE_INT: + result = nir_ifind_msb(&b, srcs[0]); + break; + default: + unreachable("Invalid type for findMSB()"); + } + break; + case ir_unop_find_lsb: + result = nir_find_lsb(&b, srcs[0]); + break; + + case ir_unop_noise: + switch (ir->type->vector_elements) { + case 1: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise1_1(&b, srcs[0]); break; + case 2: result = nir_fnoise1_2(&b, srcs[0]); break; + case 3: result = nir_fnoise1_3(&b, srcs[0]); break; + case 4: result = nir_fnoise1_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + case 2: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise2_1(&b, srcs[0]); break; + case 2: result = nir_fnoise2_2(&b, srcs[0]); break; + case 3: result = nir_fnoise2_3(&b, srcs[0]); break; + case 4: result = nir_fnoise2_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + case 3: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise3_1(&b, srcs[0]); break; + case 2: result = nir_fnoise3_2(&b, srcs[0]); break; + case 3: result = nir_fnoise3_3(&b, srcs[0]); break; + case 4: result = nir_fnoise3_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + case 4: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise4_1(&b, srcs[0]); break; + case 2: result = nir_fnoise4_2(&b, srcs[0]); break; + case 3: result = nir_fnoise4_3(&b, srcs[0]); break; + case 4: result = nir_fnoise4_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + default: + unreachable("not reached"); + } + break; + case ir_unop_get_buffer_size: { + nir_intrinsic_instr *load = nir_intrinsic_instr_create( + this->shader, + nir_intrinsic_get_buffer_size); + load->num_components = ir->type->vector_elements; + load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); + add_instr(&load->instr, ir->type->vector_elements); + return; + } + + case ir_binop_add: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1]) + : nir_iadd(&b, srcs[0], srcs[1]); + break; + case ir_binop_sub: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1]) + : nir_isub(&b, srcs[0], srcs[1]); + break; + case ir_binop_mul: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1]) + : nir_imul(&b, srcs[0], srcs[1]); + break; + case ir_binop_div: + if (out_type == GLSL_TYPE_FLOAT) + result = nir_fdiv(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT) + result = nir_idiv(&b, srcs[0], srcs[1]); + else + result = nir_udiv(&b, srcs[0], srcs[1]); + break; + case ir_binop_mod: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1]) + : nir_umod(&b, srcs[0], srcs[1]); + break; + case ir_binop_min: + if (out_type == GLSL_TYPE_FLOAT) + result = nir_fmin(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT) + result = nir_imin(&b, srcs[0], srcs[1]); + else + result = nir_umin(&b, srcs[0], srcs[1]); + break; + case ir_binop_max: + if (out_type == GLSL_TYPE_FLOAT) + result = nir_fmax(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT) + result = nir_imax(&b, srcs[0], srcs[1]); + else + result = nir_umax(&b, srcs[0], srcs[1]); + break; + case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break; + case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break; + case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; + case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; + case ir_binop_logic_and: + result = supports_ints ? nir_iand(&b, srcs[0], srcs[1]) + : nir_fand(&b, srcs[0], srcs[1]); + break; + case ir_binop_logic_or: + result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) + : nir_for(&b, srcs[0], srcs[1]); + break; + case ir_binop_logic_xor: + result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) + : nir_fxor(&b, srcs[0], srcs[1]); + break; + case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; + case ir_binop_rshift: + result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1]) + : nir_ushr(&b, srcs[0], srcs[1]); + break; + case ir_binop_imul_high: + result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1]) + : nir_umul_high(&b, srcs[0], srcs[1]); + break; + case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; + case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; + case ir_binop_less: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_flt(&b, srcs[0], srcs[1]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ilt(&b, srcs[0], srcs[1]); + else + result = nir_ult(&b, srcs[0], srcs[1]); + } else { + result = nir_slt(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_greater: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_flt(&b, srcs[1], srcs[0]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ilt(&b, srcs[1], srcs[0]); + else + result = nir_ult(&b, srcs[1], srcs[0]); + } else { + result = nir_slt(&b, srcs[1], srcs[0]); + } + break; + case ir_binop_lequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_fge(&b, srcs[1], srcs[0]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ige(&b, srcs[1], srcs[0]); + else + result = nir_uge(&b, srcs[1], srcs[0]); + } else { + result = nir_slt(&b, srcs[1], srcs[0]); + } + break; + case ir_binop_gequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_fge(&b, srcs[0], srcs[1]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ige(&b, srcs[0], srcs[1]); + else + result = nir_uge(&b, srcs[0], srcs[1]); + } else { + result = nir_slt(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_equal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_feq(&b, srcs[0], srcs[1]); + else + result = nir_ieq(&b, srcs[0], srcs[1]); + } else { + result = nir_seq(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_nequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_fne(&b, srcs[0], srcs[1]); + else + result = nir_ine(&b, srcs[0], srcs[1]); + } else { + result = nir_sne(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_all_equal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_seq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + break; + case ir_binop_any_nequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; + case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; + case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_sne(&b, srcs[0], srcs[1]); break; + case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + break; + case ir_binop_dot: + switch (ir->operands[0]->type->vector_elements) { + case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + break; + - case ir_binop_pack_half_2x16_split: - result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]); - break; + case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; + case ir_triop_fma: + result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_triop_lrp: + result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_triop_csel: + if (supports_ints) + result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); + else + result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_triop_bitfield_extract: + result = (out_type == GLSL_TYPE_INT) ? + nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) : + nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_quadop_bitfield_insert: + result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]); + break; + case ir_quadop_vector: + result = nir_vec(&b, srcs, ir->type->vector_elements); + break; + + default: + unreachable("not reached"); + } + } + + void + nir_visitor::visit(ir_swizzle *ir) + { + unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; + result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, + ir->type->vector_elements, !supports_ints); + } + + void + nir_visitor::visit(ir_texture *ir) + { + unsigned num_srcs; + nir_texop op; + switch (ir->op) { + case ir_tex: + op = nir_texop_tex; + num_srcs = 1; /* coordinate */ + break; + + case ir_txb: + case ir_txl: + op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; + num_srcs = 2; /* coordinate, bias/lod */ + break; + + case ir_txd: + op = nir_texop_txd; /* coordinate, dPdx, dPdy */ + num_srcs = 3; + break; + + case ir_txf: + op = nir_texop_txf; + if (ir->lod_info.lod != NULL) + num_srcs = 2; /* coordinate, lod */ + else + num_srcs = 1; /* coordinate */ + break; + + case ir_txf_ms: + op = nir_texop_txf_ms; + num_srcs = 2; /* coordinate, sample_index */ + break; + + case ir_txs: + op = nir_texop_txs; + if (ir->lod_info.lod != NULL) + num_srcs = 1; /* lod */ + else + num_srcs = 0; + break; + + case ir_lod: + op = nir_texop_lod; + num_srcs = 1; /* coordinate */ + break; + + case ir_tg4: + op = nir_texop_tg4; + num_srcs = 1; /* coordinate */ + break; + + case ir_query_levels: + op = nir_texop_query_levels; + num_srcs = 0; + break; + + case ir_texture_samples: + op = nir_texop_texture_samples; + num_srcs = 0; + break; + + case ir_samples_identical: + op = nir_texop_samples_identical; + num_srcs = 1; /* coordinate */ + break; + + default: + unreachable("not reached"); + } + + if (ir->projector != NULL) + num_srcs++; + if (ir->shadow_comparitor != NULL) + num_srcs++; + if (ir->offset != NULL && ir->offset->as_constant() == NULL) + num_srcs++; + + nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); + + instr->op = op; + instr->sampler_dim = + (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; + instr->is_array = ir->sampler->type->sampler_array; + instr->is_shadow = ir->sampler->type->sampler_shadow; + if (instr->is_shadow) + instr->is_new_style_shadow = (ir->type->vector_elements == 1); + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + instr->dest_type = nir_type_float; + break; + case GLSL_TYPE_INT: + instr->dest_type = nir_type_int; + break; + case GLSL_TYPE_BOOL: + case GLSL_TYPE_UINT: + instr->dest_type = nir_type_uint; + break; + default: + unreachable("not reached"); + } + + instr->sampler = evaluate_deref(&instr->instr, ir->sampler); + + unsigned src_number = 0; + + if (ir->coordinate != NULL) { + instr->coord_components = ir->coordinate->type->vector_elements; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->coordinate)); + instr->src[src_number].src_type = nir_tex_src_coord; + src_number++; + } + + if (ir->projector != NULL) { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->projector)); + instr->src[src_number].src_type = nir_tex_src_projector; + src_number++; + } + + if (ir->shadow_comparitor != NULL) { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor)); + instr->src[src_number].src_type = nir_tex_src_comparitor; + src_number++; + } + + if (ir->offset != NULL) { + /* we don't support multiple offsets yet */ + assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); + + ir_constant *const_offset = ir->offset->as_constant(); + if (const_offset != NULL) { + for (unsigned i = 0; i < const_offset->type->vector_elements; i++) + instr->const_offset[i] = const_offset->value.i[i]; + } else { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->offset)); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; + } + } + + switch (ir->op) { + case ir_txb: + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias)); + instr->src[src_number].src_type = nir_tex_src_bias; + src_number++; + break; + + case ir_txl: + case ir_txf: + case ir_txs: + if (ir->lod_info.lod != NULL) { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod)); + instr->src[src_number].src_type = nir_tex_src_lod; + src_number++; + } + break; + + case ir_txd: + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx)); + instr->src[src_number].src_type = nir_tex_src_ddx; + src_number++; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy)); + instr->src[src_number].src_type = nir_tex_src_ddy; + src_number++; + break; + + case ir_txf_ms: + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index)); + instr->src[src_number].src_type = nir_tex_src_ms_index; + src_number++; + break; + + case ir_tg4: + instr->component = ir->lod_info.component->as_constant()->value.u[0]; + break; + + default: + break; + } + + assert(src_number == num_srcs); + + add_instr(&instr->instr, nir_tex_instr_dest_size(instr)); + } + + void + nir_visitor::visit(ir_constant *ir) + { + /* + * We don't know if this variable is an an array or struct that gets + * dereferenced, so do the safe thing an make it a variable with a + * constant initializer and return a dereference. + */ + + nir_variable *var = + nir_local_variable_create(this->impl, ir->type, "const_temp"); + var->data.read_only = true; + var->constant_initializer = constant_copy(ir, var); + + this->deref_head = nir_deref_var_create(this->shader, var); + this->deref_tail = &this->deref_head->deref; + } + + void + nir_visitor::visit(ir_dereference_variable *ir) + { + struct hash_entry *entry = + _mesa_hash_table_search(this->var_table, ir->var); + assert(entry); + nir_variable *var = (nir_variable *) entry->data; + + nir_deref_var *deref = nir_deref_var_create(this->shader, var); + this->deref_head = deref; + this->deref_tail = &deref->deref; + } + + void + nir_visitor::visit(ir_dereference_record *ir) + { + ir->record->accept(this); + + int field_index = this->deref_tail->type->field_index(ir->field); + assert(field_index >= 0); + + nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); + deref->deref.type = ir->type; + this->deref_tail->child = &deref->deref; + this->deref_tail = &deref->deref; + } + + void + nir_visitor::visit(ir_dereference_array *ir) + { + nir_deref_array *deref = nir_deref_array_create(this->shader); + deref->deref.type = ir->type; + + ir_constant *const_index = ir->array_index->as_constant(); + if (const_index != NULL) { + deref->deref_array_type = nir_deref_array_type_direct; + deref->base_offset = const_index->value.u[0]; + } else { + deref->deref_array_type = nir_deref_array_type_indirect; + deref->indirect = + nir_src_for_ssa(evaluate_rvalue(ir->array_index)); + } + + ir->array->accept(this); + + this->deref_tail->child = &deref->deref; + ralloc_steal(this->deref_tail, deref); + this->deref_tail = &deref->deref; + } + + void + nir_visitor::visit(ir_barrier *ir) + { + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); + nir_builder_instr_insert(&b, &instr->instr); + } diff --cc src/compiler/nir/nir.c index 00000000000,21bf678c04e..42a53f6f3db mode 000000,100644..100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@@ -1,0 -1,1665 +1,1753 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir.h" + #include "nir_control_flow_private.h" + #include + + nir_shader * + nir_shader_create(void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options) + { + nir_shader *shader = ralloc(mem_ctx, nir_shader); + + exec_list_make_empty(&shader->uniforms); + exec_list_make_empty(&shader->inputs); + exec_list_make_empty(&shader->outputs); ++ exec_list_make_empty(&shader->shared); + + shader->options = options; + memset(&shader->info, 0, sizeof(shader->info)); + + exec_list_make_empty(&shader->functions); + exec_list_make_empty(&shader->registers); + exec_list_make_empty(&shader->globals); + exec_list_make_empty(&shader->system_values); + shader->reg_alloc = 0; + + shader->num_inputs = 0; + shader->num_outputs = 0; + shader->num_uniforms = 0; ++ shader->num_shared = 0; + + shader->stage = stage; + + return shader; + } + + static nir_register * + reg_create(void *mem_ctx, struct exec_list *list) + { + nir_register *reg = ralloc(mem_ctx, nir_register); + + list_inithead(®->uses); + list_inithead(®->defs); + list_inithead(®->if_uses); + + reg->num_components = 0; + reg->num_array_elems = 0; + reg->is_packed = false; + reg->name = NULL; + + exec_list_push_tail(list, ®->node); + + return reg; + } + + nir_register * + nir_global_reg_create(nir_shader *shader) + { + nir_register *reg = reg_create(shader, &shader->registers); + reg->index = shader->reg_alloc++; + reg->is_global = true; + + return reg; + } + + nir_register * + nir_local_reg_create(nir_function_impl *impl) + { + nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); + reg->index = impl->reg_alloc++; + reg->is_global = false; + + return reg; + } + + void + nir_reg_remove(nir_register *reg) + { + exec_node_remove(®->node); + } + + void + nir_shader_add_variable(nir_shader *shader, nir_variable *var) + { + switch (var->data.mode) { + case nir_var_all: + assert(!"invalid mode"); + break; + + case nir_var_local: + assert(!"nir_shader_add_variable cannot be used for local variables"); + break; + + case nir_var_global: + exec_list_push_tail(&shader->globals, &var->node); + break; + + case nir_var_shader_in: + exec_list_push_tail(&shader->inputs, &var->node); + break; + + case nir_var_shader_out: + exec_list_push_tail(&shader->outputs, &var->node); + break; + + case nir_var_uniform: + case nir_var_shader_storage: + exec_list_push_tail(&shader->uniforms, &var->node); + break; + ++ case nir_var_shared: ++ assert(shader->stage == MESA_SHADER_COMPUTE); ++ exec_list_push_tail(&shader->shared, &var->node); ++ break; ++ + case nir_var_system_value: + exec_list_push_tail(&shader->system_values, &var->node); + break; + } + } + + nir_variable * + nir_variable_create(nir_shader *shader, nir_variable_mode mode, + const struct glsl_type *type, const char *name) + { + nir_variable *var = rzalloc(shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = mode; + + if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || + (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + + if (mode == nir_var_shader_in || mode == nir_var_uniform) + var->data.read_only = true; + + nir_shader_add_variable(shader, var); + + return var; + } + + nir_variable * + nir_local_variable_create(nir_function_impl *impl, + const struct glsl_type *type, const char *name) + { + nir_variable *var = rzalloc(impl->function->shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = nir_var_local; + + nir_function_impl_add_variable(impl, var); + + return var; + } + + nir_function * + nir_function_create(nir_shader *shader, const char *name) + { + nir_function *func = ralloc(shader, nir_function); + + exec_list_push_tail(&shader->functions, &func->node); + + func->name = ralloc_strdup(func, name); + func->shader = shader; + func->num_params = 0; + func->params = NULL; + func->return_type = glsl_void_type(); + func->impl = NULL; + + return func; + } + + void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) + { + dest->is_ssa = src->is_ssa; + if (src->is_ssa) { + dest->ssa = src->ssa; + } else { + dest->reg.base_offset = src->reg.base_offset; + dest->reg.reg = src->reg.reg; + if (src->reg.indirect) { + dest->reg.indirect = ralloc(mem_ctx, nir_src); + nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); + } else { + dest->reg.indirect = NULL; + } + } + } + + void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr) + { + /* Copying an SSA definition makes no sense whatsoever. */ + assert(!src->is_ssa); + + dest->is_ssa = false; + + dest->reg.base_offset = src->reg.base_offset; + dest->reg.reg = src->reg.reg; + if (src->reg.indirect) { + dest->reg.indirect = ralloc(instr, nir_src); + nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); + } else { + dest->reg.indirect = NULL; + } + } + + void + nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, + nir_alu_instr *instr) + { + nir_src_copy(&dest->src, &src->src, &instr->instr); + dest->abs = src->abs; + dest->negate = src->negate; + for (unsigned i = 0; i < 4; i++) + dest->swizzle[i] = src->swizzle[i]; + } + + void + nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, + nir_alu_instr *instr) + { + nir_dest_copy(&dest->dest, &src->dest, &instr->instr); + dest->write_mask = src->write_mask; + dest->saturate = src->saturate; + } + + + static void + cf_init(nir_cf_node *node, nir_cf_node_type type) + { + exec_node_init(&node->node); + node->parent = NULL; + node->type = type; + } + + nir_function_impl * -nir_function_impl_create(nir_function *function) ++nir_function_impl_create_bare(nir_shader *shader) + { - assert(function->impl == NULL); - - void *mem_ctx = ralloc_parent(function); - - nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl); ++ nir_function_impl *impl = ralloc(shader, nir_function_impl); + - function->impl = impl; - impl->function = function; ++ impl->function = NULL; + + cf_init(&impl->cf_node, nir_cf_node_function); + + exec_list_make_empty(&impl->body); + exec_list_make_empty(&impl->registers); + exec_list_make_empty(&impl->locals); + impl->num_params = 0; + impl->params = NULL; + impl->return_var = NULL; + impl->reg_alloc = 0; + impl->ssa_alloc = 0; + impl->valid_metadata = nir_metadata_none; + + /* create start & end blocks */ - nir_block *start_block = nir_block_create(mem_ctx); - nir_block *end_block = nir_block_create(mem_ctx); ++ nir_block *start_block = nir_block_create(shader); ++ nir_block *end_block = nir_block_create(shader); + start_block->cf_node.parent = &impl->cf_node; + end_block->cf_node.parent = &impl->cf_node; + impl->end_block = end_block; + + exec_list_push_tail(&impl->body, &start_block->cf_node.node); + + start_block->successors[0] = end_block; + _mesa_set_add(end_block->predecessors, start_block); + return impl; + } + ++nir_function_impl * ++nir_function_impl_create(nir_function *function) ++{ ++ assert(function->impl == NULL); ++ ++ nir_function_impl *impl = nir_function_impl_create_bare(function->shader); ++ ++ function->impl = impl; ++ impl->function = function; ++ ++ impl->num_params = function->num_params; ++ impl->params = ralloc_array(function->shader, ++ nir_variable *, impl->num_params); ++ ++ return impl; ++} ++ + nir_block * + nir_block_create(nir_shader *shader) + { + nir_block *block = ralloc(shader, nir_block); + + cf_init(&block->cf_node, nir_cf_node_block); + + block->successors[0] = block->successors[1] = NULL; + block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, + _mesa_key_pointer_equal); + block->imm_dom = NULL; + /* XXX maybe it would be worth it to defer allocation? This + * way it doesn't get allocated for shader ref's that never run + * nir_calc_dominance? For example, state-tracker creates an + * initial IR, clones that, runs appropriate lowering pass, passes + * to driver which does common lowering/opt, and then stores ref + * which is later used to do state specific lowering and futher + * opt. Do any of the references not need dominance metadata? + */ + block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + exec_list_make_empty(&block->instr_list); + + return block; + } + + static inline void + src_init(nir_src *src) + { + src->is_ssa = false; + src->reg.reg = NULL; + src->reg.indirect = NULL; + src->reg.base_offset = 0; + } + + nir_if * + nir_if_create(nir_shader *shader) + { + nir_if *if_stmt = ralloc(shader, nir_if); + + cf_init(&if_stmt->cf_node, nir_cf_node_if); + src_init(&if_stmt->condition); + + nir_block *then = nir_block_create(shader); + exec_list_make_empty(&if_stmt->then_list); + exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); + then->cf_node.parent = &if_stmt->cf_node; + + nir_block *else_stmt = nir_block_create(shader); + exec_list_make_empty(&if_stmt->else_list); + exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); + else_stmt->cf_node.parent = &if_stmt->cf_node; + + return if_stmt; + } + + nir_loop * + nir_loop_create(nir_shader *shader) + { + nir_loop *loop = ralloc(shader, nir_loop); + + cf_init(&loop->cf_node, nir_cf_node_loop); + + nir_block *body = nir_block_create(shader); + exec_list_make_empty(&loop->body); + exec_list_push_tail(&loop->body, &body->cf_node.node); + body->cf_node.parent = &loop->cf_node; + + body->successors[0] = body; + _mesa_set_add(body->predecessors, body); + + return loop; + } + + static void + instr_init(nir_instr *instr, nir_instr_type type) + { + instr->type = type; + instr->block = NULL; + exec_node_init(&instr->node); + } + + static void + dest_init(nir_dest *dest) + { + dest->is_ssa = false; + dest->reg.reg = NULL; + dest->reg.indirect = NULL; + dest->reg.base_offset = 0; + } + + static void + alu_dest_init(nir_alu_dest *dest) + { + dest_init(&dest->dest); + dest->saturate = false; + dest->write_mask = 0xf; + } + + static void + alu_src_init(nir_alu_src *src) + { + src_init(&src->src); + src->abs = src->negate = false; + src->swizzle[0] = 0; + src->swizzle[1] = 1; + src->swizzle[2] = 2; + src->swizzle[3] = 3; + } + + nir_alu_instr * + nir_alu_instr_create(nir_shader *shader, nir_op op) + { + unsigned num_srcs = nir_op_infos[op].num_inputs; + nir_alu_instr *instr = + ralloc_size(shader, + sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); + + instr_init(&instr->instr, nir_instr_type_alu); + instr->op = op; + alu_dest_init(&instr->dest); + for (unsigned i = 0; i < num_srcs; i++) + alu_src_init(&instr->src[i]); + + return instr; + } + + nir_jump_instr * + nir_jump_instr_create(nir_shader *shader, nir_jump_type type) + { + nir_jump_instr *instr = ralloc(shader, nir_jump_instr); + instr_init(&instr->instr, nir_instr_type_jump); + instr->type = type; + return instr; + } + + nir_load_const_instr * + nir_load_const_instr_create(nir_shader *shader, unsigned num_components) + { + nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); + instr_init(&instr->instr, nir_instr_type_load_const); + + nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); + + return instr; + } + + nir_intrinsic_instr * + nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) + { + unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; + nir_intrinsic_instr *instr = + ralloc_size(shader, + sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); + + instr_init(&instr->instr, nir_instr_type_intrinsic); + instr->intrinsic = op; + + if (nir_intrinsic_infos[op].has_dest) + dest_init(&instr->dest); + + for (unsigned i = 0; i < num_srcs; i++) + src_init(&instr->src[i]); + + return instr; + } + + nir_call_instr * + nir_call_instr_create(nir_shader *shader, nir_function *callee) + { + nir_call_instr *instr = ralloc(shader, nir_call_instr); + instr_init(&instr->instr, nir_instr_type_call); + + instr->callee = callee; + instr->num_params = callee->num_params; + instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); + instr->return_deref = NULL; + + return instr; + } + + nir_tex_instr * + nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) + { + nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); + instr_init(&instr->instr, nir_instr_type_tex); + + dest_init(&instr->dest); + + instr->num_srcs = num_srcs; + instr->src = ralloc_array(instr, nir_tex_src, num_srcs); + for (unsigned i = 0; i < num_srcs; i++) + src_init(&instr->src[i].src); + ++ instr->texture_index = 0; ++ instr->texture_array_size = 0; ++ instr->texture = NULL; + instr->sampler_index = 0; - instr->sampler_array_size = 0; + instr->sampler = NULL; + + return instr; + } + + nir_phi_instr * + nir_phi_instr_create(nir_shader *shader) + { + nir_phi_instr *instr = ralloc(shader, nir_phi_instr); + instr_init(&instr->instr, nir_instr_type_phi); + + dest_init(&instr->dest); + exec_list_make_empty(&instr->srcs); + return instr; + } + + nir_parallel_copy_instr * + nir_parallel_copy_instr_create(nir_shader *shader) + { + nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); + instr_init(&instr->instr, nir_instr_type_parallel_copy); + + exec_list_make_empty(&instr->entries); + + return instr; + } + + nir_ssa_undef_instr * + nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components) + { + nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); + instr_init(&instr->instr, nir_instr_type_ssa_undef); + + nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); + + return instr; + } + + nir_deref_var * + nir_deref_var_create(void *mem_ctx, nir_variable *var) + { + nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var); + deref->deref.deref_type = nir_deref_type_var; + deref->deref.child = NULL; + deref->deref.type = var->type; + deref->var = var; + return deref; + } + + nir_deref_array * + nir_deref_array_create(void *mem_ctx) + { + nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array); + deref->deref.deref_type = nir_deref_type_array; + deref->deref.child = NULL; + deref->deref_array_type = nir_deref_array_type_direct; + src_init(&deref->indirect); + deref->base_offset = 0; + return deref; + } + + nir_deref_struct * + nir_deref_struct_create(void *mem_ctx, unsigned field_index) + { + nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct); + deref->deref.deref_type = nir_deref_type_struct; + deref->deref.child = NULL; + deref->index = field_index; + return deref; + } + + static nir_deref_var * + copy_deref_var(void *mem_ctx, nir_deref_var *deref) + { + nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(ret, deref->deref.child); + return ret; + } + + static nir_deref_array * + copy_deref_array(void *mem_ctx, nir_deref_array *deref) + { + nir_deref_array *ret = nir_deref_array_create(mem_ctx); + ret->base_offset = deref->base_offset; + ret->deref_array_type = deref->deref_array_type; + if (deref->deref_array_type == nir_deref_array_type_indirect) { + nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx); + } + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(ret, deref->deref.child); + return ret; + } + + static nir_deref_struct * + copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) + { + nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(ret, deref->deref.child); + return ret; + } + + nir_deref * + nir_copy_deref(void *mem_ctx, nir_deref *deref) + { + switch (deref->deref_type) { + case nir_deref_type_var: + return ©_deref_var(mem_ctx, nir_deref_as_var(deref))->deref; + case nir_deref_type_array: + return ©_deref_array(mem_ctx, nir_deref_as_array(deref))->deref; + case nir_deref_type_struct: + return ©_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref; + default: + unreachable("Invalid dereference type"); + } + + return NULL; + } + + /* Returns a load_const instruction that represents the constant + * initializer for the given deref chain. The caller is responsible for + * ensuring that there actually is a constant initializer. + */ + nir_load_const_instr * + nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) + { + nir_constant *constant = deref->var->constant_initializer; + assert(constant); + + const nir_deref *tail = &deref->deref; + unsigned matrix_offset = 0; + while (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (glsl_type_is_matrix(tail->type)) { + assert(arr->deref.child == NULL); + matrix_offset = arr->base_offset; + } else { + constant = constant->elements[arr->base_offset]; + } + break; + } + + case nir_deref_type_struct: { + constant = constant->elements[nir_deref_as_struct(tail->child)->index]; + break; + } + + default: + unreachable("Invalid deref child type"); + } + + tail = tail->child; + } + + nir_load_const_instr *load = + nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type)); + + matrix_offset *= load->def.num_components; + for (unsigned i = 0; i < load->def.num_components; i++) { + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + load->value.u[i] = constant->value.u[matrix_offset + i]; + break; + case GLSL_TYPE_BOOL: + load->value.u[i] = constant->value.b[matrix_offset + i] ? + NIR_TRUE : NIR_FALSE; + break; + default: + unreachable("Invalid immediate type"); + } + } + + return load; + } + + nir_function_impl * + nir_cf_node_get_function(nir_cf_node *node) + { + while (node->type != nir_cf_node_function) { + node = node->parent; + } + + return nir_cf_node_as_function(node); + } + ++/* Reduces a cursor by trying to convert everything to after and trying to ++ * go up to block granularity when possible. ++ */ ++static nir_cursor ++reduce_cursor(nir_cursor cursor) ++{ ++ switch (cursor.option) { ++ case nir_cursor_before_block: ++ if (exec_list_is_empty(&cursor.block->instr_list)) { ++ /* Empty block. After is as good as before. */ ++ cursor.option = nir_cursor_after_block; ++ } else { ++ /* Try to switch to after the previous block if there is one. ++ * (This isn't likely, but it can happen.) ++ */ ++ nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node); ++ if (prev_node && prev_node->type == nir_cf_node_block) { ++ cursor.block = nir_cf_node_as_block(prev_node); ++ cursor.option = nir_cursor_after_block; ++ } ++ } ++ return cursor; ++ ++ case nir_cursor_after_block: ++ return cursor; ++ ++ case nir_cursor_before_instr: { ++ nir_instr *prev_instr = nir_instr_prev(cursor.instr); ++ if (prev_instr) { ++ /* Before this instruction is after the previous */ ++ cursor.instr = prev_instr; ++ cursor.option = nir_cursor_after_instr; ++ } else { ++ /* No previous instruction. Switch to before block */ ++ cursor.block = cursor.instr->block; ++ cursor.option = nir_cursor_before_block; ++ } ++ return reduce_cursor(cursor); ++ } ++ ++ case nir_cursor_after_instr: ++ if (nir_instr_next(cursor.instr) == NULL) { ++ /* This is the last instruction, switch to after block */ ++ cursor.option = nir_cursor_after_block; ++ cursor.block = cursor.instr->block; ++ } ++ return cursor; ++ ++ default: ++ unreachable("Inavlid cursor option"); ++ } ++} ++ ++bool ++nir_cursors_equal(nir_cursor a, nir_cursor b) ++{ ++ /* Reduced cursors should be unique */ ++ a = reduce_cursor(a); ++ b = reduce_cursor(b); ++ ++ return a.block == b.block && a.option == b.option; ++} ++ + static bool + add_use_cb(nir_src *src, void *state) + { + nir_instr *instr = state; + + src->parent_instr = instr; + list_addtail(&src->use_link, + src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); + + return true; + } + + static bool + add_ssa_def_cb(nir_ssa_def *def, void *state) + { + nir_instr *instr = state; + + if (instr->block && def->index == UINT_MAX) { + nir_function_impl *impl = + nir_cf_node_get_function(&instr->block->cf_node); + + def->index = impl->ssa_alloc++; + } + + return true; + } + + static bool + add_reg_def_cb(nir_dest *dest, void *state) + { + nir_instr *instr = state; + + if (!dest->is_ssa) { + dest->reg.parent_instr = instr; + list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); + } + + return true; + } + + static void + add_defs_uses(nir_instr *instr) + { + nir_foreach_src(instr, add_use_cb, instr); + nir_foreach_dest(instr, add_reg_def_cb, instr); + nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); + } + + void + nir_instr_insert(nir_cursor cursor, nir_instr *instr) + { + switch (cursor.option) { + case nir_cursor_before_block: + /* Only allow inserting jumps into empty blocks. */ + if (instr->type == nir_instr_type_jump) + assert(exec_list_is_empty(&cursor.block->instr_list)); + + instr->block = cursor.block; + add_defs_uses(instr); + exec_list_push_head(&cursor.block->instr_list, &instr->node); + break; + case nir_cursor_after_block: { + /* Inserting instructions after a jump is illegal. */ + nir_instr *last = nir_block_last_instr(cursor.block); + assert(last == NULL || last->type != nir_instr_type_jump); + (void) last; + + instr->block = cursor.block; + add_defs_uses(instr); + exec_list_push_tail(&cursor.block->instr_list, &instr->node); + break; + } + case nir_cursor_before_instr: + assert(instr->type != nir_instr_type_jump); + instr->block = cursor.instr->block; + add_defs_uses(instr); + exec_node_insert_node_before(&cursor.instr->node, &instr->node); + break; + case nir_cursor_after_instr: + /* Inserting instructions after a jump is illegal. */ + assert(cursor.instr->type != nir_instr_type_jump); + + /* Only allow inserting jumps at the end of the block. */ + if (instr->type == nir_instr_type_jump) + assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); + + instr->block = cursor.instr->block; + add_defs_uses(instr); + exec_node_insert_after(&cursor.instr->node, &instr->node); + break; + } + + if (instr->type == nir_instr_type_jump) + nir_handle_add_jump(instr->block); + } + + static bool + src_is_valid(const nir_src *src) + { + return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); + } + + static bool + remove_use_cb(nir_src *src, void *state) + { + if (src_is_valid(src)) + list_del(&src->use_link); + + return true; + } + + static bool + remove_def_cb(nir_dest *dest, void *state) + { + if (!dest->is_ssa) + list_del(&dest->reg.def_link); + + return true; + } + + static void + remove_defs_uses(nir_instr *instr) + { + nir_foreach_dest(instr, remove_def_cb, instr); + nir_foreach_src(instr, remove_use_cb, instr); + } + + void nir_instr_remove(nir_instr *instr) + { + remove_defs_uses(instr); + exec_node_remove(&instr->node); + + if (instr->type == nir_instr_type_jump) { + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + nir_handle_remove_jump(instr->block, jump_instr->type); + } + } + + /*@}*/ + + void + nir_index_local_regs(nir_function_impl *impl) + { + unsigned index = 0; + foreach_list_typed(nir_register, reg, node, &impl->registers) { + reg->index = index++; + } + impl->reg_alloc = index; + } + + void + nir_index_global_regs(nir_shader *shader) + { + unsigned index = 0; + foreach_list_typed(nir_register, reg, node, &shader->registers) { + reg->index = index++; + } + shader->reg_alloc = index; + } + + static bool + visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) + { + return cb(&instr->dest.dest, state); + } + + static bool + visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, + void *state) + { + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + return cb(&instr->dest, state); + + return true; + } + + static bool + visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb, + void *state) + { + return cb(&instr->dest, state); + } + + static bool + visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state) + { + return cb(&instr->dest, state); + } + + static bool + visit_parallel_copy_dest(nir_parallel_copy_instr *instr, + nir_foreach_dest_cb cb, void *state) + { + nir_foreach_parallel_copy_entry(instr, entry) { + if (!cb(&entry->dest, state)) + return false; + } + + return true; + } + + bool + nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) + { + switch (instr->type) { + case nir_instr_type_alu: + return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_intrinsic: + return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); + case nir_instr_type_tex: + return visit_texture_dest(nir_instr_as_tex(instr), cb, state); + case nir_instr_type_phi: + return visit_phi_dest(nir_instr_as_phi(instr), cb, state); + case nir_instr_type_parallel_copy: + return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr), + cb, state); + + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + case nir_instr_type_call: + case nir_instr_type_jump: + break; + + default: + unreachable("Invalid instruction type"); + break; + } + + return true; + } + + struct foreach_ssa_def_state { + nir_foreach_ssa_def_cb cb; + void *client_state; + }; + + static inline bool + nir_ssa_def_visitor(nir_dest *dest, void *void_state) + { + struct foreach_ssa_def_state *state = void_state; + + if (dest->is_ssa) + return state->cb(&dest->ssa, state->client_state); + else + return true; + } + + bool + nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) + { + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + case nir_instr_type_intrinsic: + case nir_instr_type_phi: + case nir_instr_type_parallel_copy: { + struct foreach_ssa_def_state foreach_state = {cb, state}; + return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); + } + + case nir_instr_type_load_const: + return cb(&nir_instr_as_load_const(instr)->def, state); + case nir_instr_type_ssa_undef: + return cb(&nir_instr_as_ssa_undef(instr)->def, state); + case nir_instr_type_call: + case nir_instr_type_jump: + return true; + default: + unreachable("Invalid instruction type"); + } + } + + static bool + visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) + { + if (!cb(src, state)) + return false; + if (!src->is_ssa && src->reg.indirect) + return cb(src->reg.indirect, state); + return true; + } + + static bool + visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb, + void *state) + { + if (deref->deref_array_type == nir_deref_array_type_indirect) + return visit_src(&deref->indirect, cb, state); + return true; + } + + static bool + visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state) + { + nir_deref *cur = &deref->deref; + while (cur != NULL) { + if (cur->deref_type == nir_deref_type_array) + if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state)) + return false; + + cur = cur->child; + } + + return true; + } + + static bool + visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) + { + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + + return true; + } + + static bool + visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) + { + for (unsigned i = 0; i < instr->num_srcs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + ++ if (instr->texture != NULL) ++ if (!visit_deref_src(instr->texture, cb, state)) ++ return false; ++ + if (instr->sampler != NULL) + if (!visit_deref_src(instr->sampler, cb, state)) + return false; + + return true; + } + + static bool + visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, + void *state) + { + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) + if (!visit_src(&instr->src[i], cb, state)) + return false; + + unsigned num_vars = + nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) + if (!visit_deref_src(instr->variables[i], cb, state)) + return false; + + return true; + } + + static bool + visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) + { + return true; + } + + static bool + visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb, + void *state) + { + return true; + } + + static bool + visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) + { + nir_foreach_phi_src(instr, src) { + if (!visit_src(&src->src, cb, state)) + return false; + } + + return true; + } + + static bool + visit_parallel_copy_src(nir_parallel_copy_instr *instr, + nir_foreach_src_cb cb, void *state) + { + nir_foreach_parallel_copy_entry(instr, entry) { + if (!visit_src(&entry->src, cb, state)) + return false; + } + + return true; + } + + typedef struct { + void *state; + nir_foreach_src_cb cb; + } visit_dest_indirect_state; + + static bool + visit_dest_indirect(nir_dest *dest, void *_state) + { + visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state; + + if (!dest->is_ssa && dest->reg.indirect) + return state->cb(dest->reg.indirect, state->state); + + return true; + } + + bool + nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) + { + switch (instr->type) { + case nir_instr_type_alu: + if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) + return false; + break; + case nir_instr_type_intrinsic: + if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) + return false; + break; + case nir_instr_type_tex: + if (!visit_tex_src(nir_instr_as_tex(instr), cb, state)) + return false; + break; + case nir_instr_type_call: + if (!visit_call_src(nir_instr_as_call(instr), cb, state)) + return false; + break; + case nir_instr_type_load_const: + if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state)) + return false; + break; + case nir_instr_type_phi: + if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) + return false; + break; + case nir_instr_type_parallel_copy: + if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr), + cb, state)) + return false; + break; + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return true; + + default: + unreachable("Invalid instruction type"); + break; + } + + visit_dest_indirect_state dest_state; + dest_state.state = state; + dest_state.cb = cb; + return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); + } + + nir_const_value * + nir_src_as_const_value(nir_src src) + { + if (!src.is_ssa) + return NULL; + + if (src.ssa->parent_instr->type != nir_instr_type_load_const) + return NULL; + + nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); + + return &load->value; + } + + /** + * Returns true if the source is known to be dynamically uniform. Otherwise it + * returns false which means it may or may not be dynamically uniform but it + * can't be determined. + */ + bool + nir_src_is_dynamically_uniform(nir_src src) + { + if (!src.is_ssa) + return false; + + /* Constants are trivially dynamically uniform */ + if (src.ssa->parent_instr->type == nir_instr_type_load_const) + return true; + + /* As are uniform variables */ + if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); + + if (intr->intrinsic == nir_intrinsic_load_uniform) + return true; + } + + /* XXX: this could have many more tests, such as when a sampler function is + * called with dynamically uniform arguments. + */ + return false; + } + + static void + src_remove_all_uses(nir_src *src) + { + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; + + list_del(&src->use_link); + } + } + + static void + src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) + { + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; + + if (parent_instr) { + src->parent_instr = parent_instr; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->uses); + else + list_addtail(&src->use_link, &src->reg.reg->uses); + } else { + assert(parent_if); + src->parent_if = parent_if; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->if_uses); + else + list_addtail(&src->use_link, &src->reg.reg->if_uses); + } + } + } + + void + nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) + { + assert(!src_is_valid(src) || src->parent_instr == instr); + + src_remove_all_uses(src); + *src = new_src; + src_add_all_uses(src, instr, NULL); + } + + void + nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) + { + assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); + + src_remove_all_uses(dest); + src_remove_all_uses(src); + *dest = *src; + *src = NIR_SRC_INIT; + src_add_all_uses(dest, dest_instr, NULL); + } + + void + nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) + { + nir_src *src = &if_stmt->condition; + assert(!src_is_valid(src) || src->parent_if == if_stmt); + + src_remove_all_uses(src); + *src = new_src; + src_add_all_uses(src, NULL, if_stmt); + } + + void + nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) + { + if (dest->is_ssa) { + /* We can only overwrite an SSA destination if it has no uses. */ + assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses)); + } else { + list_del(&dest->reg.def_link); + if (dest->reg.indirect) + src_remove_all_uses(dest->reg.indirect); + } + + /* We can't re-write with an SSA def */ + assert(!new_dest.is_ssa); + + nir_dest_copy(dest, &new_dest, instr); + + dest->reg.parent_instr = instr; + list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); + + if (dest->reg.indirect) + src_add_all_uses(dest->reg.indirect, instr, NULL); + } + + void + nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, + unsigned num_components, const char *name) + { + def->name = name; + def->parent_instr = instr; + list_inithead(&def->uses); + list_inithead(&def->if_uses); + def->num_components = num_components; + + if (instr->block) { + nir_function_impl *impl = + nir_cf_node_get_function(&instr->block->cf_node); + + def->index = impl->ssa_alloc++; + } else { + def->index = UINT_MAX; + } + } + + void + nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, + unsigned num_components, const char *name) + { + dest->is_ssa = true; + nir_ssa_def_init(instr, &dest->ssa, num_components, name); + } + + void + nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) + { + assert(!new_src.is_ssa || def != new_src.ssa); + + nir_foreach_use_safe(def, use_src) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); + } + + static bool + is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) + { + assert(start->block == end->block); + + if (between->block != start->block) + return false; + + /* Search backwards looking for "between" */ + while (start != end) { + if (between == end) + return true; + + end = nir_instr_prev(end); + assert(end); + } + + return false; + } + + /* Replaces all uses of the given SSA def with the given source but only if + * the use comes after the after_me instruction. This can be useful if you + * are emitting code to fix up the result of some instruction: you can freely + * use the result in that code and then call rewrite_uses_after and pass the + * last fixup instruction as after_me and it will replace all of the uses you + * want without touching the fixup code. + * + * This function assumes that after_me is in the same block as + * def->parent_instr and that after_me comes after def->parent_instr. + */ + void + nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me) + { + assert(!new_src.is_ssa || def != new_src.ssa); + + nir_foreach_use_safe(def, use_src) { + assert(use_src->parent_instr != def->parent_instr); + /* Since def already dominates all of its uses, the only way a use can + * not be dominated by after_me is if it is between def and after_me in + * the instruction list. + */ + if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + } + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); + } + + static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + bool reverse, void *state); + + static inline bool + foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) + { + if (reverse) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, + &if_stmt->else_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + + foreach_list_typed_reverse_safe(nir_cf_node, node, node, + &if_stmt->then_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } else { + foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + + foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } + + return true; + } + + static inline bool + foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) + { + if (reverse) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } else { + foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } + + return true; + } + + static bool + foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + bool reverse, void *state) + { + switch (node->type) { + case nir_cf_node_block: + return cb(nir_cf_node_as_block(node), state); + case nir_cf_node_if: + return foreach_if(nir_cf_node_as_if(node), cb, reverse, state); + case nir_cf_node_loop: + return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state); + break; + + default: + unreachable("Invalid CFG node type"); + break; + } + + return false; + } + + bool + nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + void *state) + { + return foreach_cf_node(node, cb, false, state); + } + + bool + nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state) + { + foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { + if (!foreach_cf_node(node, cb, false, state)) + return false; + } + + return cb(impl->end_block, state); + } + + bool + nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state) + { + if (!cb(impl->end_block, state)) + return false; + + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) { + if (!foreach_cf_node(node, cb, true, state)) + return false; + } + + return true; + } + + nir_if * + nir_block_get_following_if(nir_block *block) + { + if (exec_node_is_tail_sentinel(&block->cf_node.node)) + return NULL; + + if (nir_cf_node_is_last(&block->cf_node)) + return NULL; + + nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); + + if (next_node->type != nir_cf_node_if) + return NULL; + + return nir_cf_node_as_if(next_node); + } + + nir_loop * + nir_block_get_following_loop(nir_block *block) + { + if (exec_node_is_tail_sentinel(&block->cf_node.node)) + return NULL; + + if (nir_cf_node_is_last(&block->cf_node)) + return NULL; + + nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); + + if (next_node->type != nir_cf_node_loop) + return NULL; + + return nir_cf_node_as_loop(next_node); + } + static bool + index_block(nir_block *block, void *state) + { + unsigned *index = state; + block->index = (*index)++; + return true; + } + + void + nir_index_blocks(nir_function_impl *impl) + { + unsigned index = 0; + + if (impl->valid_metadata & nir_metadata_block_index) + return; + + nir_foreach_block(impl, index_block, &index); + + impl->num_blocks = index; + } + + static bool + index_ssa_def_cb(nir_ssa_def *def, void *state) + { + unsigned *index = (unsigned *) state; + def->index = (*index)++; + + return true; + } + + static bool + index_ssa_block(nir_block *block, void *state) + { + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, index_ssa_def_cb, state); + + return true; + } + + /** + * The indices are applied top-to-bottom which has the very nice property + * that, if A dominates B, then A->index <= B->index. + */ + void + nir_index_ssa_defs(nir_function_impl *impl) + { + unsigned index = 0; + nir_foreach_block(impl, index_ssa_block, &index); + impl->ssa_alloc = index; + } + + static bool + index_instrs_block(nir_block *block, void *state) + { + unsigned *index = state; + nir_foreach_instr(block, instr) + instr->index = (*index)++; + + return true; + } + + /** + * The indices are applied top-to-bottom which has the very nice property + * that, if A dominates B, then A->index <= B->index. + */ + unsigned + nir_index_instrs(nir_function_impl *impl) + { + unsigned index = 0; + nir_foreach_block(impl, index_instrs_block, &index); + return index; + } + + nir_intrinsic_op + nir_intrinsic_from_system_value(gl_system_value val) + { + switch (val) { + case SYSTEM_VALUE_VERTEX_ID: + return nir_intrinsic_load_vertex_id; + case SYSTEM_VALUE_INSTANCE_ID: + return nir_intrinsic_load_instance_id; + case SYSTEM_VALUE_DRAW_ID: + return nir_intrinsic_load_draw_id; + case SYSTEM_VALUE_BASE_INSTANCE: + return nir_intrinsic_load_base_instance; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + return nir_intrinsic_load_vertex_id_zero_base; + case SYSTEM_VALUE_BASE_VERTEX: + return nir_intrinsic_load_base_vertex; + case SYSTEM_VALUE_INVOCATION_ID: + return nir_intrinsic_load_invocation_id; + case SYSTEM_VALUE_FRONT_FACE: + return nir_intrinsic_load_front_face; + case SYSTEM_VALUE_SAMPLE_ID: + return nir_intrinsic_load_sample_id; + case SYSTEM_VALUE_SAMPLE_POS: + return nir_intrinsic_load_sample_pos; + case SYSTEM_VALUE_SAMPLE_MASK_IN: + return nir_intrinsic_load_sample_mask_in; + case SYSTEM_VALUE_LOCAL_INVOCATION_ID: + return nir_intrinsic_load_local_invocation_id; + case SYSTEM_VALUE_WORK_GROUP_ID: + return nir_intrinsic_load_work_group_id; + case SYSTEM_VALUE_NUM_WORK_GROUPS: + return nir_intrinsic_load_num_work_groups; + case SYSTEM_VALUE_PRIMITIVE_ID: + return nir_intrinsic_load_primitive_id; + case SYSTEM_VALUE_TESS_COORD: + return nir_intrinsic_load_tess_coord; + case SYSTEM_VALUE_TESS_LEVEL_OUTER: + return nir_intrinsic_load_tess_level_outer; + case SYSTEM_VALUE_TESS_LEVEL_INNER: + return nir_intrinsic_load_tess_level_inner; + case SYSTEM_VALUE_VERTICES_IN: + return nir_intrinsic_load_patch_vertices_in; + case SYSTEM_VALUE_HELPER_INVOCATION: + return nir_intrinsic_load_helper_invocation; + default: + unreachable("system value does not directly correspond to intrinsic"); + } + } + + gl_system_value + nir_system_value_from_intrinsic(nir_intrinsic_op intrin) + { + switch (intrin) { + case nir_intrinsic_load_vertex_id: + return SYSTEM_VALUE_VERTEX_ID; + case nir_intrinsic_load_instance_id: + return SYSTEM_VALUE_INSTANCE_ID; + case nir_intrinsic_load_draw_id: + return SYSTEM_VALUE_DRAW_ID; + case nir_intrinsic_load_base_instance: + return SYSTEM_VALUE_BASE_INSTANCE; + case nir_intrinsic_load_vertex_id_zero_base: + return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + case nir_intrinsic_load_base_vertex: + return SYSTEM_VALUE_BASE_VERTEX; + case nir_intrinsic_load_invocation_id: + return SYSTEM_VALUE_INVOCATION_ID; + case nir_intrinsic_load_front_face: + return SYSTEM_VALUE_FRONT_FACE; + case nir_intrinsic_load_sample_id: + return SYSTEM_VALUE_SAMPLE_ID; + case nir_intrinsic_load_sample_pos: + return SYSTEM_VALUE_SAMPLE_POS; + case nir_intrinsic_load_sample_mask_in: + return SYSTEM_VALUE_SAMPLE_MASK_IN; + case nir_intrinsic_load_local_invocation_id: + return SYSTEM_VALUE_LOCAL_INVOCATION_ID; + case nir_intrinsic_load_num_work_groups: + return SYSTEM_VALUE_NUM_WORK_GROUPS; + case nir_intrinsic_load_work_group_id: + return SYSTEM_VALUE_WORK_GROUP_ID; + case nir_intrinsic_load_primitive_id: + return SYSTEM_VALUE_PRIMITIVE_ID; + case nir_intrinsic_load_tess_coord: + return SYSTEM_VALUE_TESS_COORD; + case nir_intrinsic_load_tess_level_outer: + return SYSTEM_VALUE_TESS_LEVEL_OUTER; + case nir_intrinsic_load_tess_level_inner: + return SYSTEM_VALUE_TESS_LEVEL_INNER; + case nir_intrinsic_load_patch_vertices_in: + return SYSTEM_VALUE_VERTICES_IN; + case nir_intrinsic_load_helper_invocation: + return SYSTEM_VALUE_HELPER_INVOCATION; + default: + unreachable("intrinsic doesn't produce a system value"); + } + } diff --cc src/compiler/nir/nir.h index 00000000000,aec75fb930c..f130e5e0eb1 mode 000000,100644..100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@@ -1,0 -1,2111 +1,2239 @@@ + /* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #pragma once + + #include "util/hash_table.h" + #include "compiler/glsl/list.h" + #include "GL/gl.h" /* GLenum */ + #include "util/list.h" + #include "util/ralloc.h" + #include "util/set.h" + #include "util/bitset.h" + #include "compiler/nir_types.h" + #include "compiler/shader_enums.h" + #include + + #include "nir_opcodes.h" + + #ifdef __cplusplus + extern "C" { + #endif + + struct gl_program; + struct gl_shader_program; + + #define NIR_FALSE 0u + #define NIR_TRUE (~0u) + + /** Defines a cast function + * + * This macro defines a cast function from in_type to out_type where + * out_type is some structure type that contains a field of type out_type. + * + * Note that you have to be a bit careful as the generated cast function + * destroys constness. + */ + #define NIR_DEFINE_CAST(name, in_type, out_type, field) \ + static inline out_type * \ + name(const in_type *parent) \ + { \ + return exec_node_data(out_type, parent, field); \ + } + + struct nir_function; + struct nir_shader; + struct nir_instr; + + + /** + * Description of built-in state associated with a uniform + * + * \sa nir_variable::state_slots + */ + typedef struct { + int tokens[5]; + int swizzle; + } nir_state_slot; + + typedef enum { + nir_var_all = -1, + nir_var_shader_in, + nir_var_shader_out, + nir_var_global, + nir_var_local, + nir_var_uniform, + nir_var_shader_storage, ++ nir_var_shared, + nir_var_system_value + } nir_variable_mode; + + /** + * Data stored in an nir_constant + */ + union nir_constant_data { + unsigned u[16]; + int i[16]; + float f[16]; + bool b[16]; + }; + + typedef struct nir_constant { + /** + * Value of the constant. + * + * The field used to back the values supplied by the constant is determined + * by the type associated with the \c nir_variable. Constants may be + * scalars, vectors, or matrices. + */ + union nir_constant_data value; + + /* we could get this from the var->type but makes clone *much* easier to + * not have to care about the type. + */ + unsigned num_elements; + + /* Array elements / Structure Fields */ + struct nir_constant **elements; + } nir_constant; + + /** + * \brief Layout qualifiers for gl_FragDepth. + * + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared + * with a layout qualifier. + */ + typedef enum { + nir_depth_layout_none, /**< No depth layout is specified. */ + nir_depth_layout_any, + nir_depth_layout_greater, + nir_depth_layout_less, + nir_depth_layout_unchanged + } nir_depth_layout; + + /** + * Either a uniform, global variable, shader input, or shader output. Based on + * ir_variable - it should be easy to translate between the two. + */ + + typedef struct nir_variable { + struct exec_node node; + + /** + * Declared type of the variable + */ + const struct glsl_type *type; + + /** + * Declared name of the variable + */ + char *name; + + struct nir_variable_data { + + /** + * Is the variable read-only? + * + * This is set for variables declared as \c const, shader inputs, + * and uniforms. + */ + unsigned read_only:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned invariant:1; + + /** + * Storage class of the variable. + * + * \sa nir_variable_mode + */ + nir_variable_mode mode:4; + + /** + * Interpolation mode for shader inputs / outputs + * + * \sa glsl_interp_qualifier + */ + unsigned interpolation:2; + + /** + * \name ARB_fragment_coord_conventions + * @{ + */ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + unsigned explicit_index:1; + + /** + * Was an initial binding explicitly set in the shader? + * + * If so, constant_initializer contains an integer nir_constant + * representing the initial binding point. + */ + unsigned explicit_binding:1; + + /** + * Does this variable have an initializer? + * + * This is used by the linker to cross-validiate initializers of global + * variables. + */ + unsigned has_initializer:1; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + */ + unsigned location_frac:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was not an array. + * + * Note that this variable and \c from_named_ifc_block_array will never + * both be non-zero. + */ + unsigned from_named_ifc_block_nonarray:1; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was an array. + * + * Note that this variable and \c from_named_ifc_block_nonarray will never + * both be non-zero. + */ + unsigned from_named_ifc_block_array:1; + + /** + * \brief Layout qualifier for gl_FragDepth. + * + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. + */ + nir_depth_layout depth_layout; + + /** + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. + */ + int location; + + /** + * The actual location of the variable in the IR. Only valid for inputs + * and outputs. + */ + unsigned int driver_location; + + /** + * output index for dual source blending. + */ + int index; + ++ /** ++ * Descriptor set binding for sampler or UBO. ++ */ ++ int descriptor_set; ++ + /** + * Initial binding point for a sampler or UBO. + * + * For array types, this represents the binding point for the first element. + */ + int binding; + + /** + * Location an atomic counter is stored at. + */ + unsigned offset; + + /** + * ARB_shader_image_load_store qualifiers. + */ + struct { + bool read_only; /**< "readonly" qualifier. */ + bool write_only; /**< "writeonly" qualifier. */ + bool coherent; + bool _volatile; + bool restrict_flag; + + /** Image internal format if specified explicitly, otherwise GL_NONE. */ + GLenum format; + } image; + + /** + * Highest element accessed with a constant expression array index + * + * Not used for non-array variables. + */ + unsigned max_array_access; + + } data; + + /** + * Built-in state that backs this uniform + * + * Once set at variable creation, \c state_slots must remain invariant. + * This is because, ideally, this array would be shared by all clones of + * this variable in the IR tree. In other words, we'd really like for it + * to be a fly-weight. + * + * If the variable is not a uniform, \c num_state_slots will be zero and + * \c state_slots will be \c NULL. + */ + /*@{*/ + unsigned num_state_slots; /**< Number of state slots used */ + nir_state_slot *state_slots; /**< State descriptors. */ + /*@}*/ + + /** + * Constant expression assigned in the initializer of the variable + */ + nir_constant *constant_initializer; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const struct glsl_type *interface_type; + } nir_variable; + + #define nir_foreach_variable(var, var_list) \ + foreach_list_typed(nir_variable, var, node, var_list) + ++/** ++ * Returns the bits in the inputs_read, outputs_written, or ++ * system_values_read bitfield corresponding to this variable. ++ */ ++static inline uint64_t ++nir_variable_get_io_mask(nir_variable *var, gl_shader_stage stage) ++{ ++ assert(var->data.mode == nir_var_shader_in || ++ var->data.mode == nir_var_shader_out || ++ var->data.mode == nir_var_system_value); ++ assert(var->data.location >= 0); ++ ++ const struct glsl_type *var_type = var->type; ++ if (stage == MESA_SHADER_GEOMETRY && var->data.mode == nir_var_shader_in) { ++ /* Most geometry shader inputs are per-vertex arrays */ ++ if (var->data.location >= VARYING_SLOT_VAR0) ++ assert(glsl_type_is_array(var_type)); ++ ++ if (glsl_type_is_array(var_type)) ++ var_type = glsl_get_array_element(var_type); ++ } ++ ++ bool is_vertex_input = (var->data.mode == nir_var_shader_in && ++ stage == MESA_SHADER_VERTEX); ++ unsigned slots = glsl_count_attribute_slots(var_type, is_vertex_input); ++ return ((1ull << slots) - 1) << var->data.location; ++} ++ + typedef struct nir_register { + struct exec_node node; + + unsigned num_components; /** < number of vector components */ + unsigned num_array_elems; /** < size of array (0 for no array) */ + + /** generic register index. */ + unsigned index; + + /** only for debug purposes, can be NULL */ + const char *name; + + /** whether this register is local (per-function) or global (per-shader) */ + bool is_global; + + /** + * If this flag is set to true, then accessing channels >= num_components + * is well-defined, and simply spills over to the next array element. This + * is useful for backends that can do per-component accessing, in + * particular scalar backends. By setting this flag and making + * num_components equal to 1, structures can be packed tightly into + * registers and then registers can be accessed per-component to get to + * each structure member, even if it crosses vec4 boundaries. + */ + bool is_packed; + + /** set of nir_src's where this register is used (read from) */ + struct list_head uses; + + /** set of nir_dest's where this register is defined (written to) */ + struct list_head defs; + + /** set of nir_if's where this register is used as a condition */ + struct list_head if_uses; + } nir_register; + + typedef enum { + nir_instr_type_alu, + nir_instr_type_call, + nir_instr_type_tex, + nir_instr_type_intrinsic, + nir_instr_type_load_const, + nir_instr_type_jump, + nir_instr_type_ssa_undef, + nir_instr_type_phi, + nir_instr_type_parallel_copy, + } nir_instr_type; + + typedef struct nir_instr { + struct exec_node node; + nir_instr_type type; + struct nir_block *block; + + /** generic instruction index. */ + unsigned index; + + /* A temporary for optimization and analysis passes to use for storing + * flags. For instance, DCE uses this to store the "dead/live" info. + */ + uint8_t pass_flags; + } nir_instr; + + static inline nir_instr * + nir_instr_next(nir_instr *instr) + { + struct exec_node *next = exec_node_get_next(&instr->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_instr, next, node); + } + + static inline nir_instr * + nir_instr_prev(nir_instr *instr) + { + struct exec_node *prev = exec_node_get_prev(&instr->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_instr, prev, node); + } + + static inline bool + nir_instr_is_first(nir_instr *instr) + { + return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node)); + } + + static inline bool + nir_instr_is_last(nir_instr *instr) + { + return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node)); + } + + typedef struct nir_ssa_def { + /** for debugging only, can be NULL */ + const char* name; + + /** generic SSA definition index. */ + unsigned index; + + /** Index into the live_in and live_out bitfields */ + unsigned live_index; + + nir_instr *parent_instr; + + /** set of nir_instr's where this register is used (read from) */ + struct list_head uses; + + /** set of nir_if's where this register is used as a condition */ + struct list_head if_uses; + + uint8_t num_components; + } nir_ssa_def; + + struct nir_src; + + typedef struct { + nir_register *reg; + struct nir_src *indirect; /** < NULL for no indirect offset */ + unsigned base_offset; + + /* TODO use-def chain goes here */ + } nir_reg_src; + + typedef struct { + nir_instr *parent_instr; + struct list_head def_link; + + nir_register *reg; + struct nir_src *indirect; /** < NULL for no indirect offset */ + unsigned base_offset; + + /* TODO def-use chain goes here */ + } nir_reg_dest; + + struct nir_if; + + typedef struct nir_src { + union { + nir_instr *parent_instr; + struct nir_if *parent_if; + }; + + struct list_head use_link; + + union { + nir_reg_src reg; + nir_ssa_def *ssa; + }; + + bool is_ssa; + } nir_src; + -#define NIR_SRC_INIT (nir_src) { { NULL } } ++#ifdef __cplusplus ++# define NIR_SRC_INIT nir_src() ++#else ++# define NIR_SRC_INIT (nir_src) { { NULL } } ++#endif + + #define nir_foreach_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + + #define nir_foreach_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + + #define nir_foreach_if_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + + #define nir_foreach_if_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + + typedef struct { + union { + nir_reg_dest reg; + nir_ssa_def ssa; + }; + + bool is_ssa; + } nir_dest; + -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } ++#ifdef __cplusplus ++# define NIR_DEST_INIT nir_dest() ++#else ++# define NIR_DEST_INIT (nir_dest) { { { NULL } } } ++#endif + + #define nir_foreach_def(reg, dest) \ + list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) + + #define nir_foreach_def_safe(reg, dest) \ + list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) + + static inline nir_src + nir_src_for_ssa(nir_ssa_def *def) + { + nir_src src = NIR_SRC_INIT; + + src.is_ssa = true; + src.ssa = def; + + return src; + } + + static inline nir_src + nir_src_for_reg(nir_register *reg) + { + nir_src src = NIR_SRC_INIT; + + src.is_ssa = false; + src.reg.reg = reg; + src.reg.indirect = NULL; + src.reg.base_offset = 0; + + return src; + } + + static inline nir_dest + nir_dest_for_reg(nir_register *reg) + { + nir_dest dest = NIR_DEST_INIT; + + dest.reg.reg = reg; + + return dest; + } + + void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); + void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); + + typedef struct { + nir_src src; + + /** + * \name input modifiers + */ + /*@{*/ + /** + * For inputs interpreted as floating point, flips the sign bit. For + * inputs interpreted as integers, performs the two's complement negation. + */ + bool negate; + + /** + * Clears the sign bit for floating point values, and computes the integer + * absolute value for integers. Note that the negate modifier acts after + * the absolute value modifier, therefore if both are set then all inputs + * will become negative. + */ + bool abs; + /*@}*/ + + /** + * For each input component, says which component of the register it is + * chosen from. Note that which elements of the swizzle are used and which + * are ignored are based on the write mask for most opcodes - for example, + * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and + * a swizzle of {2, x, 1, 0} where x means "don't care." + */ + uint8_t swizzle[4]; + } nir_alu_src; + + typedef struct { + nir_dest dest; + + /** + * \name saturate output modifier + * + * Only valid for opcodes that output floating-point numbers. Clamps the + * output to between 0.0 and 1.0 inclusive. + */ + + bool saturate; + + unsigned write_mask : 4; /* ignored if dest.is_ssa is true */ + } nir_alu_dest; + + typedef enum { + nir_type_invalid = 0, /* Not a valid type */ + nir_type_float, + nir_type_int, + nir_type_uint, + nir_type_bool + } nir_alu_type; + + typedef enum { + NIR_OP_IS_COMMUTATIVE = (1 << 0), + NIR_OP_IS_ASSOCIATIVE = (1 << 1), + } nir_op_algebraic_property; + + typedef struct { + const char *name; + + unsigned num_inputs; + + /** + * The number of components in the output + * + * If non-zero, this is the size of the output and input sizes are + * explicitly given; swizzle and writemask are still in effect, but if + * the output component is masked out, then the input component may + * still be in use. + * + * If zero, the opcode acts in the standard, per-component manner; the + * operation is performed on each component (except the ones that are + * masked out) with the input being taken from the input swizzle for + * that component. + * + * The size of some of the inputs may be given (i.e. non-zero) even + * though output_size is zero; in that case, the inputs with a zero + * size act per-component, while the inputs with non-zero size don't. + */ + unsigned output_size; + + /** + * The type of vector that the instruction outputs. Note that the + * staurate modifier is only allowed on outputs with the float type. + */ + + nir_alu_type output_type; + + /** + * The number of components in each input + */ + unsigned input_sizes[4]; + + /** + * The type of vector that each input takes. Note that negate and + * absolute value are only allowed on inputs with int or float type and + * behave differently on the two. + */ + nir_alu_type input_types[4]; + + nir_op_algebraic_property algebraic_properties; + } nir_op_info; + + extern const nir_op_info nir_op_infos[nir_num_opcodes]; + + typedef struct nir_alu_instr { + nir_instr instr; + nir_op op; + nir_alu_dest dest; + nir_alu_src src[]; + } nir_alu_instr; + + void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, + nir_alu_instr *instr); + void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, + nir_alu_instr *instr); + + /* is this source channel used? */ + static inline bool + nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel) + { + if (nir_op_infos[instr->op].input_sizes[src] > 0) + return channel < nir_op_infos[instr->op].input_sizes[src]; + + return (instr->dest.write_mask >> channel) & 1; + } + + /* + * For instructions whose destinations are SSA, get the number of channels + * used for a source + */ + static inline unsigned + nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) + { + assert(instr->dest.dest.is_ssa); + + if (nir_op_infos[instr->op].input_sizes[src] > 0) + return nir_op_infos[instr->op].input_sizes[src]; + + return instr->dest.dest.ssa.num_components; + } + + typedef enum { + nir_deref_type_var, + nir_deref_type_array, + nir_deref_type_struct + } nir_deref_type; + + typedef struct nir_deref { + nir_deref_type deref_type; + struct nir_deref *child; + const struct glsl_type *type; + } nir_deref; + + typedef struct { + nir_deref deref; + + nir_variable *var; + } nir_deref_var; + + /* This enum describes how the array is referenced. If the deref is + * direct then the base_offset is used. If the deref is indirect then then + * offset is given by base_offset + indirect. If the deref is a wildcard + * then the deref refers to all of the elements of the array at the same + * time. Wildcard dereferences are only ever allowed in copy_var + * intrinsics and the source and destination derefs must have matching + * wildcards. + */ + typedef enum { + nir_deref_array_type_direct, + nir_deref_array_type_indirect, + nir_deref_array_type_wildcard, + } nir_deref_array_type; + + typedef struct { + nir_deref deref; + + nir_deref_array_type deref_array_type; + unsigned base_offset; + nir_src indirect; + } nir_deref_array; + + typedef struct { + nir_deref deref; + + unsigned index; + } nir_deref_struct; + + NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) + NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) + NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) + + /* Returns the last deref in the chain. */ + static inline nir_deref * + nir_deref_tail(nir_deref *deref) + { + while (deref->child) + deref = deref->child; + return deref; + } + + typedef struct { + nir_instr instr; + + unsigned num_params; + nir_deref_var **params; + nir_deref_var *return_deref; + + struct nir_function *callee; + } nir_call_instr; + + #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \ + num_variables, num_indices, flags) \ + nir_intrinsic_##name, + + #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name, + + typedef enum { + #include "nir_intrinsics.h" + nir_num_intrinsics = nir_last_intrinsic + 1 + } nir_intrinsic_op; + + #undef INTRINSIC + #undef LAST_INTRINSIC + + /** Represents an intrinsic + * + * An intrinsic is an instruction type for handling things that are + * more-or-less regular operations but don't just consume and produce SSA + * values like ALU operations do. Intrinsics are not for things that have + * special semantic meaning such as phi nodes and parallel copies. + * Examples of intrinsics include variable load/store operations, system + * value loads, and the like. Even though texturing more-or-less falls + * under this category, texturing is its own instruction type because + * trying to represent texturing with intrinsics would lead to a + * combinatorial explosion of intrinsic opcodes. + * + * By having a single instruction type for handling a lot of different + * cases, optimization passes can look for intrinsics and, for the most + * part, completely ignore them. Each intrinsic type also has a few + * possible flags that govern whether or not they can be reordered or + * eliminated. That way passes like dead code elimination can still work + * on intrisics without understanding the meaning of each. + * + * Each intrinsic has some number of constant indices, some number of + * variables, and some number of sources. What these sources, variables, + * and indices mean depends on the intrinsic and is documented with the + * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture + * instructions are the only types of instruction that can operate on + * variables. + */ + typedef struct { + nir_instr instr; + + nir_intrinsic_op intrinsic; + + nir_dest dest; + + /** number of components if this is a vectorized intrinsic + * + * Similarly to ALU operations, some intrinsics are vectorized. + * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. + * For vectorized intrinsics, the num_components field specifies the + * number of destination components and the number of source components + * for all sources with nir_intrinsic_infos.src_components[i] == 0. + */ + uint8_t num_components; + + int const_index[3]; + + nir_deref_var *variables[2]; + + nir_src src[]; + } nir_intrinsic_instr; + + /** + * \name NIR intrinsics semantic flags + * + * information about what the compiler can do with the intrinsics. + * + * \sa nir_intrinsic_info::flags + */ + typedef enum { + /** + * whether the intrinsic can be safely eliminated if none of its output + * value is not being used. + */ + NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), + + /** + * Whether the intrinsic can be reordered with respect to any other + * intrinsic, i.e. whether the only reordering dependencies of the + * intrinsic are due to the register reads/writes. + */ + NIR_INTRINSIC_CAN_REORDER = (1 << 1), + } nir_intrinsic_semantic_flag; + + #define NIR_INTRINSIC_MAX_INPUTS 4 + + typedef struct { + const char *name; + + unsigned num_srcs; /** < number of register/SSA inputs */ + + /** number of components of each input register + * + * If this value is 0, the number of components is given by the + * num_components field of nir_intrinsic_instr. + */ + unsigned src_components[NIR_INTRINSIC_MAX_INPUTS]; + + bool has_dest; + + /** number of components of the output register + * + * If this value is 0, the number of components is given by the + * num_components field of nir_intrinsic_instr. + */ + unsigned dest_components; + + /** the number of inputs/outputs that are variables */ + unsigned num_variables; + + /** the number of constant indices used by the intrinsic */ + unsigned num_indices; + + /** semantic flags for calls to this intrinsic */ + nir_intrinsic_semantic_flag flags; + } nir_intrinsic_info; + + extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; + + /** + * \group texture information + * + * This gives semantic information about textures which is useful to the + * frontend, the backend, and lowering passes, but not the optimizer. + */ + + typedef enum { + nir_tex_src_coord, + nir_tex_src_projector, + nir_tex_src_comparitor, /* shadow comparitor */ + nir_tex_src_offset, + nir_tex_src_bias, + nir_tex_src_lod, + nir_tex_src_ms_index, /* MSAA sample index */ + nir_tex_src_ddx, + nir_tex_src_ddy, ++ nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ + nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ + nir_num_tex_src_types + } nir_tex_src_type; + + typedef struct { + nir_src src; + nir_tex_src_type src_type; + } nir_tex_src; + + typedef enum { + nir_texop_tex, /**< Regular texture look-up */ + nir_texop_txb, /**< Texture look-up with LOD bias */ + nir_texop_txl, /**< Texture look-up with explicit LOD */ + nir_texop_txd, /**< Texture look-up with partial derivatvies */ + nir_texop_txf, /**< Texel fetch with explicit LOD */ + nir_texop_txf_ms, /**< Multisample texture fetch */ + nir_texop_txs, /**< Texture size */ + nir_texop_lod, /**< Texture lod query */ + nir_texop_tg4, /**< Texture gather */ + nir_texop_query_levels, /**< Texture levels query */ + nir_texop_texture_samples, /**< Texture samples query */ + nir_texop_samples_identical, /**< Query whether all samples are definitely + * identical. + */ + } nir_texop; + + typedef struct { + nir_instr instr; + + enum glsl_sampler_dim sampler_dim; + nir_alu_type dest_type; + + nir_texop op; + nir_dest dest; + nir_tex_src *src; + unsigned num_srcs, coord_components; + bool is_array, is_shadow; + + /** + * If is_shadow is true, whether this is the old-style shadow that outputs 4 + * components or the new-style shadow that outputs 1 component. + */ + bool is_new_style_shadow; + + /* constant offset - must be 0 if the offset source is used */ + int const_offset[4]; + + /* gather component selector */ + unsigned component : 2; + ++ /** The texture index ++ * ++ * If this texture instruction has a nir_tex_src_texture_offset source, ++ * then the texture index is given by texture_index + texture_offset. ++ */ ++ unsigned texture_index; ++ ++ /** The size of the texture array or 0 if it's not an array */ ++ unsigned texture_array_size; ++ ++ /** The texture deref ++ * ++ * If both this and `sampler` are both NULL, use texture_index instead. ++ * If `texture` is NULL, but `sampler` is non-NULL, then the texture is ++ * implied from the sampler. ++ */ ++ nir_deref_var *texture; ++ + /** The sampler index + * + * If this texture instruction has a nir_tex_src_sampler_offset source, + * then the sampler index is given by sampler_index + sampler_offset. + */ + unsigned sampler_index; + - /** The size of the sampler array or 0 if it's not an array */ - unsigned sampler_array_size; - - nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ ++ /** The sampler deref ++ * ++ * If this is null, use sampler_index instead. ++ */ ++ nir_deref_var *sampler; + } nir_tex_instr; + + static inline unsigned + nir_tex_instr_dest_size(nir_tex_instr *instr) + { + switch (instr->op) { + case nir_texop_txs: { + unsigned ret; + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + ret = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_CUBE: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + ret = 2; + break; + case GLSL_SAMPLER_DIM_3D: + ret = 3; + break; + default: + unreachable("not reached"); + } + if (instr->is_array) + ret++; + return ret; + } + + case nir_texop_lod: + return 2; + + case nir_texop_texture_samples: + case nir_texop_query_levels: + case nir_texop_samples_identical: + return 1; + + default: + if (instr->is_shadow && instr->is_new_style_shadow) + return 1; + + return 4; + } + } + + /* Returns true if this texture operation queries something about the texture + * rather than actually sampling it. + */ + static inline bool + nir_tex_instr_is_query(nir_tex_instr *instr) + { + switch (instr->op) { + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_texture_samples: + case nir_texop_query_levels: + return true; + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_tg4: + return false; + default: + unreachable("Invalid texture opcode"); + } + } + + static inline unsigned + nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) + { + if (instr->src[src].src_type == nir_tex_src_coord) + return instr->coord_components; + + + if (instr->src[src].src_type == nir_tex_src_offset || + instr->src[src].src_type == nir_tex_src_ddx || + instr->src[src].src_type == nir_tex_src_ddy) { + if (instr->is_array) + return instr->coord_components - 1; + else + return instr->coord_components; + } + + return 1; + } + + static inline int + nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type) + { + for (unsigned i = 0; i < instr->num_srcs; i++) + if (instr->src[i].src_type == type) + return (int) i; + + return -1; + } + + typedef struct { + union { + float f[4]; + int32_t i[4]; + uint32_t u[4]; + }; + } nir_const_value; + + typedef struct { + nir_instr instr; + + nir_const_value value; + + nir_ssa_def def; + } nir_load_const_instr; + + typedef enum { + nir_jump_return, + nir_jump_break, + nir_jump_continue, + } nir_jump_type; + + typedef struct { + nir_instr instr; + nir_jump_type type; + } nir_jump_instr; + + /* creates a new SSA variable in an undefined state */ + + typedef struct { + nir_instr instr; + nir_ssa_def def; + } nir_ssa_undef_instr; + + typedef struct { + struct exec_node node; + + /* The predecessor block corresponding to this source */ + struct nir_block *pred; + + nir_src src; + } nir_phi_src; + + #define nir_foreach_phi_src(phi, entry) \ + foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs) + #define nir_foreach_phi_src_safe(phi, entry) \ + foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs) + + typedef struct { + nir_instr instr; + + struct exec_list srcs; /** < list of nir_phi_src */ + + nir_dest dest; + } nir_phi_instr; + + typedef struct { + struct exec_node node; + nir_src src; + nir_dest dest; + } nir_parallel_copy_entry; + + #define nir_foreach_parallel_copy_entry(pcopy, entry) \ + foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) + + typedef struct { + nir_instr instr; + + /* A list of nir_parallel_copy_entry's. The sources of all of the + * entries are copied to the corresponding destinations "in parallel". + * In other words, if we have two entries: a -> b and b -> a, the values + * get swapped. + */ + struct exec_list entries; + } nir_parallel_copy_instr; + + NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr) + NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, + nir_parallel_copy_instr, instr) + + /* + * Control flow + * + * Control flow consists of a tree of control flow nodes, which include + * if-statements and loops. The leaves of the tree are basic blocks, lists of + * instructions that always run start-to-finish. Each basic block also keeps + * track of its successors (blocks which may run immediately after the current + * block) and predecessors (blocks which could have run immediately before the + * current block). Each function also has a start block and an end block which + * all return statements point to (which is always empty). Together, all the + * blocks with their predecessors and successors make up the control flow + * graph (CFG) of the function. There are helpers that modify the tree of + * control flow nodes while modifying the CFG appropriately; these should be + * used instead of modifying the tree directly. + */ + + typedef enum { + nir_cf_node_block, + nir_cf_node_if, + nir_cf_node_loop, + nir_cf_node_function + } nir_cf_node_type; + + typedef struct nir_cf_node { + struct exec_node node; + nir_cf_node_type type; + struct nir_cf_node *parent; + } nir_cf_node; + + typedef struct nir_block { + nir_cf_node cf_node; + + struct exec_list instr_list; /** < list of nir_instr */ + + /** generic block index; generated by nir_index_blocks */ + unsigned index; + + /* + * Each block can only have up to 2 successors, so we put them in a simple + * array - no need for anything more complicated. + */ + struct nir_block *successors[2]; + + /* Set of nir_block predecessors in the CFG */ + struct set *predecessors; + + /* + * this node's immediate dominator in the dominance tree - set to NULL for + * the start block. + */ + struct nir_block *imm_dom; + + /* This node's children in the dominance tree */ + unsigned num_dom_children; + struct nir_block **dom_children; + + /* Set of nir_block's on the dominance frontier of this block */ + struct set *dom_frontier; + + /* + * These two indices have the property that dom_{pre,post}_index for each + * child of this block in the dominance tree will always be between + * dom_pre_index and dom_post_index for this block, which makes testing if + * a given block is dominated by another block an O(1) operation. + */ + unsigned dom_pre_index, dom_post_index; + + /* live in and out for this block; used for liveness analysis */ + BITSET_WORD *live_in; + BITSET_WORD *live_out; + } nir_block; + + static inline nir_instr * + nir_block_first_instr(nir_block *block) + { + struct exec_node *head = exec_list_get_head(&block->instr_list); + return exec_node_data(nir_instr, head, node); + } + + static inline nir_instr * + nir_block_last_instr(nir_block *block) + { + struct exec_node *tail = exec_list_get_tail(&block->instr_list); + return exec_node_data(nir_instr, tail, node); + } + + #define nir_foreach_instr(block, instr) \ + foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) + #define nir_foreach_instr_reverse(block, instr) \ + foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) + #define nir_foreach_instr_safe(block, instr) \ + foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) + #define nir_foreach_instr_reverse_safe(block, instr) \ + foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) + + typedef struct nir_if { + nir_cf_node cf_node; + nir_src condition; + + struct exec_list then_list; /** < list of nir_cf_node */ + struct exec_list else_list; /** < list of nir_cf_node */ + } nir_if; + + static inline nir_cf_node * + nir_if_first_then_node(nir_if *if_stmt) + { + struct exec_node *head = exec_list_get_head(&if_stmt->then_list); + return exec_node_data(nir_cf_node, head, node); + } + + static inline nir_cf_node * + nir_if_last_then_node(nir_if *if_stmt) + { + struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); + return exec_node_data(nir_cf_node, tail, node); + } + + static inline nir_cf_node * + nir_if_first_else_node(nir_if *if_stmt) + { + struct exec_node *head = exec_list_get_head(&if_stmt->else_list); + return exec_node_data(nir_cf_node, head, node); + } + + static inline nir_cf_node * + nir_if_last_else_node(nir_if *if_stmt) + { + struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); + return exec_node_data(nir_cf_node, tail, node); + } + + typedef struct { + nir_cf_node cf_node; + + struct exec_list body; /** < list of nir_cf_node */ + } nir_loop; + + static inline nir_cf_node * + nir_loop_first_cf_node(nir_loop *loop) + { + return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node); + } + + static inline nir_cf_node * + nir_loop_last_cf_node(nir_loop *loop) + { + return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node); + } + + /** + * Various bits of metadata that can may be created or required by + * optimization and analysis passes + */ + typedef enum { + nir_metadata_none = 0x0, + nir_metadata_block_index = 0x1, + nir_metadata_dominance = 0x2, + nir_metadata_live_ssa_defs = 0x4, + nir_metadata_not_properly_reset = 0x8, + } nir_metadata; + + typedef struct { + nir_cf_node cf_node; + + /** pointer to the function of which this is an implementation */ + struct nir_function *function; + + struct exec_list body; /** < list of nir_cf_node */ + + nir_block *end_block; + + /** list for all local variables in the function */ + struct exec_list locals; + + /** array of variables used as parameters */ + unsigned num_params; + nir_variable **params; + + /** variable used to hold the result of the function */ + nir_variable *return_var; + + /** list of local registers in the function */ + struct exec_list registers; + + /** next available local register index */ + unsigned reg_alloc; + + /** next available SSA value index */ + unsigned ssa_alloc; + + /* total number of basic blocks, only valid when block_index_dirty = false */ + unsigned num_blocks; + + nir_metadata valid_metadata; + } nir_function_impl; + + static inline nir_block * + nir_start_block(nir_function_impl *impl) + { + return (nir_block *) exec_list_get_head(&impl->body); + } + + static inline nir_cf_node * + nir_cf_node_next(nir_cf_node *node) + { + struct exec_node *next = exec_node_get_next(&node->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_cf_node, next, node); + } + + static inline nir_cf_node * + nir_cf_node_prev(nir_cf_node *node) + { + struct exec_node *prev = exec_node_get_prev(&node->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_cf_node, prev, node); + } + + static inline bool + nir_cf_node_is_first(const nir_cf_node *node) + { + return exec_node_is_head_sentinel(node->node.prev); + } + + static inline bool + nir_cf_node_is_last(const nir_cf_node *node) + { + return exec_node_is_tail_sentinel(node->node.next); + } + + NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node) + NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node) + NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node) + NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node) + + typedef enum { + nir_parameter_in, + nir_parameter_out, + nir_parameter_inout, + } nir_parameter_type; + + typedef struct { + nir_parameter_type param_type; + const struct glsl_type *type; + } nir_parameter; + + typedef struct nir_function { + struct exec_node node; + + const char *name; + struct nir_shader *shader; + + unsigned num_params; + nir_parameter *params; + const struct glsl_type *return_type; + + /** The implementation of this function. + * + * If the function is only declared and not implemented, this is NULL. + */ + nir_function_impl *impl; + } nir_function; + + typedef struct nir_shader_compiler_options { + bool lower_fdiv; + bool lower_ffma; + bool lower_flrp; + bool lower_fpow; + bool lower_fsat; + bool lower_fsqrt; + bool lower_fmod; + bool lower_bitfield_extract; + bool lower_bitfield_insert; + bool lower_uadd_carry; + bool lower_usub_borrow; + /** lowers fneg and ineg to fsub and isub. */ + bool lower_negate; + /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ + bool lower_sub; + + /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ + bool lower_scmp; + + /* Does the native fdot instruction replicate its result for four + * components? If so, then opt_algebraic_late will turn all fdotN + * instructions into fdot_replicatedN instructions. + */ + bool fdot_replicates; + + /** lowers ffract to fsub+ffloor: */ + bool lower_ffract; + ++ bool lower_pack_half_2x16; ++ bool lower_pack_unorm_2x16; ++ bool lower_pack_snorm_2x16; ++ bool lower_pack_unorm_4x8; ++ bool lower_pack_snorm_4x8; ++ bool lower_unpack_half_2x16; ++ bool lower_unpack_unorm_2x16; ++ bool lower_unpack_snorm_2x16; ++ bool lower_unpack_unorm_4x8; ++ bool lower_unpack_snorm_4x8; ++ ++ bool lower_extract_byte; ++ bool lower_extract_word; ++ + /** + * Does the driver support real 32-bit integers? (Otherwise, integers + * are simulated by floats.) + */ + bool native_integers; ++ ++ /* Indicates that the driver only has zero-based vertex id */ ++ bool vertex_id_zero_based; + } nir_shader_compiler_options; + + typedef struct nir_shader_info { + const char *name; + + /* Descriptive name provided by the client; may be NULL */ + const char *label; + + /* Number of textures used by this shader */ + unsigned num_textures; + /* Number of uniform buffers used by this shader */ + unsigned num_ubos; + /* Number of atomic buffers used by this shader */ + unsigned num_abos; + /* Number of shader storage buffers used by this shader */ + unsigned num_ssbos; + /* Number of images used by this shader */ + unsigned num_images; + + /* Which inputs are actually read */ + uint64_t inputs_read; + /* Which outputs are actually written */ + uint64_t outputs_written; + /* Which system values are actually read */ + uint64_t system_values_read; + + /* Which patch inputs are actually read */ + uint32_t patch_inputs_read; + /* Which patch outputs are actually written */ + uint32_t patch_outputs_written; + + /* Whether or not this shader ever uses textureGather() */ + bool uses_texture_gather; + + /* Whether or not this shader uses the gl_ClipDistance output */ + bool uses_clip_distance_out; + + /* Whether or not separate shader objects were used */ + bool separate_shader; + + /** Was this shader linked with any transform feedback varyings? */ + bool has_transform_feedback_varyings; + + union { + struct { + /** The number of vertices recieves per input primitive */ + unsigned vertices_in; + + /** The output primitive type (GL enum value) */ + unsigned output_primitive; + + /** The maximum number of vertices the geometry shader might write. */ + unsigned vertices_out; + + /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ + unsigned invocations; + + /** Whether or not this shader uses EndPrimitive */ + bool uses_end_primitive; + + /** Whether or not this shader uses non-zero streams */ + bool uses_streams; + } gs; + + struct { + bool uses_discard; + + /** + * Whether early fragment tests are enabled as defined by + * ARB_shader_image_load_store. + */ + bool early_fragment_tests; + + /** gl_FragDepth layout for ARB_conservative_depth. */ + enum gl_frag_depth_layout depth_layout; + } fs; + + struct { + unsigned local_size[3]; + } cs; + + struct { + /** The number of vertices in the TCS output patch. */ + unsigned vertices_out; + } tcs; + }; + } nir_shader_info; + + typedef struct nir_shader { + /** list of uniforms (nir_variable) */ + struct exec_list uniforms; + + /** list of inputs (nir_variable) */ + struct exec_list inputs; + + /** list of outputs (nir_variable) */ + struct exec_list outputs; + ++ /** list of shared compute variables (nir_variable) */ ++ struct exec_list shared; ++ + /** Set of driver-specific options for the shader. + * + * The memory for the options is expected to be kept in a single static + * copy by the driver. + */ + const struct nir_shader_compiler_options *options; + + /** Various bits of compile-time information about a given shader */ + struct nir_shader_info info; + + /** list of global variables in the shader (nir_variable) */ + struct exec_list globals; + + /** list of system value variables in the shader (nir_variable) */ + struct exec_list system_values; + + struct exec_list functions; /** < list of nir_function */ + + /** list of global register in the shader */ + struct exec_list registers; + + /** next available global register index */ + unsigned reg_alloc; + + /** + * the highest index a load_input_*, load_uniform_*, etc. intrinsic can + * access plus one + */ - unsigned num_inputs, num_uniforms, num_outputs; ++ unsigned num_inputs, num_uniforms, num_outputs, num_shared; + + /** The shader stage, such as MESA_SHADER_VERTEX. */ + gl_shader_stage stage; + } nir_shader; + + #define nir_foreach_function(shader, func) \ + foreach_list_typed(nir_function, func, node, &(shader)->functions) + + nir_shader *nir_shader_create(void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options); + + /** creates a register, including assigning it an index and adding it to the list */ + nir_register *nir_global_reg_create(nir_shader *shader); + + nir_register *nir_local_reg_create(nir_function_impl *impl); + + void nir_reg_remove(nir_register *reg); + + /** Adds a variable to the appropreate list in nir_shader */ + void nir_shader_add_variable(nir_shader *shader, nir_variable *var); + + static inline void + nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) + { + assert(var->data.mode == nir_var_local); + exec_list_push_tail(&impl->locals, &var->node); + } + + /** creates a variable, sets a few defaults, and adds it to the list */ + nir_variable *nir_variable_create(nir_shader *shader, + nir_variable_mode mode, + const struct glsl_type *type, + const char *name); + /** creates a local variable and adds it to the list */ + nir_variable *nir_local_variable_create(nir_function_impl *impl, + const struct glsl_type *type, + const char *name); + + /** creates a function and adds it to the shader's list of functions */ + nir_function *nir_function_create(nir_shader *shader, const char *name); + + nir_function_impl *nir_function_impl_create(nir_function *func); ++/** creates a function_impl that isn't tied to any particular function */ ++nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); + + nir_block *nir_block_create(nir_shader *shader); + nir_if *nir_if_create(nir_shader *shader); + nir_loop *nir_loop_create(nir_shader *shader); + + nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); + + /** requests that the given pieces of metadata be generated */ + void nir_metadata_require(nir_function_impl *impl, nir_metadata required); + /** dirties all but the preserved metadata */ + void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); + + /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ + nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); + + nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); + + nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, + unsigned num_components); + + nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, + nir_intrinsic_op op); + + nir_call_instr *nir_call_instr_create(nir_shader *shader, + nir_function *callee); + + nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); + + nir_phi_instr *nir_phi_instr_create(nir_shader *shader); + + nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); + + nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, + unsigned num_components); + + nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); + nir_deref_array *nir_deref_array_create(void *mem_ctx); + nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); + + nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); + + nir_load_const_instr * + nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); + + /** + * NIR Cursors and Instruction Insertion API + * @{ + * + * A tiny struct representing a point to insert/extract instructions or + * control flow nodes. Helps reduce the combinatorial explosion of possible + * points to insert/extract. + * + * \sa nir_control_flow.h + */ + typedef enum { + nir_cursor_before_block, + nir_cursor_after_block, + nir_cursor_before_instr, + nir_cursor_after_instr, + } nir_cursor_option; + + typedef struct { + nir_cursor_option option; + union { + nir_block *block; + nir_instr *instr; + }; + } nir_cursor; + ++static inline nir_block * ++nir_cursor_current_block(nir_cursor cursor) ++{ ++ if (cursor.option == nir_cursor_before_instr || ++ cursor.option == nir_cursor_after_instr) { ++ return cursor.instr->block; ++ } else { ++ return cursor.block; ++ } ++} ++ ++bool nir_cursors_equal(nir_cursor a, nir_cursor b); ++ + static inline nir_cursor + nir_before_block(nir_block *block) + { + nir_cursor cursor; + cursor.option = nir_cursor_before_block; + cursor.block = block; + return cursor; + } + + static inline nir_cursor + nir_after_block(nir_block *block) + { + nir_cursor cursor; + cursor.option = nir_cursor_after_block; + cursor.block = block; + return cursor; + } + + static inline nir_cursor + nir_before_instr(nir_instr *instr) + { + nir_cursor cursor; + cursor.option = nir_cursor_before_instr; + cursor.instr = instr; + return cursor; + } + + static inline nir_cursor + nir_after_instr(nir_instr *instr) + { + nir_cursor cursor; + cursor.option = nir_cursor_after_instr; + cursor.instr = instr; + return cursor; + } + + static inline nir_cursor + nir_after_block_before_jump(nir_block *block) + { + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr && last_instr->type == nir_instr_type_jump) { + return nir_before_instr(last_instr); + } else { + return nir_after_block(block); + } + } + + static inline nir_cursor + nir_before_cf_node(nir_cf_node *node) + { + if (node->type == nir_cf_node_block) + return nir_before_block(nir_cf_node_as_block(node)); + + return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); + } + + static inline nir_cursor + nir_after_cf_node(nir_cf_node *node) + { + if (node->type == nir_cf_node_block) + return nir_after_block(nir_cf_node_as_block(node)); + + return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); + } + ++static inline nir_cursor ++nir_after_cf_node_and_phis(nir_cf_node *node) ++{ ++ if (node->type == nir_cf_node_block) ++ return nir_after_block(nir_cf_node_as_block(node)); ++ ++ nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); ++ assert(block->cf_node.type == nir_cf_node_block); ++ ++ nir_foreach_instr(block, instr) { ++ if (instr->type != nir_instr_type_phi) ++ return nir_before_instr(instr); ++ } ++ return nir_after_block(block); ++} ++ + static inline nir_cursor + nir_before_cf_list(struct exec_list *cf_list) + { + nir_cf_node *first_node = exec_node_data(nir_cf_node, + exec_list_get_head(cf_list), node); + return nir_before_cf_node(first_node); + } + + static inline nir_cursor + nir_after_cf_list(struct exec_list *cf_list) + { + nir_cf_node *last_node = exec_node_data(nir_cf_node, + exec_list_get_tail(cf_list), node); + return nir_after_cf_node(last_node); + } + + /** + * Insert a NIR instruction at the given cursor. + * + * Note: This does not update the cursor. + */ + void nir_instr_insert(nir_cursor cursor, nir_instr *instr); + + static inline void + nir_instr_insert_before(nir_instr *instr, nir_instr *before) + { + nir_instr_insert(nir_before_instr(instr), before); + } + + static inline void + nir_instr_insert_after(nir_instr *instr, nir_instr *after) + { + nir_instr_insert(nir_after_instr(instr), after); + } + + static inline void + nir_instr_insert_before_block(nir_block *block, nir_instr *before) + { + nir_instr_insert(nir_before_block(block), before); + } + + static inline void + nir_instr_insert_after_block(nir_block *block, nir_instr *after) + { + nir_instr_insert(nir_after_block(block), after); + } + + static inline void + nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) + { + nir_instr_insert(nir_before_cf_node(node), before); + } + + static inline void + nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) + { + nir_instr_insert(nir_after_cf_node(node), after); + } + + static inline void + nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) + { + nir_instr_insert(nir_before_cf_list(list), before); + } + + static inline void + nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) + { + nir_instr_insert(nir_after_cf_list(list), after); + } + + void nir_instr_remove(nir_instr *instr); + + /** @} */ + + typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); + typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); + typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); + bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, + void *state); + bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); + bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); + + nir_const_value *nir_src_as_const_value(nir_src src); + bool nir_src_is_dynamically_uniform(nir_src src); + bool nir_srcs_equal(nir_src src1, nir_src src2); + void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); + void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); + void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); + void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, + nir_dest new_dest); + + void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, + unsigned num_components, const char *name); + void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, + unsigned num_components, const char *name); + void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); + void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me); + + /* visits basic blocks in source-code order */ + typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); + bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state); + bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state); + bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + void *state); + + /* If the following CF node is an if, this function returns that if. + * Otherwise, it returns NULL. + */ + nir_if *nir_block_get_following_if(nir_block *block); + + nir_loop *nir_block_get_following_loop(nir_block *block); + + void nir_index_local_regs(nir_function_impl *impl); + void nir_index_global_regs(nir_shader *shader); + void nir_index_ssa_defs(nir_function_impl *impl); + unsigned nir_index_instrs(nir_function_impl *impl); + + void nir_index_blocks(nir_function_impl *impl); + + void nir_print_shader(nir_shader *shader, FILE *fp); + void nir_print_instr(const nir_instr *instr, FILE *fp); + -nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); ++nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); ++nir_function_impl *nir_function_impl_clone(const nir_function_impl *impl); ++nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); + + #ifdef DEBUG + void nir_validate_shader(nir_shader *shader); + void nir_metadata_set_validation_flag(nir_shader *shader); + void nir_metadata_check_validation_flag(nir_shader *shader); + + #include "util/debug.h" + static inline bool + should_clone_nir(void) + { + static int should_clone = -1; + if (should_clone < 0) + should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); + + return should_clone; + } + #else + static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } + static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } + static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } + static inline bool should_clone_nir(void) { return false; } + #endif /* DEBUG */ + + #define _PASS(nir, do_pass) do { \ + do_pass \ + nir_validate_shader(nir); \ + if (should_clone_nir()) { \ + nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ + ralloc_free(nir); \ + nir = clone; \ + } \ + } while (0) + + #define NIR_PASS(progress, nir, pass, ...) _PASS(nir, \ + nir_metadata_set_validation_flag(nir); \ + if (pass(nir, ##__VA_ARGS__)) { \ + progress = true; \ + nir_metadata_check_validation_flag(nir); \ + } \ + ) + + #define NIR_PASS_V(nir, pass, ...) _PASS(nir, \ + pass(nir, ##__VA_ARGS__); \ + ) + + void nir_calc_dominance_impl(nir_function_impl *impl); + void nir_calc_dominance(nir_shader *shader); + + nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); + bool nir_block_dominates(nir_block *parent, nir_block *child); + + void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); + void nir_dump_dom_tree(nir_shader *shader, FILE *fp); + + void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); + void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); + + void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); + void nir_dump_cfg(nir_shader *shader, FILE *fp); + + int nir_gs_count_vertices(const nir_shader *shader); + + bool nir_split_var_copies(nir_shader *shader); + ++bool nir_lower_returns_impl(nir_function_impl *impl); ++bool nir_lower_returns(nir_shader *shader); ++ ++bool nir_inline_functions(nir_shader *shader); ++ + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); + void nir_lower_var_copies(nir_shader *shader); + + bool nir_lower_global_vars_to_local(nir_shader *shader); + ++bool nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask); ++ + bool nir_lower_locals_to_regs(nir_shader *shader); + -void nir_lower_outputs_to_temporaries(nir_shader *shader); ++void nir_lower_outputs_to_temporaries(nir_shader *shader, ++ nir_function *entrypoint); ++ ++void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); + + void nir_assign_var_locations(struct exec_list *var_list, + unsigned *size, + int (*type_size)(const struct glsl_type *)); + + void nir_lower_io(nir_shader *shader, + nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)); + nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); + nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); + + void nir_lower_vars_to_ssa(nir_shader *shader); + -bool nir_remove_dead_variables(nir_shader *shader); ++bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode); + + void nir_move_vec_src_uses_to_dest(nir_shader *shader); + bool nir_lower_vec_to_movs(nir_shader *shader); + void nir_lower_alu_to_scalar(nir_shader *shader); + void nir_lower_load_const_to_scalar(nir_shader *shader); + + void nir_lower_phis_to_scalar(nir_shader *shader); + + void nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program); + + bool nir_lower_system_values(nir_shader *shader); + + typedef struct nir_lower_tex_options { + /** + * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which + * sampler types a texture projector is lowered. + */ + unsigned lower_txp; + + /** + * If true, lower rect textures to 2D, using txs to fetch the + * texture dimensions and dividing the texture coords by the + * texture dims to normalize. + */ + bool lower_rect; + + /** + * To emulate certain texture wrap modes, this can be used + * to saturate the specified tex coord to [0.0, 1.0]. The + * bits are according to sampler #, ie. if, for example: + * + * (conf->saturate_s & (1 << n)) + * + * is true, then the s coord for sampler n is saturated. + * + * Note that clamping must happen *after* projector lowering + * so any projected texture sample instruction with a clamped + * coordinate gets automatically lowered, regardless of the + * 'lower_txp' setting. + */ + unsigned saturate_s; + unsigned saturate_t; + unsigned saturate_r; + + /* Bitmask of samplers that need swizzling. + * + * If (swizzle_result & (1 << sampler_index)), then the swizzle in + * swizzles[sampler_index] is applied to the result of the texturing + * operation. + */ + unsigned swizzle_result; + + /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles + * while 4 and 5 represent 0 and 1 respectively. + */ + uint8_t swizzles[32][4]; + } nir_lower_tex_options; + + bool nir_lower_tex(nir_shader *shader, + const nir_lower_tex_options *options); + + void nir_lower_idiv(nir_shader *shader); + + void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables); + void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); + + void nir_lower_two_sided_color(nir_shader *shader); + + void nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program); + void nir_lower_to_source_mods(nir_shader *shader); + + bool nir_lower_gs_intrinsics(nir_shader *shader); + + bool nir_normalize_cubemap_coords(nir_shader *shader); + + void nir_live_ssa_defs_impl(nir_function_impl *impl); + bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); + + void nir_convert_to_ssa_impl(nir_function_impl *impl); + void nir_convert_to_ssa(nir_shader *shader); ++ ++bool nir_repair_ssa_impl(nir_function_impl *impl); ++bool nir_repair_ssa(nir_shader *shader); + + /* If phi_webs_only is true, only convert SSA values involved in phi nodes to + * registers. If false, convert all values (even those not involved in a phi + * node) to registers. + */ + void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); + + bool nir_opt_algebraic(nir_shader *shader); + bool nir_opt_algebraic_late(nir_shader *shader); + bool nir_opt_constant_folding(nir_shader *shader); + + bool nir_opt_global_to_local(nir_shader *shader); + + bool nir_copy_prop(nir_shader *shader); + + bool nir_opt_cse(nir_shader *shader); + + bool nir_opt_dce(nir_shader *shader); + + bool nir_opt_dead_cf(nir_shader *shader); + + void nir_opt_gcm(nir_shader *shader); + + bool nir_opt_peephole_select(nir_shader *shader); + + bool nir_opt_remove_phis(nir_shader *shader); + + bool nir_opt_undef(nir_shader *shader); + + void nir_sweep(nir_shader *shader); + + nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); + gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); + + #ifdef __cplusplus + } /* extern "C" */ + #endif diff --cc src/compiler/nir/nir_algebraic.py index 00000000000,a30652f2afd..14c0e822ad8 mode 000000,100644..100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@@ -1,0 -1,305 +1,305 @@@ + #! /usr/bin/env python + # + # Copyright (C) 2014 Intel Corporation + # + # Permission is hereby granted, free of charge, to any person obtaining a + # copy of this software and associated documentation files (the "Software"), + # to deal in the Software without restriction, including without limitation + # the rights to use, copy, modify, merge, publish, distribute, sublicense, + # and/or sell copies of the Software, and to permit persons to whom the + # Software is furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice (including the next + # paragraph) shall be included in all copies or substantial portions of the + # Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + # Authors: + # Jason Ekstrand (jason@jlekstrand.net) + + import itertools + import struct + import sys + import mako.template + import re + + # Represents a set of variables, each with a unique id + class VarSet(object): + def __init__(self): + self.names = {} + self.ids = itertools.count() + self.immutable = False; + + def __getitem__(self, name): + if name not in self.names: + assert not self.immutable, "Unknown replacement variable: " + name + self.names[name] = self.ids.next() + + return self.names[name] + + def lock(self): + self.immutable = True + + class Value(object): + @staticmethod + def create(val, name_base, varset): + if isinstance(val, tuple): + return Expression(val, name_base, varset) + elif isinstance(val, Expression): + return val + elif isinstance(val, (str, unicode)): + return Variable(val, name_base, varset) + elif isinstance(val, (bool, int, long, float)): + return Constant(val, name_base) + + __template = mako.template.Template(""" + static const ${val.c_type} ${val.name} = { + { ${val.type_enum} }, + % if isinstance(val, Constant): + { ${hex(val)} /* ${val.value} */ }, + % elif isinstance(val, Variable): + ${val.index}, /* ${val.var_name} */ + ${'true' if val.is_constant else 'false'}, + nir_type_${ val.required_type or 'invalid' }, + % elif isinstance(val, Expression): + nir_op_${val.opcode}, + { ${', '.join(src.c_ptr for src in val.sources)} }, + % endif + };""") + + def __init__(self, name, type_str): + self.name = name + self.type_str = type_str + + @property + def type_enum(self): + return "nir_search_value_" + self.type_str + + @property + def c_type(self): + return "nir_search_" + self.type_str + + @property + def c_ptr(self): + return "&{0}.value".format(self.name) + + def render(self): + return self.__template.render(val=self, + Constant=Constant, + Variable=Variable, + Expression=Expression) + + class Constant(Value): + def __init__(self, val, name): + Value.__init__(self, name, "constant") + self.value = val + + def __hex__(self): + # Even if it's an integer, we still need to unpack as an unsigned + # int. This is because, without C99, we can only assign to the first + # element of a union in an initializer. + if isinstance(self.value, (bool)): + return 'NIR_TRUE' if self.value else 'NIR_FALSE' + if isinstance(self.value, (int, long)): - return hex(struct.unpack('I', struct.pack('i', self.value))[0]) ++ return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 'I', self.value))[0]) + elif isinstance(self.value, float): + return hex(struct.unpack('I', struct.pack('f', self.value))[0]) + else: + assert False + + _var_name_re = re.compile(r"(?P#)?(?P\w+)(?:@(?P\w+))?") + + class Variable(Value): + def __init__(self, val, name, varset): + Value.__init__(self, name, "variable") + + m = _var_name_re.match(val) + assert m and m.group('name') is not None + + self.var_name = m.group('name') + self.is_constant = m.group('const') is not None + self.required_type = m.group('type') + + if self.required_type is not None: + assert self.required_type in ('float', 'bool', 'int', 'unsigned') + + self.index = varset[self.var_name] + + class Expression(Value): + def __init__(self, expr, name_base, varset): + Value.__init__(self, name_base, "expression") + assert isinstance(expr, tuple) + + self.opcode = expr[0] + self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset) + for (i, src) in enumerate(expr[1:]) ] + + def render(self): + srcs = "\n".join(src.render() for src in self.sources) + return srcs + super(Expression, self).render() + + _optimization_ids = itertools.count() + + condition_list = ['true'] + + class SearchAndReplace(object): + def __init__(self, transform): + self.id = _optimization_ids.next() + + search = transform[0] + replace = transform[1] + if len(transform) > 2: + self.condition = transform[2] + else: + self.condition = 'true' + + if self.condition not in condition_list: + condition_list.append(self.condition) + self.condition_index = condition_list.index(self.condition) + + varset = VarSet() + if isinstance(search, Expression): + self.search = search + else: + self.search = Expression(search, "search{0}".format(self.id), varset) + + varset.lock() + + if isinstance(replace, Value): + self.replace = replace + else: + self.replace = Value.create(replace, "replace{0}".format(self.id), varset) + + _algebraic_pass_template = mako.template.Template(""" + #include "nir.h" + #include "nir_search.h" + + #ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS + #define NIR_OPT_ALGEBRAIC_STRUCT_DEFS + + struct transform { + const nir_search_expression *search; + const nir_search_value *replace; + unsigned condition_offset; + }; + + struct opt_state { + void *mem_ctx; + bool progress; + const bool *condition_flags; + }; + + #endif + + % for (opcode, xform_list) in xform_dict.iteritems(): + % for xform in xform_list: + ${xform.search.render()} + ${xform.replace.render()} + % endfor + + static const struct transform ${pass_name}_${opcode}_xforms[] = { + % for xform in xform_list: + { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} }, + % endfor + }; + % endfor + + static bool + ${pass_name}_block(nir_block *block, void *void_state) + { + struct opt_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!alu->dest.dest.is_ssa) + continue; + + switch (alu->op) { + % for opcode in xform_dict.keys(): + case nir_op_${opcode}: + for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) { + const struct transform *xform = &${pass_name}_${opcode}_xforms[i]; + if (state->condition_flags[xform->condition_offset] && + nir_replace_instr(alu, xform->search, xform->replace, + state->mem_ctx)) { + state->progress = true; + break; + } + } + break; + % endfor + default: + break; + } + } + + return true; + } + + static bool + ${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags) + { + struct opt_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + state.condition_flags = condition_flags; + + nir_foreach_block(impl, ${pass_name}_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; + } + + + bool + ${pass_name}(nir_shader *shader) + { + bool progress = false; + bool condition_flags[${len(condition_list)}]; + const nir_shader_compiler_options *options = shader->options; + + % for index, condition in enumerate(condition_list): + condition_flags[${index}] = ${condition}; + % endfor + + nir_foreach_function(shader, function) { + if (function->impl) + progress |= ${pass_name}_impl(function->impl, condition_flags); + } + + return progress; + } + """) + + class AlgebraicPass(object): + def __init__(self, pass_name, transforms): + self.xform_dict = {} + self.pass_name = pass_name + + for xform in transforms: + if not isinstance(xform, SearchAndReplace): + xform = SearchAndReplace(xform) + + if xform.search.opcode not in self.xform_dict: + self.xform_dict[xform.search.opcode] = [] + + self.xform_dict[xform.search.opcode].append(xform) + + def render(self): + return _algebraic_pass_template.render(pass_name=self.pass_name, + xform_dict=self.xform_dict, + condition_list=condition_list) diff --cc src/compiler/nir/nir_builder.h index 00000000000,88ba3a1c269..1c7c78acae8 mode 000000,100644..100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@@ -1,0 -1,364 +1,441 @@@ + /* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + #ifndef NIR_BUILDER_H + #define NIR_BUILDER_H + + #include "nir_control_flow.h" + + struct exec_list; + + typedef struct nir_builder { + nir_cursor cursor; + + nir_shader *shader; + nir_function_impl *impl; + } nir_builder; + + static inline void + nir_builder_init(nir_builder *build, nir_function_impl *impl) + { + memset(build, 0, sizeof(*build)); + build->impl = impl; + build->shader = impl->function->shader; + } + + static inline void + nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options) + { + build->shader = nir_shader_create(mem_ctx, stage, options); + nir_function *func = nir_function_create(build->shader, "main"); + build->impl = nir_function_impl_create(func); + build->cursor = nir_after_cf_list(&build->impl->body); + } + + static inline void + nir_builder_instr_insert(nir_builder *build, nir_instr *instr) + { + nir_instr_insert(build->cursor, instr); + + /* Move the cursor forward. */ + build->cursor = nir_after_instr(instr); + } + + static inline void + nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) + { + nir_cf_node_insert(build->cursor, cf); + } + ++static inline nir_ssa_def * ++nir_ssa_undef(nir_builder *build, unsigned num_components) ++{ ++ nir_ssa_undef_instr *undef = ++ nir_ssa_undef_instr_create(build->shader, num_components); ++ if (!undef) ++ return NULL; ++ ++ nir_instr_insert(nir_before_block(nir_start_block(build->impl)), ++ &undef->instr); ++ ++ return &undef->def; ++} ++ + static inline nir_ssa_def * + nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) + { + nir_load_const_instr *load_const = + nir_load_const_instr_create(build->shader, num_components); + if (!load_const) + return NULL; + + load_const->value = value; + + nir_builder_instr_insert(build, &load_const->instr); + + return &load_const->def; + } + + static inline nir_ssa_def * + nir_imm_float(nir_builder *build, float x) + { + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.f[0] = x; + + return nir_build_imm(build, 1, v); + } + + static inline nir_ssa_def * + nir_imm_vec4(nir_builder *build, float x, float y, float z, float w) + { + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.f[0] = x; + v.f[1] = y; + v.f[2] = z; + v.f[3] = w; + + return nir_build_imm(build, 4, v); + } + + static inline nir_ssa_def * + nir_imm_int(nir_builder *build, int x) + { + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.i[0] = x; + + return nir_build_imm(build, 1, v); + } + + static inline nir_ssa_def * + nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w) + { + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.i[0] = x; + v.i[1] = y; + v.i[2] = z; + v.i[3] = w; + + return nir_build_imm(build, 4, v); + } + + static inline nir_ssa_def * + nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, + nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) + { + const nir_op_info *op_info = &nir_op_infos[op]; + nir_alu_instr *instr = nir_alu_instr_create(build->shader, op); + if (!instr) + return NULL; + + instr->src[0].src = nir_src_for_ssa(src0); + if (src1) + instr->src[1].src = nir_src_for_ssa(src1); + if (src2) + instr->src[2].src = nir_src_for_ssa(src2); + if (src3) + instr->src[3].src = nir_src_for_ssa(src3); + + /* Guess the number of components the destination temporary should have + * based on our input sizes, if it's not fixed for the op. + */ + unsigned num_components = op_info->output_size; + if (num_components == 0) { + for (unsigned i = 0; i < op_info->num_inputs; i++) { + if (op_info->input_sizes[i] == 0) + num_components = MAX2(num_components, + instr->src[i].src.ssa->num_components); + } + } + assert(num_components != 0); + + /* Make sure we don't swizzle from outside of our source vector (like if a + * scalar value was passed into a multiply with a vector). + */ + for (unsigned i = 0; i < op_info->num_inputs; i++) { + for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) { + instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1; + } + } + + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); + instr->dest.write_mask = (1 << num_components) - 1; + + nir_builder_instr_insert(build, &instr->instr); + + return &instr->dest.dest.ssa; + } + + #define ALU1(op) \ + static inline nir_ssa_def * \ + nir_##op(nir_builder *build, nir_ssa_def *src0) \ + { \ + return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL); \ + } + + #define ALU2(op) \ + static inline nir_ssa_def * \ + nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) \ + { \ + return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL); \ + } + + #define ALU3(op) \ + static inline nir_ssa_def * \ + nir_##op(nir_builder *build, nir_ssa_def *src0, \ + nir_ssa_def *src1, nir_ssa_def *src2) \ + { \ + return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL); \ + } + + #define ALU4(op) \ + static inline nir_ssa_def * \ + nir_##op(nir_builder *build, nir_ssa_def *src0, \ + nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) \ + { \ + return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3); \ + } + + #include "nir_builder_opcodes.h" + + static inline nir_ssa_def * + nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components) + { + switch (num_components) { + case 4: + return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]); + case 3: + return nir_vec3(build, comp[0], comp[1], comp[2]); + case 2: + return nir_vec2(build, comp[0], comp[1]); + case 1: + return comp[0]; + default: + unreachable("bad component count"); + return NULL; + } + } + + /** + * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def. + */ + static inline nir_ssa_def * + nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) + { + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; + } + + static inline nir_ssa_def * + nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) + { + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; + } + + /** + * Construct an fmov or imov that reswizzles the source's components. + */ + static inline nir_ssa_def * + nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], + unsigned num_components, bool use_fmov) + { + nir_alu_src alu_src = { NIR_SRC_INIT }; + alu_src.src = nir_src_for_ssa(src); + for (unsigned i = 0; i < num_components; i++) + alu_src.swizzle[i] = swiz[i]; + + return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : + nir_imov_alu(build, alu_src, num_components); + } + ++/* Selects the right fdot given the number of components in each source. */ ++static inline nir_ssa_def * ++nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) ++{ ++ assert(src0->num_components == src1->num_components); ++ switch (src0->num_components) { ++ case 1: return nir_fmul(build, src0, src1); ++ case 2: return nir_fdot2(build, src0, src1); ++ case 3: return nir_fdot3(build, src0, src1); ++ case 4: return nir_fdot4(build, src0, src1); ++ default: ++ unreachable("bad component size"); ++ } ++ ++ return NULL; ++} ++ + static inline nir_ssa_def * + nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) + { + unsigned swizzle[4] = {c, c, c, c}; + return nir_swizzle(b, def, swizzle, 1, false); + } + + /** + * Turns a nir_src into a nir_ssa_def * so it can be passed to + * nir_build_alu()-based builder calls. + * + * See nir_ssa_for_alu_src() for alu instructions. + */ + static inline nir_ssa_def * + nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) + { + if (src.is_ssa && src.ssa->num_components == num_components) + return src.ssa; + + nir_alu_src alu = { NIR_SRC_INIT }; + alu.src = src; + for (int j = 0; j < 4; j++) + alu.swizzle[j] = j; + + return nir_imov_alu(build, alu, num_components); + } + + /** + * Similar to nir_ssa_for_src(), but for alu src's, respecting the + * nir_alu_src's swizzle. + */ + static inline nir_ssa_def * + nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) + { + static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 }; + nir_alu_src *src = &instr->src[srcn]; + unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn); + + if (src->src.is_ssa && (src->src.ssa->num_components == num_components) && + !src->abs && !src->negate && + (memcmp(src->swizzle, trivial_swizzle, num_components) == 0)) + return src->src.ssa; + + return nir_imov_alu(build, *src, num_components); + } + + static inline nir_ssa_def * + nir_load_var(nir_builder *build, nir_variable *var) + { + const unsigned num_components = glsl_get_vector_elements(var->type); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var); + load->num_components = num_components; + load->variables[0] = nir_deref_var_create(load, var); + nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; + } + + static inline void + nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, + unsigned writemask) + { + const unsigned num_components = glsl_get_vector_elements(var->type); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); + store->num_components = num_components; + store->const_index[0] = writemask; + store->variables[0] = nir_deref_var_create(store, var); + store->src[0] = nir_src_for_ssa(value); + nir_builder_instr_insert(build, &store->instr); + } + ++static inline void ++nir_store_deref_var(nir_builder *build, nir_deref_var *deref, ++ nir_ssa_def *value, unsigned writemask) ++{ ++ const unsigned num_components = ++ glsl_get_vector_elements(nir_deref_tail(&deref->deref)->type); ++ ++ nir_intrinsic_instr *store = ++ nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); ++ store->num_components = num_components; ++ store->const_index[0] = writemask & ((1 << num_components) - 1); ++ store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &deref->deref)); ++ store->src[0] = nir_src_for_ssa(value); ++ nir_builder_instr_insert(build, &store->instr); ++} ++ ++static inline void ++nir_copy_deref_var(nir_builder *build, nir_deref_var *dest, nir_deref_var *src) ++{ ++ assert(nir_deref_tail(&dest->deref)->type == ++ nir_deref_tail(&src->deref)->type); ++ ++ nir_intrinsic_instr *copy = ++ nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); ++ copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); ++ copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); ++ nir_builder_instr_insert(build, ©->instr); ++} ++ ++static inline void ++nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src) ++{ ++ nir_intrinsic_instr *copy = ++ nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_var); ++ copy->variables[0] = nir_deref_var_create(copy, dest); ++ copy->variables[1] = nir_deref_var_create(copy, src); ++ nir_builder_instr_insert(build, ©->instr); ++} ++ + static inline nir_ssa_def * + nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) + { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op); + load->num_components = nir_intrinsic_infos[op].dest_components; + load->const_index[0] = index; + nir_ssa_dest_init(&load->instr, &load->dest, + nir_intrinsic_infos[op].dest_components, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; + } + ++static inline void ++nir_jump(nir_builder *build, nir_jump_type jump_type) ++{ ++ nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type); ++ nir_builder_instr_insert(build, &jump->instr); ++} ++ + #endif /* NIR_BUILDER_H */ diff --cc src/compiler/nir/nir_clone.c index 00000000000,5eff743d835..bc6df56b753 mode 000000,100644..100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@@ -1,0 -1,659 +1,711 @@@ + /* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + #include "nir.h" + #include "nir_control_flow_private.h" + + /* Secret Decoder Ring: + * clone_foo(): + * Allocate and clone a foo. + * __clone_foo(): + * Clone body of foo (ie. parent class, embedded struct, etc) + */ + + typedef struct { ++ /* True if we are cloning an entire shader. */ ++ bool global_clone; ++ + /* maps orig ptr -> cloned ptr: */ - struct hash_table *ptr_table; ++ struct hash_table *remap_table; + + /* List of phi sources. */ + struct list_head phi_srcs; + + /* new shader object, used as memctx for just about everything else: */ + nir_shader *ns; + } clone_state; + + static void -init_clone_state(clone_state *state) ++init_clone_state(clone_state *state, bool global) + { - state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); ++ state->global_clone = global; ++ state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, ++ _mesa_key_pointer_equal); + list_inithead(&state->phi_srcs); + } + + static void + free_clone_state(clone_state *state) + { - _mesa_hash_table_destroy(state->ptr_table, NULL); ++ _mesa_hash_table_destroy(state->remap_table, NULL); + } + -static void * -lookup_ptr(clone_state *state, const void *ptr) ++static inline void * ++_lookup_ptr(clone_state *state, const void *ptr, bool global) + { + struct hash_entry *entry; + + if (!ptr) + return NULL; + - entry = _mesa_hash_table_search(state->ptr_table, ptr); ++ if (!state->global_clone && global) ++ return (void *)ptr; ++ ++ entry = _mesa_hash_table_search(state->remap_table, ptr); + assert(entry && "Failed to find pointer!"); + if (!entry) + return NULL; + + return entry->data; + } + + static void -store_ptr(clone_state *state, void *nptr, const void *ptr) ++add_remap(clone_state *state, void *nptr, const void *ptr) ++{ ++ _mesa_hash_table_insert(state->remap_table, ptr, nptr); ++} ++ ++static void * ++remap_local(clone_state *state, const void *ptr) + { - _mesa_hash_table_insert(state->ptr_table, ptr, nptr); ++ return _lookup_ptr(state, ptr, false); + } + -static nir_constant * -clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) ++static void * ++remap_global(clone_state *state, const void *ptr) ++{ ++ return _lookup_ptr(state, ptr, true); ++} ++ ++static nir_register * ++remap_reg(clone_state *state, const nir_register *reg) ++{ ++ return _lookup_ptr(state, reg, reg->is_global); ++} ++ ++static nir_variable * ++remap_var(clone_state *state, const nir_variable *var) ++{ ++ return _lookup_ptr(state, var, var->data.mode != nir_var_local); ++} ++ ++nir_constant * ++nir_constant_clone(const nir_constant *c, nir_variable *nvar) + { + nir_constant *nc = ralloc(nvar, nir_constant); + + nc->value = c->value; + nc->num_elements = c->num_elements; + nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); + for (unsigned i = 0; i < c->num_elements; i++) { - nc->elements[i] = clone_constant(state, c->elements[i], nvar); ++ nc->elements[i] = nir_constant_clone(c->elements[i], nvar); + } + + return nc; + } + + /* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid + * having to deal with locals and globals separately: + */ + static nir_variable * + clone_variable(clone_state *state, const nir_variable *var) + { + nir_variable *nvar = rzalloc(state->ns, nir_variable); - store_ptr(state, nvar, var); ++ add_remap(state, nvar, var); + + nvar->type = var->type; + nvar->name = ralloc_strdup(nvar, var->name); + nvar->data = var->data; + nvar->num_state_slots = var->num_state_slots; + nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); + memcpy(nvar->state_slots, var->state_slots, + var->num_state_slots * sizeof(nir_state_slot)); + if (var->constant_initializer) { + nvar->constant_initializer = - clone_constant(state, var->constant_initializer, nvar); ++ nir_constant_clone(var->constant_initializer, nvar); + } + nvar->interface_type = var->interface_type; + + return nvar; + } + + /* clone list of nir_variable: */ + static void + clone_var_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) + { + exec_list_make_empty(dst); + foreach_list_typed(nir_variable, var, node, list) { + nir_variable *nvar = clone_variable(state, var); + exec_list_push_tail(dst, &nvar->node); + } + } + + /* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() + * to avoid having to deal with locals and globals separately: + */ + static nir_register * + clone_register(clone_state *state, const nir_register *reg) + { + nir_register *nreg = rzalloc(state->ns, nir_register); - store_ptr(state, nreg, reg); ++ add_remap(state, nreg, reg); + + nreg->num_components = reg->num_components; + nreg->num_array_elems = reg->num_array_elems; + nreg->index = reg->index; + nreg->name = ralloc_strdup(nreg, reg->name); + nreg->is_global = reg->is_global; + nreg->is_packed = reg->is_packed; + + /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ + list_inithead(&nreg->uses); + list_inithead(&nreg->defs); + list_inithead(&nreg->if_uses); + + return nreg; + } + + /* clone list of nir_register: */ + static void + clone_reg_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) + { + exec_list_make_empty(dst); + foreach_list_typed(nir_register, reg, node, list) { + nir_register *nreg = clone_register(state, reg); + exec_list_push_tail(dst, &nreg->node); + } + } + + static void + __clone_src(clone_state *state, void *ninstr_or_if, + nir_src *nsrc, const nir_src *src) + { + nsrc->is_ssa = src->is_ssa; + if (src->is_ssa) { - nsrc->ssa = lookup_ptr(state, src->ssa); ++ nsrc->ssa = remap_local(state, src->ssa); + } else { - nsrc->reg.reg = lookup_ptr(state, src->reg.reg); ++ nsrc->reg.reg = remap_reg(state, src->reg.reg); + if (src->reg.indirect) { + nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); + __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); + } + nsrc->reg.base_offset = src->reg.base_offset; + } + } + + static void + __clone_dst(clone_state *state, nir_instr *ninstr, + nir_dest *ndst, const nir_dest *dst) + { + ndst->is_ssa = dst->is_ssa; + if (dst->is_ssa) { + nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); - store_ptr(state, &ndst->ssa, &dst->ssa); ++ add_remap(state, &ndst->ssa, &dst->ssa); + } else { - ndst->reg.reg = lookup_ptr(state, dst->reg.reg); ++ ndst->reg.reg = remap_reg(state, dst->reg.reg); + if (dst->reg.indirect) { + ndst->reg.indirect = ralloc(ninstr, nir_src); + __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); + } + ndst->reg.base_offset = dst->reg.base_offset; + } + } + + static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, + nir_instr *ninstr, nir_deref *parent); + + static nir_deref_var * + clone_deref_var(clone_state *state, const nir_deref_var *dvar, + nir_instr *ninstr) + { - nir_variable *nvar = lookup_ptr(state, dvar->var); ++ nir_variable *nvar = remap_var(state, dvar->var); + nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); + + if (dvar->deref.child) + ndvar->deref.child = clone_deref(state, dvar->deref.child, + ninstr, &ndvar->deref); + + return ndvar; + } + + static nir_deref_array * + clone_deref_array(clone_state *state, const nir_deref_array *darr, + nir_instr *ninstr, nir_deref *parent) + { + nir_deref_array *ndarr = nir_deref_array_create(parent); + + ndarr->deref.type = darr->deref.type; + if (darr->deref.child) + ndarr->deref.child = clone_deref(state, darr->deref.child, + ninstr, &ndarr->deref); + + ndarr->deref_array_type = darr->deref_array_type; + ndarr->base_offset = darr->base_offset; + if (ndarr->deref_array_type == nir_deref_array_type_indirect) + __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); + + return ndarr; + } + + static nir_deref_struct * + clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, + nir_instr *ninstr, nir_deref *parent) + { + nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); + + ndstr->deref.type = dstr->deref.type; + if (dstr->deref.child) + ndstr->deref.child = clone_deref(state, dstr->deref.child, + ninstr, &ndstr->deref); + + return ndstr; + } + + static nir_deref * + clone_deref(clone_state *state, const nir_deref *dref, + nir_instr *ninstr, nir_deref *parent) + { + switch (dref->deref_type) { + case nir_deref_type_array: + return &clone_deref_array(state, nir_deref_as_array(dref), + ninstr, parent)->deref; + case nir_deref_type_struct: + return &clone_deref_struct(state, nir_deref_as_struct(dref), + ninstr, parent)->deref; + default: + unreachable("bad deref type"); + return NULL; + } + } + + static nir_alu_instr * + clone_alu(clone_state *state, const nir_alu_instr *alu) + { + nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); + + __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + return nalu; + } + + static nir_intrinsic_instr * + clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) + { + nir_intrinsic_instr *nitr = + nir_intrinsic_instr_create(state->ns, itr->intrinsic); + + unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; + unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; + + if (nir_intrinsic_infos[itr->intrinsic].has_dest) + __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); + + nitr->num_components = itr->num_components; + memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); + + for (unsigned i = 0; i < num_variables; i++) { + nitr->variables[i] = clone_deref_var(state, itr->variables[i], + &nitr->instr); + } + + for (unsigned i = 0; i < num_srcs; i++) + __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); + + return nitr; + } + + static nir_load_const_instr * + clone_load_const(clone_state *state, const nir_load_const_instr *lc) + { + nir_load_const_instr *nlc = + nir_load_const_instr_create(state->ns, lc->def.num_components); + + memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); + - store_ptr(state, &nlc->def, &lc->def); ++ add_remap(state, &nlc->def, &lc->def); + + return nlc; + } + + static nir_ssa_undef_instr * + clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) + { + nir_ssa_undef_instr *nsa = + nir_ssa_undef_instr_create(state->ns, sa->def.num_components); + - store_ptr(state, &nsa->def, &sa->def); ++ add_remap(state, &nsa->def, &sa->def); + + return nsa; + } + + static nir_tex_instr * + clone_tex(clone_state *state, const nir_tex_instr *tex) + { + nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); + + ntex->sampler_dim = tex->sampler_dim; + ntex->dest_type = tex->dest_type; + ntex->op = tex->op; + __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); + for (unsigned i = 0; i < ntex->num_srcs; i++) { + ntex->src[i].src_type = tex->src[i].src_type; + __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); + } + ntex->coord_components = tex->coord_components; + ntex->is_array = tex->is_array; + ntex->is_shadow = tex->is_shadow; + ntex->is_new_style_shadow = tex->is_new_style_shadow; + memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); + ntex->component = tex->component; ++ ntex->texture_index = tex->texture_index; ++ ntex->texture_array_size = tex->texture_array_size; ++ if (tex->texture) ++ ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr); + ntex->sampler_index = tex->sampler_index; - ntex->sampler_array_size = tex->sampler_array_size; + if (tex->sampler) + ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); + + return ntex; + } + + static nir_phi_instr * + clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) + { + nir_phi_instr *nphi = nir_phi_instr_create(state->ns); + + __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); + + /* Cloning a phi node is a bit different from other instructions. The + * sources of phi instructions are the only time where we can use an SSA + * def before it is defined. In order to handle this, we just copy over + * the sources from the old phi instruction directly and then fix them up + * in a second pass once all the instrutions in the function have been + * properly cloned. + * + * In order to ensure that the copied sources (which are the same as the + * old phi instruction's sources for now) don't get inserted into the old + * shader's use-def lists, we have to add the phi instruction *before* we + * set up its sources. + */ + nir_instr_insert_after_block(nblk, &nphi->instr); + + foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { + nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); + + /* Just copy the old source for now. */ + memcpy(nsrc, src, sizeof(*src)); + + /* Since we're not letting nir_insert_instr handle use/def stuff for us, + * we have to set the parent_instr manually. It doesn't really matter + * when we do it, so we might as well do it here. + */ + nsrc->src.parent_instr = &nphi->instr; + + /* Stash it in the list of phi sources. We'll walk this list and fix up + * sources at the very end of clone_function_impl. + */ + list_add(&nsrc->src.use_link, &state->phi_srcs); + + exec_list_push_tail(&nphi->srcs, &nsrc->node); + } + + return nphi; + } + + static nir_jump_instr * + clone_jump(clone_state *state, const nir_jump_instr *jmp) + { + nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); + + return njmp; + } + + static nir_call_instr * + clone_call(clone_state *state, const nir_call_instr *call) + { - nir_function *ncallee = lookup_ptr(state, call->callee); ++ nir_function *ncallee = remap_global(state, call->callee); + nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); + + for (unsigned i = 0; i < ncall->num_params; i++) + ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); + + ncall->return_deref = clone_deref_var(state, call->return_deref, + &ncall->instr); + + return ncall; + } + + static nir_instr * + clone_instr(clone_state *state, const nir_instr *instr) + { + switch (instr->type) { + case nir_instr_type_alu: + return &clone_alu(state, nir_instr_as_alu(instr))->instr; + case nir_instr_type_intrinsic: + return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; + case nir_instr_type_load_const: + return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; + case nir_instr_type_ssa_undef: + return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; + case nir_instr_type_tex: + return &clone_tex(state, nir_instr_as_tex(instr))->instr; + case nir_instr_type_phi: + unreachable("Cannot clone phis with clone_instr"); + case nir_instr_type_jump: + return &clone_jump(state, nir_instr_as_jump(instr))->instr; + case nir_instr_type_call: + return &clone_call(state, nir_instr_as_call(instr))->instr; + case nir_instr_type_parallel_copy: + unreachable("Cannot clone parallel copies"); + default: + unreachable("bad instr type"); + return NULL; + } + } + + static nir_block * + clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) + { + /* Don't actually create a new block. Just use the one from the tail of + * the list. NIR guarantees that the tail of the list is a block and that + * no two blocks are side-by-side in the IR; It should be empty. + */ + nir_block *nblk = + exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); + assert(nblk->cf_node.type == nir_cf_node_block); + assert(exec_list_is_empty(&nblk->instr_list)); + + /* We need this for phi sources */ - store_ptr(state, nblk, blk); ++ add_remap(state, nblk, blk); + + nir_foreach_instr(blk, instr) { + if (instr->type == nir_instr_type_phi) { + /* Phi instructions are a bit of a special case when cloning because + * we don't want inserting the instruction to automatically handle + * use/defs for us. Instead, we need to wait until all the + * blocks/instructions are in so that we can set their sources up. + */ + clone_phi(state, nir_instr_as_phi(instr), nblk); + } else { + nir_instr *ninstr = clone_instr(state, instr); + nir_instr_insert_after_block(nblk, ninstr); + } + } + + return nblk; + } + + static void + clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list); + + static nir_if * + clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) + { + nir_if *ni = nir_if_create(state->ns); + + __clone_src(state, ni, &ni->condition, &i->condition); + + nir_cf_node_insert_end(cf_list, &ni->cf_node); + + clone_cf_list(state, &ni->then_list, &i->then_list); + clone_cf_list(state, &ni->else_list, &i->else_list); + + return ni; + } + + static nir_loop * + clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) + { + nir_loop *nloop = nir_loop_create(state->ns); + + nir_cf_node_insert_end(cf_list, &nloop->cf_node); + + clone_cf_list(state, &nloop->body, &loop->body); + + return nloop; + } + + /* clone list of nir_cf_node: */ + static void + clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) + { + foreach_list_typed(nir_cf_node, cf, node, list) { + switch (cf->type) { + case nir_cf_node_block: + clone_block(state, dst, nir_cf_node_as_block(cf)); + break; + case nir_cf_node_if: + clone_if(state, dst, nir_cf_node_as_if(cf)); + break; + case nir_cf_node_loop: + clone_loop(state, dst, nir_cf_node_as_loop(cf)); + break; + default: + unreachable("bad cf type"); + } + } + } + + static nir_function_impl * -clone_function_impl(clone_state *state, const nir_function_impl *fi, - nir_function *nfxn) ++clone_function_impl(clone_state *state, const nir_function_impl *fi) + { - nir_function_impl *nfi = nir_function_impl_create(nfxn); ++ nir_function_impl *nfi = nir_function_impl_create_bare(state->ns); + + clone_var_list(state, &nfi->locals, &fi->locals); + clone_reg_list(state, &nfi->registers, &fi->registers); + nfi->reg_alloc = fi->reg_alloc; + + nfi->num_params = fi->num_params; + nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); + for (unsigned i = 0; i < fi->num_params; i++) { - nfi->params[i] = lookup_ptr(state, fi->params[i]); ++ nfi->params[i] = remap_local(state, fi->params[i]); + } - nfi->return_var = lookup_ptr(state, fi->return_var); ++ nfi->return_var = remap_local(state, fi->return_var); + + assert(list_empty(&state->phi_srcs)); + + clone_cf_list(state, &nfi->body, &fi->body); + + /* After we've cloned almost everything, we have to walk the list of phi + * sources and fix them up. Thanks to loops, the block and SSA value for a + * phi source may not be defined when we first encounter it. Instead, we + * add it to the phi_srcs list and we fix it up here. + */ + list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { - src->pred = lookup_ptr(state, src->pred); ++ src->pred = remap_local(state, src->pred); + assert(src->src.is_ssa); - src->src.ssa = lookup_ptr(state, src->src.ssa); ++ src->src.ssa = remap_local(state, src->src.ssa); + + /* Remove from this list and place in the uses of the SSA def */ + list_del(&src->src.use_link); + list_addtail(&src->src.use_link, &src->src.ssa->uses); + } + assert(list_empty(&state->phi_srcs)); + + /* All metadata is invalidated in the cloning process */ + nfi->valid_metadata = 0; + + return nfi; + } + ++nir_function_impl * ++nir_function_impl_clone(const nir_function_impl *fi) ++{ ++ clone_state state; ++ init_clone_state(&state, false); ++ ++ /* We use the same shader */ ++ state.ns = fi->function->shader; ++ ++ nir_function_impl *nfi = clone_function_impl(&state, fi); ++ ++ free_clone_state(&state); ++ ++ return nfi; ++} ++ + static nir_function * + clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) + { + assert(ns == state->ns); + nir_function *nfxn = nir_function_create(ns, fxn->name); + + /* Needed for call instructions */ - store_ptr(state, nfxn, fxn); ++ add_remap(state, nfxn, fxn); + + nfxn->num_params = fxn->num_params; + nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params); + memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params); + + nfxn->return_type = fxn->return_type; + + /* At first glance, it looks like we should clone the function_impl here. + * However, call instructions need to be able to reference at least the + * function and those will get processed as we clone the function_impl's. + * We stop here and do function_impls as a second pass. + */ + + return nfxn; + } + + nir_shader * + nir_shader_clone(void *mem_ctx, const nir_shader *s) + { + clone_state state; - init_clone_state(&state); ++ init_clone_state(&state, true); + + nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); + state.ns = ns; + + clone_var_list(&state, &ns->uniforms, &s->uniforms); + clone_var_list(&state, &ns->inputs, &s->inputs); + clone_var_list(&state, &ns->outputs, &s->outputs); ++ clone_var_list(&state, &ns->shared, &s->shared); + clone_var_list(&state, &ns->globals, &s->globals); + clone_var_list(&state, &ns->system_values, &s->system_values); + + /* Go through and clone functions */ + foreach_list_typed(nir_function, fxn, node, &s->functions) + clone_function(&state, fxn, ns); + + /* Only after all functions are cloned can we clone the actual function + * implementations. This is because nir_call_instr's need to reference the + * functions of other functions and we don't know what order the functions + * will have in the list. + */ + nir_foreach_function(s, fxn) { - nir_function *nfxn = lookup_ptr(&state, fxn); - clone_function_impl(&state, fxn->impl, nfxn); ++ nir_function *nfxn = remap_global(&state, fxn); ++ nfxn->impl = clone_function_impl(&state, fxn->impl); ++ nfxn->impl->function = nfxn; + } + + clone_reg_list(&state, &ns->registers, &s->registers); + ns->reg_alloc = s->reg_alloc; + + ns->info = s->info; + ns->info.name = ralloc_strdup(ns, ns->info.name); + if (ns->info.label) + ns->info.label = ralloc_strdup(ns, ns->info.label); + + ns->num_inputs = s->num_inputs; + ns->num_uniforms = s->num_uniforms; + ns->num_outputs = s->num_outputs; ++ ns->num_shared = s->num_shared; + + free_clone_state(&state); + + return ns; + } diff --cc src/compiler/nir/nir_control_flow.c index 00000000000,96395a41615..33b06d0cc84 mode 000000,100644..100644 --- a/src/compiler/nir/nir_control_flow.c +++ b/src/compiler/nir/nir_control_flow.c @@@ -1,0 -1,808 +1,820 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir_control_flow_private.h" + + /** + * \name Control flow modification + * + * These functions modify the control flow tree while keeping the control flow + * graph up-to-date. The invariants respected are: + * 1. Each then statement, else statement, or loop body must have at least one + * control flow node. + * 2. Each if-statement and loop must have one basic block before it and one + * after. + * 3. Two basic blocks cannot be directly next to each other. + * 4. If a basic block has a jump instruction, there must be only one and it + * must be at the end of the block. + * 5. The CFG must always be connected - this means that we must insert a fake + * CFG edge for loops with no break statement. + * + * The purpose of the second one is so that we have places to insert code during + * GCM, as well as eliminating the possibility of critical edges. + */ + /*@{*/ + + static bool + block_ends_in_jump(nir_block *block) + { + return !exec_list_is_empty(&block->instr_list) && + nir_block_last_instr(block)->type == nir_instr_type_jump; + } + + static inline void + block_add_pred(nir_block *block, nir_block *pred) + { + _mesa_set_add(block->predecessors, pred); + } + + static inline void + block_remove_pred(nir_block *block, nir_block *pred) + { + struct set_entry *entry = _mesa_set_search(block->predecessors, pred); + + assert(entry); + + _mesa_set_remove(block->predecessors, entry); + } + + static void + link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2) + { + pred->successors[0] = succ1; + if (succ1 != NULL) + block_add_pred(succ1, pred); + + pred->successors[1] = succ2; + if (succ2 != NULL) + block_add_pred(succ2, pred); + } + + static void + unlink_blocks(nir_block *pred, nir_block *succ) + { + if (pred->successors[0] == succ) { + pred->successors[0] = pred->successors[1]; + pred->successors[1] = NULL; + } else { + assert(pred->successors[1] == succ); + pred->successors[1] = NULL; + } + + block_remove_pred(succ, pred); + } + + static void + unlink_block_successors(nir_block *block) + { + if (block->successors[1] != NULL) + unlink_blocks(block, block->successors[1]); + if (block->successors[0] != NULL) + unlink_blocks(block, block->successors[0]); + } + + static void + link_non_block_to_block(nir_cf_node *node, nir_block *block) + { + if (node->type == nir_cf_node_if) { + /* + * We're trying to link an if to a block after it; this just means linking + * the last block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *last_then = nir_if_last_then_node(if_stmt); + assert(last_then->type == nir_cf_node_block); + nir_block *last_then_block = nir_cf_node_as_block(last_then); + + nir_cf_node *last_else = nir_if_last_else_node(if_stmt); + assert(last_else->type == nir_cf_node_block); + nir_block *last_else_block = nir_cf_node_as_block(last_else); + + if (!block_ends_in_jump(last_then_block)) { + unlink_block_successors(last_then_block); + link_blocks(last_then_block, block, NULL); + } + + if (!block_ends_in_jump(last_else_block)) { + unlink_block_successors(last_else_block); + link_blocks(last_else_block, block, NULL); + } + } else { + assert(node->type == nir_cf_node_loop); + + /* + * We can only get to this codepath if we're inserting a new loop, or + * at least a loop with no break statements; we can't insert break + * statements into a loop when we haven't inserted it into the CFG + * because we wouldn't know which block comes after the loop + * and therefore, which block should be the successor of the block with + * the break). Therefore, we need to insert a fake edge (see invariant + * #5). + */ + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = block; + block_add_pred(block, last_block); + } + } + + static void + link_block_to_non_block(nir_block *block, nir_cf_node *node) + { + if (node->type == nir_cf_node_if) { + /* + * We're trying to link a block to an if after it; this just means linking + * the block to the first block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *first_then = nir_if_first_then_node(if_stmt); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(if_stmt); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + unlink_block_successors(block); + link_blocks(block, first_then_block, first_else_block); + } else { + /* + * For similar reasons as the corresponding case in + * link_non_block_to_block(), don't worry about if the loop header has + * any predecessors that need to be unlinked. + */ + + assert(node->type == nir_cf_node_loop); + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *loop_header = nir_loop_first_cf_node(loop); + assert(loop_header->type == nir_cf_node_block); + nir_block *loop_header_block = nir_cf_node_as_block(loop_header); + + unlink_block_successors(block); + link_blocks(block, loop_header_block, NULL); + } + + } + + /** + * Replace a block's successor with a different one. + */ + static void + replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ) + { + if (block->successors[0] == old_succ) { + block->successors[0] = new_succ; + } else { + assert(block->successors[1] == old_succ); + block->successors[1] = new_succ; + } + + block_remove_pred(old_succ, block); + block_add_pred(new_succ, block); + } + + /** + * Takes a basic block and inserts a new empty basic block before it, making its + * predecessors point to the new block. This essentially splits the block into + * an empty header and a body so that another non-block CF node can be inserted + * between the two. Note that this does *not* link the two basic blocks, so + * some kind of cleanup *must* be performed after this call. + */ + + static nir_block * + split_block_beginning(nir_block *block) + { + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node); + + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + replace_successor(pred, block, new_block); + } + + /* Any phi nodes must stay part of the new block, or else their + * sourcse will be messed up. This will reverse the order of the phi's, but + * order shouldn't matter. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + exec_node_remove(&instr->node); + instr->block = new_block; + exec_list_push_head(&new_block->instr_list, &instr->node); + } + + return new_block; + } + + static void + rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred) + { + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src(phi, src) { + if (src->pred == old_pred) { + src->pred = new_pred; + break; + } + } + } + } + + static void + insert_phi_undef(nir_block *block, nir_block *pred) + { + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(ralloc_parent(phi), + phi->dest.ssa.num_components); + nir_instr_insert_before_cf_list(&impl->body, &undef->instr); + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = pred; + src->src.parent_instr = &phi->instr; + src->src.is_ssa = true; + src->src.ssa = &undef->def; + + list_addtail(&src->src.use_link, &undef->def.uses); + + exec_list_push_tail(&phi->srcs, &src->node); + } + } + + /** + * Moves the successors of source to the successors of dest, leaving both + * successors of source NULL. + */ + + static void + move_successors(nir_block *source, nir_block *dest) + { + nir_block *succ1 = source->successors[0]; + nir_block *succ2 = source->successors[1]; + + if (succ1) { + unlink_blocks(source, succ1); + rewrite_phi_preds(succ1, source, dest); + } + + if (succ2) { + unlink_blocks(source, succ2); + rewrite_phi_preds(succ2, source, dest); + } + + unlink_block_successors(dest); + link_blocks(dest, succ1, succ2); + } + + /* Given a basic block with no successors that has been inserted into the + * control flow tree, gives it the successors it would normally have assuming + * it doesn't end in a jump instruction. Also inserts phi sources with undefs + * if necessary. + */ + static void + block_add_normal_succs(nir_block *block) + { + if (exec_node_is_tail_sentinel(block->cf_node.node.next)) { + nir_cf_node *parent = block->cf_node.parent; + if (parent->type == nir_cf_node_if) { + nir_cf_node *next = nir_cf_node_next(parent); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + link_blocks(block, next_block, NULL); - } else { - assert(parent->type == nir_cf_node_loop); ++ } else if (parent->type == nir_cf_node_loop) { + nir_loop *loop = nir_cf_node_as_loop(parent); + + nir_cf_node *head = nir_loop_first_cf_node(loop); + assert(head->type == nir_cf_node_block); + nir_block *head_block = nir_cf_node_as_block(head); + + link_blocks(block, head_block, NULL); + insert_phi_undef(head_block, block); ++ } else { ++ assert(parent->type == nir_cf_node_function); ++ nir_function_impl *impl = nir_cf_node_as_function(parent); ++ link_blocks(block, impl->end_block, NULL); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next->type == nir_cf_node_if) { + nir_if *next_if = nir_cf_node_as_if(next); + + nir_cf_node *first_then = nir_if_first_then_node(next_if); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(next_if); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + link_blocks(block, first_then_block, first_else_block); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *next_loop = nir_cf_node_as_loop(next); + + nir_cf_node *first = nir_loop_first_cf_node(next_loop); + assert(first->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first); + + link_blocks(block, first_block, NULL); + insert_phi_undef(first_block, block); + } + } + } + + static nir_block * + split_block_end(nir_block *block) + { + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node); + + if (block_ends_in_jump(block)) { + /* Figure out what successor block would've had if it didn't have a jump + * instruction, and make new_block have that successor. + */ + block_add_normal_succs(new_block); + } else { + move_successors(block, new_block); + } + + return new_block; + } + + static nir_block * + split_block_before_instr(nir_instr *instr) + { + assert(instr->type != nir_instr_type_phi); + nir_block *new_block = split_block_beginning(instr->block); + + nir_foreach_instr_safe(instr->block, cur_instr) { + if (cur_instr == instr) + break; + + exec_node_remove(&cur_instr->node); + cur_instr->block = new_block; + exec_list_push_tail(&new_block->instr_list, &cur_instr->node); + } + + return new_block; + } + + /* Splits a basic block at the point specified by the cursor. The "before" and + * "after" arguments are filled out with the blocks resulting from the split + * if non-NULL. Note that the "beginning" of the block is actually interpreted + * as before the first non-phi instruction, and it's illegal to split a block + * before a phi instruction. + */ + + static void + split_block_cursor(nir_cursor cursor, + nir_block **_before, nir_block **_after) + { + nir_block *before, *after; + switch (cursor.option) { + case nir_cursor_before_block: + after = cursor.block; + before = split_block_beginning(cursor.block); + break; + + case nir_cursor_after_block: + before = cursor.block; + after = split_block_end(cursor.block); + break; + + case nir_cursor_before_instr: + after = cursor.instr->block; + before = split_block_before_instr(cursor.instr); + break; + + case nir_cursor_after_instr: + /* We lower this to split_block_before_instr() so that we can keep the + * after-a-jump-instr case contained to split_block_end(). + */ + if (nir_instr_is_last(cursor.instr)) { + before = cursor.instr->block; + after = split_block_end(cursor.instr->block); + } else { + after = cursor.instr->block; + before = split_block_before_instr(nir_instr_next(cursor.instr)); + } + break; + + default: + unreachable("not reached"); + } + + if (_before) + *_before = before; + if (_after) + *_after = after; + } + + /** + * Inserts a non-basic block between two basic blocks and links them together. + */ + + static void + insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after) + { + node->parent = before->cf_node.parent; + exec_node_insert_after(&before->cf_node.node, &node->node); + link_block_to_non_block(before, node); + link_non_block_to_block(node, after); + } + + /* walk up the control flow tree to find the innermost enclosed loop */ + static nir_loop * + nearest_loop(nir_cf_node *node) + { + while (node->type != nir_cf_node_loop) { + node = node->parent; + } + + return nir_cf_node_as_loop(node); + } + + /* + * update the CFG after a jump instruction has been added to the end of a block + */ + + void + nir_handle_add_jump(nir_block *block) + { + nir_instr *instr = nir_block_last_instr(block); + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + + unlink_block_successors(block); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); + + if (jump_instr->type == nir_jump_break || + jump_instr->type == nir_jump_continue) { + nir_loop *loop = nearest_loop(&block->cf_node); + + if (jump_instr->type == nir_jump_continue) { + nir_cf_node *first_node = nir_loop_first_cf_node(loop); + assert(first_node->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first_node); + link_blocks(block, first_block, NULL); + } else { + nir_cf_node *after = nir_cf_node_next(&loop->cf_node); + assert(after->type == nir_cf_node_block); + nir_block *after_block = nir_cf_node_as_block(after); + link_blocks(block, after_block, NULL); + + /* If we inserted a fake link, remove it */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + if (last_block->successors[1] != NULL) + unlink_blocks(last_block, after_block); + } + } else { + assert(jump_instr->type == nir_jump_return); + link_blocks(block, impl->end_block, NULL); + } + } + + static void + remove_phi_src(nir_block *block, nir_block *pred) + { + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src_safe(phi, src) { + if (src->pred == pred) { + list_del(&src->src.use_link); + exec_node_remove(&src->node); + } + } + } + } + + /* Removes the successor of a block with a jump, and inserts a fake edge for + * infinite loops. Note that the jump to be eliminated may be free-floating. + */ + + static void + unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors) + { + nir_block *next = block->successors[0]; + + if (block->successors[0]) + remove_phi_src(block->successors[0], block); + if (block->successors[1]) + remove_phi_src(block->successors[1], block); + + unlink_block_successors(block); + if (add_normal_successors) + block_add_normal_succs(block); + + /* If we've just removed a break, and the block we were jumping to (after + * the loop) now has zero predecessors, we've created a new infinite loop. + * + * NIR doesn't allow blocks (other than the start block) to have zero + * predecessors. In particular, dominance assumes all blocks are reachable. + * So, we insert a "fake link" by making successors[1] point after the loop. + * + * Note that we have to do this after unlinking/recreating the block's + * successors. If we removed a "break" at the end of the loop, then + * block == last_block, so block->successors[0] would already be "next", + * and adding a fake link would create two identical successors. Doing + * this afterward works, as we'll have changed block->successors[0] to + * be the top of the loop. + */ + if (type == nir_jump_break && next->predecessors->entries == 0) { + nir_loop *loop = + nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node)); + + /* insert fake link */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = next; + block_add_pred(next, last_block); + } + } + + void + nir_handle_remove_jump(nir_block *block, nir_jump_type type) + { + unlink_jump(block, type, true); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); + } + + static void + update_if_uses(nir_cf_node *node) + { + if (node->type != nir_cf_node_if) + return; + + nir_if *if_stmt = nir_cf_node_as_if(node); + + if_stmt->condition.parent_if = if_stmt; + if (if_stmt->condition.is_ssa) { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.ssa->if_uses); + } else { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.reg.reg->if_uses); + } + } + + /** + * Stitch two basic blocks together into one. The aggregate must have the same + * predecessors as the first and the same successors as the second. + */ + + static void + stitch_blocks(nir_block *before, nir_block *after) + { + /* + * We move after into before, so we have to deal with up to 2 successors vs. + * possibly a large number of predecessors. + * + * TODO: special case when before is empty and after isn't? + */ + + if (block_ends_in_jump(before)) { + assert(exec_list_is_empty(&after->instr_list)); + if (after->successors[0]) + remove_phi_src(after->successors[0], after); + if (after->successors[1]) + remove_phi_src(after->successors[1], after); + unlink_block_successors(after); + exec_node_remove(&after->cf_node.node); + } else { + move_successors(after, before); + + foreach_list_typed(nir_instr, instr, node, &after->instr_list) { + instr->block = before; + } + + exec_list_append(&before->instr_list, &after->instr_list); + exec_node_remove(&after->cf_node.node); + } + } + + void + nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node) + { + nir_block *before, *after; + + split_block_cursor(cursor, &before, &after); + + if (node->type == nir_cf_node_block) { + nir_block *block = nir_cf_node_as_block(node); + exec_node_insert_after(&before->cf_node.node, &block->cf_node.node); + block->cf_node.parent = before->cf_node.parent; + /* stitch_blocks() assumes that any block that ends with a jump has + * already been setup with the correct successors, so we need to set + * up jumps here as the block is being inserted. + */ + if (block_ends_in_jump(block)) + nir_handle_add_jump(block); + + stitch_blocks(block, after); + stitch_blocks(before, block); + } else { + update_if_uses(node); + insert_non_block(before, node, after); + } + } + + static bool + replace_ssa_def_uses(nir_ssa_def *def, void *void_impl) + { + nir_function_impl *impl = void_impl; + void *mem_ctx = ralloc_parent(impl); + + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(mem_ctx, def->num_components); + nir_instr_insert_before_cf_list(&impl->body, &undef->instr); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def)); + return true; + } + + static void + cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl) + { + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + /* We need to walk the instructions and clean up defs/uses */ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_jump) { + nir_jump_type jump_type = nir_instr_as_jump(instr)->type; + unlink_jump(block, jump_type, false); + } else { + nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl); + nir_instr_remove(instr); + } + } + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) + cleanup_cf_node(child, impl); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) + cleanup_cf_node(child, impl); + + list_del(&if_stmt->condition.use_link); + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + foreach_list_typed(nir_cf_node, child, node, &loop->body) + cleanup_cf_node(child, impl); + break; + } + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + foreach_list_typed(nir_cf_node, child, node, &impl->body) + cleanup_cf_node(child, impl); + break; + } + default: + unreachable("Invalid CF node type"); + } + } + + void + nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end) + { + nir_block *block_begin, *block_end, *block_before, *block_after; + ++ if (nir_cursors_equal(begin, end)) { ++ exec_list_make_empty(&extracted->list); ++ extracted->impl = NULL; /* we shouldn't need this */ ++ return; ++ } ++ + /* In the case where begin points to an instruction in some basic block and + * end points to the end of the same basic block, we rely on the fact that + * splitting on an instruction moves earlier instructions into a new basic + * block. If the later instructions were moved instead, then the end cursor + * would be pointing to the same place that begin used to point to, which + * is obviously not what we want. + */ + split_block_cursor(begin, &block_before, &block_begin); + split_block_cursor(end, &block_end, &block_after); + + extracted->impl = nir_cf_node_get_function(&block_begin->cf_node); + exec_list_make_empty(&extracted->list); + + /* Dominance and other block-related information is toast. */ + nir_metadata_preserve(extracted->impl, nir_metadata_none); + + nir_cf_node *cf_node = &block_begin->cf_node; + nir_cf_node *cf_node_end = &block_end->cf_node; + while (true) { + nir_cf_node *next = nir_cf_node_next(cf_node); + + exec_node_remove(&cf_node->node); + cf_node->parent = NULL; + exec_list_push_tail(&extracted->list, &cf_node->node); + + if (cf_node == cf_node_end) + break; + + cf_node = next; + } + + stitch_blocks(block_before, block_after); + } + + void + nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor) + { + nir_block *before, *after; + ++ if (exec_list_is_empty(&cf_list->list)) ++ return; ++ + split_block_cursor(cursor, &before, &after); + + foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) { + exec_node_remove(&node->node); + node->parent = before->cf_node.parent; + exec_node_insert_node_before(&after->cf_node.node, &node->node); + } + + stitch_blocks(before, + nir_cf_node_as_block(nir_cf_node_next(&before->cf_node))); + stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)), + after); + } + + void + nir_cf_delete(nir_cf_list *cf_list) + { + foreach_list_typed(nir_cf_node, node, node, &cf_list->list) { + cleanup_cf_node(node, cf_list->impl); + } + } diff --cc src/compiler/nir/nir_dominance.c index 00000000000,b345b85e8a0..d95f3968074 mode 000000,100644..100644 --- a/src/compiler/nir/nir_dominance.c +++ b/src/compiler/nir/nir_dominance.c @@@ -1,0 -1,350 +1,354 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir.h" + + /* + * Implements the algorithms for computing the dominance tree and the + * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper, + * Harvey, and Kennedy. + */ + + typedef struct { + nir_function_impl *impl; + bool progress; + } dom_state; + + static bool + init_block_cb(nir_block *block, void *_state) + { + dom_state *state = (dom_state *) _state; + if (block == nir_start_block(state->impl)) + block->imm_dom = block; + else + block->imm_dom = NULL; + block->num_dom_children = 0; + + struct set_entry *entry; + set_foreach(block->dom_frontier, entry) { + _mesa_set_remove(block->dom_frontier, entry); + } + + return true; + } + + static nir_block * + intersect(nir_block *b1, nir_block *b2) + { + while (b1 != b2) { + /* + * Note, the comparisons here are the opposite of what the paper says + * because we index blocks from beginning -> end (i.e. reverse + * post-order) instead of post-order like they assume. + */ + while (b1->index > b2->index) + b1 = b1->imm_dom; + while (b2->index > b1->index) + b2 = b2->imm_dom; + } + + return b1; + } + + static bool + calc_dominance_cb(nir_block *block, void *_state) + { + dom_state *state = (dom_state *) _state; + if (block == nir_start_block(state->impl)) + return true; + + nir_block *new_idom = NULL; + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + if (pred->imm_dom) { + if (new_idom) + new_idom = intersect(pred, new_idom); + else + new_idom = pred; + } + } + - assert(new_idom); + if (block->imm_dom != new_idom) { + block->imm_dom = new_idom; + state->progress = true; + } + + return true; + } + + static bool + calc_dom_frontier_cb(nir_block *block, void *state) + { + (void) state; + + if (block->predecessors->entries > 1) { + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *runner = (nir_block *) entry->key; ++ ++ /* Skip unreachable predecessors */ ++ if (runner->imm_dom == NULL) ++ continue; ++ + while (runner != block->imm_dom) { + _mesa_set_add(runner->dom_frontier, block); + runner = runner->imm_dom; + } + } + } + + return true; + } + + /* + * Compute each node's children in the dominance tree from the immediate + * dominator information. We do this in three stages: + * + * 1. Calculate the number of children each node has + * 2. Allocate arrays, setting the number of children to 0 again + * 3. For each node, add itself to its parent's list of children, using + * num_dom_children as an index - at the end of this step, num_dom_children + * for each node will be the same as it was at the end of step #1. + */ + + static bool + block_count_children(nir_block *block, void *state) + { + (void) state; + + if (block->imm_dom) + block->imm_dom->num_dom_children++; + + return true; + } + + static bool + block_alloc_children(nir_block *block, void *state) + { + void *mem_ctx = state; + + block->dom_children = ralloc_array(mem_ctx, nir_block *, + block->num_dom_children); + block->num_dom_children = 0; + + return true; + } + + static bool + block_add_child(nir_block *block, void *state) + { + (void) state; + + if (block->imm_dom) + block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block; + + return true; + } + + static void + calc_dom_children(nir_function_impl* impl) + { + void *mem_ctx = ralloc_parent(impl); + + nir_foreach_block(impl, block_count_children, NULL); + nir_foreach_block(impl, block_alloc_children, mem_ctx); + nir_foreach_block(impl, block_add_child, NULL); + } + + static void + calc_dfs_indicies(nir_block *block, unsigned *index) + { + block->dom_pre_index = (*index)++; + + for (unsigned i = 0; i < block->num_dom_children; i++) + calc_dfs_indicies(block->dom_children[i], index); + + block->dom_post_index = (*index)++; + } + + void + nir_calc_dominance_impl(nir_function_impl *impl) + { + if (impl->valid_metadata & nir_metadata_dominance) + return; + + nir_metadata_require(impl, nir_metadata_block_index); + + dom_state state; + state.impl = impl; + state.progress = true; + + nir_foreach_block(impl, init_block_cb, &state); + + while (state.progress) { + state.progress = false; + nir_foreach_block(impl, calc_dominance_cb, &state); + } + + nir_foreach_block(impl, calc_dom_frontier_cb, &state); + + nir_block *start_block = nir_start_block(impl); + start_block->imm_dom = NULL; + + calc_dom_children(impl); + + unsigned dfs_index = 0; + calc_dfs_indicies(start_block, &dfs_index); + } + + void + nir_calc_dominance(nir_shader *shader) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_calc_dominance_impl(function->impl); + } + } + + /** + * Computes the least common anscestor of two blocks. If one of the blocks + * is null, the other block is returned. + */ + nir_block * + nir_dominance_lca(nir_block *b1, nir_block *b2) + { + if (b1 == NULL) + return b2; + + if (b2 == NULL) + return b1; + + assert(nir_cf_node_get_function(&b1->cf_node) == + nir_cf_node_get_function(&b2->cf_node)); + + assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & + nir_metadata_dominance); + + return intersect(b1, b2); + } + + /** + * Returns true if parent dominates child + */ + bool + nir_block_dominates(nir_block *parent, nir_block *child) + { + assert(nir_cf_node_get_function(&parent->cf_node) == + nir_cf_node_get_function(&child->cf_node)); + + assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & + nir_metadata_dominance); + + return child->dom_pre_index >= parent->dom_pre_index && + child->dom_post_index <= parent->dom_post_index; + } + + static bool + dump_block_dom(nir_block *block, void *state) + { + FILE *fp = state; + if (block->imm_dom) + fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index); + return true; + } + + void + nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp) + { + fprintf(fp, "digraph doms_%s {\n", impl->function->name); + nir_foreach_block(impl, dump_block_dom, fp); + fprintf(fp, "}\n\n"); + } + + void + nir_dump_dom_tree(nir_shader *shader, FILE *fp) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_dump_dom_tree_impl(function->impl, fp); + } + } + + static bool + dump_block_dom_frontier(nir_block *block, void *state) + { + FILE *fp = state; + + fprintf(fp, "DF(%u) = {", block->index); + struct set_entry *entry; + set_foreach(block->dom_frontier, entry) { + nir_block *df = (nir_block *) entry->key; + fprintf(fp, "%u, ", df->index); + } + fprintf(fp, "}\n"); + return true; + } + + void + nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp) + { + nir_foreach_block(impl, dump_block_dom_frontier, fp); + } + + void + nir_dump_dom_frontier(nir_shader *shader, FILE *fp) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_dump_dom_frontier_impl(function->impl, fp); + } + } + + static bool + dump_block_succs(nir_block *block, void *state) + { + FILE *fp = state; + if (block->successors[0]) + fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index); + if (block->successors[1]) + fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index); + return true; + } + + void + nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp) + { + fprintf(fp, "digraph cfg_%s {\n", impl->function->name); + nir_foreach_block(impl, dump_block_succs, fp); + fprintf(fp, "}\n\n"); + } + + void + nir_dump_cfg(nir_shader *shader, FILE *fp) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_dump_cfg_impl(function->impl, fp); + } + } diff --cc src/compiler/nir/nir_gather_info.c index 00000000000,00000000000..b84915c2d2b new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_gather_info.c @@@ -1,0 -1,0 +1,109 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "nir.h" ++ ++static void ++gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader) ++{ ++ switch (instr->intrinsic) { ++ case nir_intrinsic_discard: ++ assert(shader->stage == MESA_SHADER_FRAGMENT); ++ shader->info.fs.uses_discard = true; ++ break; ++ ++ case nir_intrinsic_load_front_face: ++ case nir_intrinsic_load_vertex_id: ++ case nir_intrinsic_load_vertex_id_zero_base: ++ case nir_intrinsic_load_base_vertex: ++ case nir_intrinsic_load_instance_id: ++ case nir_intrinsic_load_sample_id: ++ case nir_intrinsic_load_sample_pos: ++ case nir_intrinsic_load_sample_mask_in: ++ case nir_intrinsic_load_primitive_id: ++ case nir_intrinsic_load_invocation_id: ++ case nir_intrinsic_load_local_invocation_id: ++ case nir_intrinsic_load_work_group_id: ++ case nir_intrinsic_load_num_work_groups: ++ shader->info.system_values_read |= ++ (1 << nir_system_value_from_intrinsic(instr->intrinsic)); ++ break; ++ ++ case nir_intrinsic_end_primitive: ++ case nir_intrinsic_end_primitive_with_counter: ++ assert(shader->stage == MESA_SHADER_GEOMETRY); ++ shader->info.gs.uses_end_primitive = 1; ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static void ++gather_tex_info(nir_tex_instr *instr, nir_shader *shader) ++{ ++ if (instr->op == nir_texop_tg4) ++ shader->info.uses_texture_gather = true; ++} ++ ++static bool ++gather_info_block(nir_block *block, void *shader) ++{ ++ nir_foreach_instr(block, instr) { ++ switch (instr->type) { ++ case nir_instr_type_intrinsic: ++ gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader); ++ break; ++ case nir_instr_type_tex: ++ gather_tex_info(nir_instr_as_tex(instr), shader); ++ break; ++ case nir_instr_type_call: ++ assert(!"nir_shader_gather_info only works if functions are inlined"); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ return true; ++} ++ ++void ++nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) ++{ ++ shader->info.inputs_read = 0; ++ foreach_list_typed(nir_variable, var, node, &shader->inputs) ++ shader->info.inputs_read |= nir_variable_get_io_mask(var, shader->stage); ++ ++ /* TODO: Some day we may need to add stream support to NIR */ ++ shader->info.outputs_written = 0; ++ foreach_list_typed(nir_variable, var, node, &shader->outputs) ++ shader->info.outputs_written |= nir_variable_get_io_mask(var, shader->stage); ++ ++ shader->info.system_values_read = 0; ++ foreach_list_typed(nir_variable, var, node, &shader->system_values) ++ shader->info.system_values_read |= nir_variable_get_io_mask(var, shader->stage); ++ ++ nir_foreach_block(entrypoint, gather_info_block, shader); ++} diff --cc src/compiler/nir/nir_inline_functions.c index 00000000000,00000000000..3cf83279053 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_inline_functions.c @@@ -1,0 -1,0 +1,153 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "nir.h" ++#include "nir_builder.h" ++#include "nir_control_flow.h" ++ ++struct inline_functions_state { ++ struct set *inlined; ++ nir_builder builder; ++ bool progress; ++}; ++ ++static bool inline_function_impl(nir_function_impl *impl, struct set *inlined); ++ ++static bool ++inline_functions_block(nir_block *block, void *void_state) ++{ ++ struct inline_functions_state *state = void_state; ++ ++ nir_builder *b = &state->builder; ++ ++ /* This is tricky. We're iterating over instructions in a block but, as ++ * we go, the block and its instruction list are being split into ++ * pieces. However, this *should* be safe since foreach_safe always ++ * stashes the next thing in the iteration. That next thing will ++ * properly get moved to the next block when it gets split, and we ++ * continue iterating there. ++ */ ++ nir_foreach_instr_safe(block, instr) { ++ if (instr->type != nir_instr_type_call) ++ continue; ++ ++ state->progress = true; ++ ++ nir_call_instr *call = nir_instr_as_call(instr); ++ assert(call->callee->impl); ++ ++ inline_function_impl(call->callee->impl, state->inlined); ++ ++ nir_function_impl *callee_copy = ++ nir_function_impl_clone(call->callee->impl); ++ ++ exec_list_append(&b->impl->locals, &callee_copy->locals); ++ exec_list_append(&b->impl->registers, &callee_copy->registers); ++ ++ b->cursor = nir_before_instr(&call->instr); ++ ++ /* Add copies of all in parameters */ ++ assert(call->num_params == callee_copy->num_params); ++ for (unsigned i = 0; i < callee_copy->num_params; i++) { ++ /* Only in or inout parameters */ ++ if (call->callee->params[i].param_type == nir_parameter_out) ++ continue; ++ ++ nir_copy_deref_var(b, nir_deref_var_create(b->shader, ++ callee_copy->params[i]), ++ call->params[i]); ++ } ++ ++ /* Pluck the body out of the function and place it here */ ++ nir_cf_list body; ++ nir_cf_list_extract(&body, &callee_copy->body); ++ nir_cf_reinsert(&body, b->cursor); ++ ++ b->cursor = nir_before_instr(&call->instr); ++ ++ /* Add copies of all out parameters and the return */ ++ assert(call->num_params == callee_copy->num_params); ++ for (unsigned i = 0; i < callee_copy->num_params; i++) { ++ /* Only out or inout parameters */ ++ if (call->callee->params[i].param_type == nir_parameter_in) ++ continue; ++ ++ nir_copy_deref_var(b, call->params[i], ++ nir_deref_var_create(b->shader, ++ callee_copy->params[i])); ++ } ++ if (!glsl_type_is_void(call->callee->return_type)) { ++ nir_copy_deref_var(b, call->return_deref, ++ nir_deref_var_create(b->shader, ++ callee_copy->return_var)); ++ } ++ ++ nir_instr_remove(&call->instr); ++ } ++ ++ return true; ++} ++ ++static bool ++inline_function_impl(nir_function_impl *impl, struct set *inlined) ++{ ++ if (_mesa_set_search(inlined, impl)) ++ return false; /* Already inlined */ ++ ++ struct inline_functions_state state; ++ ++ state.inlined = inlined; ++ state.progress = false; ++ nir_builder_init(&state.builder, impl); ++ ++ nir_foreach_block(impl, inline_functions_block, &state); ++ ++ if (state.progress) { ++ /* SSA and register indices are completely messed up now */ ++ nir_index_ssa_defs(impl); ++ nir_index_local_regs(impl); ++ ++ nir_metadata_preserve(impl, nir_metadata_none); ++ } ++ ++ _mesa_set_add(inlined, impl); ++ ++ return state.progress; ++} ++ ++bool ++nir_inline_functions(nir_shader *shader) ++{ ++ struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer, ++ _mesa_key_pointer_equal); ++ bool progress = false; ++ ++ nir_foreach_function(shader, function) { ++ if (function->impl) ++ progress = inline_function_impl(function->impl, inlined) || progress; ++ } ++ ++ _mesa_set_destroy(inlined, NULL); ++ ++ return progress; ++} diff --cc src/compiler/nir/nir_instr_set.c index 00000000000,d3f939fe805..eb021326097 mode 000000,100644..100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@@ -1,0 -1,519 +1,522 @@@ + /* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + #include "nir_instr_set.h" + #include "nir_vla.h" + + #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) + + static uint32_t + hash_src(uint32_t hash, const nir_src *src) + { + assert(src->is_ssa); + hash = HASH(hash, src->ssa); + return hash; + } + + static uint32_t + hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) + { + hash = HASH(hash, src->abs); + hash = HASH(hash, src->negate); + + for (unsigned i = 0; i < num_components; i++) + hash = HASH(hash, src->swizzle[i]); + + hash = hash_src(hash, &src->src); + return hash; + } + + static uint32_t + hash_alu(uint32_t hash, const nir_alu_instr *instr) + { + hash = HASH(hash, instr->op); + hash = HASH(hash, instr->dest.dest.ssa.num_components); + + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); + uint32_t hash0 = hash_alu_src(hash, &instr->src[0], + nir_ssa_alu_instr_src_components(instr, 0)); + uint32_t hash1 = hash_alu_src(hash, &instr->src[1], + nir_ssa_alu_instr_src_components(instr, 1)); + /* For commutative operations, we need some commutative way of + * combining the hashes. One option would be to XOR them but that + * means that anything with two identical sources will hash to 0 and + * that's common enough we probably don't want the guaranteed + * collision. Either addition or multiplication will also work. + */ + hash = hash0 * hash1; + } else { + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + hash = hash_alu_src(hash, &instr->src[i], + nir_ssa_alu_instr_src_components(instr, i)); + } + } + + return hash; + } + + static uint32_t + hash_load_const(uint32_t hash, const nir_load_const_instr *instr) + { + hash = HASH(hash, instr->def.num_components); + + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f, + instr->def.num_components + * sizeof(instr->value.f[0])); + + return hash; + } + + static int + cmp_phi_src(const void *data1, const void *data2) + { + nir_phi_src *src1 = *(nir_phi_src **)data1; + nir_phi_src *src2 = *(nir_phi_src **)data2; + return src1->pred - src2->pred; + } + + static uint32_t + hash_phi(uint32_t hash, const nir_phi_instr *instr) + { + hash = HASH(hash, instr->instr.block); + + /* sort sources by predecessor, since the order shouldn't matter */ + unsigned num_preds = instr->instr.block->predecessors->entries; + NIR_VLA(nir_phi_src *, srcs, num_preds); + unsigned i = 0; + nir_foreach_phi_src(instr, src) { + srcs[i++] = src; + } + + qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src); + + for (i = 0; i < num_preds; i++) { + hash = hash_src(hash, &srcs[i]->src); + hash = HASH(hash, srcs[i]->pred); + } + + return hash; + } + + static uint32_t + hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) + { + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + hash = HASH(hash, instr->intrinsic); + + if (info->has_dest) + hash = HASH(hash, instr->dest.ssa.num_components); + + assert(info->num_variables == 0); + + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, + info->num_indices + * sizeof(instr->const_index[0])); + return hash; + } + + static uint32_t + hash_tex(uint32_t hash, const nir_tex_instr *instr) + { + hash = HASH(hash, instr->op); + hash = HASH(hash, instr->num_srcs); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + hash = HASH(hash, instr->src[i].src_type); + hash = hash_src(hash, &instr->src[i].src); + } + + hash = HASH(hash, instr->coord_components); + hash = HASH(hash, instr->sampler_dim); + hash = HASH(hash, instr->is_array); + hash = HASH(hash, instr->is_shadow); + hash = HASH(hash, instr->is_new_style_shadow); + hash = HASH(hash, instr->const_offset); + unsigned component = instr->component; + hash = HASH(hash, component); ++ hash = HASH(hash, instr->texture_index); ++ hash = HASH(hash, instr->texture_array_size); + hash = HASH(hash, instr->sampler_index); - hash = HASH(hash, instr->sampler_array_size); + + assert(!instr->sampler); + + return hash; + } + + /* Computes a hash of an instruction for use in a hash table. Note that this + * will only work for instructions where instr_can_rewrite() returns true, and + * it should return identical hashes for two instructions that are the same + * according nir_instrs_equal(). + */ + + static uint32_t + hash_instr(const void *data) + { + const nir_instr *instr = data; + uint32_t hash = _mesa_fnv32_1a_offset_bias; + + switch (instr->type) { + case nir_instr_type_alu: + hash = hash_alu(hash, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + hash = hash_load_const(hash, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_phi: + hash = hash_phi(hash, nir_instr_as_phi(instr)); + break; + case nir_instr_type_intrinsic: + hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + hash = hash_tex(hash, nir_instr_as_tex(instr)); + break; + default: + unreachable("Invalid instruction type"); + } + + return hash; + } + + bool + nir_srcs_equal(nir_src src1, nir_src src2) + { + if (src1.is_ssa) { + if (src2.is_ssa) { + return src1.ssa == src2.ssa; + } else { + return false; + } + } else { + if (src2.is_ssa) { + return false; + } else { + if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) + return false; + + if (src1.reg.indirect) { + if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) + return false; + } + + return src1.reg.reg == src2.reg.reg && + src1.reg.base_offset == src2.reg.base_offset; + } + } + } + + static bool + nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, + unsigned src1, unsigned src2) + { + if (alu1->src[src1].abs != alu2->src[src2].abs || + alu1->src[src1].negate != alu2->src[src2].negate) + return false; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) + return false; + } + + return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); + } + + /* Returns "true" if two instructions are equal. Note that this will only + * work for the subset of instructions defined by instr_can_rewrite(). Also, + * it should only return "true" for instructions that hash_instr() will return + * the same hash for (ignoring collisions, of course). + */ + + static bool + nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) + { + if (instr1->type != instr2->type) + return false; + + switch (instr1->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu1 = nir_instr_as_alu(instr1); + nir_alu_instr *alu2 = nir_instr_as_alu(instr2); + + if (alu1->op != alu2->op) + return false; + + /* TODO: We can probably acutally do something more inteligent such + * as allowing different numbers and taking a maximum or something + * here */ + if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) + return false; + + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[alu1->op].num_inputs == 2); + return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && + nir_alu_srcs_equal(alu1, alu2, 1, 1)) || + (nir_alu_srcs_equal(alu1, alu2, 0, 1) && + nir_alu_srcs_equal(alu1, alu2, 1, 0)); + } else { + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } + } + return true; + } + case nir_instr_type_tex: { + nir_tex_instr *tex1 = nir_instr_as_tex(instr1); + nir_tex_instr *tex2 = nir_instr_as_tex(instr2); + + if (tex1->op != tex2->op) + return false; + + if (tex1->num_srcs != tex2->num_srcs) + return false; + for (unsigned i = 0; i < tex1->num_srcs; i++) { + if (tex1->src[i].src_type != tex2->src[i].src_type || + !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { + return false; + } + } + + if (tex1->coord_components != tex2->coord_components || + tex1->sampler_dim != tex2->sampler_dim || + tex1->is_array != tex2->is_array || + tex1->is_shadow != tex2->is_shadow || + tex1->is_new_style_shadow != tex2->is_new_style_shadow || + memcmp(tex1->const_offset, tex2->const_offset, + sizeof(tex1->const_offset)) != 0 || + tex1->component != tex2->component || - tex1->sampler_index != tex2->sampler_index || - tex1->sampler_array_size != tex2->sampler_array_size) { ++ tex1->texture_index != tex2->texture_index || ++ tex1->texture_array_size != tex2->texture_array_size || ++ tex1->sampler_index != tex2->sampler_index) { + return false; + } + + /* Don't support un-lowered sampler derefs currently. */ - assert(!tex1->sampler && !tex2->sampler); ++ assert(!tex1->texture && !tex1->sampler && ++ !tex2->texture && !tex2->sampler); + + return true; + } + case nir_instr_type_load_const: { + nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); + nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); + + if (load1->def.num_components != load2->def.num_components) + return false; + + return memcmp(load1->value.f, load2->value.f, + load1->def.num_components * sizeof(*load2->value.f)) == 0; + } + case nir_instr_type_phi: { + nir_phi_instr *phi1 = nir_instr_as_phi(instr1); + nir_phi_instr *phi2 = nir_instr_as_phi(instr2); + + if (phi1->instr.block != phi2->instr.block) + return false; + + nir_foreach_phi_src(phi1, src1) { + nir_foreach_phi_src(phi2, src2) { + if (src1->pred == src2->pred) { + if (!nir_srcs_equal(src1->src, src2->src)) + return false; + + break; + } + } + } + + return true; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); + nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); + const nir_intrinsic_info *info = + &nir_intrinsic_infos[intrinsic1->intrinsic]; + + if (intrinsic1->intrinsic != intrinsic2->intrinsic || + intrinsic1->num_components != intrinsic2->num_components) + return false; + + if (info->has_dest && intrinsic1->dest.ssa.num_components != + intrinsic2->dest.ssa.num_components) + return false; + + for (unsigned i = 0; i < info->num_srcs; i++) { + if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) + return false; + } + + assert(info->num_variables == 0); + + for (unsigned i = 0; i < info->num_indices; i++) { + if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) + return false; + } + + return true; + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; + } + + static bool + src_is_ssa(nir_src *src, void *data) + { + (void) data; + return src->is_ssa; + } + + static bool + dest_is_ssa(nir_dest *dest, void *data) + { + (void) data; + return dest->is_ssa; + } + + /* This function determines if uses of an instruction can safely be rewritten + * to use another identical instruction instead. Note that this function must + * be kept in sync with hash_instr() and nir_instrs_equal() -- only + * instructions that pass this test will be handed on to those functions, and + * conversely they must handle everything that this function returns true for. + */ + + static bool + instr_can_rewrite(nir_instr *instr) + { + /* We only handle SSA. */ + if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || + !nir_foreach_src(instr, src_is_ssa, NULL)) + return false; + + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_load_const: + case nir_instr_type_phi: + return true; + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + /* Don't support un-lowered sampler derefs currently. */ - if (tex->sampler) ++ if (tex->texture || tex->sampler) + return false; + + return true; + } + case nir_instr_type_intrinsic: { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; + return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER) && + info->num_variables == 0; /* not implemented yet */ + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return false; + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; + } + + static nir_ssa_def * + nir_instr_get_dest_ssa_def(nir_instr *instr) + { + switch (instr->type) { + case nir_instr_type_alu: + assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); + return &nir_instr_as_alu(instr)->dest.dest.ssa; + case nir_instr_type_load_const: + return &nir_instr_as_load_const(instr)->def; + case nir_instr_type_phi: + assert(nir_instr_as_phi(instr)->dest.is_ssa); + return &nir_instr_as_phi(instr)->dest.ssa; + case nir_instr_type_intrinsic: + assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); + return &nir_instr_as_intrinsic(instr)->dest.ssa; + case nir_instr_type_tex: + assert(nir_instr_as_tex(instr)->dest.is_ssa); + return &nir_instr_as_tex(instr)->dest.ssa; + default: + unreachable("We never ask for any of these"); + } + } + + static bool + cmp_func(const void *data1, const void *data2) + { + return nir_instrs_equal(data1, data2); + } + + struct set * + nir_instr_set_create(void *mem_ctx) + { + return _mesa_set_create(mem_ctx, hash_instr, cmp_func); + } + + void + nir_instr_set_destroy(struct set *instr_set) + { + _mesa_set_destroy(instr_set, NULL); + } + + bool + nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) + { + if (!instr_can_rewrite(instr)) + return false; + + struct set_entry *entry = _mesa_set_search(instr_set, instr); + if (entry) { + nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); + nir_ssa_def *new_def = + nir_instr_get_dest_ssa_def((nir_instr *) entry->key); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); + return true; + } + + _mesa_set_add(instr_set, instr); + return false; + } + + void + nir_instr_set_remove(struct set *instr_set, nir_instr *instr) + { + if (!instr_can_rewrite(instr)) + return; + + struct set_entry *entry = _mesa_set_search(instr_set, instr); + if (entry) + _mesa_set_remove(instr_set, entry); + } + diff --cc src/compiler/nir/nir_intrinsics.h index 00000000000,62eead4878a..3e7cf735a1b mode 000000,100644..100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@@ -1,0 -1,316 +1,367 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + /** + * This header file defines all the available intrinsics in one place. It + * expands to a list of macros of the form: + * + * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, + * num_variables, num_indices, flags) + * + * Which should correspond one-to-one with the nir_intrinsic_info structure. It + * is included in both ir.h to create the nir_intrinsic enum (with members of + * the form nir_intrinsic_(name)) and and in opcodes.c to create + * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures + * for each intrinsic. + */ + + #define ARR(...) { __VA_ARGS__ } + + + INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) + INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0) + INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) + + /* + * Interpolation of input. The interp_var_at* intrinsics are similar to the + * load_var intrinsic acting an a shader input except that they interpolate + * the input differently. The at_sample and at_offset intrinsics take an + * aditional source that is a integer sample id or a vec2 position offset + * respectively. + */ + + INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + + /* + * Ask the driver for the size of a given buffer. It takes the buffer index + * as source. + */ + INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + + /* + * a barrier is an intrinsic with no inputs/outputs but which can't be moved + * around/optimized in general + */ + #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) + + BARRIER(barrier) + BARRIER(discard) + + /* + * Memory barrier with semantics analogous to the memoryBarrier() GLSL + * intrinsic. + */ + BARRIER(memory_barrier) + + /* + * Shader clock intrinsic with semantics analogous to the clock2x32ARB() + * GLSL intrinsic. + * The latter can be used as code motion barrier, which is currently not + * feasible with NIR. + */ + INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE) + + /* + * Memory barrier with semantics analogous to the compute shader + * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(), + * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics. + */ + BARRIER(group_memory_barrier) + BARRIER(memory_barrier_atomic_counter) + BARRIER(memory_barrier_buffer) + BARRIER(memory_barrier_image) + BARRIER(memory_barrier_shared) + + /** A conditional discard, with a single boolean source. */ + INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) + + /** + * Basic Geometry Shader intrinsics. + * + * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single + * index, which is the stream ID to write to. + * + * end_primitive implements GLSL's EndPrimitive() built-in. + */ + INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0) + INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) + + /** + * Geometry Shader intrinsics with a vertex count. + * + * Alternatively, drivers may implement these intrinsics, and use + * nir_lower_gs_intrinsics() to convert from the basic intrinsics. + * + * These maintain a count of the number of vertices emitted, as an additional + * unsigned integer source. + */ + INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0) + INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0) + INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0) + + /* + * Atomic counters + * + * The *_var variants take an atomic_uint nir_variable, while the other, + * lowered, variants take a constant buffer index and register offset. + */ + + #define ATOMIC(name, flags) \ + INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \ + INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags) + + ATOMIC(inc, 0) + ATOMIC(dec, 0) + ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) + + /* + * Image load, store and atomic intrinsics. + * + * All image intrinsics take an image target passed as a nir_variable. Image + * variables contain a number of memory and layout qualifiers that influence + * the semantics of the intrinsic. + * + * All image intrinsics take a four-coordinate vector and a sample index as + * first two sources, determining the location within the image that will be + * accessed by the intrinsic. Components not applicable to the image target + * in use are undefined. Image store takes an additional four-component + * argument with the value to be written, and image atomic operations take + * either one or two additional scalar arguments with the same meaning as in + * the ARB_shader_image_load_store specification. + */ + INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE) + INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) + INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) + INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + ++/* ++ * Vulkan descriptor set intrinsic ++ * ++ * The Vulkan API uses a different binding model from GL. In the Vulkan ++ * API, all external resources are represented by a tripple: ++ * ++ * (descriptor set, binding, array index) ++ * ++ * where the array index is the only thing allowed to be indirect. The ++ * vulkan_surface_index intrinsic takes the descriptor set and binding as ++ * its first two indices and the array index as its source. The third ++ * index is a nir_variable_mode in case that's useful to the backend. ++ * ++ * The intended usage is that the shader will call vulkan_surface_index to ++ * get an index and then pass that as the buffer index ubo/ssbo calls. ++ */ ++INTRINSIC(vulkan_resource_index, 1, ARR(1), true, 1, 0, 3, ++ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) ++ ++/* ++ * variable atomic intrinsics ++ * ++ * All of these variable atomic memory operations read a value from memory, ++ * compute a new value using one of the operations below, write the new value ++ * to memory, and return the original value read. ++ * ++ * All operations take 1 source except CompSwap that takes 2. These sources ++ * represent: ++ * ++ * 0: The data parameter to the atomic function (i.e. the value to add ++ * in shared_atomic_add, etc). ++ * 1: For CompSwap only: the second data parameter. ++ * ++ * All operations take 1 variable deref. ++ */ ++INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, 0) ++INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, 0) ++ + /* + * SSBO atomic intrinsics + * + * All of the SSBO atomic memory operations read a value from memory, + * compute a new value using one of the operations below, write the new + * value to memory, and return the original value read. + * + * All operations take 3 sources except CompSwap that takes 4. These + * sources represent: + * + * 0: The SSBO buffer index. + * 1: The offset into the SSBO buffer of the variable that the atomic + * operation will operate on. + * 2: The data parameter to the atomic function (i.e. the value to add + * in ssbo_atomic_add, etc). + * 3: For CompSwap only: the second data parameter. + */ + INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) + + /* + * CS shared variable atomic intrinsics + * + * All of the shared variable atomic memory operations read a value from + * memory, compute a new value using one of the operations below, write the + * new value to memory, and return the original value read. + * + * All operations take 2 sources except CompSwap that takes 3. These + * sources represent: + * + * 0: The offset into the shared variable storage region that the atomic + * operation will operate on. + * 1: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 2: For CompSwap only: the second data parameter. + */ + INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0) + INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + + #define SYSTEM_VALUE(name, components, num_indices) \ + INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + + SYSTEM_VALUE(front_face, 1, 0) + SYSTEM_VALUE(vertex_id, 1, 0) + SYSTEM_VALUE(vertex_id_zero_base, 1, 0) + SYSTEM_VALUE(base_vertex, 1, 0) + SYSTEM_VALUE(instance_id, 1, 0) + SYSTEM_VALUE(base_instance, 1, 0) + SYSTEM_VALUE(draw_id, 1, 0) + SYSTEM_VALUE(sample_id, 1, 0) + SYSTEM_VALUE(sample_pos, 2, 0) + SYSTEM_VALUE(sample_mask_in, 1, 0) + SYSTEM_VALUE(primitive_id, 1, 0) + SYSTEM_VALUE(invocation_id, 1, 0) + SYSTEM_VALUE(tess_coord, 3, 0) + SYSTEM_VALUE(tess_level_outer, 4, 0) + SYSTEM_VALUE(tess_level_inner, 2, 0) + SYSTEM_VALUE(patch_vertices_in, 1, 0) + SYSTEM_VALUE(local_invocation_id, 3, 0) + SYSTEM_VALUE(work_group_id, 3, 0) + SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ + SYSTEM_VALUE(num_work_groups, 3, 0) + SYSTEM_VALUE(helper_invocation, 1, 0) + + /* + * Load operations pull data from some piece of GPU memory. All load + * operations operate in terms of offsets into some piece of theoretical + * memory. Loads from externally visible memory (UBO and SSBO) simply take a + * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.) + * take a base+offset pair where the base (const_index[0]) gives the location + * of the start of the variable being loaded and and the offset source is a + * offset into that variable. + * ++ * Uniform load operations have a second index that specifies the size of the ++ * variable being loaded. If const_index[1] == 0, then the size is unknown. ++ * + * Some load operations such as UBO/SSBO load and per_vertex loads take an + * additional source to specify which UBO/SSBO/vertex to load from. + * + * The exact address type depends on the lowering pass that generates the + * load/store intrinsics. Typically, this is vec4 units for things such as + * varying slots and float units for fragment shader inputs. UBO and SSBO + * offsets are always in bytes. + */ + + #define LOAD(name, srcs, indices, flags) \ + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) + -/* src[] = { offset }. const_index[] = { base } */ -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) ++/* src[] = { offset }. const_index[] = { base, size } */ ++LOAD(uniform, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { buffer_index, offset }. No const_index */ + LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { vertex, offset }. const_index[] = { base } */ + LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + /* src[] = { buffer_index, offset }. No const_index */ + LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) + /* src[] = { vertex, offset }. const_index[] = { base } */ + LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) + /* src[] = { offset }. const_index[] = { base } */ + LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) ++/* src[] = { offset }. const_index[] = { base, size } */ ++LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + + /* + * Stores work the same way as loads, except now the first source is the value + * to store and the second (and possibly third) source specify where to store + * the value. SSBO and shared memory stores also have a write mask as + * const_index[0]. + */ + + #define STORE(name, srcs, indices, flags) \ + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) + + /* src[] = { value, offset }. const_index[] = { base, write_mask } */ + STORE(output, 2, 2, 0) + /* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */ + STORE(per_vertex_output, 3, 2, 0) + /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ + STORE(ssbo, 3, 1, 0) + /* src[] = { value, offset }. const_index[] = { base, write_mask } */ + STORE(shared, 2, 2, 0) + + LAST_INTRINSIC(store_shared) diff --cc src/compiler/nir/nir_lower_alu_to_scalar.c index 00000000000,0a27e66cf0f..37cb0221e0b mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@@ -1,0 -1,210 +1,264 @@@ + /* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + #include "nir.h" + #include "nir_builder.h" + + /** @file nir_lower_alu_to_scalar.c + * + * Replaces nir_alu_instr operations with more than one channel used in the + * arguments with individual per-channel operations. + */ + + static void + nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components) + { + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); + instr->dest.write_mask = (1 << num_components) - 1; + } + + static void + lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, + nir_builder *builder) + { + unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; + + nir_ssa_def *last = NULL; + for (unsigned i = 0; i < num_components; i++) { + nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op); + nir_alu_ssa_dest_init(chan, 1); + nir_alu_src_copy(&chan->src[0], &instr->src[0], chan); + chan->src[0].swizzle[0] = chan->src[0].swizzle[i]; + if (nir_op_infos[chan_op].num_inputs > 1) { + assert(nir_op_infos[chan_op].num_inputs == 2); + nir_alu_src_copy(&chan->src[1], &instr->src[1], chan); + chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; + } + + nir_builder_instr_insert(builder, &chan->instr); + + if (i == 0) { + last = &chan->dest.dest.ssa; + } else { + last = nir_build_alu(builder, merge_op, + last, &chan->dest.dest.ssa, NULL, NULL); + } + } + + assert(instr->dest.write_mask == 1); + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last)); + nir_instr_remove(&instr->instr); + } + + static void + lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) + { + unsigned num_src = nir_op_infos[instr->op].num_inputs; + unsigned i, chan; + + assert(instr->dest.dest.is_ssa); + assert(instr->dest.write_mask != 0); + + b->cursor = nir_before_instr(&instr->instr); + + #define LOWER_REDUCTION(name, chan, merge) \ + case name##2: \ + case name##3: \ + case name##4: \ + lower_reduction(instr, chan, merge, b); \ + return; + + switch (instr->op) { + case nir_op_vec4: + case nir_op_vec3: + case nir_op_vec2: + /* We don't need to scalarize these ops, they're the ones generated to + * group up outputs into a value that can be SSAed. + */ + return; + ++ case nir_op_pack_half_2x16: ++ if (!b->shader->options->lower_pack_half_2x16) ++ return; ++ ++ nir_ssa_def *val = ++ nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa, ++ instr->src[0].swizzle[0]), ++ nir_channel(b, instr->src[0].src.ssa, ++ instr->src[0].swizzle[1])); ++ ++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); ++ nir_instr_remove(&instr->instr); ++ return; ++ + case nir_op_unpack_unorm_4x8: + case nir_op_unpack_snorm_4x8: + case nir_op_unpack_unorm_2x16: + case nir_op_unpack_snorm_2x16: + /* There is no scalar version of these ops, unless we were to break it + * down to bitshifts and math (which is definitely not intended). + */ + return; + - case nir_op_unpack_half_2x16: - /* We could split this into unpack_half_2x16_split_[xy], but should - * we? - */ ++ case nir_op_unpack_half_2x16: { ++ if (!b->shader->options->lower_unpack_half_2x16) ++ return; ++ ++ nir_ssa_def *comps[2]; ++ comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa); ++ comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa); ++ nir_ssa_def *vec = nir_vec(b, comps, 2); ++ ++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); ++ nir_instr_remove(&instr->instr); + return; ++ } ++ ++ case nir_op_pack_uvec2_to_uint: { ++ assert(b->shader->options->lower_pack_snorm_2x16 || ++ b->shader->options->lower_pack_unorm_2x16); ++ ++ nir_ssa_def *word = ++ nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); ++ nir_ssa_def *val = ++ nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), ++ nir_channel(b, word, 0)); ++ ++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); ++ nir_instr_remove(&instr->instr); ++ break; ++ } ++ ++ case nir_op_pack_uvec4_to_uint: { ++ assert(b->shader->options->lower_pack_snorm_4x8 || ++ b->shader->options->lower_pack_unorm_4x8); ++ ++ nir_ssa_def *byte = ++ nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); ++ nir_ssa_def *val = ++ nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)), ++ nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))), ++ nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)), ++ nir_channel(b, byte, 0))); ++ ++ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); ++ nir_instr_remove(&instr->instr); ++ break; ++ } + + case nir_op_fdph: { + nir_ssa_def *sum[4]; + for (unsigned i = 0; i < 3; i++) { + sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa, + instr->src[0].swizzle[i]), + nir_channel(b, instr->src[1].src.ssa, + instr->src[1].swizzle[i])); + } + sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]); + + nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]), + nir_fadd(b, sum[2], sum[3])); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + return; + } + + LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); + LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand); + LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand); + LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior); + LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior); + LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand); + LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for); + + default: + break; + } + + if (instr->dest.dest.ssa.num_components == 1) + return; + + unsigned num_components = instr->dest.dest.ssa.num_components; + nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL }; + + for (chan = 0; chan < 4; chan++) { + if (!(instr->dest.write_mask & (1 << chan))) + continue; + + nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op); + for (i = 0; i < num_src; i++) { + /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar + * args (input_sizes[] == 1). + */ + assert(nir_op_infos[instr->op].input_sizes[i] < 2); + unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ? + 0 : chan); + + nir_alu_src_copy(&lower->src[i], &instr->src[i], lower); + for (int j = 0; j < 4; j++) + lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan]; + } + + nir_alu_ssa_dest_init(lower, 1); + lower->dest.saturate = instr->dest.saturate; + comps[chan] = &lower->dest.dest.ssa; + + nir_builder_instr_insert(b, &lower->instr); + } + + nir_ssa_def *vec = nir_vec(b, comps, num_components); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); + + nir_instr_remove(&instr->instr); + } + + static bool + lower_alu_to_scalar_block(nir_block *block, void *builder) + { + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + lower_alu_instr_scalar(nir_instr_as_alu(instr), builder); + } + + return true; + } + + static void + nir_lower_alu_to_scalar_impl(nir_function_impl *impl) + { + nir_builder builder; + nir_builder_init(&builder, impl); + + nir_foreach_block(impl, lower_alu_to_scalar_block, &builder); + } + + void + nir_lower_alu_to_scalar(nir_shader *shader) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_alu_to_scalar_impl(function->impl); + } + } diff --cc src/compiler/nir/nir_lower_atomics.c index 00000000000,1a4458d4f84..b07e199d71b mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@@ -1,0 -1,166 +1,167 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "compiler/glsl/ir_uniform.h" + #include "nir.h" + #include "main/config.h" + #include + + typedef struct { + const struct gl_shader_program *shader_program; + nir_shader *shader; + } lower_atomic_state; + + /* + * replace atomic counter intrinsics that use a variable with intrinsics + * that directly store the buffer index and byte offset + */ + + static void + lower_instr(nir_intrinsic_instr *instr, + lower_atomic_state *state) + { + nir_intrinsic_op op; + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_read_var: + op = nir_intrinsic_atomic_counter_read; + break; + + case nir_intrinsic_atomic_counter_inc_var: + op = nir_intrinsic_atomic_counter_inc; + break; + + case nir_intrinsic_atomic_counter_dec_var: + op = nir_intrinsic_atomic_counter_dec; + break; + + default: + return; + } + + if (instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage) ++ instr->variables[0]->var->data.mode != nir_var_shader_storage && ++ instr->variables[0]->var->data.mode != nir_var_shared) + return; /* atomics passed as function arguments can't be lowered */ + + void *mem_ctx = ralloc_parent(instr); + unsigned uniform_loc = instr->variables[0]->var->data.location; + + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); + new_instr->const_index[0] = + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; + + nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); + offset_const->value.u[0] = instr->variables[0]->var->data.offset; + + nir_instr_insert_before(&instr->instr, &offset_const->instr); + + nir_ssa_def *offset_def = &offset_const->def; + + nir_deref *tail = &instr->variables[0]->deref; + while (tail->child != NULL) { + assert(tail->child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + tail = tail->child; + + unsigned child_array_elements = tail->child != NULL ? + glsl_get_aoa_size(tail->type) : 1; + + offset_const->value.u[0] += deref_array->base_offset * + child_array_elements * ATOMIC_COUNTER_SIZE; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_load_const_instr *atomic_counter_size = + nir_load_const_instr_create(mem_ctx, 1); + atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE; + nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); + + nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); + mul->dest.write_mask = 0x1; + nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul); + mul->src[1].src.is_ssa = true; + mul->src[1].src.ssa = &atomic_counter_size->def; + nir_instr_insert_before(&instr->instr, &mul->instr); + + nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd); + nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); + add->dest.write_mask = 0x1; + add->src[0].src.is_ssa = true; + add->src[0].src.ssa = &mul->dest.dest.ssa; + add->src[1].src.is_ssa = true; + add->src[1].src.ssa = offset_def; + nir_instr_insert_before(&instr->instr, &add->instr); + + offset_def = &add->dest.dest.ssa; + } + } + + new_instr->src[0].is_ssa = true; + new_instr->src[0].ssa = offset_def; + + if (instr->dest.is_ssa) { + nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, + instr->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, + nir_src_for_ssa(&new_instr->dest.ssa)); + } else { + nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); + } + + nir_instr_insert_before(&instr->instr, &new_instr->instr); + nir_instr_remove(&instr->instr); + } + + static bool + lower_block(nir_block *block, void *state) + { + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_intrinsic) + lower_instr(nir_instr_as_intrinsic(instr), + (lower_atomic_state *) state); + } + + return true; + } + + void + nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program) + { + lower_atomic_state state = { + .shader = shader, + .shader_program = shader_program, + }; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_foreach_block(function->impl, lower_block, (void *) &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + } diff --cc src/compiler/nir/nir_lower_indirect_derefs.c index 00000000000,00000000000..69f2df4ba6d new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@@ -1,0 -1,0 +1,239 @@@ ++/* ++ * Copyright © 2016 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "nir.h" ++#include "nir_builder.h" ++ ++static void ++emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, ++ nir_deref_var *deref, nir_deref *tail, ++ nir_ssa_def **dest, nir_ssa_def *src); ++ ++static void ++emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, ++ nir_deref_var *deref, nir_deref *arr_parent, ++ int start, int end, ++ nir_ssa_def **dest, nir_ssa_def *src) ++{ ++ assert(arr_parent->child && ++ arr_parent->child->deref_type == nir_deref_type_array); ++ nir_deref_array *arr = nir_deref_as_array(arr_parent->child); ++ assert(arr->deref_array_type == nir_deref_array_type_indirect); ++ assert(arr->indirect.is_ssa); ++ ++ assert(start < end); ++ if (start == end - 1) { ++ /* Base case. Just emit the load/store op */ ++ nir_deref_array direct = *arr; ++ direct.deref_array_type = nir_deref_array_type_direct; ++ direct.base_offset += start; ++ direct.indirect = NIR_SRC_INIT; ++ ++ arr_parent->child = &direct.deref; ++ emit_load_store(b, orig_instr, deref, &arr->deref, dest, src); ++ arr_parent->child = &arr->deref; ++ } else { ++ int mid = start + (end - start) / 2; ++ ++ nir_ssa_def *then_dest, *else_dest; ++ ++ nir_if *if_stmt = nir_if_create(b->shader); ++ if_stmt->condition = nir_src_for_ssa(nir_ilt(b, arr->indirect.ssa, ++ nir_imm_int(b, mid))); ++ nir_cf_node_insert(b->cursor, &if_stmt->cf_node); ++ ++ b->cursor = nir_after_cf_list(&if_stmt->then_list); ++ emit_indirect_load_store(b, orig_instr, deref, arr_parent, ++ start, mid, &then_dest, src); ++ ++ b->cursor = nir_after_cf_list(&if_stmt->else_list); ++ emit_indirect_load_store(b, orig_instr, deref, arr_parent, ++ mid, end, &else_dest, src); ++ ++ b->cursor = nir_after_cf_node(&if_stmt->cf_node); ++ ++ if (src == NULL) { ++ /* We're a load. We need to insert a phi node */ ++ nir_phi_instr *phi = nir_phi_instr_create(b->shader); ++ nir_ssa_dest_init(&phi->instr, &phi->dest, ++ then_dest->num_components, NULL); ++ ++ nir_phi_src *src0 = ralloc(phi, nir_phi_src); ++ src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt)); ++ src0->src = nir_src_for_ssa(then_dest); ++ exec_list_push_tail(&phi->srcs, &src0->node); ++ ++ nir_phi_src *src1 = ralloc(phi, nir_phi_src); ++ src1->pred = nir_cf_node_as_block(nir_if_last_else_node(if_stmt)); ++ src1->src = nir_src_for_ssa(else_dest); ++ exec_list_push_tail(&phi->srcs, &src1->node); ++ ++ nir_builder_instr_insert(b, &phi->instr); ++ *dest = &phi->dest.ssa; ++ } ++ } ++} ++ ++static void ++emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr, ++ nir_deref_var *deref, nir_deref *tail, ++ nir_ssa_def **dest, nir_ssa_def *src) ++{ ++ for (; tail->child; tail = tail->child) { ++ if (tail->child->deref_type != nir_deref_type_array) ++ continue; ++ ++ nir_deref_array *arr = nir_deref_as_array(tail->child); ++ if (arr->deref_array_type != nir_deref_array_type_indirect) ++ continue; ++ ++ int length = glsl_get_length(tail->type); ++ ++ emit_indirect_load_store(b, orig_instr, deref, tail, -arr->base_offset, ++ length - arr->base_offset, dest, src); ++ return; ++ } ++ ++ assert(tail && tail->child == NULL); ++ ++ /* We reached the end of the deref chain. Emit the instruction */ ++ ++ if (src == NULL) { ++ /* This is a load instruction */ ++ nir_intrinsic_instr *load = ++ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); ++ load->num_components = orig_instr->num_components; ++ load->variables[0] = ++ nir_deref_as_var(nir_copy_deref(load, &deref->deref)); ++ nir_ssa_dest_init(&load->instr, &load->dest, ++ load->num_components, NULL); ++ nir_builder_instr_insert(b, &load->instr); ++ *dest = &load->dest.ssa; ++ } else { ++ /* This is a store instruction */ ++ nir_intrinsic_instr *store = ++ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); ++ store->num_components = orig_instr->num_components; ++ store->const_index[0] = orig_instr->const_index[0]; /* writemask */ ++ store->variables[0] = ++ nir_deref_as_var(nir_copy_deref(store, &deref->deref)); ++ store->src[0] = nir_src_for_ssa(src); ++ nir_builder_instr_insert(b, &store->instr); ++ } ++} ++ ++static bool ++deref_has_indirect(nir_deref_var *deref) ++{ ++ for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { ++ if (tail->deref_type != nir_deref_type_array) ++ continue; ++ ++ nir_deref_array *arr = nir_deref_as_array(tail); ++ if (arr->deref_array_type == nir_deref_array_type_indirect) ++ return true; ++ } ++ ++ return false; ++} ++ ++struct lower_indirect_state { ++ nir_builder builder; ++ uint32_t mode_mask; ++ bool progress; ++}; ++ ++static bool ++lower_indirect_block(nir_block *block, void *void_state) ++{ ++ struct lower_indirect_state *state = void_state; ++ ++ nir_foreach_instr_safe(block, instr) { ++ if (instr->type != nir_instr_type_intrinsic) ++ continue; ++ ++ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); ++ if (intrin->intrinsic != nir_intrinsic_load_var && ++ intrin->intrinsic != nir_intrinsic_store_var) ++ continue; ++ ++ if (!deref_has_indirect(intrin->variables[0])) ++ continue; ++ ++ /* Only lower variables whose mode is in the mask */ ++ if (!(state->mode_mask & (1 << intrin->variables[0]->var->data.mode))) ++ continue; ++ ++ state->builder.cursor = nir_before_instr(&intrin->instr); ++ ++ if (intrin->intrinsic == nir_intrinsic_load_var) { ++ nir_ssa_def *result; ++ emit_load_store(&state->builder, intrin, intrin->variables[0], ++ &intrin->variables[0]->deref, &result, NULL); ++ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result)); ++ } else { ++ assert(intrin->src[0].is_ssa); ++ emit_load_store(&state->builder, intrin, intrin->variables[0], ++ &intrin->variables[0]->deref, NULL, intrin->src[0].ssa); ++ } ++ nir_instr_remove(&intrin->instr); ++ state->progress = true; ++ } ++ ++ return true; ++} ++ ++static bool ++lower_indirects_impl(nir_function_impl *impl, uint32_t mode_mask) ++{ ++ struct lower_indirect_state state; ++ ++ state.progress = false; ++ state.mode_mask = mode_mask; ++ nir_builder_init(&state.builder, impl); ++ ++ nir_foreach_block(impl, lower_indirect_block, &state); ++ ++ if (state.progress) ++ nir_metadata_preserve(impl, nir_metadata_none); ++ ++ return state.progress; ++} ++ ++/** Lowers indirect variable loads/stores to direct loads/stores. ++ * ++ * The pass works by replacing any indirect load or store with an if-ladder ++ * that does a binary search on the array index. ++ */ ++bool ++nir_lower_indirect_derefs(nir_shader *shader, uint32_t mode_mask) ++{ ++ bool progress = false; ++ ++ nir_foreach_function(shader, function) { ++ if (function->impl) ++ progress = lower_indirects_impl(function->impl, mode_mask) || progress; ++ } ++ ++ return progress; ++} diff --cc src/compiler/nir/nir_lower_io.c index 00000000000,80c5151f0ea..2c5fa16af5e mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@@ -1,0 -1,350 +1,461 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + + /* + * This lowering pass converts references to input/output variables with + * loads/stores to actual input/output intrinsics. + */ + + #include "nir.h" + #include "nir_builder.h" + + struct lower_io_state { + nir_builder builder; + void *mem_ctx; + int (*type_size)(const struct glsl_type *type); + nir_variable_mode mode; + }; + + void + nir_assign_var_locations(struct exec_list *var_list, unsigned *size, + int (*type_size)(const struct glsl_type *)) + { + unsigned location = 0; + + nir_foreach_variable(var, var_list) { + /* + * UBO's have their own address spaces, so don't count them towards the + * number of global uniforms + */ + if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && + var->interface_type != NULL) + continue; + + var->data.driver_location = location; + location += type_size(var->type); + } + + *size = location; + } + + /** + * Returns true if we're processing a stage whose inputs are arrays indexed + * by a vertex number (such as geometry shader inputs). + */ + static bool + is_per_vertex_input(struct lower_io_state *state, nir_variable *var) + { + gl_shader_stage stage = state->builder.shader->stage; + + return var->data.mode == nir_var_shader_in && !var->data.patch && + (stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY); + } + + static bool + is_per_vertex_output(struct lower_io_state *state, nir_variable *var) + { + gl_shader_stage stage = state->builder.shader->stage; + return var->data.mode == nir_var_shader_out && !var->data.patch && + stage == MESA_SHADER_TESS_CTRL; + } + + static nir_ssa_def * + get_io_offset(nir_builder *b, nir_deref_var *deref, + nir_ssa_def **vertex_index, + int (*type_size)(const struct glsl_type *)) + { + nir_deref *tail = &deref->deref; + + /* For per-vertex input arrays (i.e. geometry shader inputs), keep the + * outermost array index separate. Process the rest normally. + */ + if (vertex_index != NULL) { + tail = tail->child; + assert(tail->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tail); + + nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1)); + } + *vertex_index = vtx; + } + + /* Just emit code and let constant-folding go to town */ + nir_ssa_def *offset = nir_imm_int(b, 0); + + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *deref_array = nir_deref_as_array(tail); + unsigned size = type_size(tail->type); + + offset = nir_iadd(b, offset, + nir_imm_int(b, size * deref_array->base_offset)); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, size), + nir_ssa_for_src(b, deref_array->indirect, 1)); + + offset = nir_iadd(b, offset, mul); + } + } else if (tail->deref_type == nir_deref_type_struct) { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + unsigned field_offset = 0; + for (unsigned i = 0; i < deref_struct->index; i++) { + field_offset += type_size(glsl_get_struct_field(parent_type, i)); + } + offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); + } + } + + return offset; + } + + static nir_intrinsic_op + load_op(struct lower_io_state *state, + nir_variable_mode mode, bool per_vertex) + { + nir_intrinsic_op op; + switch (mode) { + case nir_var_shader_in: + op = per_vertex ? nir_intrinsic_load_per_vertex_input : + nir_intrinsic_load_input; + break; + case nir_var_shader_out: + op = per_vertex ? nir_intrinsic_load_per_vertex_output : + nir_intrinsic_load_output; + break; + case nir_var_uniform: + op = nir_intrinsic_load_uniform; + break; ++ case nir_var_shared: ++ op = nir_intrinsic_load_shared; ++ break; + default: + unreachable("Unknown variable mode"); + } + return op; + } + ++static nir_intrinsic_op ++store_op(struct lower_io_state *state, ++ nir_variable_mode mode, bool per_vertex) ++{ ++ nir_intrinsic_op op; ++ switch (mode) { ++ case nir_var_shader_in: ++ case nir_var_shader_out: ++ op = per_vertex ? nir_intrinsic_store_per_vertex_output : ++ nir_intrinsic_store_output; ++ break; ++ case nir_var_shared: ++ op = nir_intrinsic_store_shared; ++ break; ++ default: ++ unreachable("Unknown variable mode"); ++ } ++ return op; ++} ++ ++static nir_intrinsic_op ++atomic_op(nir_intrinsic_op opcode) ++{ ++ switch (opcode) { ++#define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O; ++ OP(atomic_exchange) ++ OP(atomic_comp_swap) ++ OP(atomic_add) ++ OP(atomic_imin) ++ OP(atomic_umin) ++ OP(atomic_imax) ++ OP(atomic_umax) ++ OP(atomic_and) ++ OP(atomic_or) ++ OP(atomic_xor) ++#undef OP ++ default: ++ unreachable("Invalid atomic"); ++ } ++} ++ + static bool + nir_lower_io_block(nir_block *block, void *void_state) + { + struct lower_io_state *state = void_state; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + - if (intrin->intrinsic != nir_intrinsic_load_var && - intrin->intrinsic != nir_intrinsic_store_var) ++ switch (intrin->intrinsic) { ++ case nir_intrinsic_load_var: ++ case nir_intrinsic_store_var: ++ case nir_intrinsic_var_atomic_add: ++ case nir_intrinsic_var_atomic_imin: ++ case nir_intrinsic_var_atomic_umin: ++ case nir_intrinsic_var_atomic_imax: ++ case nir_intrinsic_var_atomic_umax: ++ case nir_intrinsic_var_atomic_and: ++ case nir_intrinsic_var_atomic_or: ++ case nir_intrinsic_var_atomic_xor: ++ case nir_intrinsic_var_atomic_exchange: ++ case nir_intrinsic_var_atomic_comp_swap: ++ /* We can lower the io for this nir instrinsic */ ++ break; ++ default: ++ /* We can't lower the io for this nir instrinsic, so skip it */ + continue; ++ } + + nir_variable_mode mode = intrin->variables[0]->var->data.mode; + + if (state->mode != nir_var_all && state->mode != mode) + continue; + + if (mode != nir_var_shader_in && + mode != nir_var_shader_out && ++ mode != nir_var_shared && + mode != nir_var_uniform) + continue; + + b->cursor = nir_before_instr(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + bool per_vertex = + is_per_vertex_input(state, intrin->variables[0]->var) || + is_per_vertex_output(state, intrin->variables[0]->var); + + nir_ssa_def *offset; + nir_ssa_def *vertex_index; + + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(state->mem_ctx, + load_op(state, mode, per_vertex)); + load->num_components = intrin->num_components; + + load->const_index[0] = + intrin->variables[0]->var->data.driver_location; + ++ if (load->intrinsic == nir_intrinsic_load_uniform) { ++ load->const_index[1] = ++ state->type_size(intrin->variables[0]->var->type); ++ } ++ + if (per_vertex) + load->src[0] = nir_src_for_ssa(vertex_index); + + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset); + + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + } else { + nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx); + } + + nir_instr_insert_before(&intrin->instr, &load->instr); + nir_instr_remove(&intrin->instr); + break; + } + + case nir_intrinsic_store_var: { - assert(mode == nir_var_shader_out); ++ assert(mode == nir_var_shader_out || mode == nir_var_shared); + + nir_ssa_def *offset; + nir_ssa_def *vertex_index; + + bool per_vertex = + is_per_vertex_output(state, intrin->variables[0]->var); + + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); + - nir_intrinsic_op store_op = - per_vertex ? nir_intrinsic_store_per_vertex_output : - nir_intrinsic_store_output; - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, - store_op); ++ nir_intrinsic_instr *store = ++ nir_intrinsic_instr_create(state->mem_ctx, ++ store_op(state, mode, per_vertex)); + store->num_components = intrin->num_components; + + nir_src_copy(&store->src[0], &intrin->src[0], store); + + store->const_index[0] = + intrin->variables[0]->var->data.driver_location; + + /* Copy the writemask */ + store->const_index[1] = intrin->const_index[0]; + + if (per_vertex) + store->src[1] = nir_src_for_ssa(vertex_index); + + store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset); + + nir_instr_insert_before(&intrin->instr, &store->instr); + nir_instr_remove(&intrin->instr); + break; + } + ++ case nir_intrinsic_var_atomic_add: ++ case nir_intrinsic_var_atomic_imin: ++ case nir_intrinsic_var_atomic_umin: ++ case nir_intrinsic_var_atomic_imax: ++ case nir_intrinsic_var_atomic_umax: ++ case nir_intrinsic_var_atomic_and: ++ case nir_intrinsic_var_atomic_or: ++ case nir_intrinsic_var_atomic_xor: ++ case nir_intrinsic_var_atomic_exchange: ++ case nir_intrinsic_var_atomic_comp_swap: { ++ assert(mode == nir_var_shared); ++ ++ nir_ssa_def *offset; ++ ++ offset = get_io_offset(b, intrin->variables[0], ++ NULL, state->type_size); ++ ++ nir_intrinsic_instr *atomic = ++ nir_intrinsic_instr_create(state->mem_ctx, ++ atomic_op(intrin->intrinsic)); ++ ++ atomic->src[0] = nir_src_for_ssa(offset); ++ ++ atomic->const_index[0] = ++ intrin->variables[0]->var->data.driver_location; ++ ++ nir_src_copy(&atomic->src[1], &intrin->src[0], atomic); ++ ++ if (intrin->intrinsic == nir_intrinsic_var_atomic_comp_swap) ++ nir_src_copy(&atomic->src[2], &intrin->src[1], atomic); ++ ++ if (intrin->dest.is_ssa) { ++ nir_ssa_dest_init(&atomic->instr, &atomic->dest, ++ intrin->dest.ssa.num_components, NULL); ++ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, ++ nir_src_for_ssa(&atomic->dest.ssa)); ++ } else { ++ nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx); ++ } ++ ++ nir_instr_insert_before(&intrin->instr, &atomic->instr); ++ nir_instr_remove(&intrin->instr); ++ break; ++ } ++ + default: + break; + } + } + + return true; + } + + static void + nir_lower_io_impl(nir_function_impl *impl, + nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)) + { + struct lower_io_state state; + + nir_builder_init(&state.builder, impl); + state.mem_ctx = ralloc_parent(impl); + state.mode = mode; + state.type_size = type_size; + + nir_foreach_block(impl, nir_lower_io_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + void + nir_lower_io(nir_shader *shader, nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_io_impl(function->impl, mode, type_size); + } + } + + /** + * Return the offset soruce for a load/store intrinsic. + */ + nir_src * + nir_get_io_offset_src(nir_intrinsic_instr *instr) + { + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_output: + case nir_intrinsic_load_uniform: + return &instr->src[0]; ++ case nir_intrinsic_load_ubo: ++ case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: + return &instr->src[1]; ++ case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_per_vertex_output: + return &instr->src[2]; + default: + return NULL; + } + } + + /** + * Return the vertex index source for a load/store per_vertex intrinsic. + */ + nir_src * + nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) + { + switch (instr->intrinsic) { + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + return &instr->src[0]; + case nir_intrinsic_store_per_vertex_output: + return &instr->src[1]; + default: + return NULL; + } + } diff --cc src/compiler/nir/nir_lower_outputs_to_temporaries.c index 00000000000,71b06b81fcc..00ac09114cf mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_outputs_to_temporaries.c +++ b/src/compiler/nir/nir_lower_outputs_to_temporaries.c @@@ -1,0 -1,133 +1,136 @@@ + /* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + /* + * Implements a pass that lowers output variables to a temporary plus an + * output variable with a single copy at each exit point of the shader. + * This way the output variable is only ever written. + * + * Because valid NIR requires that output variables are never read, this + * pass is more of a helper for NIR producers and must be run before the + * shader is ever validated. + */ + + #include "nir.h" + + struct lower_outputs_state { + nir_shader *shader; + struct exec_list old_outputs; + }; + + static void + emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state) + { + assert(exec_list_length(&state->shader->outputs) == + exec_list_length(&state->old_outputs)); + + foreach_two_lists(out_node, &state->shader->outputs, + temp_node, &state->old_outputs) { + nir_variable *output = exec_node_data(nir_variable, out_node, node); + nir_variable *temp = exec_node_data(nir_variable, temp_node, node); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, output); + copy->variables[1] = nir_deref_var_create(copy, temp); + + nir_instr_insert(cursor, ©->instr); + } + } + + static bool + emit_output_copies_block(nir_block *block, void *state) + { + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_emit_vertex) + emit_output_copies(nir_before_instr(&intrin->instr), state); + } + + return true; + } + + void -nir_lower_outputs_to_temporaries(nir_shader *shader) ++nir_lower_outputs_to_temporaries(nir_shader *shader, nir_function *entrypoint) + { + struct lower_outputs_state state; + + if (shader->stage == MESA_SHADER_TESS_CTRL) + return; + + state.shader = shader; + exec_list_move_nodes_to(&shader->outputs, &state.old_outputs); + + /* Walk over all of the outputs turn each output into a temporary and + * make a new variable for the actual output. + */ + nir_foreach_variable(var, &state.old_outputs) { + nir_variable *output = ralloc(shader, nir_variable); + memcpy(output, var, sizeof *output); + + /* The orignal is now the temporary */ + nir_variable *temp = var; + + /* Reparent the name to the new variable */ + ralloc_steal(output, output->name); + ++ /* Reparent the constant initializer (if any) */ ++ ralloc_steal(output, output->constant_initializer); ++ + /* Give the output a new name with @out-temp appended */ + temp->name = ralloc_asprintf(var, "%s@out-temp", output->name); + temp->data.mode = nir_var_global; + temp->constant_initializer = NULL; + + exec_list_push_tail(&shader->outputs, &output->node); + } + + nir_foreach_function(shader, function) { + if (function->impl == NULL) + continue; + + if (shader->stage == MESA_SHADER_GEOMETRY) { + /* For geometry shaders, we have to emit the output copies right + * before each EmitVertex call. + */ + nir_foreach_block(function->impl, emit_output_copies_block, &state); - } else if (strcmp(function->name, "main") == 0) { ++ } else if (function == entrypoint) { + /* For all other shader types, we need to do the copies right before + * the jumps to the end block. + */ + struct set_entry *block_entry; + set_foreach(function->impl->end_block->predecessors, block_entry) { + struct nir_block *block = (void *)block_entry->key; + emit_output_copies(nir_after_block_before_jump(block), &state); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + exec_list_append(&shader->globals, &state.old_outputs); + } diff --cc src/compiler/nir/nir_lower_returns.c index 00000000000,00000000000..91bb2f7dfeb new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_lower_returns.c @@@ -1,0 -1,0 +1,246 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "nir.h" ++#include "nir_builder.h" ++#include "nir_control_flow.h" ++ ++struct lower_returns_state { ++ nir_builder builder; ++ struct exec_list *cf_list; ++ nir_loop *loop; ++ nir_variable *return_flag; ++}; ++ ++static bool lower_returns_in_cf_list(struct exec_list *cf_list, ++ struct lower_returns_state *state); ++ ++static void ++predicate_following(nir_cf_node *node, struct lower_returns_state *state) ++{ ++ nir_builder *b = &state->builder; ++ b->cursor = nir_after_cf_node_and_phis(node); ++ ++ if (nir_cursors_equal(b->cursor, nir_after_cf_list(state->cf_list))) ++ return; /* Nothing to predicate */ ++ ++ assert(state->return_flag); ++ ++ nir_if *if_stmt = nir_if_create(b->shader); ++ if_stmt->condition = nir_src_for_ssa(nir_load_var(b, state->return_flag)); ++ nir_cf_node_insert(b->cursor, &if_stmt->cf_node); ++ ++ if (state->loop) { ++ /* If we're inside of a loop, then all we need to do is insert a ++ * conditional break. ++ */ ++ nir_jump_instr *brk = ++ nir_jump_instr_create(state->builder.shader, nir_jump_break); ++ nir_instr_insert(nir_before_cf_list(&if_stmt->then_list), &brk->instr); ++ } else { ++ /* Otherwise, we need to actually move everything into the else case ++ * of the if statement. ++ */ ++ nir_cf_list list; ++ nir_cf_extract(&list, nir_after_cf_node(&if_stmt->cf_node), ++ nir_after_cf_list(state->cf_list)); ++ assert(!exec_list_is_empty(&list.list)); ++ nir_cf_reinsert(&list, nir_before_cf_list(&if_stmt->else_list)); ++ } ++} ++ ++static bool ++lower_returns_in_loop(nir_loop *loop, struct lower_returns_state *state) ++{ ++ nir_loop *parent = state->loop; ++ state->loop = loop; ++ bool progress = lower_returns_in_cf_list(&loop->body, state); ++ state->loop = parent; ++ ++ /* If the recursive call made progress, then there were returns inside ++ * of the loop. These would have been lowered to breaks with the return ++ * flag set to true. We need to predicate everything following the loop ++ * on the return flag. ++ */ ++ if (progress) ++ predicate_following(&loop->cf_node, state); ++ ++ return progress; ++} ++ ++static bool ++lower_returns_in_if(nir_if *if_stmt, struct lower_returns_state *state) ++{ ++ bool progress; ++ ++ progress = lower_returns_in_cf_list(&if_stmt->then_list, state); ++ progress = lower_returns_in_cf_list(&if_stmt->else_list, state) || progress; ++ ++ /* If either of the recursive calls made progress, then there were ++ * returns inside of the body of the if. If we're in a loop, then these ++ * were lowered to breaks which automatically skip to the end of the ++ * loop so we don't have to do anything. If we're not in a loop, then ++ * all we know is that the return flag is set appropreately and that the ++ * recursive calls ensured that nothing gets executed *inside* the if ++ * after a return. In order to ensure nothing outside gets executed ++ * after a return, we need to predicate everything following on the ++ * return flag. ++ */ ++ if (progress && !state->loop) ++ predicate_following(&if_stmt->cf_node, state); ++ ++ return progress; ++} ++ ++static bool ++lower_returns_in_block(nir_block *block, struct lower_returns_state *state) ++{ ++ if (block->predecessors->entries == 0 && ++ block != nir_start_block(state->builder.impl)) { ++ /* This block is unreachable. Delete it and everything after it. */ ++ nir_cf_list list; ++ nir_cf_extract(&list, nir_before_cf_node(&block->cf_node), ++ nir_after_cf_list(state->cf_list)); ++ ++ if (exec_list_is_empty(&list.list)) { ++ /* There's nothing here, which also means there's nothing in this ++ * block so we have nothing to do. ++ */ ++ return false; ++ } else { ++ nir_cf_delete(&list); ++ return true; ++ } ++ } ++ ++ nir_instr *last_instr = nir_block_last_instr(block); ++ if (last_instr == NULL) ++ return false; ++ ++ if (last_instr->type != nir_instr_type_jump) ++ return false; ++ ++ nir_jump_instr *jump = nir_instr_as_jump(last_instr); ++ if (jump->type != nir_jump_return) ++ return false; ++ ++ nir_instr_remove(&jump->instr); ++ ++ nir_builder *b = &state->builder; ++ b->cursor = nir_after_block(block); ++ ++ /* Set the return flag */ ++ if (state->return_flag == NULL) { ++ state->return_flag = ++ nir_local_variable_create(b->impl, glsl_bool_type(), "return"); ++ ++ /* Set a default value of false */ ++ state->return_flag->constant_initializer = ++ rzalloc(state->return_flag, nir_constant); ++ } ++ nir_store_var(b, state->return_flag, nir_imm_int(b, NIR_TRUE), 1); ++ ++ if (state->loop) { ++ /* We're in a loop; we need to break out of it. */ ++ nir_jump(b, nir_jump_break); ++ } else { ++ /* Not in a loop; we'll deal with predicating later*/ ++ assert(nir_cf_node_next(&block->cf_node) == NULL); ++ } ++ ++ return true; ++} ++ ++static bool ++lower_returns_in_cf_list(struct exec_list *cf_list, ++ struct lower_returns_state *state) ++{ ++ bool progress = false; ++ ++ struct exec_list *parent_list = state->cf_list; ++ state->cf_list = cf_list; ++ ++ /* We iterate over the list backwards because any given lower call may ++ * take everything following the given CF node and predicate it. In ++ * order to avoid recursion/iteration problems, we want everything after ++ * a given node to already be lowered before this happens. ++ */ ++ foreach_list_typed_reverse_safe(nir_cf_node, node, node, cf_list) { ++ switch (node->type) { ++ case nir_cf_node_block: ++ if (lower_returns_in_block(nir_cf_node_as_block(node), state)) ++ progress = true; ++ break; ++ ++ case nir_cf_node_if: ++ if (lower_returns_in_if(nir_cf_node_as_if(node), state)) ++ progress = true; ++ break; ++ ++ case nir_cf_node_loop: ++ if (lower_returns_in_loop(nir_cf_node_as_loop(node), state)) ++ progress = true; ++ break; ++ ++ default: ++ unreachable("Invalid inner CF node type"); ++ } ++ } ++ ++ state->cf_list = parent_list; ++ ++ return progress; ++} ++ ++bool ++nir_lower_returns_impl(nir_function_impl *impl) ++{ ++ struct lower_returns_state state; ++ ++ state.cf_list = &impl->body; ++ state.loop = NULL; ++ state.return_flag = NULL; ++ nir_builder_init(&state.builder, impl); ++ ++ bool progress = lower_returns_in_cf_list(&impl->body, &state); ++ ++ if (progress) { ++ nir_metadata_preserve(impl, nir_metadata_none); ++ nir_repair_ssa_impl(impl); ++ } ++ ++ return progress; ++} ++ ++bool ++nir_lower_returns(nir_shader *shader) ++{ ++ bool progress = false; ++ ++ nir_foreach_function(shader, function) { ++ if (function->impl) ++ progress = nir_lower_returns_impl(function->impl) || progress; ++ } ++ ++ return progress; ++} diff --cc src/compiler/nir/nir_lower_samplers.c index 00000000000,96e82914014..29654136aee mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_samplers.c +++ b/src/compiler/nir/nir_lower_samplers.c @@@ -1,0 -1,187 +1,198 @@@ + /* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include "nir.h" + #include "nir_builder.h" + #include "program/hash_table.h" + #include "compiler/glsl/ir_uniform.h" + + #include "main/compiler.h" + #include "main/mtypes.h" + #include "program/prog_parameter.h" + #include "program/program.h" + + /* Calculate the sampler index based on array indicies and also + * calculate the base uniform location for struct members. + */ + static void + calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr, + unsigned *array_elements, nir_ssa_def **indirect, + nir_builder *b, unsigned *location) + { + if (tail->child == NULL) + return; + + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + calc_sampler_offsets(tail->child, instr, array_elements, + indirect, b, location); + instr->sampler_index += deref_array->base_offset * *array_elements; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, *array_elements), + nir_ssa_for_src(b, deref_array->indirect, 1)); + + nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, + NIR_SRC_INIT); + + if (*indirect) { + *indirect = nir_iadd(b, *indirect, mul); + } else { + *indirect = mul; + } + } + + *array_elements *= glsl_get_length(tail->type); + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child); + *location += glsl_get_record_location_offset(tail->type, deref_struct->index); + calc_sampler_offsets(tail->child, instr, array_elements, + indirect, b, location); + break; + } + + default: + unreachable("Invalid deref type"); + break; + } + } + + static void + lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, + gl_shader_stage stage, nir_builder *builder) + { + if (instr->sampler == NULL) + return; + ++ /* GLSL only has combined textures/samplers */ ++ assert(instr->texture == NULL); ++ + instr->sampler_index = 0; + unsigned location = instr->sampler->var->data.location; + unsigned array_elements = 1; + nir_ssa_def *indirect = NULL; + + builder->cursor = nir_before_instr(&instr->instr); + calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements, + &indirect, builder, &location); + + if (indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); ++ instr->num_srcs + 2); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ ++ instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset; ++ instr->num_srcs++; ++ nir_instr_rewrite_src(&instr->instr, ++ &instr->src[instr->num_srcs - 1].src, ++ nir_src_for_ssa(indirect)); ++ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + nir_src_for_ssa(indirect)); + - instr->sampler_array_size = array_elements; ++ instr->texture_array_size = array_elements; + } + + if (location > shader_program->NumUniformStorage - 1 || + !shader_program->UniformStorage[location].opaque[stage].active) { + assert(!"cannot return a sampler"); + return; + } + + instr->sampler_index += + shader_program->UniformStorage[location].opaque[stage].index; + + instr->sampler = NULL; ++ ++ instr->texture_index = instr->sampler_index; + } + + typedef struct { + nir_builder builder; + const struct gl_shader_program *shader_program; + gl_shader_stage stage; + } lower_state; + + static bool + lower_block_cb(nir_block *block, void *_state) + { + lower_state *state = (lower_state *) _state; + + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + lower_sampler(tex_instr, state->shader_program, state->stage, + &state->builder); + } + } + + return true; + } + + static void + lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, + gl_shader_stage stage) + { + lower_state state; + + nir_builder_init(&state.builder, impl); + state.shader_program = shader_program; + state.stage = stage; + + nir_foreach_block(impl, lower_block_cb, &state); + } + + void + nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program) + { + nir_foreach_function(shader, function) { + if (function->impl) + lower_impl(function->impl, shader_program, shader->stage); + } + } diff --cc src/compiler/nir/nir_lower_system_values.c index 00000000000,2bd787d3574..79f6bedc990 mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@@ -1,0 -1,98 +1,166 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir.h" + #include "nir_builder.h" + + struct lower_system_values_state { + nir_builder builder; + bool progress; + }; + + static bool + convert_block(nir_block *block, void *void_state) + { + struct lower_system_values_state *state = void_state; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr); + + if (load_var->intrinsic != nir_intrinsic_load_var) + continue; + + nir_variable *var = load_var->variables[0]->var; + if (var->data.mode != nir_var_system_value) + continue; + + b->cursor = nir_after_instr(&load_var->instr); + - nir_intrinsic_op sysval_op = - nir_intrinsic_from_system_value(var->data.location); - nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0); ++ nir_ssa_def *sysval; ++ switch (var->data.location) { ++ case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: { ++ /* From the GLSL man page for gl_GlobalInvocationID: ++ * ++ * "The value of gl_GlobalInvocationID is equal to ++ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" ++ */ ++ ++ nir_const_value local_size; ++ local_size.u[0] = b->shader->info.cs.local_size[0]; ++ local_size.u[1] = b->shader->info.cs.local_size[1]; ++ local_size.u[2] = b->shader->info.cs.local_size[2]; ++ ++ nir_ssa_def *group_id = ++ nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0); ++ nir_ssa_def *local_id = ++ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); ++ ++ sysval = nir_iadd(b, nir_imul(b, group_id, ++ nir_build_imm(b, 3, local_size)), ++ local_id); ++ break; ++ } ++ ++ case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: { ++ /* From the GLSL man page for gl_LocalInvocationIndex: ++ * ++ * ?The value of gl_LocalInvocationIndex is equal to ++ * gl_LocalInvocationID.z * gl_WorkGroupSize.x * ++ * gl_WorkGroupSize.y + gl_LocalInvocationID.y * ++ * gl_WorkGroupSize.x + gl_LocalInvocationID.x" ++ */ ++ nir_ssa_def *local_id = ++ nir_load_system_value(b, nir_intrinsic_load_local_invocation_id, 0); ++ ++ unsigned stride_y = b->shader->info.cs.local_size[0]; ++ unsigned stride_z = b->shader->info.cs.local_size[0] * ++ b->shader->info.cs.local_size[1]; ++ ++ sysval = nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 2), ++ nir_imm_int(b, stride_z)), ++ nir_iadd(b, nir_imul(b, nir_channel(b, local_id, 1), ++ nir_imm_int(b, stride_y)), ++ nir_channel(b, local_id, 0))); ++ break; ++ } ++ ++ case SYSTEM_VALUE_VERTEX_ID: ++ if (b->shader->options->vertex_id_zero_based) { ++ sysval = nir_iadd(b, ++ nir_load_system_value(b, nir_intrinsic_load_vertex_id_zero_base, 0), ++ nir_load_system_value(b, nir_intrinsic_load_base_vertex, 0)); ++ } else { ++ sysval = nir_load_system_value(b, nir_intrinsic_load_vertex_id, 0); ++ } ++ break; ++ ++ case SYSTEM_VALUE_INSTANCE_INDEX: ++ sysval = nir_iadd(b, ++ nir_load_system_value(b, nir_intrinsic_load_instance_id, 0), ++ nir_load_system_value(b, nir_intrinsic_load_base_instance, 0)); ++ break; ++ ++ default: { ++ nir_intrinsic_op sysval_op = ++ nir_intrinsic_from_system_value(var->data.location); ++ sysval = nir_load_system_value(b, sysval_op, 0); ++ break; ++ } /* default */ ++ } + + nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); + nir_instr_remove(&load_var->instr); + + state->progress = true; + } + + return true; + } + + static bool + convert_impl(nir_function_impl *impl) + { + struct lower_system_values_state state; + + state.progress = false; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, convert_block, &state); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + return state.progress; + } + + bool + nir_lower_system_values(nir_shader *shader) + { + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = convert_impl(function->impl) || progress; + } + + exec_list_make_empty(&shader->system_values); + + return progress; + } diff --cc src/compiler/nir/nir_lower_vars_to_ssa.c index 00000000000,75d31ff60af..e1f368d2f2b mode 000000,100644..100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@@ -1,0 -1,973 +1,751 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + + #include "nir.h" + #include "nir_builder.h" ++#include "nir_phi_builder.h" + #include "nir_vla.h" + + + struct deref_node { + struct deref_node *parent; + const struct glsl_type *type; + + bool lower_to_ssa; + + /* Only valid for things that end up in the direct list. + * Note that multiple nir_deref_vars may correspond to this node, but they + * will all be equivalent, so any is as good as the other. + */ + nir_deref_var *deref; + struct exec_node direct_derefs_link; + + struct set *loads; + struct set *stores; + struct set *copies; + - nir_ssa_def **def_stack; - nir_ssa_def **def_stack_tail; ++ struct nir_phi_builder_value *pb_value; + + struct deref_node *wildcard; + struct deref_node *indirect; + struct deref_node *children[0]; + }; + + struct lower_variables_state { + nir_shader *shader; + void *dead_ctx; + nir_function_impl *impl; + + /* A hash table mapping variables to deref_node data */ + struct hash_table *deref_var_nodes; + + /* A hash table mapping fully-qualified direct dereferences, i.e. + * dereferences with no indirect or wildcard array dereferences, to + * deref_node data. + * + * At the moment, we only lower loads, stores, and copies that can be + * trivially lowered to loads and stores, i.e. copies with no indirects + * and no wildcards. If a part of a variable that is being loaded from + * and/or stored into is also involved in a copy operation with + * wildcards, then we lower that copy operation to loads and stores, but + * otherwise we leave copies with wildcards alone. Since the only derefs + * used in these loads, stores, and trivial copies are ones with no + * wildcards and no indirects, these are precisely the derefs that we + * can actually consider lowering. + */ + struct exec_list direct_deref_nodes; + + /* Controls whether get_deref_node will add variables to the + * direct_deref_nodes table. This is turned on when we are initially + * scanning for load/store instructions. It is then turned off so we + * don't accidentally change the direct_deref_nodes table while we're + * iterating throug it. + */ + bool add_to_direct_deref_nodes; + - /* A hash table mapping phi nodes to deref_state data */ - struct hash_table *phi_table; ++ struct nir_phi_builder *phi_builder; + }; + + static struct deref_node * + deref_node_create(struct deref_node *parent, + const struct glsl_type *type, nir_shader *shader) + { + size_t size = sizeof(struct deref_node) + + glsl_get_length(type) * sizeof(struct deref_node *); + + struct deref_node *node = rzalloc_size(shader, size); + node->type = type; + node->parent = parent; + node->deref = NULL; + exec_node_init(&node->direct_derefs_link); + + return node; + } + + /* Returns the deref node associated with the given variable. This will be + * the root of the tree representing all of the derefs of the given variable. + */ + static struct deref_node * + get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state) + { + struct deref_node *node; + + struct hash_entry *var_entry = + _mesa_hash_table_search(state->deref_var_nodes, var); + + if (var_entry) { + return var_entry->data; + } else { + node = deref_node_create(NULL, var->type, state->dead_ctx); + _mesa_hash_table_insert(state->deref_var_nodes, var, node); + return node; + } + } + + /* Gets the deref_node for the given deref chain and creates it if it + * doesn't yet exist. If the deref is fully-qualified and direct and + * state->add_to_direct_deref_nodes is true, it will be added to the hash + * table of of fully-qualified direct derefs. + */ + static struct deref_node * + get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) + { + bool is_direct = true; + + /* Start at the base of the chain. */ + struct deref_node *node = get_deref_node_for_var(deref->var, state); + assert(deref->deref.type == node->type); + + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + switch (tail->deref_type) { + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + assert(deref_struct->index < glsl_get_length(node->type)); + + if (node->children[deref_struct->index] == NULL) + node->children[deref_struct->index] = + deref_node_create(node, tail->type, state->dead_ctx); + + node = node->children[deref_struct->index]; + break; + } + + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail); + + switch (arr->deref_array_type) { + case nir_deref_array_type_direct: + /* This is possible if a loop unrolls and generates an + * out-of-bounds offset. We need to handle this at least + * somewhat gracefully. + */ + if (arr->base_offset >= glsl_get_length(node->type)) + return NULL; + + if (node->children[arr->base_offset] == NULL) + node->children[arr->base_offset] = + deref_node_create(node, tail->type, state->dead_ctx); + + node = node->children[arr->base_offset]; + break; + + case nir_deref_array_type_indirect: + if (node->indirect == NULL) + node->indirect = deref_node_create(node, tail->type, + state->dead_ctx); + + node = node->indirect; + is_direct = false; + break; + + case nir_deref_array_type_wildcard: + if (node->wildcard == NULL) + node->wildcard = deref_node_create(node, tail->type, + state->dead_ctx); + + node = node->wildcard; + is_direct = false; + break; + + default: + unreachable("Invalid array deref type"); + } + break; + } + default: + unreachable("Invalid deref type"); + } + } + + assert(node); + + /* Only insert if it isn't already in the list. */ + if (is_direct && state->add_to_direct_deref_nodes && + node->direct_derefs_link.next == NULL) { + node->deref = deref; + assert(deref->var != NULL); + exec_list_push_tail(&state->direct_deref_nodes, + &node->direct_derefs_link); + } + + return node; + } + + /* \sa foreach_deref_node_match */ + static bool + foreach_deref_node_worker(struct deref_node *node, nir_deref *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) + { + if (deref->child == NULL) { + return cb(node, state); + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (node->children[arr->base_offset] && + !foreach_deref_node_worker(node->children[arr->base_offset], + deref->child, cb, state)) + return false; + + if (node->wildcard && + !foreach_deref_node_worker(node->wildcard, + deref->child, cb, state)) + return false; + + return true; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + return foreach_deref_node_worker(node->children[str->index], + deref->child, cb, state); + } + + default: + unreachable("Invalid deref child type"); + } + } + } + + /* Walks over every "matching" deref_node and calls the callback. A node + * is considered to "match" if either refers to that deref or matches up t + * a wildcard. In other words, the following would match a[6].foo[3].bar: + * + * a[6].foo[3].bar + * a[*].foo[3].bar + * a[6].foo[*].bar + * a[*].foo[*].bar + * + * The given deref must be a full-length and fully qualified (no wildcards + * or indirects) deref chain. + */ + static bool + foreach_deref_node_match(nir_deref_var *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) + { + nir_deref_var var_deref = *deref; + var_deref.deref.child = NULL; + struct deref_node *node = get_deref_node(&var_deref, state); + + if (node == NULL) + return false; + + return foreach_deref_node_worker(node, &deref->deref, cb, state); + } + + /* \sa deref_may_be_aliased */ + static bool + deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, + struct lower_variables_state *state) + { + if (deref->child == NULL) { + return false; + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + + /* If there is an indirect at this level, we're aliased. */ + if (node->indirect) + return true; + + assert(arr->deref_array_type == nir_deref_array_type_direct); + + if (node->children[arr->base_offset] && + deref_may_be_aliased_node(node->children[arr->base_offset], + deref->child, state)) + return true; + + if (node->wildcard && + deref_may_be_aliased_node(node->wildcard, deref->child, state)) + return true; + + return false; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + if (node->children[str->index]) { + return deref_may_be_aliased_node(node->children[str->index], + deref->child, state); + } else { + return false; + } + } + + default: + unreachable("Invalid nir_deref child type"); + } + } + } + + /* Returns true if there are no indirects that can ever touch this deref. + * + * For example, if the given deref is a[6].foo, then any uses of a[i].foo + * would cause this to return false, but a[i].bar would not affect it + * because it's a different structure member. A var_copy involving of + * a[*].bar also doesn't affect it because that can be lowered to entirely + * direct load/stores. + * + * We only support asking this question about fully-qualified derefs. + * Obviously, it's pointless to ask this about indirects, but we also + * rule-out wildcards. Handling Wildcard dereferences would involve + * checking each array index to make sure that there aren't any indirect + * references. + */ + static bool + deref_may_be_aliased(nir_deref_var *deref, + struct lower_variables_state *state) + { + return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state), + &deref->deref, state); + } + + static void + register_load_instr(nir_intrinsic_instr *load_instr, + struct lower_variables_state *state) + { + struct deref_node *node = get_deref_node(load_instr->variables[0], state); + if (node == NULL) + return; + + if (node->loads == NULL) + node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->loads, load_instr); + } + + static void + register_store_instr(nir_intrinsic_instr *store_instr, + struct lower_variables_state *state) + { + struct deref_node *node = get_deref_node(store_instr->variables[0], state); + if (node == NULL) + return; + + if (node->stores == NULL) + node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->stores, store_instr); + } + + static void + register_copy_instr(nir_intrinsic_instr *copy_instr, + struct lower_variables_state *state) + { + for (unsigned idx = 0; idx < 2; idx++) { + struct deref_node *node = + get_deref_node(copy_instr->variables[idx], state); + + if (node == NULL) + continue; + + if (node->copies == NULL) + node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->copies, copy_instr); + } + } + + /* Registers all variable uses in the given block. */ + static bool + register_variable_uses_block(nir_block *block, void *void_state) + { + struct lower_variables_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + register_load_instr(intrin, state); + break; + + case nir_intrinsic_store_var: + register_store_instr(intrin, state); + break; + + case nir_intrinsic_copy_var: + register_copy_instr(intrin, state); + break; + + default: + continue; + } + } + + return true; + } + + /* Walks over all of the copy instructions to or from the given deref_node + * and lowers them to load/store intrinsics. + */ + static bool + lower_copies_to_load_store(struct deref_node *node, + struct lower_variables_state *state) + { + if (!node->copies) + return true; + + struct set_entry *copy_entry; + set_foreach(node->copies, copy_entry) { + nir_intrinsic_instr *copy = (void *)copy_entry->key; + + nir_lower_var_copy_instr(copy, state->shader); + + for (unsigned i = 0; i < 2; ++i) { + struct deref_node *arg_node = + get_deref_node(copy->variables[i], state); + + /* Only bother removing copy entries for other nodes */ + if (arg_node == NULL || arg_node == node) + continue; + + struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy); + assert(arg_entry); + _mesa_set_remove(node->copies, arg_entry); + } + + nir_instr_remove(©->instr); + } + + node->copies = NULL; + + return true; + } + -/** Pushes an SSA def onto the def stack for the given node - * - * Each node is potentially associated with a stack of SSA definitions. - * This stack is used for determining what SSA definition reaches a given - * point in the program for variable renaming. The stack is always kept in - * dominance-order with at most one SSA def per block. If the SSA - * definition on the top of the stack is in the same block as the one being - * pushed, the top element is replaced. - */ -static void -def_stack_push(struct deref_node *node, nir_ssa_def *def, - struct lower_variables_state *state) -{ - if (node->def_stack == NULL) { - node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, - state->impl->num_blocks); - node->def_stack_tail = node->def_stack - 1; - } - - if (node->def_stack_tail >= node->def_stack) { - nir_ssa_def *top_def = *node->def_stack_tail; - - if (def->parent_instr->block == top_def->parent_instr->block) { - /* They're in the same block, just replace the top */ - *node->def_stack_tail = def; - return; - } - } - - *(++node->def_stack_tail) = def; -} - -/* Pop the top of the def stack if it's in the given block */ -static void -def_stack_pop_if_in_block(struct deref_node *node, nir_block *block) -{ - /* If we're popping, then we have presumably pushed at some time in the - * past so this should exist. - */ - assert(node->def_stack != NULL); - - /* The stack is already empty. Do nothing. */ - if (node->def_stack_tail < node->def_stack) - return; - - nir_ssa_def *def = *node->def_stack_tail; - if (def->parent_instr->block == block) - node->def_stack_tail--; -} - -/** Retrieves the SSA definition on the top of the stack for the given - * node, if one exists. If the stack is empty, then we return the constant - * initializer (if it exists) or an SSA undef. - */ -static nir_ssa_def * -get_ssa_def_for_block(struct deref_node *node, nir_block *block, - struct lower_variables_state *state) -{ - /* If we have something on the stack, go ahead and return it. We're - * assuming that the top of the stack dominates the given block. - */ - if (node->def_stack && node->def_stack_tail >= node->def_stack) - return *node->def_stack_tail; - - /* If we got here then we don't have a definition that dominates the - * given block. This means that we need to add an undef and use that. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - glsl_get_vector_elements(node->type)); - nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); - def_stack_push(node, &undef->def, state); - return &undef->def; -} - -/* Given a block and one of its predecessors, this function fills in the - * souces of the phi nodes to take SSA defs from the given predecessor. - * This function must be called exactly once per block/predecessor pair. - */ -static void -add_phi_sources(nir_block *block, nir_block *pred, - struct lower_variables_state *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - if (!entry) - continue; - - struct deref_node *node = entry->data; - - nir_phi_src *src = ralloc(phi, nir_phi_src); - src->pred = pred; - src->src.parent_instr = &phi->instr; - src->src.is_ssa = true; - src->src.ssa = get_ssa_def_for_block(node, pred, state); - - list_addtail(&src->src.use_link, &src->src.ssa->uses); - - exec_list_push_tail(&phi->srcs, &src->node); - } -} - + /* Performs variable renaming by doing a DFS of the dominance tree + * + * This algorithm is very similar to the one outlined in "Efficiently + * Computing Static Single Assignment Form and the Control Dependence + * Graph" by Cytron et. al. The primary difference is that we only put one + * SSA def on the stack per block. + */ + static bool + rename_variables_block(nir_block *block, struct lower_variables_state *state) + { + nir_builder b; + nir_builder_init(&b, state->impl); + + nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - - def_stack_push(node, &phi->dest.ssa, state); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* If we hit this path then we are referencing an invalid - * value. Most likely, we unrolled something and are - * reading past the end of some array. In any case, this - * should result in an undefined value. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - intrin->num_components); - - nir_instr_insert_before(&intrin->instr, &undef->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&undef->def)); - continue; - } - - if (!node->lower_to_ssa) - continue; - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src.is_ssa = true; - mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); - for (unsigned i = intrin->num_components; i < 4; i++) - mov->src[0].swizzle[i] = 0; ++ if (instr->type != nir_instr_type_intrinsic) ++ continue; + - assert(intrin->dest.is_ssa); ++ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, NULL); ++ switch (intrin->intrinsic) { ++ case nir_intrinsic_load_var: { ++ struct deref_node *node = ++ get_deref_node(intrin->variables[0], state); ++ ++ if (node == NULL) { ++ /* If we hit this path then we are referencing an invalid ++ * value. Most likely, we unrolled something and are ++ * reading past the end of some array. In any case, this ++ * should result in an undefined value. ++ */ ++ nir_ssa_undef_instr *undef = ++ nir_ssa_undef_instr_create(state->shader, ++ intrin->num_components); + - nir_instr_insert_before(&intrin->instr, &mov->instr); ++ nir_instr_insert_before(&intrin->instr, &undef->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - break; ++ nir_src_for_ssa(&undef->def)); ++ continue; + } + - case nir_intrinsic_store_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); ++ if (!node->lower_to_ssa) ++ continue; + - if (node == NULL) { - /* Probably an out-of-bounds array store. That should be a - * no-op. */ - nir_instr_remove(&intrin->instr); - continue; - } ++ nir_alu_instr *mov = nir_alu_instr_create(state->shader, ++ nir_op_imov); ++ mov->src[0].src = nir_src_for_ssa( ++ nir_phi_builder_value_get_block_def(node->pb_value, block)); ++ for (unsigned i = intrin->num_components; i < 4; i++) ++ mov->src[0].swizzle[i] = 0; + - if (!node->lower_to_ssa) - continue; - - assert(intrin->num_components == - glsl_get_vector_elements(node->type)); - - assert(intrin->src[0].is_ssa); - - nir_ssa_def *new_def; - b.cursor = nir_before_instr(&intrin->instr); - - if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { - /* Whole variable store - just copy the source. Note that - * intrin->num_components and intrin->src[0].ssa->num_components - * may differ. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < intrin->num_components ? i : 0; - - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, - intrin->num_components, false); - } else { - nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state); - /* For writemasked store_var intrinsics, we combine the newly - * written values with the existing contents of unwritten - * channels, creating a new SSA value for the whole vector. - */ - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < intrin->num_components; i++) { - if (intrin->const_index[0] & (1 << i)) { - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - srcs[i] = nir_channel(&b, old_def, i); - } - } - new_def = nir_vec(&b, srcs, intrin->num_components); - } - - assert(new_def->num_components == intrin->num_components); ++ assert(intrin->dest.is_ssa); + - def_stack_push(node, new_def, state); ++ mov->dest.write_mask = (1 << intrin->num_components) - 1; ++ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, ++ intrin->num_components, NULL); + - /* We'll wait to remove the instruction until the next pass - * where we pop the node we just pushed back off the stack. - */ - break; - } ++ nir_instr_insert_before(&intrin->instr, &mov->instr); ++ nir_instr_remove(&intrin->instr); + - default: - break; - } ++ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, ++ nir_src_for_ssa(&mov->dest.dest.ssa)); ++ break; + } - } - - if (block->successors[0]) - add_phi_sources(block->successors[0], block, state); - if (block->successors[1]) - add_phi_sources(block->successors[1], block, state); - - for (unsigned i = 0; i < block->num_dom_children; ++i) - rename_variables_block(block->dom_children[i], state); - - /* Now we iterate over the instructions and pop off any SSA defs that we - * pushed in the first loop. - */ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; + - def_stack_pop_if_in_block(node, block); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); ++ case nir_intrinsic_store_var: { ++ struct deref_node *node = ++ get_deref_node(intrin->variables[0], state); + - if (intrin->intrinsic != nir_intrinsic_store_var) - continue; - - struct deref_node *node = get_deref_node(intrin->variables[0], state); - if (!node) ++ if (node == NULL) { ++ /* Probably an out-of-bounds array store. That should be a ++ * no-op. */ ++ nir_instr_remove(&intrin->instr); + continue; ++ } + + if (!node->lower_to_ssa) + continue; + - def_stack_pop_if_in_block(node, block); - nir_instr_remove(&intrin->instr); - } - } - - return true; -} - -/* Inserts phi nodes for all variables marked lower_to_ssa - * - * This is the same algorithm as presented in "Efficiently Computing Static - * Single Assignment Form and the Control Dependence Graph" by Cytron et. - * al. - */ -static void -insert_phi_nodes(struct lower_variables_state *state) -{ - NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks); - NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks); - - /* - * Since the work flags already prevent us from inserting a node that has - * ever been inserted into W, we don't need to use a set to represent W. - * Also, since no block can ever be inserted into W more than once, we know - * that the maximum size of W is the number of basic blocks in the - * function. So all we need to handle W is an array and a pointer to the - * next element to be inserted and the next element to be removed. - */ - NIR_VLA(nir_block *, W, state->impl->num_blocks); - - unsigned w_start, w_end; - unsigned iter_count = 0; - - foreach_list_typed(struct deref_node, node, direct_derefs_link, - &state->direct_deref_nodes) { - if (node->stores == NULL) - continue; ++ assert(intrin->num_components == ++ glsl_get_vector_elements(node->type)); + - if (!node->lower_to_ssa) - continue; ++ assert(intrin->src[0].is_ssa); + - w_start = w_end = 0; - iter_count++; ++ nir_ssa_def *new_def; ++ b.cursor = nir_before_instr(&intrin->instr); + - struct set_entry *store_entry; - set_foreach(node->stores, store_entry) { - nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; - if (work[store->instr.block->index] < iter_count) - W[w_end++] = store->instr.block; - work[store->instr.block->index] = iter_count; - } - - while (w_start != w_end) { - nir_block *cur = W[w_start++]; - struct set_entry *dom_entry; - set_foreach(cur->dom_frontier, dom_entry) { - nir_block *next = (nir_block *) dom_entry->key; - - /* - * If there's more than one return statement, then the end block - * can be a join point for some definitions. However, there are - * no instructions in the end block, so nothing would use those - * phi nodes. Of course, we couldn't place those phi nodes - * anyways due to the restriction of having no instructions in the - * end block... ++ if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { ++ /* Whole variable store - just copy the source. Note that ++ * intrin->num_components and intrin->src[0].ssa->num_components ++ * may differ. + */ - if (next == state->impl->end_block) - continue; - - if (has_already[next->index] < iter_count) { - nir_phi_instr *phi = nir_phi_instr_create(state->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_block(next, &phi->instr); ++ unsigned swiz[4]; ++ for (unsigned i = 0; i < 4; i++) ++ swiz[i] = i < intrin->num_components ? i : 0; + - _mesa_hash_table_insert(state->phi_table, phi, node); - - has_already[next->index] = iter_count; - if (work[next->index] < iter_count) { - work[next->index] = iter_count; - W[w_end++] = next; ++ new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, ++ intrin->num_components, false); ++ } else { ++ nir_ssa_def *old_def = ++ nir_phi_builder_value_get_block_def(node->pb_value, block); ++ /* For writemasked store_var intrinsics, we combine the newly ++ * written values with the existing contents of unwritten ++ * channels, creating a new SSA value for the whole vector. ++ */ ++ nir_ssa_def *srcs[4]; ++ for (unsigned i = 0; i < intrin->num_components; i++) { ++ if (intrin->const_index[0] & (1 << i)) { ++ srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); ++ } else { ++ srcs[i] = nir_channel(&b, old_def, i); + } + } ++ new_def = nir_vec(&b, srcs, intrin->num_components); + } ++ ++ assert(new_def->num_components == intrin->num_components); ++ ++ nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); ++ nir_instr_remove(&intrin->instr); ++ break; ++ } ++ ++ default: ++ break; + } + } -} + ++ for (unsigned i = 0; i < block->num_dom_children; ++i) ++ rename_variables_block(block->dom_children[i], state); ++ ++ return true; ++} + + /** Implements a pass to lower variable uses to SSA values + * + * This path walks the list of instructions and tries to lower as many + * local variable load/store operations to SSA defs and uses as it can. + * The process involves four passes: + * + * 1) Iterate over all of the instructions and mark where each local + * variable deref is used in a load, store, or copy. While we're at + * it, we keep track of all of the fully-qualified (no wildcards) and + * fully-direct references we see and store them in the + * direct_deref_nodes hash table. + * + * 2) Walk over the the list of fully-qualified direct derefs generated in + * the previous pass. For each deref, we determine if it can ever be + * aliased, i.e. if there is an indirect reference anywhere that may + * refer to it. If it cannot be aliased, we mark it for lowering to an + * SSA value. At this point, we lower any var_copy instructions that + * use the given deref to load/store operations and, if the deref has a + * constant initializer, we go ahead and add a load_const value at the + * beginning of the function with the initialized value. + * + * 3) Walk over the list of derefs we plan to lower to SSA values and + * insert phi nodes as needed. + * + * 4) Perform "variable renaming" by replacing the load/store instructions + * with SSA definitions and SSA uses. + */ + static bool + nir_lower_vars_to_ssa_impl(nir_function_impl *impl) + { + struct lower_variables_state state; + + state.shader = impl->function->shader; + state.dead_ctx = ralloc_context(state.shader); + state.impl = impl; + + state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + exec_list_make_empty(&state.direct_deref_nodes); - state.phi_table = _mesa_hash_table_create(state.dead_ctx, - _mesa_hash_pointer, - _mesa_key_pointer_equal); + + /* Build the initial deref structures and direct_deref_nodes table */ + state.add_to_direct_deref_nodes = true; + nir_foreach_block(impl, register_variable_uses_block, &state); + + bool progress = false; + + nir_metadata_require(impl, nir_metadata_block_index); + + /* We're about to iterate through direct_deref_nodes. Don't modify it. */ + state.add_to_direct_deref_nodes = false; + + foreach_list_typed_safe(struct deref_node, node, direct_derefs_link, + &state.direct_deref_nodes) { + nir_deref_var *deref = node->deref; + + if (deref->var->data.mode != nir_var_local) { + exec_node_remove(&node->direct_derefs_link); + continue; + } + + if (deref_may_be_aliased(deref, &state)) { + exec_node_remove(&node->direct_derefs_link); + continue; + } + + node->lower_to_ssa = true; + progress = true; + - if (deref->var->constant_initializer) { - nir_load_const_instr *load = - nir_deref_get_const_initializer_load(state.shader, deref); - nir_ssa_def_init(&load->instr, &load->def, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_cf_list(&impl->body, &load->instr); - def_stack_push(node, &load->def, &state); - } - + foreach_deref_node_match(deref, lower_copies_to_load_store, &state); + } + + if (!progress) + return false; + + nir_metadata_require(impl, nir_metadata_dominance); + + /* We may have lowered some copy instructions to load/store + * instructions. The uses from the copy instructions hav already been + * removed but we need to rescan to ensure that the uses from the newly + * added load/store instructions are registered. We need this + * information for phi node insertion below. + */ + nir_foreach_block(impl, register_variable_uses_block, &state); + - insert_phi_nodes(&state); ++ state.phi_builder = nir_phi_builder_create(state.impl); ++ ++ NIR_VLA(BITSET_WORD, store_blocks, BITSET_WORDS(state.impl->num_blocks)); ++ foreach_list_typed(struct deref_node, node, direct_derefs_link, ++ &state.direct_deref_nodes) { ++ if (!node->lower_to_ssa) ++ continue; ++ ++ memset(store_blocks, 0, ++ BITSET_WORDS(state.impl->num_blocks) * sizeof(*store_blocks)); ++ ++ if (node->stores) { ++ struct set_entry *store_entry; ++ set_foreach(node->stores, store_entry) { ++ nir_intrinsic_instr *store = ++ (nir_intrinsic_instr *)store_entry->key; ++ BITSET_SET(store_blocks, store->instr.block->index); ++ } ++ } ++ ++ if (node->deref->var->constant_initializer) ++ BITSET_SET(store_blocks, 0); ++ ++ node->pb_value = ++ nir_phi_builder_add_value(state.phi_builder, ++ glsl_get_vector_elements(node->type), ++ store_blocks); ++ ++ if (node->deref->var->constant_initializer) { ++ nir_load_const_instr *load = ++ nir_deref_get_const_initializer_load(state.shader, node->deref); ++ nir_instr_insert_before_cf_list(&impl->body, &load->instr); ++ nir_phi_builder_value_set_block_def(node->pb_value, ++ nir_start_block(impl), &load->def); ++ } ++ } ++ + rename_variables_block(nir_start_block(impl), &state); + ++ nir_phi_builder_finish(state.phi_builder); ++ + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + ralloc_free(state.dead_ctx); + + return progress; + } + + void + nir_lower_vars_to_ssa(nir_shader *shader) + { + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_vars_to_ssa_impl(function->impl); + } + } diff --cc src/compiler/nir/nir_opcodes.py index 00000000000,e79810c1991..0eff89783dd mode 000000,100644..100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@@ -1,0 -1,668 +1,702 @@@ + #! /usr/bin/env python + # + # Copyright (C) 2014 Connor Abbott + # + # Permission is hereby granted, free of charge, to any person obtaining a + # copy of this software and associated documentation files (the "Software"), + # to deal in the Software without restriction, including without limitation + # the rights to use, copy, modify, merge, publish, distribute, sublicense, + # and/or sell copies of the Software, and to permit persons to whom the + # Software is furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice (including the next + # paragraph) shall be included in all copies or substantial portions of the + # Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + # Authors: + # Connor Abbott (cwabbott0@gmail.com) + + + # Class that represents all the information we have about the opcode + # NOTE: this must be kept in sync with nir_op_info + + class Opcode(object): + """Class that represents all the information we have about the opcode + NOTE: this must be kept in sync with nir_op_info + """ + def __init__(self, name, output_size, output_type, input_sizes, + input_types, algebraic_properties, const_expr): + """Parameters: + + - name is the name of the opcode (prepend nir_op_ for the enum name) + - all types are strings that get nir_type_ prepended to them + - input_types is a list of types + - algebraic_properties is a space-seperated string, where nir_op_is_ is + prepended before each entry + - const_expr is an expression or series of statements that computes the + constant value of the opcode given the constant values of its inputs. + + Constant expressions are formed from the variables src0, src1, ..., + src(N-1), where N is the number of arguments. The output of the + expression should be stored in the dst variable. Per-component input + and output variables will be scalars and non-per-component input and + output variables will be a struct with fields named x, y, z, and w + all of the correct type. Input and output variables can be assumed + to already be of the correct type and need no conversion. In + particular, the conversion from the C bool type to/from NIR_TRUE and + NIR_FALSE happens automatically. + + For per-component instructions, the entire expression will be + executed once for each component. For non-per-component + instructions, the expression is expected to store the correct values + in dst.x, dst.y, etc. If "dst" does not exist anywhere in the + constant expression, an assignment to dst will happen automatically + and the result will be equivalent to "dst = " for + per-component instructions and "dst.x = dst.y = ... = " + for non-per-component instructions. + """ + assert isinstance(name, str) + assert isinstance(output_size, int) + assert isinstance(output_type, str) + assert isinstance(input_sizes, list) + assert isinstance(input_sizes[0], int) + assert isinstance(input_types, list) + assert isinstance(input_types[0], str) + assert isinstance(algebraic_properties, str) + assert isinstance(const_expr, str) + assert len(input_sizes) == len(input_types) + assert 0 <= output_size <= 4 + for size in input_sizes: + assert 0 <= size <= 4 + if output_size != 0: + assert size != 0 + self.name = name + self.num_inputs = len(input_sizes) + self.output_size = output_size + self.output_type = output_type + self.input_sizes = input_sizes + self.input_types = input_types + self.algebraic_properties = algebraic_properties + self.const_expr = const_expr + + # helper variables for strings + tfloat = "float" + tint = "int" + tbool = "bool" + tuint = "uint" + + commutative = "commutative " + associative = "associative " + + # global dictionary of opcodes + opcodes = {} + + def opcode(name, output_size, output_type, input_sizes, input_types, + algebraic_properties, const_expr): + assert name not in opcodes + opcodes[name] = Opcode(name, output_size, output_type, input_sizes, + input_types, algebraic_properties, const_expr) + -def unop_convert(name, in_type, out_type, const_expr): ++def unop_convert(name, out_type, in_type, const_expr): + opcode(name, 0, out_type, [0], [in_type], "", const_expr) + + def unop(name, ty, const_expr): + opcode(name, 0, ty, [0], [ty], "", const_expr) + + def unop_horiz(name, output_size, output_type, input_size, input_type, + const_expr): + opcode(name, output_size, output_type, [input_size], [input_type], "", + const_expr) + + def unop_reduce(name, output_size, output_type, input_type, prereduce_expr, + reduce_expr, final_expr): + def prereduce(src): + return "(" + prereduce_expr.format(src=src) + ")" + def final(src): + return final_expr.format(src="(" + src + ")") + def reduce_(src0, src1): + return reduce_expr.format(src0=src0, src1=src1) + src0 = prereduce("src0.x") + src1 = prereduce("src0.y") + src2 = prereduce("src0.z") + src3 = prereduce("src0.w") + unop_horiz(name + "2", output_size, output_type, 2, input_type, + final(reduce_(src0, src1))) + unop_horiz(name + "3", output_size, output_type, 3, input_type, + final(reduce_(reduce_(src0, src1), src2))) + unop_horiz(name + "4", output_size, output_type, 4, input_type, + final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) + + + # These two move instructions differ in what modifiers they support and what + # the negate modifier means. Otherwise, they are identical. + unop("fmov", tfloat, "src0") + unop("imov", tint, "src0") + + unop("ineg", tint, "-src0") + unop("fneg", tfloat, "-src0") + unop("inot", tint, "~src0") # invert every bit of the integer + unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f") + unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)") + unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)") + unop("iabs", tint, "(src0 < 0) ? -src0 : src0") + unop("fabs", tfloat, "fabsf(src0)") + unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") + unop("frcp", tfloat, "1.0f / src0") + unop("frsq", tfloat, "1.0f / sqrtf(src0)") + unop("fsqrt", tfloat, "sqrtf(src0)") + unop("fexp2", tfloat, "exp2f(src0)") + unop("flog2", tfloat, "log2f(src0)") -unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. -unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion -unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. ++unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion. ++unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion ++unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion. + # Float-to-boolean conversion -unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") ++unop_convert("f2b", tbool, tfloat, "src0 != 0.0f") + # Boolean-to-float conversion -unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") ++unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f") + # Int-to-boolean conversion -unop_convert("i2b", tint, tbool, "src0 != 0") -unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. ++unop_convert("i2b", tbool, tint, "src0 != 0") ++unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion ++unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion. + + # Unary floating-point rounding operations. + + + unop("ftrunc", tfloat, "truncf(src0)") + unop("fceil", tfloat, "ceilf(src0)") + unop("ffloor", tfloat, "floorf(src0)") + unop("ffract", tfloat, "src0 - floorf(src0)") + unop("fround_even", tfloat, "_mesa_roundevenf(src0)") + ++unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))") + + # Trigonometric operations. + + + unop("fsin", tfloat, "sinf(src0)") + unop("fcos", tfloat, "cosf(src0)") + + + # Partial derivatives. + + + unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0. + unop("fddy", tfloat, "0.0f") + unop("fddx_fine", tfloat, "0.0f") + unop("fddy_fine", tfloat, "0.0f") + unop("fddx_coarse", tfloat, "0.0f") + unop("fddy_coarse", tfloat, "0.0f") + + + # Floating point pack and unpack operations. + + def pack_2x16(fmt): + unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """ + dst.x = (uint32_t) pack_fmt_1x16(src0.x); + dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; + """.replace("fmt", fmt)) + + def pack_4x8(fmt): + unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """ + dst.x = (uint32_t) pack_fmt_1x8(src0.x); + dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; + dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; + dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24; + """.replace("fmt", fmt)) + + def unpack_2x16(fmt): + unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """ + dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); + dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); + """.replace("fmt", fmt)) + + def unpack_4x8(fmt): + unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """ + dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); + dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); + dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); + dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24)); + """.replace("fmt", fmt)) + + + pack_2x16("snorm") + pack_4x8("snorm") + pack_2x16("unorm") + pack_4x8("unorm") + pack_2x16("half") + unpack_2x16("snorm") + unpack_4x8("snorm") + unpack_2x16("unorm") + unpack_4x8("unorm") + unpack_2x16("half") + ++unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """ ++dst = (src0.x & 0xffff) | (src0.y >> 16); ++""") ++ ++unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """ ++dst = (src0.x << 0) | ++ (src0.y << 8) | ++ (src0.z << 16) | ++ (src0.w << 24); ++""") + + # Lowered floating point unpacking operations. + + + unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint, + "unpack_half_1x16((uint16_t)(src0.x & 0xffff))") + unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint, + "unpack_half_1x16((uint16_t)(src0.x >> 16))") + + + # Bit operations, part of ARB_gpu_shader5. + + + unop("bitfield_reverse", tuint, """ + /* we're not winning any awards for speed here, but that's ok */ + dst = 0; + for (unsigned bit = 0; bit < 32; bit++) + dst |= ((src0 >> bit) & 1) << (31 - bit); + """) + unop("bit_count", tuint, """ + dst = 0; + for (unsigned bit = 0; bit < 32; bit++) { + if ((src0 >> bit) & 1) + dst++; + } + """) + -unop_convert("ufind_msb", tuint, tint, """ ++unop_convert("ufind_msb", tint, tuint, """ + dst = -1; + for (int bit = 31; bit > 0; bit--) { + if ((src0 >> bit) & 1) { + dst = bit; + break; + } + } + """) + + unop("ifind_msb", tint, """ + dst = -1; + for (int bit = 31; bit >= 0; bit--) { + /* If src0 < 0, we're looking for the first 0 bit. + * if src0 >= 0, we're looking for the first 1 bit. + */ + if ((((src0 >> bit) & 1) && (src0 >= 0)) || + (!((src0 >> bit) & 1) && (src0 < 0))) { + dst = bit; + break; + } + } + """) + + unop("find_lsb", tint, """ + dst = -1; + for (unsigned bit = 0; bit < 32; bit++) { + if ((src0 >> bit) & 1) { + dst = bit; + break; + } + } + """) + + + for i in xrange(1, 5): + for j in xrange(1, 5): + unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f") + + def binop_convert(name, out_type, in_type, alg_props, const_expr): + opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr) + + def binop(name, ty, alg_props, const_expr): + binop_convert(name, ty, ty, alg_props, const_expr) + + def binop_compare(name, ty, alg_props, const_expr): + binop_convert(name, tbool, ty, alg_props, const_expr) + + def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size, + src2_type, const_expr): + opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type], + "", const_expr) + + def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, + reduce_expr, final_expr): + def final(src): + return final_expr.format(src= "(" + src + ")") + def reduce_(src0, src1): + return reduce_expr.format(src0=src0, src1=src1) + def prereduce(src0, src1): + return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")" + src0 = prereduce("src0.x", "src1.x") + src1 = prereduce("src0.y", "src1.y") + src2 = prereduce("src0.z", "src1.z") + src3 = prereduce("src0.w", "src1.w") + opcode(name + "2", output_size, output_type, + [2, 2], [src_type, src_type], commutative, + final(reduce_(src0, src1))) + opcode(name + "3", output_size, output_type, + [3, 3], [src_type, src_type], commutative, + final(reduce_(reduce_(src0, src1), src2))) + opcode(name + "4", output_size, output_type, + [4, 4], [src_type, src_type], commutative, + final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) + + binop("fadd", tfloat, commutative + associative, "src0 + src1") + binop("iadd", tint, commutative + associative, "src0 + src1") + binop("fsub", tfloat, "", "src0 - src1") + binop("isub", tint, "", "src0 - src1") + + binop("fmul", tfloat, commutative + associative, "src0 * src1") + # low 32-bits of signed/unsigned integer multiply + binop("imul", tint, commutative + associative, "src0 * src1") + # high 32-bits of signed integer multiply + binop("imul_high", tint, commutative, + "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") + # high 32-bits of unsigned integer multiply + binop("umul_high", tuint, commutative, + "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") + + binop("fdiv", tfloat, "", "src0 / src1") + binop("idiv", tint, "", "src0 / src1") + binop("udiv", tuint, "", "src0 / src1") + + # returns a boolean representing the carry resulting from the addition of + # the two unsigned arguments. + + binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0") + + # returns a boolean representing the borrow resulting from the subtraction + # of the two unsigned arguments. + + binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1") + -binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") + binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") + ++# For signed integers, there are several different possible definitions of ++# "modulus" or "remainder". We follow the conventions used by LLVM and ++# SPIR-V. The irem opcode implements the standard C/C++ signed "%" ++# operation while the imod opcode implements the more mathematical ++# "modulus" operation. For details on the difference, see ++# ++# http://mathforum.org/library/drmath/view/52343.html ++ ++binop("irem", tint, "", "src1 == 0 ? 0 : src0 % src1") ++binop("imod", tint, "", ++ "src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ?" ++ " src0 % src1 : src0 % src1 + src1)") ++binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") ++binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)") ++ + # + # Comparisons + # + + + # these integer-aware comparisons return a boolean (0 or ~0) + + binop_compare("flt", tfloat, "", "src0 < src1") + binop_compare("fge", tfloat, "", "src0 >= src1") + binop_compare("feq", tfloat, commutative, "src0 == src1") + binop_compare("fne", tfloat, commutative, "src0 != src1") + binop_compare("ilt", tint, "", "src0 < src1") + binop_compare("ige", tint, "", "src0 >= src1") + binop_compare("ieq", tint, commutative, "src0 == src1") + binop_compare("ine", tint, commutative, "src0 != src1") + binop_compare("ult", tuint, "", "src0 < src1") + binop_compare("uge", tuint, "", "src0 >= src1") + + # integer-aware GLSL-style comparisons that compare floats and ints + + binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}", + "{src0} && {src1}", "{src}") + binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}", + "{src0} || {src1}", "{src}") + binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}", + "{src0} && {src1}", "{src}") + binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}", + "{src0} || {src1}", "{src}") + + # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0 + + binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}", + "{src0} && {src1}", "{src} ? 1.0f : 0.0f") + binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}", + "{src0} || {src1}", "{src} ? 1.0f : 0.0f") + + # These comparisons for integer-less hardware return 1.0 and 0.0 for true + # and false respectively + + binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than + binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal + binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal + binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal + + + binop("ishl", tint, "", "src0 << src1") + binop("ishr", tint, "", "src0 >> src1") + binop("ushr", tuint, "", "src0 >> src1") + + # bitwise logic operators + # + # These are also used as boolean and, or, xor for hardware supporting + # integers. + + + binop("iand", tuint, commutative + associative, "src0 & src1") + binop("ior", tuint, commutative + associative, "src0 | src1") + binop("ixor", tuint, commutative + associative, "src0 ^ src1") + + + # floating point logic operators + # + # These use (src != 0.0) for testing the truth of the input, and output 1.0 + # for true and 0.0 for false + + binop("fand", tfloat, commutative, + "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") + binop("for", tfloat, commutative, + "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") + binop("fxor", tfloat, commutative, + "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") + + binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", + "{src}") + + binop_reduce("fdot_replicated", 4, tfloat, tfloat, + "{src0} * {src1}", "{src0} + {src1}", "{src}") + + opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "", + "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") + opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "", + "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") + + binop("fmin", tfloat, "", "fminf(src0, src1)") + binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") + binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1") + binop("fmax", tfloat, "", "fmaxf(src0, src1)") + binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") + binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0") + + # Saturated vector add for 4 8bit ints. + binop("usadd_4x8", tint, commutative + associative, """ + dst = 0; + for (int i = 0; i < 32; i += 8) { + dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; + } + """) + + # Saturated vector subtract for 4 8bit ints. + binop("ussub_4x8", tint, "", """ + dst = 0; + for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + if (src0_chan > src1_chan) + dst |= (src0_chan - src1_chan) << i; + } + """) + + # vector min for 4 8bit ints. + binop("umin_4x8", tint, commutative + associative, """ + dst = 0; + for (int i = 0; i < 32; i += 8) { + dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; + } + """) + + # vector max for 4 8bit ints. + binop("umax_4x8", tint, commutative + associative, """ + dst = 0; + for (int i = 0; i < 32; i += 8) { + dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; + } + """) + + # unorm multiply: (a * b) / 255. + binop("umul_unorm_4x8", tint, commutative + associative, """ + dst = 0; + for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + dst |= ((src0_chan * src1_chan) / 255) << i; + } + """) + + binop("fpow", tfloat, "", "powf(src0, src1)") + + binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat, + "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") + + # bfm implements the behavior of the first operation of the SM5 "bfi" assembly + # and that of the "bfi1" i965 instruction. That is, it has undefined behavior + # if either of its arguments are 32. + binop_convert("bfm", tuint, tint, "", """ + int bits = src0, offset = src1; + if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32) + dst = 0; /* undefined */ + else + dst = ((1u << bits) - 1) << offset; + """) + + opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """ + dst = ldexpf(src0, src1); + /* flush denormals to zero. */ + if (!isnormal(dst)) + dst = copysignf(0.0f, src0); + """) + + # Combines the first component of each input to make a 2-component vector. + + binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """ + dst.x = src0.x; + dst.y = src1.x; + """) + ++# Byte extraction ++binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))") ++binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))") ++ ++# Word extraction ++binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))") ++binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))") ++ ++ + def triop(name, ty, const_expr): + opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) + def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): + opcode(name, output_size, tuint, + [src1_size, src2_size, src3_size], + [tuint, tuint, tuint], "", const_expr) + + triop("ffma", tfloat, "src0 * src1 + src2") + + triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") + + # Conditional Select + # + # A vector conditional select instruction (like ?:, but operating per- + # component on vectors). There are two versions, one for floating point + # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). + + + triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") + opcode("bcsel", 0, tuint, [0, 0, 0], + [tbool, tuint, tuint], "", "src0 ? src1 : src2") + + # SM5 bfi assembly + triop("bfi", tuint, """ + unsigned mask = src0, insert = src1, base = src2; + if (mask == 0) { + dst = base; + } else { + unsigned tmp = mask; + while (!(tmp & 1)) { + tmp >>= 1; + insert <<= 1; + } + dst = (base & ~mask) | (insert & mask); + } + """) + + # SM5 ubfe/ibfe assembly + opcode("ubfe", 0, tuint, + [0, 0, 0], [tuint, tint, tint], "", """ + unsigned base = src0; + int offset = src1, bits = src2; + if (bits == 0) { + dst = 0; + } else if (bits < 0 || offset < 0) { + dst = 0; /* undefined */ + } else if (offset + bits < 32) { + dst = (base << (32 - bits - offset)) >> (32 - bits); + } else { + dst = base >> offset; + } + """) + opcode("ibfe", 0, tint, + [0, 0, 0], [tint, tint, tint], "", """ + int base = src0; + int offset = src1, bits = src2; + if (bits == 0) { + dst = 0; + } else if (bits < 0 || offset < 0) { + dst = 0; /* undefined */ + } else if (offset + bits < 32) { + dst = (base << (32 - bits - offset)) >> (32 - bits); + } else { + dst = base >> offset; + } + """) + + # GLSL bitfieldExtract() + opcode("ubitfield_extract", 0, tuint, + [0, 0, 0], [tuint, tint, tint], "", """ + unsigned base = src0; + int offset = src1, bits = src2; + if (bits == 0) { + dst = 0; + } else if (bits < 0 || offset < 0 || offset + bits > 32) { + dst = 0; /* undefined per the spec */ + } else { + dst = (base >> offset) & ((1ull << bits) - 1); + } + """) + opcode("ibitfield_extract", 0, tint, + [0, 0, 0], [tint, tint, tint], "", """ + int base = src0; + int offset = src1, bits = src2; + if (bits == 0) { + dst = 0; + } else if (offset < 0 || bits < 0 || offset + bits > 32) { + dst = 0; + } else { + dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */ + } + """) + + # Combines the first component of each input to make a 3-component vector. + + triop_horiz("vec3", 3, 1, 1, 1, """ + dst.x = src0.x; + dst.y = src1.x; + dst.z = src2.x; + """) + + def quadop_horiz(name, output_size, src1_size, src2_size, src3_size, + src4_size, const_expr): + opcode(name, output_size, tuint, + [src1_size, src2_size, src3_size, src4_size], + [tuint, tuint, tuint, tuint], + "", const_expr) + + opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0], + [tuint, tuint, tint, tint], "", """ + unsigned base = src0, insert = src1; + int offset = src2, bits = src3; + if (bits == 0) { + dst = 0; + } else if (offset < 0 || bits < 0 || bits + offset > 32) { + dst = 0; + } else { + unsigned mask = ((1ull << bits) - 1) << offset; + dst = (base & ~mask) | ((insert << bits) & mask); + } + """) + + quadop_horiz("vec4", 4, 1, 1, 1, 1, """ + dst.x = src0.x; + dst.y = src1.x; + dst.z = src2.x; + dst.w = src3.x; + """) + + diff --cc src/compiler/nir/nir_opt_algebraic.py index 00000000000,7745b76f7ce..f4bfd3a921a mode 000000,100644..100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@@ -1,0 -1,285 +1,354 @@@ + #! /usr/bin/env python ++# -*- encoding: utf-8 -*- + # + # Copyright (C) 2014 Intel Corporation + # + # Permission is hereby granted, free of charge, to any person obtaining a + # copy of this software and associated documentation files (the "Software"), + # to deal in the Software without restriction, including without limitation + # the rights to use, copy, modify, merge, publish, distribute, sublicense, + # and/or sell copies of the Software, and to permit persons to whom the + # Software is furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice (including the next + # paragraph) shall be included in all copies or substantial portions of the + # Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + # Authors: + # Jason Ekstrand (jason@jlekstrand.net) + + import nir_algebraic + + # Convenience variables + a = 'a' + b = 'b' + c = 'c' + d = 'd' + + # Written in the form (, ) where is an expression + # and is either an expression or a value. An expression is + # defined as a tuple of the form (, , , , ) + # where each source is either an expression or a value. A value can be + # either a numeric constant or a string representing a variable name. + # + # Variable names are specified as "[#]name[@type]" where "#" inicates that + # the given variable will only match constants and the type indicates that + # the given variable will only match values from ALU instructions with the + # given output type. + # + # For constants, you have to be careful to make sure that it is the right + # type because python is unaware of the source and destination types of the + # opcodes. + + optimizations = [ + (('fneg', ('fneg', a)), a), + (('ineg', ('ineg', a)), a), + (('fabs', ('fabs', a)), ('fabs', a)), + (('fabs', ('fneg', a)), ('fabs', a)), + (('iabs', ('iabs', a)), ('iabs', a)), + (('iabs', ('ineg', a)), ('iabs', a)), + (('fadd', a, 0.0), a), + (('iadd', a, 0), a), + (('usadd_4x8', a, 0), a), + (('usadd_4x8', a, ~0), ~0), + (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), + (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), + (('fadd', ('fneg', a), a), 0.0), + (('iadd', ('ineg', a), a), 0), + (('iadd', ('ineg', a), ('iadd', a, b)), b), + (('iadd', a, ('iadd', ('ineg', a), b)), b), + (('fadd', ('fneg', a), ('fadd', a, b)), b), + (('fadd', a, ('fadd', ('fneg', a), b)), b), + (('fmul', a, 0.0), 0.0), + (('imul', a, 0), 0), + (('umul_unorm_4x8', a, 0), 0), + (('umul_unorm_4x8', a, ~0), a), + (('fmul', a, 1.0), a), + (('imul', a, 1), a), + (('fmul', a, -1.0), ('fneg', a)), + (('imul', a, -1), ('ineg', a)), ++ (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), + (('ffma', 0.0, a, b), b), + (('ffma', a, 0.0, b), b), + (('ffma', a, b, 0.0), ('fmul', a, b)), + (('ffma', a, 1.0, b), ('fadd', a, b)), + (('ffma', 1.0, a, b), ('fadd', a, b)), + (('flrp', a, b, 0.0), a), + (('flrp', a, b, 1.0), b), + (('flrp', a, a, b), a), + (('flrp', 0.0, a, b), ('fmul', a, b)), + (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), + (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), + (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), + (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'), + (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), + (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), + # Comparison simplifications + (('inot', ('flt', a, b)), ('fge', a, b)), + (('inot', ('fge', a, b)), ('flt', a, b)), + (('inot', ('feq', a, b)), ('fne', a, b)), + (('inot', ('fne', a, b)), ('feq', a, b)), + (('inot', ('ilt', a, b)), ('ige', a, b)), + (('inot', ('ige', a, b)), ('ilt', a, b)), + (('inot', ('ieq', a, b)), ('ine', a, b)), + (('inot', ('ine', a, b)), ('ieq', a, b)), + (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), + (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), + (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), + (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)), + (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), + (('fmin', a, a), a), + (('fmax', a, a), a), + (('imin', a, a), a), + (('imax', a, a), a), + (('umin', a, a), a), + (('umax', a, a), a), + (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), + (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'), + (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), + (('fsat', ('fsat', a)), ('fsat', a)), + (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), + (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)), + (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), + (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)), + (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), + (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), + (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), + (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), + (('fne', ('fneg', a), a), ('fne', a, 0.0)), + (('feq', ('fneg', a), a), ('feq', a, 0.0)), + # Emulating booleans + (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))), + (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), + (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), + (('iand', 'a@bool', 1.0), ('b2f', a)), + (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + # Comparison with the same args. Note that these are not done for + # the float versions because NaN always returns false on float + # inequalities. + (('ilt', a, a), False), + (('ige', a, a), True), + (('ieq', a, a), True), + (('ine', a, a), False), + (('ult', a, a), False), + (('uge', a, a), True), + # Logical and bit operations + (('fand', a, 0.0), 0.0), + (('iand', a, a), a), + (('iand', a, ~0), a), + (('iand', a, 0), 0), + (('ior', a, a), a), + (('ior', a, 0), a), + (('fxor', a, a), 0.0), + (('ixor', a, a), 0), + (('inot', ('inot', a)), a), + # DeMorgan's Laws + (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), + (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))), + # Shift optimizations + (('ishl', 0, a), 0), + (('ishl', a, 0), a), + (('ishr', 0, a), 0), + (('ishr', a, 0), a), + (('ushr', 0, a), 0), + (('ushr', a, 0), a), + # Exponential/logarithmic identities + (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a + (('flog2', ('fexp2', a)), a), # lg2(2^a) = a + (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) + (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b + (('fpow', a, 1.0), a), + (('fpow', a, 2.0), ('fmul', a, a)), + (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), + (('fpow', 2.0, a), ('fexp2', a)), + (('fpow', ('fpow', a, 2.2), 0.454545), a), + (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), + (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), + (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), + (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), + (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), + (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), + (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), + (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), + (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), + (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), + (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), + # Division and reciprocal + (('fdiv', 1.0, a), ('frcp', a)), + (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), + (('frcp', ('frcp', a)), a), + (('frcp', ('fsqrt', a)), ('frsq', a)), + (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), + (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), + # Boolean simplifications + (('ieq', 'a@bool', True), a), + (('ine', 'a@bool', True), ('inot', a)), + (('ine', 'a@bool', False), a), + (('ieq', 'a@bool', False), ('inot', 'a')), + (('bcsel', a, True, False), ('ine', a, 0)), + (('bcsel', a, False, True), ('ieq', a, 0)), + (('bcsel', True, b, c), b), + (('bcsel', False, b, c), c), + # The result of this should be hit by constant propagation and, in the + # next round of opt_algebraic, get picked up by one of the above two. + (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)), + + (('bcsel', a, b, b), b), + (('fcsel', a, b, b), b), + + # Conversions + (('i2b', ('b2i', a)), a), + (('f2i', ('ftrunc', a)), ('f2i', a)), + (('f2u', ('ftrunc', a)), ('f2u', a)), + + # Subtracts + (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), + (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('ussub_4x8', a, 0), a), + (('ussub_4x8', a, ~0), 0), + (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), + (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), + (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), + (('ineg', a), ('isub', 0, a), 'options->lower_negate'), + (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), + (('iadd', a, ('isub', 0, b)), ('isub', a, b)), + (('fabs', ('fsub', 0.0, a)), ('fabs', a)), + (('iabs', ('isub', 0, a)), ('iabs', a)), + + # Misc. lowering + (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'), ++ (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'), + (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), + (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), ++ (('ldexp', 'x', 'exp'), ++ ('fmul', 'x', ('ishl', ('imin', ('imax', ('iadd', 'exp', 0x7f), 0), 0xff), 23))), + + (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), + ('bcsel', ('ilt', 31, 'bits'), 'insert', + ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')), + 'options->lower_bitfield_insert'), + + (('ibitfield_extract', 'value', 'offset', 'bits'), + ('bcsel', ('ilt', 31, 'bits'), 'value', + ('ibfe', 'value', 'offset', 'bits')), + 'options->lower_bitfield_extract'), + + (('ubitfield_extract', 'value', 'offset', 'bits'), + ('bcsel', ('ult', 31, 'bits'), 'value', + ('ubfe', 'value', 'offset', 'bits')), + 'options->lower_bitfield_extract'), ++ ++ (('extract_ibyte', a, b), ++ ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8), ++ 'options->lower_extract_byte'), ++ ++ (('extract_ubyte', a, b), ++ ('iand', ('ushr', a, ('imul', b, 8)), 0xff), ++ 'options->lower_extract_byte'), ++ ++ (('extract_iword', a, b), ++ ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16), ++ 'options->lower_extract_word'), ++ ++ (('extract_uword', a, b), ++ ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), ++ 'options->lower_extract_word'), ++ ++ (('pack_unorm_2x16', 'v'), ++ ('pack_uvec2_to_uint', ++ ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), ++ 'options->lower_pack_unorm_2x16'), ++ ++ (('pack_unorm_4x8', 'v'), ++ ('pack_uvec4_to_uint', ++ ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), ++ 'options->lower_pack_unorm_4x8'), ++ ++ (('pack_snorm_2x16', 'v'), ++ ('pack_uvec2_to_uint', ++ ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), ++ 'options->lower_pack_snorm_2x16'), ++ ++ (('pack_snorm_4x8', 'v'), ++ ('pack_uvec4_to_uint', ++ ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), ++ 'options->lower_pack_snorm_4x8'), ++ ++ (('unpack_unorm_2x16', 'v'), ++ ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0), ++ ('extract_uword', 'v', 1), 0, 0)), ++ 65535.0), ++ 'options->lower_unpack_unorm_2x16'), ++ ++ (('unpack_unorm_4x8', 'v'), ++ ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0), ++ ('extract_ubyte', 'v', 1), ++ ('extract_ubyte', 'v', 2), ++ ('extract_ubyte', 'v', 3))), ++ 255.0), ++ 'options->lower_unpack_unorm_4x8'), ++ ++ (('unpack_snorm_2x16', 'v'), ++ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0), ++ ('extract_iword', 'v', 1), 0, 0)), ++ 32767.0))), ++ 'options->lower_unpack_snorm_2x16'), ++ ++ (('unpack_snorm_4x8', 'v'), ++ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0), ++ ('extract_ibyte', 'v', 1), ++ ('extract_ibyte', 'v', 2), ++ ('extract_ibyte', 'v', 3))), ++ 127.0))), ++ 'options->lower_unpack_snorm_4x8'), + ] + + # Add optimizations to handle the case where the result of a ternary is + # compared to a constant. This way we can take things like + # + # (a ? 0 : 1) > 0 + # + # and turn it into + # + # a ? (0 > 0) : (1 > 0) + # + # which constant folding will eat for lunch. The resulting ternary will + # further get cleaned up by the boolean reductions above and we will be + # left with just the original variable "a". + for op in ['flt', 'fge', 'feq', 'fne', + 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']: + optimizations += [ + ((op, ('bcsel', 'a', '#b', '#c'), '#d'), + ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))), + ((op, '#d', ('bcsel', a, '#b', '#c')), + ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), + ] + + # This section contains "late" optimizations that should be run after the + # regular optimizations have finished. Optimizations should go here if + # they help code generation but do not necessarily produce code that is + # more easily optimizable. + late_optimizations = [ + (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), + (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), + (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), + (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'), + (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), + (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), + ] + + print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() + print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", + late_optimizations).render() diff --cc src/compiler/nir/nir_phi_builder.c index 00000000000,00000000000..5429083e5c8 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_phi_builder.c @@@ -1,0 -1,0 +1,254 @@@ ++/* ++ * Copyright © 2016 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "nir_phi_builder.h" ++#include "nir/nir_vla.h" ++ ++struct nir_phi_builder { ++ nir_shader *shader; ++ nir_function_impl *impl; ++ ++ /* Copied from the impl for easy access */ ++ unsigned num_blocks; ++ ++ /* Array of all blocks indexed by block->index. */ ++ nir_block **blocks; ++ ++ /* Hold on to the values so we can easily iterate over them. */ ++ struct exec_list values; ++ ++ /* Worklist for phi adding */ ++ unsigned iter_count; ++ unsigned *work; ++ nir_block **W; ++}; ++ ++#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1) ++ ++struct nir_phi_builder_value { ++ struct exec_node node; ++ ++ struct nir_phi_builder *builder; ++ ++ /* Needed so we can create phis and undefs */ ++ unsigned num_components; ++ ++ /* The list of phi nodes associated with this value. Phi nodes are not ++ * added directly. Instead, they are created, the instr->block pointer ++ * set, and then added to this list. Later, in phi_builder_finish, we ++ * set up their sources and add them to the top of their respective ++ * blocks. ++ */ ++ struct exec_list phis; ++ ++ /* Array of SSA defs, indexed by block. If a phi needs to be inserted ++ * in a given block, it will have the magic value NEEDS_PHI. ++ */ ++ nir_ssa_def *defs[0]; ++}; ++ ++static bool ++fill_block_array(nir_block *block, void *void_data) ++{ ++ nir_block **blocks = void_data; ++ blocks[block->index] = block; ++ return true; ++} ++ ++struct nir_phi_builder * ++nir_phi_builder_create(nir_function_impl *impl) ++{ ++ struct nir_phi_builder *pb = ralloc(NULL, struct nir_phi_builder); ++ ++ pb->shader = impl->function->shader; ++ pb->impl = impl; ++ ++ assert(impl->valid_metadata & (nir_metadata_block_index | ++ nir_metadata_dominance)); ++ ++ pb->num_blocks = impl->num_blocks; ++ pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks); ++ nir_foreach_block(impl, fill_block_array, pb->blocks); ++ ++ exec_list_make_empty(&pb->values); ++ ++ pb->iter_count = 0; ++ pb->work = rzalloc_array(pb, unsigned, pb->num_blocks); ++ pb->W = ralloc_array(pb, nir_block *, pb->num_blocks); ++ ++ return pb; ++} ++ ++struct nir_phi_builder_value * ++nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, ++ const BITSET_WORD *defs) ++{ ++ struct nir_phi_builder_value *val; ++ unsigned i, w_start = 0, w_end = 0; ++ ++ val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks); ++ val->builder = pb; ++ val->num_components = num_components; ++ exec_list_make_empty(&val->phis); ++ exec_list_push_tail(&pb->values, &val->node); ++ ++ pb->iter_count++; ++ ++ BITSET_WORD tmp; ++ BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) { ++ if (pb->work[i] < pb->iter_count) ++ pb->W[w_end++] = pb->blocks[i]; ++ pb->work[i] = pb->iter_count; ++ } ++ ++ while (w_start != w_end) { ++ nir_block *cur = pb->W[w_start++]; ++ struct set_entry *dom_entry; ++ set_foreach(cur->dom_frontier, dom_entry) { ++ nir_block *next = (nir_block *) dom_entry->key; ++ ++ /* ++ * If there's more than one return statement, then the end block ++ * can be a join point for some definitions. However, there are ++ * no instructions in the end block, so nothing would use those ++ * phi nodes. Of course, we couldn't place those phi nodes ++ * anyways due to the restriction of having no instructions in the ++ * end block... ++ */ ++ if (next == pb->impl->end_block) ++ continue; ++ ++ if (val->defs[next->index] == NULL) { ++ val->defs[next->index] = NEEDS_PHI; ++ ++ if (pb->work[next->index] < pb->iter_count) { ++ pb->work[next->index] = pb->iter_count; ++ pb->W[w_end++] = next; ++ } ++ } ++ } ++ } ++ ++ return val; ++} ++ ++void ++nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, ++ nir_block *block, nir_ssa_def *def) ++{ ++ val->defs[block->index] = def; ++} ++ ++nir_ssa_def * ++nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, ++ nir_block *block) ++{ ++ if (val->defs[block->index] == NULL) { ++ if (block->imm_dom) { ++ /* Grab it from our immediate dominator. We'll stash it here for ++ * easy access later. ++ */ ++ val->defs[block->index] = ++ nir_phi_builder_value_get_block_def(val, block->imm_dom); ++ return val->defs[block->index]; ++ } else { ++ /* No immediate dominator means that this block is either the ++ * start block or unreachable. In either case, the value is ++ * undefined so we need an SSA undef. ++ */ ++ nir_ssa_undef_instr *undef = ++ nir_ssa_undef_instr_create(val->builder->shader, ++ val->num_components); ++ nir_instr_insert(nir_before_cf_list(&val->builder->impl->body), ++ &undef->instr); ++ val->defs[block->index] = &undef->def; ++ return &undef->def; ++ } ++ } else if (val->defs[block->index] == NEEDS_PHI) { ++ /* If we need a phi instruction, go ahead and create one but don't ++ * add it to the program yet. Later, we'll go through and set up phi ++ * sources and add the instructions will be added at that time. ++ */ ++ nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader); ++ nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL); ++ phi->instr.block = block; ++ exec_list_push_tail(&val->phis, &phi->instr.node); ++ val->defs[block->index] = &phi->dest.ssa; ++ return &phi->dest.ssa; ++ } else { ++ return val->defs[block->index]; ++ } ++} ++ ++static int ++compare_blocks(const void *_a, const void *_b) ++{ ++ nir_block * const * a = _a; ++ nir_block * const * b = _b; ++ ++ return (*a)->index - (*b)->index; ++} ++ ++void ++nir_phi_builder_finish(struct nir_phi_builder *pb) ++{ ++ const unsigned num_blocks = pb->num_blocks; ++ NIR_VLA(nir_block *, preds, num_blocks); ++ ++ foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) { ++ /* We can't iterate over the list of phis normally because we are ++ * removing them as we go and, in some cases, adding new phis as we ++ * build the source lists of others. ++ */ ++ while (!exec_list_is_empty(&val->phis)) { ++ struct exec_node *head = exec_list_get_head(&val->phis); ++ nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node); ++ assert(phi->instr.type == nir_instr_type_phi); ++ ++ exec_node_remove(&phi->instr.node); ++ ++ /* Construct an array of predecessors. We sort it to ensure ++ * determinism in the phi insertion algorithm. ++ * ++ * XXX: Calling qsort this many times seems expensive. ++ */ ++ int num_preds = 0; ++ struct set_entry *entry; ++ set_foreach(phi->instr.block->predecessors, entry) ++ preds[num_preds++] = (nir_block *)entry->key; ++ qsort(preds, num_preds, sizeof(*preds), compare_blocks); ++ ++ for (unsigned i = 0; i < num_preds; i++) { ++ nir_phi_src *src = ralloc(phi, nir_phi_src); ++ src->pred = preds[i]; ++ src->src = nir_src_for_ssa( ++ nir_phi_builder_value_get_block_def(val, preds[i])); ++ exec_list_push_tail(&phi->srcs, &src->node); ++ } ++ ++ nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr); ++ } ++ } ++ ++ ralloc_free(pb); ++} diff --cc src/compiler/nir/nir_phi_builder.h index 00000000000,00000000000..50251bf1ba3 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_phi_builder.h @@@ -1,0 -1,0 +1,84 @@@ ++/* ++ * Copyright © 2016 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#pragma once ++ ++#include "nir.h" ++ ++struct nir_phi_builder; ++struct nir_phi_builder_value; ++ ++/* Create a new phi builder. ++ * ++ * While this is fairly cheap, it does allocate some memory and walk the list ++ * of blocks so it's recommended that you only call it once and use it to ++ * build phis for several values. ++ */ ++struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl); ++ ++/* Register a value with the builder. ++ * ++ * The 'defs' parameter specifies a bitset of blocks in which the given value ++ * is defined. This is used to determine where to place the phi nodes. ++ */ ++struct nir_phi_builder_value * ++nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, ++ const BITSET_WORD *defs); ++ ++/* Register a definition for the given value and block. ++ * ++ * It is safe to call this function as many times as you wish for any given ++ * block/value pair. However, it always replaces whatever was there ++ * previously even if that definition is from a phi node. The phi builder ++ * always uses the latest information it has, so you must be careful about the ++ * order in which you register definitions. The final value at the end of the ++ * block must be the last value registered. ++ */ ++void ++nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, ++ nir_block *block, nir_ssa_def *def); ++ ++/* Get the definition for the given value in the given block. ++ * ++ * This definition will always be the latest definition known for the given ++ * block. If no definition is immediately available, it will crawl up the ++ * dominance tree and insert phi nodes as needed until it finds one. In the ++ * case that no suitable definition is found, it will return the result of a ++ * nir_ssa_undef_instr with the correct number of components. ++ * ++ * Because this function only uses the latest available information for any ++ * given block, you must have already finished registering definitions for any ++ * blocks that dominate the current block in order to get the correct result. ++ */ ++nir_ssa_def * ++nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, ++ nir_block *block); ++ ++/* Finish building phi nodes and free the builder. ++ * ++ * This function does far more than just free memory. Prior to calling ++ * nir_phi_builder_finish, no phi nodes have actually been inserted in the ++ * program. This function is what finishes setting up phi node sources and ++ * adds the phi nodes to the program. ++ */ ++void nir_phi_builder_finish(struct nir_phi_builder *pb); diff --cc src/compiler/nir/nir_print.c index 00000000000,48ecb48a620..f36b91de6e0 mode 000000,100644..100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@@ -1,0 -1,1069 +1,1089 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir.h" + #include "compiler/shader_enums.h" + #include + #include + + static void + print_tabs(unsigned num_tabs, FILE *fp) + { + for (unsigned i = 0; i < num_tabs; i++) + fprintf(fp, "\t"); + } + + typedef struct { + FILE *fp; + nir_shader *shader; + /** map from nir_variable -> printable name */ + struct hash_table *ht; + + /** set of names used so far for nir_variables */ + struct set *syms; + + /* an index used to make new non-conflicting names */ + unsigned index; + } print_state; + + static void + print_register(nir_register *reg, print_state *state) + { + FILE *fp = state->fp; + if (reg->name != NULL) + fprintf(fp, "/* %s */ ", reg->name); + if (reg->is_global) + fprintf(fp, "gr%u", reg->index); + else + fprintf(fp, "r%u", reg->index); + } + + static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" }; + + static void + print_register_decl(nir_register *reg, print_state *state) + { + FILE *fp = state->fp; + fprintf(fp, "decl_reg %s ", sizes[reg->num_components]); + if (reg->is_packed) + fprintf(fp, "(packed) "); + print_register(reg, state); + if (reg->num_array_elems != 0) + fprintf(fp, "[%u]", reg->num_array_elems); + fprintf(fp, "\n"); + } + + static void + print_ssa_def(nir_ssa_def *def, print_state *state) + { + FILE *fp = state->fp; + if (def->name != NULL) + fprintf(fp, "/* %s */ ", def->name); + fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index); + } + + static void + print_ssa_use(nir_ssa_def *def, print_state *state) + { + FILE *fp = state->fp; + if (def->name != NULL) + fprintf(fp, "/* %s */ ", def->name); + fprintf(fp, "ssa_%u", def->index); + } + + static void print_src(nir_src *src, print_state *state); + + static void + print_reg_src(nir_reg_src *src, print_state *state) + { + FILE *fp = state->fp; + print_register(src->reg, state); + if (src->reg->num_array_elems != 0) { + fprintf(fp, "[%u", src->base_offset); + if (src->indirect != NULL) { + fprintf(fp, " + "); + print_src(src->indirect, state); + } + fprintf(fp, "]"); + } + } + + static void + print_reg_dest(nir_reg_dest *dest, print_state *state) + { + FILE *fp = state->fp; + print_register(dest->reg, state); + if (dest->reg->num_array_elems != 0) { + fprintf(fp, "[%u", dest->base_offset); + if (dest->indirect != NULL) { + fprintf(fp, " + "); + print_src(dest->indirect, state); + } + fprintf(fp, "]"); + } + } + + static void + print_src(nir_src *src, print_state *state) + { + if (src->is_ssa) + print_ssa_use(src->ssa, state); + else + print_reg_src(&src->reg, state); + } + + static void + print_dest(nir_dest *dest, print_state *state) + { + if (dest->is_ssa) + print_ssa_def(&dest->ssa, state); + else + print_reg_dest(&dest->reg, state); + } + + static void + print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state) + { + FILE *fp = state->fp; + + if (instr->src[src].negate) + fprintf(fp, "-"); + if (instr->src[src].abs) + fprintf(fp, "abs("); + + print_src(&instr->src[src].src, state); + + bool print_swizzle = false; + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + if (instr->src[src].swizzle[i] != i) { + print_swizzle = true; + break; + } + } + + if (print_swizzle) { + fprintf(fp, "."); + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]); + } + } + + if (instr->src[src].abs) + fprintf(fp, ")"); + } + + static void + print_alu_dest(nir_alu_dest *dest, print_state *state) + { + FILE *fp = state->fp; + /* we're going to print the saturate modifier later, after the opcode */ + + print_dest(&dest->dest, state); + + if (!dest->dest.is_ssa && + dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) { + fprintf(fp, "."); + for (unsigned i = 0; i < 4; i++) + if ((dest->write_mask >> i) & 1) + fprintf(fp, "%c", "xyzw"[i]); + } + } + + static void + print_alu_instr(nir_alu_instr *instr, print_state *state) + { + FILE *fp = state->fp; + + print_alu_dest(&instr->dest, state); + + fprintf(fp, " = %s", nir_op_infos[instr->op].name); + if (instr->dest.saturate) + fprintf(fp, ".sat"); + fprintf(fp, " "); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_alu_src(instr, i, state); + } + } + ++static const char * ++get_var_name(nir_variable *var, print_state *state) ++{ ++ if (state->ht == NULL) ++ return var->name; ++ ++ assert(state->syms); ++ ++ struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); ++ if (entry) ++ return entry->data; ++ ++ char *name; ++ if (var->name == NULL) { ++ name = ralloc_asprintf(state->syms, "@%u", state->index++); ++ } else { ++ struct set_entry *set_entry = _mesa_set_search(state->syms, var->name); ++ if (set_entry != NULL) { ++ /* we have a collision with another name, append an @ + a unique ++ * index */ ++ name = ralloc_asprintf(state->syms, "%s@%u", var->name, ++ state->index++); ++ } else { ++ /* Mark this one as seen */ ++ _mesa_set_add(state->syms, var->name); ++ name = var->name; ++ } ++ } ++ ++ _mesa_hash_table_insert(state->ht, var, name); ++ ++ return name; ++} ++ + static void + print_constant(nir_constant *c, const struct glsl_type *type, print_state *state) + { + FILE *fp = state->fp; + unsigned total_elems = glsl_get_components(type); + unsigned i; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (i = 0; i < total_elems; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "0x%08x", c->value.u[i]); + } + break; + + case GLSL_TYPE_FLOAT: + for (i = 0; i < total_elems; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%f", c->value.f[i]); + } + break; + + case GLSL_TYPE_STRUCT: + for (i = 0; i < c->num_elements; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "{ "); + print_constant(c->elements[i], glsl_get_struct_field(type, i), state); + fprintf(fp, " }"); + } + break; + + case GLSL_TYPE_ARRAY: + for (i = 0; i < c->num_elements; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "{ "); + print_constant(c->elements[i], glsl_get_array_element(type), state); + fprintf(fp, " }"); + } + break; + + default: + unreachable("not reached"); + } + } + + static void + print_var_decl(nir_variable *var, print_state *state) + { + FILE *fp = state->fp; + + fprintf(fp, "decl_var "); + + const char *const cent = (var->data.centroid) ? "centroid " : ""; + const char *const samp = (var->data.sample) ? "sample " : ""; + const char *const patch = (var->data.patch) ? "patch " : ""; + const char *const inv = (var->data.invariant) ? "invariant " : ""; + const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage", "system " }; ++ "uniform ", "shader_storage ", "shared ", ++ "system "}; + + fprintf(fp, "%s%s%s%s%s%s ", + cent, samp, patch, inv, mode[var->data.mode], + glsl_interp_qualifier_name(var->data.interpolation)); + + glsl_print_type(var->type, fp); + - struct set_entry *entry = NULL; - if (state->syms) - entry = _mesa_set_search(state->syms, var->name); - - char *name; - - if (entry != NULL) { - /* we have a collision with another name, append an @ + a unique index */ - name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); - } else { - name = var->name; - } - - fprintf(fp, " %s", name); ++ fprintf(fp, " %s", get_var_name(var, state)); + + if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage) { + const char *loc = NULL; + char buf[4]; + + switch (state->shader->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == nir_var_shader_in) + loc = gl_vert_attrib_name(var->data.location); + else if (var->data.mode == nir_var_shader_out) + loc = gl_varying_slot_name(var->data.location); + break; + case MESA_SHADER_GEOMETRY: + if ((var->data.mode == nir_var_shader_in) || + (var->data.mode == nir_var_shader_out)) + loc = gl_varying_slot_name(var->data.location); + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == nir_var_shader_in) + loc = gl_varying_slot_name(var->data.location); + else if (var->data.mode == nir_var_shader_out) + loc = gl_frag_result_name(var->data.location); + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_COMPUTE: + default: + /* TODO */ + break; + } + + if (!loc) { + snprintf(buf, sizeof(buf), "%u", var->data.location); + loc = buf; + } + + fprintf(fp, " (%s, %u)", loc, var->data.driver_location); + } + + if (var->constant_initializer) { + fprintf(fp, " = { "); + print_constant(var->constant_initializer, var->type, state); + fprintf(fp, " }"); + } + + fprintf(fp, "\n"); - - if (state->syms) { - _mesa_set_add(state->syms, name); - _mesa_hash_table_insert(state->ht, var, name); - } + } + + static void + print_var(nir_variable *var, print_state *state) + { + FILE *fp = state->fp; - const char *name; - if (state->ht) { - struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); - - assert(entry != NULL); - name = entry->data; - } else { - name = var->name; - } - - fprintf(fp, "%s", name); ++ fprintf(fp, "%s", get_var_name(var, state)); + } + + static void + print_deref_var(nir_deref_var *deref, print_state *state) + { + print_var(deref->var, state); + } + + static void + print_deref_array(nir_deref_array *deref, print_state *state) + { + FILE *fp = state->fp; + fprintf(fp, "["); + switch (deref->deref_array_type) { + case nir_deref_array_type_direct: + fprintf(fp, "%u", deref->base_offset); + break; + case nir_deref_array_type_indirect: + if (deref->base_offset != 0) + fprintf(fp, "%u + ", deref->base_offset); + print_src(&deref->indirect, state); + break; + case nir_deref_array_type_wildcard: + fprintf(fp, "*"); + break; + } + fprintf(fp, "]"); + } + + static void + print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type, + print_state *state) + { + FILE *fp = state->fp; + fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index)); + } + + static void + print_deref(nir_deref_var *deref, print_state *state) + { + nir_deref *tail = &deref->deref; + nir_deref *pretail = NULL; + while (tail != NULL) { + switch (tail->deref_type) { + case nir_deref_type_var: + assert(pretail == NULL); + assert(tail == &deref->deref); + print_deref_var(deref, state); + break; + + case nir_deref_type_array: + assert(pretail != NULL); + print_deref_array(nir_deref_as_array(tail), state); + break; + + case nir_deref_type_struct: + assert(pretail != NULL); + print_deref_struct(nir_deref_as_struct(tail), + pretail->type, state); + break; + + default: + unreachable("Invalid deref type"); + } + + pretail = tail; + tail = pretail->child; + } + } + + static void + print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) + { + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + FILE *fp = state->fp; + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + print_dest(&instr->dest, state); + fprintf(fp, " = "); + } + + fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name); + + for (unsigned i = 0; i < num_srcs; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_src(&instr->src[i], state); + } + + fprintf(fp, ") ("); + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + + for (unsigned i = 0; i < num_vars; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_deref(instr->variables[i], state); + } + + fprintf(fp, ") ("); + + unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices; + + for (unsigned i = 0; i < num_indices; i++) { + if (i != 0) + fprintf(fp, ", "); + + fprintf(fp, "%d", instr->const_index[i]); + } + + fprintf(fp, ")"); + + if (!state->shader) + return; + + struct exec_list *var_list = NULL; + + switch (instr->intrinsic) { + case nir_intrinsic_load_uniform: + var_list = &state->shader->uniforms; + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + var_list = &state->shader->inputs; + break; + case nir_intrinsic_load_output: + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + var_list = &state->shader->outputs; + break; + default: + return; + } + + nir_foreach_variable(var, var_list) { + if ((var->data.driver_location == instr->const_index[0]) && + var->name) { + fprintf(fp, "\t/* %s */", var->name); + break; + } + } + } + + static void + print_tex_instr(nir_tex_instr *instr, print_state *state) + { + FILE *fp = state->fp; + + print_dest(&instr->dest, state); + + fprintf(fp, " = "); + + switch (instr->op) { + case nir_texop_tex: + fprintf(fp, "tex "); + break; + case nir_texop_txb: + fprintf(fp, "txb "); + break; + case nir_texop_txl: + fprintf(fp, "txl "); + break; + case nir_texop_txd: + fprintf(fp, "txd "); + break; + case nir_texop_txf: + fprintf(fp, "txf "); + break; + case nir_texop_txf_ms: + fprintf(fp, "txf_ms "); + break; + case nir_texop_txs: + fprintf(fp, "txs "); + break; + case nir_texop_lod: + fprintf(fp, "lod "); + break; + case nir_texop_tg4: + fprintf(fp, "tg4 "); + break; + case nir_texop_query_levels: + fprintf(fp, "query_levels "); + break; + case nir_texop_texture_samples: + fprintf(fp, "texture_samples "); + break; + case nir_texop_samples_identical: + fprintf(fp, "samples_identical "); + break; + default: + unreachable("Invalid texture operation"); + break; + } + + for (unsigned i = 0; i < instr->num_srcs; i++) { + print_src(&instr->src[i].src, state); + + fprintf(fp, " "); + + switch(instr->src[i].src_type) { + case nir_tex_src_coord: + fprintf(fp, "(coord)"); + break; + case nir_tex_src_projector: + fprintf(fp, "(projector)"); + break; + case nir_tex_src_comparitor: + fprintf(fp, "(comparitor)"); + break; + case nir_tex_src_offset: + fprintf(fp, "(offset)"); + break; + case nir_tex_src_bias: + fprintf(fp, "(bias)"); + break; + case nir_tex_src_lod: + fprintf(fp, "(lod)"); + break; + case nir_tex_src_ms_index: + fprintf(fp, "(ms_index)"); + break; + case nir_tex_src_ddx: + fprintf(fp, "(ddx)"); + break; + case nir_tex_src_ddy: + fprintf(fp, "(ddy)"); + break; ++ case nir_tex_src_texture_offset: ++ fprintf(fp, "(texture_offset)"); ++ break; + case nir_tex_src_sampler_offset: + fprintf(fp, "(sampler_offset)"); + break; + + default: + unreachable("Invalid texture source type"); + break; + } + + fprintf(fp, ", "); + } + + bool has_nonzero_offset = false; + for (unsigned i = 0; i < 4; i++) { + if (instr->const_offset[i] != 0) { + has_nonzero_offset = true; + break; + } + } + + if (has_nonzero_offset) { + fprintf(fp, "[%i %i %i %i] (offset), ", + instr->const_offset[0], instr->const_offset[1], + instr->const_offset[2], instr->const_offset[3]); + } + + if (instr->op == nir_texop_tg4) { + fprintf(fp, "%u (gather_component), ", instr->component); + } + ++ if (instr->texture) { ++ assert(instr->sampler); ++ fprintf(fp, " (texture)"); ++ } + if (instr->sampler) { + print_deref(instr->sampler, state); ++ fprintf(fp, " (sampler)"); + } else { - fprintf(fp, "%u", instr->sampler_index); ++ assert(instr->texture == NULL); ++ fprintf(fp, "%u (texture) %u (sampler)", ++ instr->texture_index, instr->sampler_index); + } - - fprintf(fp, " (sampler)"); + } + + static void + print_call_instr(nir_call_instr *instr, print_state *state) + { + FILE *fp = state->fp; + + fprintf(fp, "call %s ", instr->callee->name); + + for (unsigned i = 0; i < instr->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_deref(instr->params[i], state); + } + + if (instr->return_deref != NULL) { + if (instr->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + print_deref(instr->return_deref, state); + } + } + + static void + print_load_const_instr(nir_load_const_instr *instr, print_state *state) + { + FILE *fp = state->fp; + + print_ssa_def(&instr->def, state); + + fprintf(fp, " = load_const ("); + + for (unsigned i = 0; i < instr->def.num_components; i++) { + if (i != 0) + fprintf(fp, ", "); + + /* + * we don't really know the type of the constant (if it will be used as a + * float or an int), so just print the raw constant in hex for fidelity + * and then print the float in a comment for readability. + */ + + fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]); + } + + fprintf(fp, ")"); + } + + static void + print_jump_instr(nir_jump_instr *instr, print_state *state) + { + FILE *fp = state->fp; + + switch (instr->type) { + case nir_jump_break: + fprintf(fp, "break"); + break; + + case nir_jump_continue: + fprintf(fp, "continue"); + break; + + case nir_jump_return: + fprintf(fp, "return"); + break; + } + } + + static void + print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state) + { + FILE *fp = state->fp; + print_ssa_def(&instr->def, state); + fprintf(fp, " = undefined"); + } + + static void + print_phi_instr(nir_phi_instr *instr, print_state *state) + { + FILE *fp = state->fp; + print_dest(&instr->dest, state); + fprintf(fp, " = phi "); + nir_foreach_phi_src(instr, src) { + if (&src->node != exec_list_get_head(&instr->srcs)) + fprintf(fp, ", "); + + fprintf(fp, "block_%u: ", src->pred->index); + print_src(&src->src, state); + } + } + + static void + print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state) + { + FILE *fp = state->fp; + nir_foreach_parallel_copy_entry(instr, entry) { + if (&entry->node != exec_list_get_head(&instr->entries)) + fprintf(fp, "; "); + + print_dest(&entry->dest, state); + fprintf(fp, " = "); + print_src(&entry->src, state); + } + } + + static void + print_instr(const nir_instr *instr, print_state *state, unsigned tabs) + { + FILE *fp = state->fp; + print_tabs(tabs, fp); + + switch (instr->type) { + case nir_instr_type_alu: + print_alu_instr(nir_instr_as_alu(instr), state); + break; + + case nir_instr_type_call: + print_call_instr(nir_instr_as_call(instr), state); + break; + + case nir_instr_type_intrinsic: + print_intrinsic_instr(nir_instr_as_intrinsic(instr), state); + break; + + case nir_instr_type_tex: + print_tex_instr(nir_instr_as_tex(instr), state); + break; + + case nir_instr_type_load_const: + print_load_const_instr(nir_instr_as_load_const(instr), state); + break; + + case nir_instr_type_jump: + print_jump_instr(nir_instr_as_jump(instr), state); + break; + + case nir_instr_type_ssa_undef: + print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); + break; + + case nir_instr_type_phi: + print_phi_instr(nir_instr_as_phi(instr), state); + break; + + case nir_instr_type_parallel_copy: + print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state); + break; + + default: + unreachable("Invalid instruction type"); + break; + } + } + + static int + compare_block_index(const void *p1, const void *p2) + { + const nir_block *block1 = *((const nir_block **) p1); + const nir_block *block2 = *((const nir_block **) p2); + + return (int) block1->index - (int) block2->index; + } + + static void print_cf_node(nir_cf_node *node, print_state *state, + unsigned tabs); + + static void + print_block(nir_block *block, print_state *state, unsigned tabs) + { + FILE *fp = state->fp; + + print_tabs(tabs, fp); + fprintf(fp, "block block_%u:\n", block->index); + + /* sort the predecessors by index so we consistently print the same thing */ + + nir_block **preds = + malloc(block->predecessors->entries * sizeof(nir_block *)); + + struct set_entry *entry; + unsigned i = 0; + set_foreach(block->predecessors, entry) { + preds[i++] = (nir_block *) entry->key; + } + + qsort(preds, block->predecessors->entries, sizeof(nir_block *), + compare_block_index); + + print_tabs(tabs, fp); + fprintf(fp, "/* preds: "); + for (unsigned i = 0; i < block->predecessors->entries; i++) { + fprintf(fp, "block_%u ", preds[i]->index); + } + fprintf(fp, "*/\n"); + + free(preds); + + nir_foreach_instr(block, instr) { + print_instr(instr, state, tabs); + fprintf(fp, "\n"); + } + + print_tabs(tabs, fp); + fprintf(fp, "/* succs: "); + for (unsigned i = 0; i < 2; i++) + if (block->successors[i]) { + fprintf(fp, "block_%u ", block->successors[i]->index); + } + fprintf(fp, "*/\n"); + } + + static void + print_if(nir_if *if_stmt, print_state *state, unsigned tabs) + { + FILE *fp = state->fp; + + print_tabs(tabs, fp); + fprintf(fp, "if "); + print_src(&if_stmt->condition, state); + fprintf(fp, " {\n"); + foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) { + print_cf_node(node, state, tabs + 1); + } + print_tabs(tabs, fp); + fprintf(fp, "} else {\n"); + foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) { + print_cf_node(node, state, tabs + 1); + } + print_tabs(tabs, fp); + fprintf(fp, "}\n"); + } + + static void + print_loop(nir_loop *loop, print_state *state, unsigned tabs) + { + FILE *fp = state->fp; + + print_tabs(tabs, fp); + fprintf(fp, "loop {\n"); + foreach_list_typed(nir_cf_node, node, node, &loop->body) { + print_cf_node(node, state, tabs + 1); + } + print_tabs(tabs, fp); + fprintf(fp, "}\n"); + } + + static void + print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs) + { + switch (node->type) { + case nir_cf_node_block: + print_block(nir_cf_node_as_block(node), state, tabs); + break; + + case nir_cf_node_if: + print_if(nir_cf_node_as_if(node), state, tabs); + break; + + case nir_cf_node_loop: + print_loop(nir_cf_node_as_loop(node), state, tabs); + break; + + default: + unreachable("Invalid CFG node type"); + } + } + + static void + print_function_impl(nir_function_impl *impl, print_state *state) + { + FILE *fp = state->fp; + + fprintf(fp, "\nimpl %s ", impl->function->name); + + for (unsigned i = 0; i < impl->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_var(impl->params[i], state); + } + + if (impl->return_var != NULL) { + if (impl->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + print_var(impl->return_var, state); + } + + fprintf(fp, "{\n"); + + nir_foreach_variable(var, &impl->locals) { + fprintf(fp, "\t"); + print_var_decl(var, state); + } + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + fprintf(fp, "\t"); + print_register_decl(reg, state); + } + + nir_index_blocks(impl); + + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + print_cf_node(node, state, 1); + } + + fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index); + } + + static void + print_function(nir_function *function, print_state *state) + { + FILE *fp = state->fp; + + fprintf(fp, "decl_function %s ", function->name); + + for (unsigned i = 0; i < function->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + switch (function->params[i].param_type) { + case nir_parameter_in: + fprintf(fp, "in "); + break; + case nir_parameter_out: + fprintf(fp, "out "); + break; + case nir_parameter_inout: + fprintf(fp, "inout "); + break; + default: + unreachable("Invalid parameter type"); + } + + glsl_print_type(function->params[i].type, fp); + } + + if (function->return_type != NULL) { + if (function->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + glsl_print_type(function->return_type, fp); + } + + fprintf(fp, "\n"); + + if (function->impl != NULL) { + print_function_impl(function->impl, state); + return; + } + } + + static void + init_print_state(print_state *state, nir_shader *shader, FILE *fp) + { + state->fp = fp; + state->shader = shader; + state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->syms = _mesa_set_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + state->index = 0; + } + + static void + destroy_print_state(print_state *state) + { + _mesa_hash_table_destroy(state->ht, NULL); + _mesa_set_destroy(state->syms, NULL); + } + + void + nir_print_shader(nir_shader *shader, FILE *fp) + { + print_state state; + init_print_state(&state, shader, fp); + + fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage)); + + if (shader->info.name) + fprintf(fp, "name: %s\n", shader->info.name); + + if (shader->info.label) + fprintf(fp, "label: %s\n", shader->info.label); + + fprintf(fp, "inputs: %u\n", shader->num_inputs); + fprintf(fp, "outputs: %u\n", shader->num_outputs); + fprintf(fp, "uniforms: %u\n", shader->num_uniforms); ++ fprintf(fp, "shared: %u\n", shader->num_shared); + + nir_foreach_variable(var, &shader->uniforms) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->inputs) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->outputs) { + print_var_decl(var, &state); + } + ++ nir_foreach_variable(var, &shader->shared) { ++ print_var_decl(var, &state); ++ } ++ + nir_foreach_variable(var, &shader->globals) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->system_values) { + print_var_decl(var, &state); + } + + foreach_list_typed(nir_register, reg, node, &shader->registers) { + print_register_decl(reg, &state); + } + + foreach_list_typed(nir_function, func, node, &shader->functions) { + print_function(func, &state); + } + + destroy_print_state(&state); + } + + void + nir_print_instr(const nir_instr *instr, FILE *fp) + { + print_state state = { + .fp = fp, + }; + print_instr(instr, &state, 0); + + } diff --cc src/compiler/nir/nir_remove_dead_variables.c index 00000000000,db754e56b1c..792c5d4aae6 mode 000000,100644..100644 --- a/src/compiler/nir/nir_remove_dead_variables.c +++ b/src/compiler/nir/nir_remove_dead_variables.c @@@ -1,0 -1,141 +1,156 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir.h" + + static void + add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live) + { + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + nir_variable *var = instr->variables[i]->var; + _mesa_set_add(live, var); + } + } + + static void + add_var_use_call(nir_call_instr *instr, struct set *live) + { + if (instr->return_deref != NULL) { + nir_variable *var = instr->return_deref->var; + _mesa_set_add(live, var); + } + + for (unsigned i = 0; i < instr->num_params; i++) { + nir_variable *var = instr->params[i]->var; + _mesa_set_add(live, var); + } + } + + static void + add_var_use_tex(nir_tex_instr *instr, struct set *live) + { + if (instr->sampler != NULL) { + nir_variable *var = instr->sampler->var; + _mesa_set_add(live, var); + } + } + + static bool + add_var_use_block(nir_block *block, void *state) + { + struct set *live = state; + + nir_foreach_instr(block, instr) { + switch(instr->type) { + case nir_instr_type_intrinsic: + add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live); + break; + + case nir_instr_type_call: + add_var_use_call(nir_instr_as_call(instr), live); + break; + + case nir_instr_type_tex: + add_var_use_tex(nir_instr_as_tex(instr), live); + break; + + default: + break; + } + } + + return true; + } + + static void + add_var_use_shader(nir_shader *shader, struct set *live) + { + nir_foreach_function(shader, function) { + if (function->impl) { + nir_foreach_block(function->impl, add_var_use_block, live); + } + } + } + + static bool + remove_dead_vars(struct exec_list *var_list, struct set *live) + { + bool progress = false; + + foreach_list_typed_safe(nir_variable, var, node, var_list) { + struct set_entry *entry = _mesa_set_search(live, var); + if (entry == NULL) { + exec_node_remove(&var->node); + ralloc_free(var); + progress = true; + } + } + + return progress; + } + + bool -nir_remove_dead_variables(nir_shader *shader) ++nir_remove_dead_variables(nir_shader *shader, nir_variable_mode mode) + { + bool progress = false; + struct set *live = + _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + add_var_use_shader(shader, live); + - progress = remove_dead_vars(&shader->globals, live) || progress; ++ if (mode == nir_var_uniform || mode == nir_var_all) ++ progress = remove_dead_vars(&shader->uniforms, live) || progress; + - nir_foreach_function(shader, function) { - if (function->impl) { - if (remove_dead_vars(&function->impl->locals, live)) { - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance | - nir_metadata_live_ssa_defs); - progress = true; ++ if (mode == nir_var_shader_in || mode == nir_var_all) ++ progress = remove_dead_vars(&shader->inputs, live) || progress; ++ ++ if (mode == nir_var_shader_out || mode == nir_var_all) ++ progress = remove_dead_vars(&shader->outputs, live) || progress; ++ ++ if (mode == nir_var_global || mode == nir_var_all) ++ progress = remove_dead_vars(&shader->globals, live) || progress; ++ ++ if (mode == nir_var_system_value || mode == nir_var_all) ++ progress = remove_dead_vars(&shader->system_values, live) || progress; ++ ++ if (mode == nir_var_local || mode == nir_var_all) { ++ nir_foreach_function(shader, function) { ++ if (function->impl) { ++ if (remove_dead_vars(&function->impl->locals, live)) { ++ nir_metadata_preserve(function->impl, nir_metadata_block_index | ++ nir_metadata_dominance | ++ nir_metadata_live_ssa_defs); ++ progress = true; ++ } + } + } + } + + _mesa_set_destroy(live, NULL); + return progress; + } diff --cc src/compiler/nir/nir_repair_ssa.c index 00000000000,00000000000..3ab4f0f6db7 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/nir_repair_ssa.c @@@ -1,0 -1,0 +1,157 @@@ ++/* ++ * Copyright © 2016 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "nir.h" ++#include "nir_phi_builder.h" ++ ++struct repair_ssa_state { ++ nir_function_impl *impl; ++ ++ BITSET_WORD *def_set; ++ struct nir_phi_builder *phi_builder; ++ ++ bool progress; ++}; ++ ++/* Get ready to build a phi and return the builder */ ++static struct nir_phi_builder * ++prep_build_phi(struct repair_ssa_state *state) ++{ ++ const unsigned num_words = BITSET_WORDS(state->impl->num_blocks); ++ ++ /* We create the phi builder on-demand. */ ++ if (state->phi_builder == NULL) { ++ state->phi_builder = nir_phi_builder_create(state->impl); ++ state->def_set = ralloc_array(NULL, BITSET_WORD, num_words); ++ } ++ ++ /* We're going to build a phi. That's progress. */ ++ state->progress = true; ++ ++ /* Set the defs set to empty */ ++ memset(state->def_set, 0, num_words * sizeof(*state->def_set)); ++ ++ return state->phi_builder; ++} ++ ++static nir_block * ++get_src_block(nir_src *src) ++{ ++ if (src->parent_instr->type == nir_instr_type_phi) { ++ return exec_node_data(nir_phi_src, src, src)->pred; ++ } else { ++ return src->parent_instr->block; ++ } ++} ++ ++static bool ++repair_ssa_def(nir_ssa_def *def, void *void_state) ++{ ++ struct repair_ssa_state *state = void_state; ++ ++ bool is_valid = true; ++ nir_foreach_use(def, src) { ++ if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) { ++ is_valid = false; ++ break; ++ } ++ } ++ ++ if (is_valid) ++ return true; ++ ++ struct nir_phi_builder *pb = prep_build_phi(state); ++ ++ BITSET_SET(state->def_set, def->parent_instr->block->index); ++ ++ struct nir_phi_builder_value *val = ++ nir_phi_builder_add_value(pb, def->num_components, state->def_set); ++ ++ nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def); ++ ++ nir_foreach_use_safe(def, src) { ++ nir_block *src_block = get_src_block(src); ++ if (!nir_block_dominates(def->parent_instr->block, src_block)) { ++ nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa( ++ nir_phi_builder_value_get_block_def(val, src_block))); ++ } ++ } ++ ++ return true; ++} ++ ++static bool ++repair_ssa_block(nir_block *block, void *state) ++{ ++ nir_foreach_instr_safe(block, instr) { ++ nir_foreach_ssa_def(instr, repair_ssa_def, state); ++ } ++ ++ return true; ++} ++ ++bool ++nir_repair_ssa_impl(nir_function_impl *impl) ++{ ++ struct repair_ssa_state state; ++ ++ state.impl = impl; ++ state.phi_builder = NULL; ++ state.progress = false; ++ ++ nir_metadata_require(impl, nir_metadata_block_index | ++ nir_metadata_dominance); ++ ++ nir_foreach_block(impl, repair_ssa_block, &state); ++ ++ if (state.progress) ++ nir_metadata_preserve(impl, nir_metadata_block_index | ++ nir_metadata_dominance); ++ ++ if (state.phi_builder) { ++ nir_phi_builder_finish(state.phi_builder); ++ ralloc_free(state.def_set); ++ } ++ ++ return state.progress; ++} ++ ++/** This pass can be used to repair SSA form in a shader. ++ * ++ * Sometimes a transformation (such as return lowering) will have to make ++ * changes to a shader which, while still correct, break some of NIR's SSA ++ * invariants. This pass will insert ssa_undefs and phi nodes as needed to ++ * get the shader back into SSA that the validator will like. ++ */ ++bool ++nir_repair_ssa(nir_shader *shader) ++{ ++ bool progress = false; ++ ++ nir_foreach_function(shader, function) { ++ if (function->impl) ++ progress = nir_repair_ssa_impl(function->impl) || progress; ++ } ++ ++ return progress; ++} diff --cc src/compiler/nir/nir_sweep.c index 00000000000,0710bdba7c7..5c62154ec7f mode 000000,100644..100644 --- a/src/compiler/nir/nir_sweep.c +++ b/src/compiler/nir/nir_sweep.c @@@ -1,0 -1,173 +1,174 @@@ + /* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + #include "nir.h" + + /** + * \file nir_sweep.c + * + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated + * memory - anything still connected to the program will be kept, and any dead memory + * we dropped on the floor will be freed. + * + * The expectation is that drivers should call this when finished compiling the shader + * (after any optimization, lowering, and so on). However, it's also fine to call it + * earlier, and even many times, trading CPU cycles for memory savings. + */ + + #define steal_list(mem_ctx, type, list) \ + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } + + static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); + + static bool + sweep_src_indirect(nir_src *src, void *nir) + { + if (!src->is_ssa && src->reg.indirect) + ralloc_steal(nir, src->reg.indirect); + + return true; + } + + static bool + sweep_dest_indirect(nir_dest *dest, void *nir) + { + if (!dest->is_ssa && dest->reg.indirect) + ralloc_steal(nir, dest->reg.indirect); + + return true; + } + + static void + sweep_block(nir_shader *nir, nir_block *block) + { + ralloc_steal(nir, block); + + nir_foreach_instr(block, instr) { + ralloc_steal(nir, instr); + + nir_foreach_src(instr, sweep_src_indirect, nir); + nir_foreach_dest(instr, sweep_dest_indirect, nir); + } + } + + static void + sweep_if(nir_shader *nir, nir_if *iff) + { + ralloc_steal(nir, iff); + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { + sweep_cf_node(nir, cf_node); + } + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { + sweep_cf_node(nir, cf_node); + } + } + + static void + sweep_loop(nir_shader *nir, nir_loop *loop) + { + ralloc_steal(nir, loop); + + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + sweep_cf_node(nir, cf_node); + } + } + + static void + sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) + { + switch (cf_node->type) { + case nir_cf_node_block: + sweep_block(nir, nir_cf_node_as_block(cf_node)); + break; + case nir_cf_node_if: + sweep_if(nir, nir_cf_node_as_if(cf_node)); + break; + case nir_cf_node_loop: + sweep_loop(nir, nir_cf_node_as_loop(cf_node)); + break; + default: + unreachable("Invalid CF node type"); + } + } + + static void + sweep_impl(nir_shader *nir, nir_function_impl *impl) + { + ralloc_steal(nir, impl); + + ralloc_steal(nir, impl->params); + ralloc_steal(nir, impl->return_var); + steal_list(nir, nir_variable, &impl->locals); + steal_list(nir, nir_register, &impl->registers); + + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { + sweep_cf_node(nir, cf_node); + } + + sweep_block(nir, impl->end_block); + + /* Wipe out all the metadata, if any. */ + nir_metadata_preserve(impl, nir_metadata_none); + } + + static void + sweep_function(nir_shader *nir, nir_function *f) + { + ralloc_steal(nir, f); + ralloc_steal(nir, f->params); + + if (f->impl) + sweep_impl(nir, f->impl); + } + + void + nir_sweep(nir_shader *nir) + { + void *rubbish = ralloc_context(NULL); + + /* First, move ownership of all the memory to a temporary context; assume dead. */ + ralloc_adopt(rubbish, nir); + + ralloc_steal(nir, (char *)nir->info.name); + if (nir->info.label) + ralloc_steal(nir, (char *)nir->info.label); + + /* Variables and registers are not dead. Steal them back. */ + steal_list(nir, nir_variable, &nir->uniforms); + steal_list(nir, nir_variable, &nir->inputs); + steal_list(nir, nir_variable, &nir->outputs); ++ steal_list(nir, nir_variable, &nir->shared); + steal_list(nir, nir_variable, &nir->globals); + steal_list(nir, nir_variable, &nir->system_values); + steal_list(nir, nir_register, &nir->registers); + + /* Recurse into functions, stealing their contents back. */ + foreach_list_typed(nir_function, func, node, &nir->functions) { + sweep_function(nir, func); + } + + /* Free everything we didn't steal back. */ + ralloc_free(rubbish); + } diff --cc src/compiler/nir/nir_validate.c index 00000000000,e4db68db3c0..1a943d76314 mode 000000,100644..100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@@ -1,0 -1,1071 +1,1076 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir.h" + #include + + /* + * This file checks for invalid IR indicating a bug somewhere in the compiler. + */ + + /* Since this file is just a pile of asserts, don't bother compiling it if + * we're not building a debug build. + */ + #ifdef DEBUG + + /* + * Per-register validation state. + */ + + typedef struct { + /* + * equivalent to the uses and defs in nir_register, but built up by the + * validator. At the end, we verify that the sets have the same entries. + */ + struct set *uses, *if_uses, *defs; + nir_function_impl *where_defined; /* NULL for global registers */ + } reg_validate_state; + + typedef struct { + /* + * equivalent to the uses in nir_ssa_def, but built up by the validator. + * At the end, we verify that the sets have the same entries. + */ + struct set *uses, *if_uses; + nir_function_impl *where_defined; + } ssa_def_validate_state; + + typedef struct { + /* map of register -> validation state (struct above) */ + struct hash_table *regs; + + /* the current shader being validated */ + nir_shader *shader; + + /* the current instruction being validated */ + nir_instr *instr; + + /* the current basic block being validated */ + nir_block *block; + + /* the current if statement being validated */ + nir_if *if_stmt; + + /* the current loop being visited */ + nir_loop *loop; + + /* the parent of the current cf node being visited */ + nir_cf_node *parent_node; + + /* the current function implementation being validated */ + nir_function_impl *impl; + + /* map of SSA value -> function implementation where it is defined */ + struct hash_table *ssa_defs; + + /* bitset of ssa definitions we have found; used to check uniqueness */ + BITSET_WORD *ssa_defs_found; + + /* bitset of registers we have currently found; used to check uniqueness */ + BITSET_WORD *regs_found; + + /* map of local variable -> function implementation where it is defined */ + struct hash_table *var_defs; + } validate_state; + + static void validate_src(nir_src *src, validate_state *state); + + static void + validate_reg_src(nir_src *src, validate_state *state) + { + assert(src->reg.reg != NULL); + + struct hash_entry *entry; + entry = _mesa_hash_table_search(state->regs, src->reg.reg); + assert(entry); + + reg_validate_state *reg_state = (reg_validate_state *) entry->data; + + if (state->instr) { + _mesa_set_add(reg_state->uses, src); + } else { + assert(state->if_stmt); + _mesa_set_add(reg_state->if_uses, src); + } + + if (!src->reg.reg->is_global) { + assert(reg_state->where_defined == state->impl && + "using a register declared in a different function"); + } + + assert((src->reg.reg->num_array_elems == 0 || + src->reg.base_offset < src->reg.reg->num_array_elems) && + "definitely out-of-bounds array access"); + + if (src->reg.indirect) { + assert(src->reg.reg->num_array_elems != 0); + assert((src->reg.indirect->is_ssa || + src->reg.indirect->reg.indirect == NULL) && + "only one level of indirection allowed"); + validate_src(src->reg.indirect, state); + } + } + + static void + validate_ssa_src(nir_src *src, validate_state *state) + { + assert(src->ssa != NULL); + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa); + + assert(entry); + + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + assert(def_state->where_defined == state->impl && + "using an SSA value defined in a different function"); + + if (state->instr) { + _mesa_set_add(def_state->uses, src); + } else { + assert(state->if_stmt); + _mesa_set_add(def_state->if_uses, src); + } + + /* TODO validate that the use is dominated by the definition */ + } + + static void + validate_src(nir_src *src, validate_state *state) + { + if (state->instr) + assert(src->parent_instr == state->instr); + else + assert(src->parent_if == state->if_stmt); + + if (src->is_ssa) + validate_ssa_src(src, state); + else + validate_reg_src(src, state); + } + + static void + validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state) + { + nir_alu_src *src = &instr->src[index]; + + unsigned num_components; + if (src->src.is_ssa) + num_components = src->src.ssa->num_components; + else { + if (src->src.reg.reg->is_packed) + num_components = 4; /* can't check anything */ + else + num_components = src->src.reg.reg->num_components; + } + for (unsigned i = 0; i < 4; i++) { + assert(src->swizzle[i] < 4); + + if (nir_alu_instr_channel_used(instr, index, i)) + assert(src->swizzle[i] < num_components); + } + + validate_src(&src->src, state); + } + + static void + validate_reg_dest(nir_reg_dest *dest, validate_state *state) + { + assert(dest->reg != NULL); + + assert(dest->parent_instr == state->instr); + + struct hash_entry *entry2; + entry2 = _mesa_hash_table_search(state->regs, dest->reg); + + assert(entry2); + + reg_validate_state *reg_state = (reg_validate_state *) entry2->data; + _mesa_set_add(reg_state->defs, dest); + + if (!dest->reg->is_global) { + assert(reg_state->where_defined == state->impl && + "writing to a register declared in a different function"); + } + + assert((dest->reg->num_array_elems == 0 || + dest->base_offset < dest->reg->num_array_elems) && + "definitely out-of-bounds array access"); + + if (dest->indirect) { + assert(dest->reg->num_array_elems != 0); + assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) && + "only one level of indirection allowed"); + validate_src(dest->indirect, state); + } + } + + static void + validate_ssa_def(nir_ssa_def *def, validate_state *state) + { + assert(def->index < state->impl->ssa_alloc); + assert(!BITSET_TEST(state->ssa_defs_found, def->index)); + BITSET_SET(state->ssa_defs_found, def->index); + + assert(def->parent_instr == state->instr); + + assert(def->num_components <= 4); + + list_validate(&def->uses); + list_validate(&def->if_uses); + + ssa_def_validate_state *def_state = ralloc(state->ssa_defs, + ssa_def_validate_state); + def_state->where_defined = state->impl; + def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + _mesa_hash_table_insert(state->ssa_defs, def, def_state); + } + + static void + validate_dest(nir_dest *dest, validate_state *state) + { + if (dest->is_ssa) + validate_ssa_def(&dest->ssa, state); + else + validate_reg_dest(&dest->reg, state); + } + + static void + validate_alu_dest(nir_alu_dest *dest, validate_state *state) + { + unsigned dest_size = + dest->dest.is_ssa ? dest->dest.ssa.num_components + : dest->dest.reg.reg->num_components; + bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed; + /* + * validate that the instruction doesn't write to components not in the + * register/SSA value + */ + assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1))); + + /* validate that saturate is only ever used on instructions with + * destinations of type float + */ + nir_alu_instr *alu = nir_instr_as_alu(state->instr); + assert(nir_op_infos[alu->op].output_type == nir_type_float || + !dest->saturate); + + validate_dest(&dest->dest, state); + } + + static void + validate_alu_instr(nir_alu_instr *instr, validate_state *state) + { + assert(instr->op < nir_num_opcodes); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + validate_alu_src(instr, i, state); + } + + validate_alu_dest(&instr->dest, state); + } + + static void + validate_deref_chain(nir_deref *deref, validate_state *state) + { + assert(deref->child == NULL || ralloc_parent(deref->child) == deref); + + nir_deref *parent = NULL; + while (deref != NULL) { + switch (deref->deref_type) { + case nir_deref_type_array: + assert(deref->type == glsl_get_array_element(parent->type)); + if (nir_deref_as_array(deref)->deref_array_type == + nir_deref_array_type_indirect) + validate_src(&nir_deref_as_array(deref)->indirect, state); + break; + + case nir_deref_type_struct: + assert(deref->type == + glsl_get_struct_field(parent->type, + nir_deref_as_struct(deref)->index)); + break; + + case nir_deref_type_var: + break; + + default: + assert(!"Invalid deref type"); + break; + } + + parent = deref; + deref = deref->child; + } + } + + static void + validate_var_use(nir_variable *var, validate_state *state) + { + if (var->data.mode == nir_var_local) { + struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); + + assert(entry); + assert((nir_function_impl *) entry->data == state->impl); + } + } + + static void + validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state) + { + assert(deref != NULL); + assert(ralloc_parent(deref) == parent_mem_ctx); + assert(deref->deref.type == deref->var->type); + + validate_var_use(deref->var, state); + + validate_deref_chain(&deref->deref, state); + } + + static void + validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) + { + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) { + unsigned components_read = + nir_intrinsic_infos[instr->intrinsic].src_components[i]; + if (components_read == 0) + components_read = instr->num_components; + + assert(components_read > 0); + + if (instr->src[i].is_ssa) { + assert(components_read <= instr->src[i].ssa->num_components); + } else if (!instr->src[i].reg.reg->is_packed) { + assert(components_read <= instr->src[i].reg.reg->num_components); + } + + validate_src(&instr->src[i], state); + } + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + validate_deref_var(instr, instr->variables[i], state); + } + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + unsigned components_written = + nir_intrinsic_infos[instr->intrinsic].dest_components; + if (components_written == 0) + components_written = instr->num_components; + + assert(components_written > 0); + + if (instr->dest.is_ssa) { + assert(components_written <= instr->dest.ssa.num_components); + } else if (!instr->dest.reg.reg->is_packed) { + assert(components_written <= instr->dest.reg.reg->num_components); + } + + validate_dest(&instr->dest, state); + } + + switch (instr->intrinsic) { + case nir_intrinsic_load_var: { + const struct glsl_type *type = + nir_deref_tail(&instr->variables[0]->deref)->type; + assert(glsl_type_is_vector_or_scalar(type) || + (instr->variables[0]->var->data.mode == nir_var_uniform && + glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); + assert(instr->num_components == glsl_get_vector_elements(type)); + break; + } + case nir_intrinsic_store_var: { + const struct glsl_type *type = + nir_deref_tail(&instr->variables[0]->deref)->type; + assert(glsl_type_is_vector_or_scalar(type) || + (instr->variables[0]->var->data.mode == nir_var_uniform && + glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); + assert(instr->num_components == glsl_get_vector_elements(type)); + assert(instr->variables[0]->var->data.mode != nir_var_shader_in && + instr->variables[0]->var->data.mode != nir_var_uniform && + instr->variables[0]->var->data.mode != nir_var_shader_storage); + assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0); + break; + } + case nir_intrinsic_copy_var: + assert(nir_deref_tail(&instr->variables[0]->deref)->type == + nir_deref_tail(&instr->variables[1]->deref)->type); + assert(instr->variables[0]->var->data.mode != nir_var_shader_in && + instr->variables[0]->var->data.mode != nir_var_uniform && + instr->variables[0]->var->data.mode != nir_var_shader_storage); + break; + default: + break; + } + } + + static void + validate_tex_instr(nir_tex_instr *instr, validate_state *state) + { + bool src_type_seen[nir_num_tex_src_types]; + for (unsigned i = 0; i < nir_num_tex_src_types; i++) + src_type_seen[i] = false; + + for (unsigned i = 0; i < instr->num_srcs; i++) { + assert(!src_type_seen[instr->src[i].src_type]); + src_type_seen[instr->src[i].src_type] = true; + validate_src(&instr->src[i].src, state); + } + + if (instr->sampler != NULL) + validate_deref_var(instr, instr->sampler, state); + + validate_dest(&instr->dest, state); + } + + static void + validate_call_instr(nir_call_instr *instr, validate_state *state) + { - if (instr->return_deref == NULL) ++ if (instr->return_deref == NULL) { + assert(glsl_type_is_void(instr->callee->return_type)); - else ++ } else { + assert(instr->return_deref->deref.type == instr->callee->return_type); ++ validate_deref_var(instr, instr->return_deref, state); ++ } + + assert(instr->num_params == instr->callee->num_params); + + for (unsigned i = 0; i < instr->num_params; i++) { + assert(instr->callee->params[i].type == instr->params[i]->deref.type); + validate_deref_var(instr, instr->params[i], state); + } - - validate_deref_var(instr, instr->return_deref, state); + } + + static void + validate_load_const_instr(nir_load_const_instr *instr, validate_state *state) + { + validate_ssa_def(&instr->def, state); + } + + static void + validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state) + { + validate_ssa_def(&instr->def, state); + } + + static void + validate_phi_instr(nir_phi_instr *instr, validate_state *state) + { + /* + * don't validate the sources until we get to them from their predecessor + * basic blocks, to avoid validating an SSA use before its definition. + */ + + validate_dest(&instr->dest, state); + + exec_list_validate(&instr->srcs); + assert(exec_list_length(&instr->srcs) == + state->block->predecessors->entries); + } + + static void + validate_instr(nir_instr *instr, validate_state *state) + { + assert(instr->block == state->block); + + state->instr = instr; + + switch (instr->type) { + case nir_instr_type_alu: + validate_alu_instr(nir_instr_as_alu(instr), state); + break; + + case nir_instr_type_call: + validate_call_instr(nir_instr_as_call(instr), state); + break; + + case nir_instr_type_intrinsic: + validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state); + break; + + case nir_instr_type_tex: + validate_tex_instr(nir_instr_as_tex(instr), state); + break; + + case nir_instr_type_load_const: + validate_load_const_instr(nir_instr_as_load_const(instr), state); + break; + + case nir_instr_type_phi: + validate_phi_instr(nir_instr_as_phi(instr), state); + break; + + case nir_instr_type_ssa_undef: + validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); + break; + + case nir_instr_type_jump: + break; + + default: + assert(!"Invalid ALU instruction type"); + break; + } + + state->instr = NULL; + } + + static void + validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state) + { + state->instr = &instr->instr; + + assert(instr->dest.is_ssa); + + exec_list_validate(&instr->srcs); + nir_foreach_phi_src(instr, src) { + if (src->pred == pred) { + assert(src->src.is_ssa); + assert(src->src.ssa->num_components == + instr->dest.ssa.num_components); + + validate_src(&src->src, state); + state->instr = NULL; + return; + } + } + + abort(); + } + + static void + validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state) + { + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + + validate_phi_src(nir_instr_as_phi(instr), block, state); + } + } + + static void validate_cf_node(nir_cf_node *node, validate_state *state); + + static void + validate_block(nir_block *block, validate_state *state) + { + assert(block->cf_node.parent == state->parent_node); + + state->block = block; + + exec_list_validate(&block->instr_list); + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_phi) { + assert(instr == nir_block_first_instr(block) || + nir_instr_prev(instr)->type == nir_instr_type_phi); + } + + if (instr->type == nir_instr_type_jump) { + assert(instr == nir_block_last_instr(block)); + } + + validate_instr(instr, state); + } + + assert(block->successors[0] != NULL); + assert(block->successors[0] != block->successors[1]); + + for (unsigned i = 0; i < 2; i++) { + if (block->successors[i] != NULL) { + struct set_entry *entry = + _mesa_set_search(block->successors[i]->predecessors, block); + assert(entry); + + validate_phi_srcs(block, block->successors[i], state); + } + } + + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + const nir_block *pred = entry->key; + assert(pred->successors[0] == block || + pred->successors[1] == block); + } + + if (!exec_list_is_empty(&block->instr_list) && + nir_block_last_instr(block)->type == nir_instr_type_jump) { + assert(block->successors[1] == NULL); + nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); + switch (jump->type) { + case nir_jump_break: { + nir_block *after = + nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node)); + assert(block->successors[0] == after); + break; + } + + case nir_jump_continue: { + nir_block *first = + nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); + assert(block->successors[0] == first); + break; + } + + case nir_jump_return: + assert(block->successors[0] == state->impl->end_block); + break; + + default: + unreachable("bad jump type"); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next == NULL) { + switch (state->parent_node->type) { + case nir_cf_node_loop: { + nir_block *first = + nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); + assert(block->successors[0] == first); + /* due to the hack for infinite loops, block->successors[1] may + * point to the block after the loop. + */ + break; + } + + case nir_cf_node_if: { + nir_block *after = + nir_cf_node_as_block(nir_cf_node_next(state->parent_node)); + assert(block->successors[0] == after); + assert(block->successors[1] == NULL); + break; + } + + case nir_cf_node_function: + assert(block->successors[0] == state->impl->end_block); + assert(block->successors[1] == NULL); + break; + + default: + unreachable("unknown control flow node type"); + } + } else { + if (next->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(next); + assert(&block->successors[0]->cf_node == + nir_if_first_then_node(if_stmt)); + assert(&block->successors[1]->cf_node == + nir_if_first_else_node(if_stmt)); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *loop = nir_cf_node_as_loop(next); + assert(&block->successors[0]->cf_node == + nir_loop_first_cf_node(loop)); + assert(block->successors[1] == NULL); + } + } + } + } + + static void + validate_if(nir_if *if_stmt, validate_state *state) + { + state->if_stmt = if_stmt; + + assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev)); + nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + assert(prev_node->type == nir_cf_node_block); + + assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next)); + nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node); + assert(next_node->type == nir_cf_node_block); + + validate_src(&if_stmt->condition, state); + + assert(!exec_list_is_empty(&if_stmt->then_list)); + assert(!exec_list_is_empty(&if_stmt->else_list)); + + nir_cf_node *old_parent = state->parent_node; + state->parent_node = &if_stmt->cf_node; + + exec_list_validate(&if_stmt->then_list); + foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) { + validate_cf_node(cf_node, state); + } + + exec_list_validate(&if_stmt->else_list); + foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) { + validate_cf_node(cf_node, state); + } + + state->parent_node = old_parent; + state->if_stmt = NULL; + } + + static void + validate_loop(nir_loop *loop, validate_state *state) + { + assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev)); + nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node); + assert(prev_node->type == nir_cf_node_block); + + assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next)); + nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node); + assert(next_node->type == nir_cf_node_block); + + assert(!exec_list_is_empty(&loop->body)); + + nir_cf_node *old_parent = state->parent_node; + state->parent_node = &loop->cf_node; + nir_loop *old_loop = state->loop; + state->loop = loop; + + exec_list_validate(&loop->body); + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + validate_cf_node(cf_node, state); + } + + state->parent_node = old_parent; + state->loop = old_loop; + } + + static void + validate_cf_node(nir_cf_node *node, validate_state *state) + { + assert(node->parent == state->parent_node); + + switch (node->type) { + case nir_cf_node_block: + validate_block(nir_cf_node_as_block(node), state); + break; + + case nir_cf_node_if: + validate_if(nir_cf_node_as_if(node), state); + break; + + case nir_cf_node_loop: + validate_loop(nir_cf_node_as_loop(node), state); + break; + + default: + unreachable("Invalid CF node type"); + } + } + + static void + prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) + { + assert(reg->is_global == is_global); + + if (is_global) + assert(reg->index < state->shader->reg_alloc); + else + assert(reg->index < state->impl->reg_alloc); + assert(!BITSET_TEST(state->regs_found, reg->index)); + BITSET_SET(state->regs_found, reg->index); + + list_validate(®->uses); + list_validate(®->defs); + list_validate(®->if_uses); + + reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); + reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + reg_state->where_defined = is_global ? NULL : state->impl; + + _mesa_hash_table_insert(state->regs, reg, reg_state); + } + + static void + postvalidate_reg_decl(nir_register *reg, validate_state *state) + { + struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg); + + reg_validate_state *reg_state = (reg_validate_state *) entry->data; + + nir_foreach_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->uses, src); + assert(entry); + _mesa_set_remove(reg_state->uses, entry); + } + + if (reg_state->uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(reg_state->uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + nir_foreach_if_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src); + assert(entry); + _mesa_set_remove(reg_state->if_uses, entry); + } + + if (reg_state->if_uses->entries != 0) { + printf("extra entries in register if_uses:\n"); + struct set_entry *entry; + set_foreach(reg_state->if_uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + nir_foreach_def(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->defs, src); + assert(entry); + _mesa_set_remove(reg_state->defs, entry); + } + + if (reg_state->defs->entries != 0) { + printf("extra entries in register defs:\n"); + struct set_entry *entry; + set_foreach(reg_state->defs, entry) + printf("%p\n", entry->key); + + abort(); + } + } + + static void + validate_var_decl(nir_variable *var, bool is_global, validate_state *state) + { + assert(is_global != (var->data.mode == nir_var_local)); + + /* + * TODO validate some things ir_validate.cpp does (requires more GLSL type + * support) + */ + + if (!is_global) { + _mesa_hash_table_insert(state->var_defs, var, state->impl); + } + } + + static bool + postvalidate_ssa_def(nir_ssa_def *def, void *void_state) + { + validate_state *state = void_state; + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + nir_foreach_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->uses, src); + assert(entry); + _mesa_set_remove(def_state->uses, entry); + } + + if (def_state->uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + nir_foreach_if_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->if_uses, src); + assert(entry); + _mesa_set_remove(def_state->if_uses, entry); + } + + if (def_state->if_uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->if_uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + return true; + } + + static bool + postvalidate_ssa_defs_block(nir_block *block, void *state) + { + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, postvalidate_ssa_def, state); + + return true; + } + + static void + validate_function_impl(nir_function_impl *impl, validate_state *state) + { + assert(impl->function->impl == impl); + assert(impl->cf_node.parent == NULL); + + assert(impl->num_params == impl->function->num_params); + for (unsigned i = 0; i < impl->num_params; i++) + assert(impl->params[i]->type == impl->function->params[i].type); + + if (glsl_type_is_void(impl->function->return_type)) + assert(impl->return_var == NULL); + else + assert(impl->return_var->type == impl->function->return_type); + + assert(exec_list_is_empty(&impl->end_block->instr_list)); + assert(impl->end_block->successors[0] == NULL); + assert(impl->end_block->successors[1] == NULL); + + state->impl = impl; + state->parent_node = &impl->cf_node; + + exec_list_validate(&impl->locals); + nir_foreach_variable(var, &impl->locals) { + validate_var_decl(var, false, state); + } + + state->regs_found = realloc(state->regs_found, + BITSET_WORDS(impl->reg_alloc) * + sizeof(BITSET_WORD)); + memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&impl->registers); + foreach_list_typed(nir_register, reg, node, &impl->registers) { + prevalidate_reg_decl(reg, false, state); + } + + state->ssa_defs_found = realloc(state->ssa_defs_found, + BITSET_WORDS(impl->ssa_alloc) * + sizeof(BITSET_WORD)); + memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&impl->body); + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + validate_cf_node(node, state); + } + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + postvalidate_reg_decl(reg, state); + } + + nir_foreach_block(impl, postvalidate_ssa_defs_block, state); + } + + static void + validate_function(nir_function *func, validate_state *state) + { + if (func->impl != NULL) { + assert(func->impl->function == func); + validate_function_impl(func->impl, state); + } + } + + static void + init_validate_state(validate_state *state) + { + state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->ssa_defs_found = NULL; + state->regs_found = NULL; + state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->loop = NULL; + } + + static void + destroy_validate_state(validate_state *state) + { + _mesa_hash_table_destroy(state->regs, NULL); + _mesa_hash_table_destroy(state->ssa_defs, NULL); + free(state->ssa_defs_found); + free(state->regs_found); + _mesa_hash_table_destroy(state->var_defs, NULL); + } + + void + nir_validate_shader(nir_shader *shader) + { + validate_state state; + init_validate_state(&state); + + state.shader = shader; + + exec_list_validate(&shader->uniforms); + nir_foreach_variable(var, &shader->uniforms) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->inputs); + nir_foreach_variable(var, &shader->inputs) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->outputs); + nir_foreach_variable(var, &shader->outputs) { + validate_var_decl(var, true, &state); + } + ++ exec_list_validate(&shader->shared); ++ nir_foreach_variable(var, &shader->shared) { ++ validate_var_decl(var, true, &state); ++ } ++ + exec_list_validate(&shader->globals); + nir_foreach_variable(var, &shader->globals) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->system_values); + nir_foreach_variable(var, &shader->system_values) { + validate_var_decl(var, true, &state); + } + + state.regs_found = realloc(state.regs_found, + BITSET_WORDS(shader->reg_alloc) * + sizeof(BITSET_WORD)); + memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&shader->registers); + foreach_list_typed(nir_register, reg, node, &shader->registers) { + prevalidate_reg_decl(reg, true, &state); + } + + exec_list_validate(&shader->functions); + foreach_list_typed(nir_function, func, node, &shader->functions) { + validate_function(func, &state); + } + + foreach_list_typed(nir_register, reg, node, &shader->registers) { + postvalidate_reg_decl(reg, &state); + } + + destroy_validate_state(&state); + } + + #endif /* NDEBUG */ diff --cc src/compiler/nir/spirv/GLSL.std.450.h index 00000000000,00000000000..d1c9b5c1d44 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/GLSL.std.450.h @@@ -1,0 -1,0 +1,127 @@@ ++/* ++** Copyright (c) 2014-2015 The Khronos Group Inc. ++** ++** Permission is hereby granted, free of charge, to any person obtaining a copy ++** of this software and/or associated documentation files (the "Materials"), ++** to deal in the Materials without restriction, including without limitation ++** the rights to use, copy, modify, merge, publish, distribute, sublicense, ++** and/or sell copies of the Materials, and to permit persons to whom the ++** Materials are furnished to do so, subject to the following conditions: ++** ++** The above copyright notice and this permission notice shall be included in ++** all copies or substantial portions of the Materials. ++** ++** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS ++** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND ++** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ ++** ++** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS ++** IN THE MATERIALS. ++*/ ++ ++#ifndef GLSLstd450_H ++#define GLSLstd450_H ++ ++const int GLSLstd450Version = 99; ++const int GLSLstd450Revision = 3; ++ ++enum GLSLstd450 { ++ GLSLstd450Bad = 0, // Don't use ++ ++ GLSLstd450Round = 1, ++ GLSLstd450RoundEven = 2, ++ GLSLstd450Trunc = 3, ++ GLSLstd450FAbs = 4, ++ GLSLstd450SAbs = 5, ++ GLSLstd450FSign = 6, ++ GLSLstd450SSign = 7, ++ GLSLstd450Floor = 8, ++ GLSLstd450Ceil = 9, ++ GLSLstd450Fract = 10, ++ ++ GLSLstd450Radians = 11, ++ GLSLstd450Degrees = 12, ++ GLSLstd450Sin = 13, ++ GLSLstd450Cos = 14, ++ GLSLstd450Tan = 15, ++ GLSLstd450Asin = 16, ++ GLSLstd450Acos = 17, ++ GLSLstd450Atan = 18, ++ GLSLstd450Sinh = 19, ++ GLSLstd450Cosh = 20, ++ GLSLstd450Tanh = 21, ++ GLSLstd450Asinh = 22, ++ GLSLstd450Acosh = 23, ++ GLSLstd450Atanh = 24, ++ GLSLstd450Atan2 = 25, ++ ++ GLSLstd450Pow = 26, ++ GLSLstd450Exp = 27, ++ GLSLstd450Log = 28, ++ GLSLstd450Exp2 = 29, ++ GLSLstd450Log2 = 30, ++ GLSLstd450Sqrt = 31, ++ GLSLstd450InverseSqrt = 32, ++ ++ GLSLstd450Determinant = 33, ++ GLSLstd450MatrixInverse = 34, ++ ++ GLSLstd450Modf = 35, // second operand needs an OpVariable to write to ++ GLSLstd450ModfStruct = 36, // no OpVariable operand ++ GLSLstd450FMin = 37, ++ GLSLstd450UMin = 38, ++ GLSLstd450SMin = 39, ++ GLSLstd450FMax = 40, ++ GLSLstd450UMax = 41, ++ GLSLstd450SMax = 42, ++ GLSLstd450FClamp = 43, ++ GLSLstd450UClamp = 44, ++ GLSLstd450SClamp = 45, ++ GLSLstd450FMix = 46, ++ GLSLstd450IMix = 47, ++ GLSLstd450Step = 48, ++ GLSLstd450SmoothStep = 49, ++ ++ GLSLstd450Fma = 50, ++ GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to ++ GLSLstd450FrexpStruct = 52, // no OpVariable operand ++ GLSLstd450Ldexp = 53, ++ ++ GLSLstd450PackSnorm4x8 = 54, ++ GLSLstd450PackUnorm4x8 = 55, ++ GLSLstd450PackSnorm2x16 = 56, ++ GLSLstd450PackUnorm2x16 = 57, ++ GLSLstd450PackHalf2x16 = 58, ++ GLSLstd450PackDouble2x32 = 59, ++ GLSLstd450UnpackSnorm2x16 = 60, ++ GLSLstd450UnpackUnorm2x16 = 61, ++ GLSLstd450UnpackHalf2x16 = 62, ++ GLSLstd450UnpackSnorm4x8 = 63, ++ GLSLstd450UnpackUnorm4x8 = 64, ++ GLSLstd450UnpackDouble2x32 = 65, ++ ++ GLSLstd450Length = 66, ++ GLSLstd450Distance = 67, ++ GLSLstd450Cross = 68, ++ GLSLstd450Normalize = 69, ++ GLSLstd450FaceForward = 70, ++ GLSLstd450Reflect = 71, ++ GLSLstd450Refract = 72, ++ ++ GLSLstd450FindILsb = 73, ++ GLSLstd450FindSMsb = 74, ++ GLSLstd450FindUMsb = 75, ++ ++ GLSLstd450InterpolateAtCentroid = 76, ++ GLSLstd450InterpolateAtSample = 77, ++ GLSLstd450InterpolateAtOffset = 78, ++ ++ GLSLstd450Count ++}; ++ ++#endif // #ifndef GLSLstd450_H diff --cc src/compiler/nir/spirv/nir_spirv.h index 00000000000,00000000000..500f2cb94df new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/nir_spirv.h @@@ -1,0 -1,0 +1,54 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Jason Ekstrand (jason@jlekstrand.net) ++ * ++ */ ++ ++#pragma once ++ ++#ifndef _NIR_SPIRV_H_ ++#define _NIR_SPIRV_H_ ++ ++#include "nir/nir.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++struct nir_spirv_specialization { ++ uint32_t id; ++ uint32_t data; ++}; ++ ++nir_function *spirv_to_nir(const uint32_t *words, size_t word_count, ++ struct nir_spirv_specialization *specializations, ++ unsigned num_specializations, ++ gl_shader_stage stage, const char *entry_point_name, ++ const nir_shader_compiler_options *options); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* _NIR_SPIRV_H_ */ diff --cc src/compiler/nir/spirv/spirv.h index 00000000000,00000000000..63bcb2f88dd new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/spirv.h @@@ -1,0 -1,0 +1,870 @@@ ++/* ++** Copyright (c) 2014-2015 The Khronos Group Inc. ++** ++** Permission is hereby granted, free of charge, to any person obtaining a copy ++** of this software and/or associated documentation files (the "Materials"), ++** to deal in the Materials without restriction, including without limitation ++** the rights to use, copy, modify, merge, publish, distribute, sublicense, ++** and/or sell copies of the Materials, and to permit persons to whom the ++** Materials are furnished to do so, subject to the following conditions: ++** ++** The above copyright notice and this permission notice shall be included in ++** all copies or substantial portions of the Materials. ++** ++** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS ++** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND ++** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ ++** ++** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS ++** IN THE MATERIALS. ++*/ ++ ++/* ++** This header is automatically generated by the same tool that creates ++** the Binary Section of the SPIR-V specification. ++*/ ++ ++/* ++** Enumeration tokens for SPIR-V, in various styles: ++** C, C++, C++11, JSON, Lua, Python ++** ++** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL ++** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL ++** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL ++** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL ++** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] ++** ++** Some tokens act like mask values, which can be OR'd together, ++** while others are mutually exclusive. The mask-like ones have ++** "Mask" in their name, and a parallel enum that has the shift ++** amount (1 << x) for each corresponding enumerant. ++*/ ++ ++#ifndef spirv_H ++#define spirv_H ++ ++typedef unsigned int SpvId; ++ ++#define SPV_VERSION 0x10000 ++#define SPV_REVISION 2 ++ ++static const unsigned int SpvMagicNumber = 0x07230203; ++static const unsigned int SpvVersion = 0x00010000; ++static const unsigned int SpvRevision = 2; ++static const unsigned int SpvOpCodeMask = 0xffff; ++static const unsigned int SpvWordCountShift = 16; ++ ++typedef enum SpvSourceLanguage_ { ++ SpvSourceLanguageUnknown = 0, ++ SpvSourceLanguageESSL = 1, ++ SpvSourceLanguageGLSL = 2, ++ SpvSourceLanguageOpenCL_C = 3, ++ SpvSourceLanguageOpenCL_CPP = 4, ++} SpvSourceLanguage; ++ ++typedef enum SpvExecutionModel_ { ++ SpvExecutionModelVertex = 0, ++ SpvExecutionModelTessellationControl = 1, ++ SpvExecutionModelTessellationEvaluation = 2, ++ SpvExecutionModelGeometry = 3, ++ SpvExecutionModelFragment = 4, ++ SpvExecutionModelGLCompute = 5, ++ SpvExecutionModelKernel = 6, ++} SpvExecutionModel; ++ ++typedef enum SpvAddressingModel_ { ++ SpvAddressingModelLogical = 0, ++ SpvAddressingModelPhysical32 = 1, ++ SpvAddressingModelPhysical64 = 2, ++} SpvAddressingModel; ++ ++typedef enum SpvMemoryModel_ { ++ SpvMemoryModelSimple = 0, ++ SpvMemoryModelGLSL450 = 1, ++ SpvMemoryModelOpenCL = 2, ++} SpvMemoryModel; ++ ++typedef enum SpvExecutionMode_ { ++ SpvExecutionModeInvocations = 0, ++ SpvExecutionModeSpacingEqual = 1, ++ SpvExecutionModeSpacingFractionalEven = 2, ++ SpvExecutionModeSpacingFractionalOdd = 3, ++ SpvExecutionModeVertexOrderCw = 4, ++ SpvExecutionModeVertexOrderCcw = 5, ++ SpvExecutionModePixelCenterInteger = 6, ++ SpvExecutionModeOriginUpperLeft = 7, ++ SpvExecutionModeOriginLowerLeft = 8, ++ SpvExecutionModeEarlyFragmentTests = 9, ++ SpvExecutionModePointMode = 10, ++ SpvExecutionModeXfb = 11, ++ SpvExecutionModeDepthReplacing = 12, ++ SpvExecutionModeDepthGreater = 14, ++ SpvExecutionModeDepthLess = 15, ++ SpvExecutionModeDepthUnchanged = 16, ++ SpvExecutionModeLocalSize = 17, ++ SpvExecutionModeLocalSizeHint = 18, ++ SpvExecutionModeInputPoints = 19, ++ SpvExecutionModeInputLines = 20, ++ SpvExecutionModeInputLinesAdjacency = 21, ++ SpvExecutionModeTriangles = 22, ++ SpvExecutionModeInputTrianglesAdjacency = 23, ++ SpvExecutionModeQuads = 24, ++ SpvExecutionModeIsolines = 25, ++ SpvExecutionModeOutputVertices = 26, ++ SpvExecutionModeOutputPoints = 27, ++ SpvExecutionModeOutputLineStrip = 28, ++ SpvExecutionModeOutputTriangleStrip = 29, ++ SpvExecutionModeVecTypeHint = 30, ++ SpvExecutionModeContractionOff = 31, ++} SpvExecutionMode; ++ ++typedef enum SpvStorageClass_ { ++ SpvStorageClassUniformConstant = 0, ++ SpvStorageClassInput = 1, ++ SpvStorageClassUniform = 2, ++ SpvStorageClassOutput = 3, ++ SpvStorageClassWorkgroup = 4, ++ SpvStorageClassCrossWorkgroup = 5, ++ SpvStorageClassPrivate = 6, ++ SpvStorageClassFunction = 7, ++ SpvStorageClassGeneric = 8, ++ SpvStorageClassPushConstant = 9, ++ SpvStorageClassAtomicCounter = 10, ++ SpvStorageClassImage = 11, ++} SpvStorageClass; ++ ++typedef enum SpvDim_ { ++ SpvDim1D = 0, ++ SpvDim2D = 1, ++ SpvDim3D = 2, ++ SpvDimCube = 3, ++ SpvDimRect = 4, ++ SpvDimBuffer = 5, ++ SpvDimSubpassData = 6, ++} SpvDim; ++ ++typedef enum SpvSamplerAddressingMode_ { ++ SpvSamplerAddressingModeNone = 0, ++ SpvSamplerAddressingModeClampToEdge = 1, ++ SpvSamplerAddressingModeClamp = 2, ++ SpvSamplerAddressingModeRepeat = 3, ++ SpvSamplerAddressingModeRepeatMirrored = 4, ++} SpvSamplerAddressingMode; ++ ++typedef enum SpvSamplerFilterMode_ { ++ SpvSamplerFilterModeNearest = 0, ++ SpvSamplerFilterModeLinear = 1, ++} SpvSamplerFilterMode; ++ ++typedef enum SpvImageFormat_ { ++ SpvImageFormatUnknown = 0, ++ SpvImageFormatRgba32f = 1, ++ SpvImageFormatRgba16f = 2, ++ SpvImageFormatR32f = 3, ++ SpvImageFormatRgba8 = 4, ++ SpvImageFormatRgba8Snorm = 5, ++ SpvImageFormatRg32f = 6, ++ SpvImageFormatRg16f = 7, ++ SpvImageFormatR11fG11fB10f = 8, ++ SpvImageFormatR16f = 9, ++ SpvImageFormatRgba16 = 10, ++ SpvImageFormatRgb10A2 = 11, ++ SpvImageFormatRg16 = 12, ++ SpvImageFormatRg8 = 13, ++ SpvImageFormatR16 = 14, ++ SpvImageFormatR8 = 15, ++ SpvImageFormatRgba16Snorm = 16, ++ SpvImageFormatRg16Snorm = 17, ++ SpvImageFormatRg8Snorm = 18, ++ SpvImageFormatR16Snorm = 19, ++ SpvImageFormatR8Snorm = 20, ++ SpvImageFormatRgba32i = 21, ++ SpvImageFormatRgba16i = 22, ++ SpvImageFormatRgba8i = 23, ++ SpvImageFormatR32i = 24, ++ SpvImageFormatRg32i = 25, ++ SpvImageFormatRg16i = 26, ++ SpvImageFormatRg8i = 27, ++ SpvImageFormatR16i = 28, ++ SpvImageFormatR8i = 29, ++ SpvImageFormatRgba32ui = 30, ++ SpvImageFormatRgba16ui = 31, ++ SpvImageFormatRgba8ui = 32, ++ SpvImageFormatR32ui = 33, ++ SpvImageFormatRgb10a2ui = 34, ++ SpvImageFormatRg32ui = 35, ++ SpvImageFormatRg16ui = 36, ++ SpvImageFormatRg8ui = 37, ++ SpvImageFormatR16ui = 38, ++ SpvImageFormatR8ui = 39, ++} SpvImageFormat; ++ ++typedef enum SpvImageChannelOrder_ { ++ SpvImageChannelOrderR = 0, ++ SpvImageChannelOrderA = 1, ++ SpvImageChannelOrderRG = 2, ++ SpvImageChannelOrderRA = 3, ++ SpvImageChannelOrderRGB = 4, ++ SpvImageChannelOrderRGBA = 5, ++ SpvImageChannelOrderBGRA = 6, ++ SpvImageChannelOrderARGB = 7, ++ SpvImageChannelOrderIntensity = 8, ++ SpvImageChannelOrderLuminance = 9, ++ SpvImageChannelOrderRx = 10, ++ SpvImageChannelOrderRGx = 11, ++ SpvImageChannelOrderRGBx = 12, ++ SpvImageChannelOrderDepth = 13, ++ SpvImageChannelOrderDepthStencil = 14, ++ SpvImageChannelOrdersRGB = 15, ++ SpvImageChannelOrdersRGBx = 16, ++ SpvImageChannelOrdersRGBA = 17, ++ SpvImageChannelOrdersBGRA = 18, ++} SpvImageChannelOrder; ++ ++typedef enum SpvImageChannelDataType_ { ++ SpvImageChannelDataTypeSnormInt8 = 0, ++ SpvImageChannelDataTypeSnormInt16 = 1, ++ SpvImageChannelDataTypeUnormInt8 = 2, ++ SpvImageChannelDataTypeUnormInt16 = 3, ++ SpvImageChannelDataTypeUnormShort565 = 4, ++ SpvImageChannelDataTypeUnormShort555 = 5, ++ SpvImageChannelDataTypeUnormInt101010 = 6, ++ SpvImageChannelDataTypeSignedInt8 = 7, ++ SpvImageChannelDataTypeSignedInt16 = 8, ++ SpvImageChannelDataTypeSignedInt32 = 9, ++ SpvImageChannelDataTypeUnsignedInt8 = 10, ++ SpvImageChannelDataTypeUnsignedInt16 = 11, ++ SpvImageChannelDataTypeUnsignedInt32 = 12, ++ SpvImageChannelDataTypeHalfFloat = 13, ++ SpvImageChannelDataTypeFloat = 14, ++ SpvImageChannelDataTypeUnormInt24 = 15, ++ SpvImageChannelDataTypeUnormInt101010_2 = 16, ++} SpvImageChannelDataType; ++ ++typedef enum SpvImageOperandsShift_ { ++ SpvImageOperandsBiasShift = 0, ++ SpvImageOperandsLodShift = 1, ++ SpvImageOperandsGradShift = 2, ++ SpvImageOperandsConstOffsetShift = 3, ++ SpvImageOperandsOffsetShift = 4, ++ SpvImageOperandsConstOffsetsShift = 5, ++ SpvImageOperandsSampleShift = 6, ++ SpvImageOperandsMinLodShift = 7, ++} SpvImageOperandsShift; ++ ++typedef enum SpvImageOperandsMask_ { ++ SpvImageOperandsMaskNone = 0, ++ SpvImageOperandsBiasMask = 0x00000001, ++ SpvImageOperandsLodMask = 0x00000002, ++ SpvImageOperandsGradMask = 0x00000004, ++ SpvImageOperandsConstOffsetMask = 0x00000008, ++ SpvImageOperandsOffsetMask = 0x00000010, ++ SpvImageOperandsConstOffsetsMask = 0x00000020, ++ SpvImageOperandsSampleMask = 0x00000040, ++ SpvImageOperandsMinLodMask = 0x00000080, ++} SpvImageOperandsMask; ++ ++typedef enum SpvFPFastMathModeShift_ { ++ SpvFPFastMathModeNotNaNShift = 0, ++ SpvFPFastMathModeNotInfShift = 1, ++ SpvFPFastMathModeNSZShift = 2, ++ SpvFPFastMathModeAllowRecipShift = 3, ++ SpvFPFastMathModeFastShift = 4, ++} SpvFPFastMathModeShift; ++ ++typedef enum SpvFPFastMathModeMask_ { ++ SpvFPFastMathModeMaskNone = 0, ++ SpvFPFastMathModeNotNaNMask = 0x00000001, ++ SpvFPFastMathModeNotInfMask = 0x00000002, ++ SpvFPFastMathModeNSZMask = 0x00000004, ++ SpvFPFastMathModeAllowRecipMask = 0x00000008, ++ SpvFPFastMathModeFastMask = 0x00000010, ++} SpvFPFastMathModeMask; ++ ++typedef enum SpvFPRoundingMode_ { ++ SpvFPRoundingModeRTE = 0, ++ SpvFPRoundingModeRTZ = 1, ++ SpvFPRoundingModeRTP = 2, ++ SpvFPRoundingModeRTN = 3, ++} SpvFPRoundingMode; ++ ++typedef enum SpvLinkageType_ { ++ SpvLinkageTypeExport = 0, ++ SpvLinkageTypeImport = 1, ++} SpvLinkageType; ++ ++typedef enum SpvAccessQualifier_ { ++ SpvAccessQualifierReadOnly = 0, ++ SpvAccessQualifierWriteOnly = 1, ++ SpvAccessQualifierReadWrite = 2, ++} SpvAccessQualifier; ++ ++typedef enum SpvFunctionParameterAttribute_ { ++ SpvFunctionParameterAttributeZext = 0, ++ SpvFunctionParameterAttributeSext = 1, ++ SpvFunctionParameterAttributeByVal = 2, ++ SpvFunctionParameterAttributeSret = 3, ++ SpvFunctionParameterAttributeNoAlias = 4, ++ SpvFunctionParameterAttributeNoCapture = 5, ++ SpvFunctionParameterAttributeNoWrite = 6, ++ SpvFunctionParameterAttributeNoReadWrite = 7, ++} SpvFunctionParameterAttribute; ++ ++typedef enum SpvDecoration_ { ++ SpvDecorationRelaxedPrecision = 0, ++ SpvDecorationSpecId = 1, ++ SpvDecorationBlock = 2, ++ SpvDecorationBufferBlock = 3, ++ SpvDecorationRowMajor = 4, ++ SpvDecorationColMajor = 5, ++ SpvDecorationArrayStride = 6, ++ SpvDecorationMatrixStride = 7, ++ SpvDecorationGLSLShared = 8, ++ SpvDecorationGLSLPacked = 9, ++ SpvDecorationCPacked = 10, ++ SpvDecorationBuiltIn = 11, ++ SpvDecorationNoPerspective = 13, ++ SpvDecorationFlat = 14, ++ SpvDecorationPatch = 15, ++ SpvDecorationCentroid = 16, ++ SpvDecorationSample = 17, ++ SpvDecorationInvariant = 18, ++ SpvDecorationRestrict = 19, ++ SpvDecorationAliased = 20, ++ SpvDecorationVolatile = 21, ++ SpvDecorationConstant = 22, ++ SpvDecorationCoherent = 23, ++ SpvDecorationNonWritable = 24, ++ SpvDecorationNonReadable = 25, ++ SpvDecorationUniform = 26, ++ SpvDecorationSaturatedConversion = 28, ++ SpvDecorationStream = 29, ++ SpvDecorationLocation = 30, ++ SpvDecorationComponent = 31, ++ SpvDecorationIndex = 32, ++ SpvDecorationBinding = 33, ++ SpvDecorationDescriptorSet = 34, ++ SpvDecorationOffset = 35, ++ SpvDecorationXfbBuffer = 36, ++ SpvDecorationXfbStride = 37, ++ SpvDecorationFuncParamAttr = 38, ++ SpvDecorationFPRoundingMode = 39, ++ SpvDecorationFPFastMathMode = 40, ++ SpvDecorationLinkageAttributes = 41, ++ SpvDecorationNoContraction = 42, ++ SpvDecorationInputAttachmentIndex = 43, ++ SpvDecorationAlignment = 44, ++} SpvDecoration; ++ ++typedef enum SpvBuiltIn_ { ++ SpvBuiltInPosition = 0, ++ SpvBuiltInPointSize = 1, ++ SpvBuiltInClipDistance = 3, ++ SpvBuiltInCullDistance = 4, ++ SpvBuiltInVertexId = 5, ++ SpvBuiltInInstanceId = 6, ++ SpvBuiltInPrimitiveId = 7, ++ SpvBuiltInInvocationId = 8, ++ SpvBuiltInLayer = 9, ++ SpvBuiltInViewportIndex = 10, ++ SpvBuiltInTessLevelOuter = 11, ++ SpvBuiltInTessLevelInner = 12, ++ SpvBuiltInTessCoord = 13, ++ SpvBuiltInPatchVertices = 14, ++ SpvBuiltInFragCoord = 15, ++ SpvBuiltInPointCoord = 16, ++ SpvBuiltInFrontFacing = 17, ++ SpvBuiltInSampleId = 18, ++ SpvBuiltInSamplePosition = 19, ++ SpvBuiltInSampleMask = 20, ++ SpvBuiltInFragDepth = 22, ++ SpvBuiltInHelperInvocation = 23, ++ SpvBuiltInNumWorkgroups = 24, ++ SpvBuiltInWorkgroupSize = 25, ++ SpvBuiltInWorkgroupId = 26, ++ SpvBuiltInLocalInvocationId = 27, ++ SpvBuiltInGlobalInvocationId = 28, ++ SpvBuiltInLocalInvocationIndex = 29, ++ SpvBuiltInWorkDim = 30, ++ SpvBuiltInGlobalSize = 31, ++ SpvBuiltInEnqueuedWorkgroupSize = 32, ++ SpvBuiltInGlobalOffset = 33, ++ SpvBuiltInGlobalLinearId = 34, ++ SpvBuiltInSubgroupSize = 36, ++ SpvBuiltInSubgroupMaxSize = 37, ++ SpvBuiltInNumSubgroups = 38, ++ SpvBuiltInNumEnqueuedSubgroups = 39, ++ SpvBuiltInSubgroupId = 40, ++ SpvBuiltInSubgroupLocalInvocationId = 41, ++ SpvBuiltInVertexIndex = 42, ++ SpvBuiltInInstanceIndex = 43, ++} SpvBuiltIn; ++ ++typedef enum SpvSelectionControlShift_ { ++ SpvSelectionControlFlattenShift = 0, ++ SpvSelectionControlDontFlattenShift = 1, ++} SpvSelectionControlShift; ++ ++typedef enum SpvSelectionControlMask_ { ++ SpvSelectionControlMaskNone = 0, ++ SpvSelectionControlFlattenMask = 0x00000001, ++ SpvSelectionControlDontFlattenMask = 0x00000002, ++} SpvSelectionControlMask; ++ ++typedef enum SpvLoopControlShift_ { ++ SpvLoopControlUnrollShift = 0, ++ SpvLoopControlDontUnrollShift = 1, ++} SpvLoopControlShift; ++ ++typedef enum SpvLoopControlMask_ { ++ SpvLoopControlMaskNone = 0, ++ SpvLoopControlUnrollMask = 0x00000001, ++ SpvLoopControlDontUnrollMask = 0x00000002, ++} SpvLoopControlMask; ++ ++typedef enum SpvFunctionControlShift_ { ++ SpvFunctionControlInlineShift = 0, ++ SpvFunctionControlDontInlineShift = 1, ++ SpvFunctionControlPureShift = 2, ++ SpvFunctionControlConstShift = 3, ++} SpvFunctionControlShift; ++ ++typedef enum SpvFunctionControlMask_ { ++ SpvFunctionControlMaskNone = 0, ++ SpvFunctionControlInlineMask = 0x00000001, ++ SpvFunctionControlDontInlineMask = 0x00000002, ++ SpvFunctionControlPureMask = 0x00000004, ++ SpvFunctionControlConstMask = 0x00000008, ++} SpvFunctionControlMask; ++ ++typedef enum SpvMemorySemanticsShift_ { ++ SpvMemorySemanticsAcquireShift = 1, ++ SpvMemorySemanticsReleaseShift = 2, ++ SpvMemorySemanticsAcquireReleaseShift = 3, ++ SpvMemorySemanticsSequentiallyConsistentShift = 4, ++ SpvMemorySemanticsUniformMemoryShift = 6, ++ SpvMemorySemanticsSubgroupMemoryShift = 7, ++ SpvMemorySemanticsWorkgroupMemoryShift = 8, ++ SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, ++ SpvMemorySemanticsAtomicCounterMemoryShift = 10, ++ SpvMemorySemanticsImageMemoryShift = 11, ++} SpvMemorySemanticsShift; ++ ++typedef enum SpvMemorySemanticsMask_ { ++ SpvMemorySemanticsMaskNone = 0, ++ SpvMemorySemanticsAcquireMask = 0x00000002, ++ SpvMemorySemanticsReleaseMask = 0x00000004, ++ SpvMemorySemanticsAcquireReleaseMask = 0x00000008, ++ SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, ++ SpvMemorySemanticsUniformMemoryMask = 0x00000040, ++ SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, ++ SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, ++ SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, ++ SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, ++ SpvMemorySemanticsImageMemoryMask = 0x00000800, ++} SpvMemorySemanticsMask; ++ ++typedef enum SpvMemoryAccessShift_ { ++ SpvMemoryAccessVolatileShift = 0, ++ SpvMemoryAccessAlignedShift = 1, ++ SpvMemoryAccessNontemporalShift = 2, ++} SpvMemoryAccessShift; ++ ++typedef enum SpvMemoryAccessMask_ { ++ SpvMemoryAccessMaskNone = 0, ++ SpvMemoryAccessVolatileMask = 0x00000001, ++ SpvMemoryAccessAlignedMask = 0x00000002, ++ SpvMemoryAccessNontemporalMask = 0x00000004, ++} SpvMemoryAccessMask; ++ ++typedef enum SpvScope_ { ++ SpvScopeCrossDevice = 0, ++ SpvScopeDevice = 1, ++ SpvScopeWorkgroup = 2, ++ SpvScopeSubgroup = 3, ++ SpvScopeInvocation = 4, ++} SpvScope; ++ ++typedef enum SpvGroupOperation_ { ++ SpvGroupOperationReduce = 0, ++ SpvGroupOperationInclusiveScan = 1, ++ SpvGroupOperationExclusiveScan = 2, ++} SpvGroupOperation; ++ ++typedef enum SpvKernelEnqueueFlags_ { ++ SpvKernelEnqueueFlagsNoWait = 0, ++ SpvKernelEnqueueFlagsWaitKernel = 1, ++ SpvKernelEnqueueFlagsWaitWorkGroup = 2, ++} SpvKernelEnqueueFlags; ++ ++typedef enum SpvKernelProfilingInfoShift_ { ++ SpvKernelProfilingInfoCmdExecTimeShift = 0, ++} SpvKernelProfilingInfoShift; ++ ++typedef enum SpvKernelProfilingInfoMask_ { ++ SpvKernelProfilingInfoMaskNone = 0, ++ SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, ++} SpvKernelProfilingInfoMask; ++ ++typedef enum SpvCapability_ { ++ SpvCapabilityMatrix = 0, ++ SpvCapabilityShader = 1, ++ SpvCapabilityGeometry = 2, ++ SpvCapabilityTessellation = 3, ++ SpvCapabilityAddresses = 4, ++ SpvCapabilityLinkage = 5, ++ SpvCapabilityKernel = 6, ++ SpvCapabilityVector16 = 7, ++ SpvCapabilityFloat16Buffer = 8, ++ SpvCapabilityFloat16 = 9, ++ SpvCapabilityFloat64 = 10, ++ SpvCapabilityInt64 = 11, ++ SpvCapabilityInt64Atomics = 12, ++ SpvCapabilityImageBasic = 13, ++ SpvCapabilityImageReadWrite = 14, ++ SpvCapabilityImageMipmap = 15, ++ SpvCapabilityPipes = 17, ++ SpvCapabilityGroups = 18, ++ SpvCapabilityDeviceEnqueue = 19, ++ SpvCapabilityLiteralSampler = 20, ++ SpvCapabilityAtomicStorage = 21, ++ SpvCapabilityInt16 = 22, ++ SpvCapabilityTessellationPointSize = 23, ++ SpvCapabilityGeometryPointSize = 24, ++ SpvCapabilityImageGatherExtended = 25, ++ SpvCapabilityStorageImageMultisample = 27, ++ SpvCapabilityUniformBufferArrayDynamicIndexing = 28, ++ SpvCapabilitySampledImageArrayDynamicIndexing = 29, ++ SpvCapabilityStorageBufferArrayDynamicIndexing = 30, ++ SpvCapabilityStorageImageArrayDynamicIndexing = 31, ++ SpvCapabilityClipDistance = 32, ++ SpvCapabilityCullDistance = 33, ++ SpvCapabilityImageCubeArray = 34, ++ SpvCapabilitySampleRateShading = 35, ++ SpvCapabilityImageRect = 36, ++ SpvCapabilitySampledRect = 37, ++ SpvCapabilityGenericPointer = 38, ++ SpvCapabilityInt8 = 39, ++ SpvCapabilityInputAttachment = 40, ++ SpvCapabilitySparseResidency = 41, ++ SpvCapabilityMinLod = 42, ++ SpvCapabilitySampled1D = 43, ++ SpvCapabilityImage1D = 44, ++ SpvCapabilitySampledCubeArray = 45, ++ SpvCapabilitySampledBuffer = 46, ++ SpvCapabilityImageBuffer = 47, ++ SpvCapabilityImageMSArray = 48, ++ SpvCapabilityStorageImageExtendedFormats = 49, ++ SpvCapabilityImageQuery = 50, ++ SpvCapabilityDerivativeControl = 51, ++ SpvCapabilityInterpolationFunction = 52, ++ SpvCapabilityTransformFeedback = 53, ++ SpvCapabilityGeometryStreams = 54, ++ SpvCapabilityStorageImageReadWithoutFormat = 55, ++ SpvCapabilityStorageImageWriteWithoutFormat = 56, ++ SpvCapabilityMultiViewport = 57, ++} SpvCapability; ++ ++typedef enum SpvOp_ { ++ SpvOpNop = 0, ++ SpvOpUndef = 1, ++ SpvOpSourceContinued = 2, ++ SpvOpSource = 3, ++ SpvOpSourceExtension = 4, ++ SpvOpName = 5, ++ SpvOpMemberName = 6, ++ SpvOpString = 7, ++ SpvOpLine = 8, ++ SpvOpExtension = 10, ++ SpvOpExtInstImport = 11, ++ SpvOpExtInst = 12, ++ SpvOpMemoryModel = 14, ++ SpvOpEntryPoint = 15, ++ SpvOpExecutionMode = 16, ++ SpvOpCapability = 17, ++ SpvOpTypeVoid = 19, ++ SpvOpTypeBool = 20, ++ SpvOpTypeInt = 21, ++ SpvOpTypeFloat = 22, ++ SpvOpTypeVector = 23, ++ SpvOpTypeMatrix = 24, ++ SpvOpTypeImage = 25, ++ SpvOpTypeSampler = 26, ++ SpvOpTypeSampledImage = 27, ++ SpvOpTypeArray = 28, ++ SpvOpTypeRuntimeArray = 29, ++ SpvOpTypeStruct = 30, ++ SpvOpTypeOpaque = 31, ++ SpvOpTypePointer = 32, ++ SpvOpTypeFunction = 33, ++ SpvOpTypeEvent = 34, ++ SpvOpTypeDeviceEvent = 35, ++ SpvOpTypeReserveId = 36, ++ SpvOpTypeQueue = 37, ++ SpvOpTypePipe = 38, ++ SpvOpTypeForwardPointer = 39, ++ SpvOpConstantTrue = 41, ++ SpvOpConstantFalse = 42, ++ SpvOpConstant = 43, ++ SpvOpConstantComposite = 44, ++ SpvOpConstantSampler = 45, ++ SpvOpConstantNull = 46, ++ SpvOpSpecConstantTrue = 48, ++ SpvOpSpecConstantFalse = 49, ++ SpvOpSpecConstant = 50, ++ SpvOpSpecConstantComposite = 51, ++ SpvOpSpecConstantOp = 52, ++ SpvOpFunction = 54, ++ SpvOpFunctionParameter = 55, ++ SpvOpFunctionEnd = 56, ++ SpvOpFunctionCall = 57, ++ SpvOpVariable = 59, ++ SpvOpImageTexelPointer = 60, ++ SpvOpLoad = 61, ++ SpvOpStore = 62, ++ SpvOpCopyMemory = 63, ++ SpvOpCopyMemorySized = 64, ++ SpvOpAccessChain = 65, ++ SpvOpInBoundsAccessChain = 66, ++ SpvOpPtrAccessChain = 67, ++ SpvOpArrayLength = 68, ++ SpvOpGenericPtrMemSemantics = 69, ++ SpvOpInBoundsPtrAccessChain = 70, ++ SpvOpDecorate = 71, ++ SpvOpMemberDecorate = 72, ++ SpvOpDecorationGroup = 73, ++ SpvOpGroupDecorate = 74, ++ SpvOpGroupMemberDecorate = 75, ++ SpvOpVectorExtractDynamic = 77, ++ SpvOpVectorInsertDynamic = 78, ++ SpvOpVectorShuffle = 79, ++ SpvOpCompositeConstruct = 80, ++ SpvOpCompositeExtract = 81, ++ SpvOpCompositeInsert = 82, ++ SpvOpCopyObject = 83, ++ SpvOpTranspose = 84, ++ SpvOpSampledImage = 86, ++ SpvOpImageSampleImplicitLod = 87, ++ SpvOpImageSampleExplicitLod = 88, ++ SpvOpImageSampleDrefImplicitLod = 89, ++ SpvOpImageSampleDrefExplicitLod = 90, ++ SpvOpImageSampleProjImplicitLod = 91, ++ SpvOpImageSampleProjExplicitLod = 92, ++ SpvOpImageSampleProjDrefImplicitLod = 93, ++ SpvOpImageSampleProjDrefExplicitLod = 94, ++ SpvOpImageFetch = 95, ++ SpvOpImageGather = 96, ++ SpvOpImageDrefGather = 97, ++ SpvOpImageRead = 98, ++ SpvOpImageWrite = 99, ++ SpvOpImage = 100, ++ SpvOpImageQueryFormat = 101, ++ SpvOpImageQueryOrder = 102, ++ SpvOpImageQuerySizeLod = 103, ++ SpvOpImageQuerySize = 104, ++ SpvOpImageQueryLod = 105, ++ SpvOpImageQueryLevels = 106, ++ SpvOpImageQuerySamples = 107, ++ SpvOpConvertFToU = 109, ++ SpvOpConvertFToS = 110, ++ SpvOpConvertSToF = 111, ++ SpvOpConvertUToF = 112, ++ SpvOpUConvert = 113, ++ SpvOpSConvert = 114, ++ SpvOpFConvert = 115, ++ SpvOpQuantizeToF16 = 116, ++ SpvOpConvertPtrToU = 117, ++ SpvOpSatConvertSToU = 118, ++ SpvOpSatConvertUToS = 119, ++ SpvOpConvertUToPtr = 120, ++ SpvOpPtrCastToGeneric = 121, ++ SpvOpGenericCastToPtr = 122, ++ SpvOpGenericCastToPtrExplicit = 123, ++ SpvOpBitcast = 124, ++ SpvOpSNegate = 126, ++ SpvOpFNegate = 127, ++ SpvOpIAdd = 128, ++ SpvOpFAdd = 129, ++ SpvOpISub = 130, ++ SpvOpFSub = 131, ++ SpvOpIMul = 132, ++ SpvOpFMul = 133, ++ SpvOpUDiv = 134, ++ SpvOpSDiv = 135, ++ SpvOpFDiv = 136, ++ SpvOpUMod = 137, ++ SpvOpSRem = 138, ++ SpvOpSMod = 139, ++ SpvOpFRem = 140, ++ SpvOpFMod = 141, ++ SpvOpVectorTimesScalar = 142, ++ SpvOpMatrixTimesScalar = 143, ++ SpvOpVectorTimesMatrix = 144, ++ SpvOpMatrixTimesVector = 145, ++ SpvOpMatrixTimesMatrix = 146, ++ SpvOpOuterProduct = 147, ++ SpvOpDot = 148, ++ SpvOpIAddCarry = 149, ++ SpvOpISubBorrow = 150, ++ SpvOpUMulExtended = 151, ++ SpvOpSMulExtended = 152, ++ SpvOpAny = 154, ++ SpvOpAll = 155, ++ SpvOpIsNan = 156, ++ SpvOpIsInf = 157, ++ SpvOpIsFinite = 158, ++ SpvOpIsNormal = 159, ++ SpvOpSignBitSet = 160, ++ SpvOpLessOrGreater = 161, ++ SpvOpOrdered = 162, ++ SpvOpUnordered = 163, ++ SpvOpLogicalEqual = 164, ++ SpvOpLogicalNotEqual = 165, ++ SpvOpLogicalOr = 166, ++ SpvOpLogicalAnd = 167, ++ SpvOpLogicalNot = 168, ++ SpvOpSelect = 169, ++ SpvOpIEqual = 170, ++ SpvOpINotEqual = 171, ++ SpvOpUGreaterThan = 172, ++ SpvOpSGreaterThan = 173, ++ SpvOpUGreaterThanEqual = 174, ++ SpvOpSGreaterThanEqual = 175, ++ SpvOpULessThan = 176, ++ SpvOpSLessThan = 177, ++ SpvOpULessThanEqual = 178, ++ SpvOpSLessThanEqual = 179, ++ SpvOpFOrdEqual = 180, ++ SpvOpFUnordEqual = 181, ++ SpvOpFOrdNotEqual = 182, ++ SpvOpFUnordNotEqual = 183, ++ SpvOpFOrdLessThan = 184, ++ SpvOpFUnordLessThan = 185, ++ SpvOpFOrdGreaterThan = 186, ++ SpvOpFUnordGreaterThan = 187, ++ SpvOpFOrdLessThanEqual = 188, ++ SpvOpFUnordLessThanEqual = 189, ++ SpvOpFOrdGreaterThanEqual = 190, ++ SpvOpFUnordGreaterThanEqual = 191, ++ SpvOpShiftRightLogical = 194, ++ SpvOpShiftRightArithmetic = 195, ++ SpvOpShiftLeftLogical = 196, ++ SpvOpBitwiseOr = 197, ++ SpvOpBitwiseXor = 198, ++ SpvOpBitwiseAnd = 199, ++ SpvOpNot = 200, ++ SpvOpBitFieldInsert = 201, ++ SpvOpBitFieldSExtract = 202, ++ SpvOpBitFieldUExtract = 203, ++ SpvOpBitReverse = 204, ++ SpvOpBitCount = 205, ++ SpvOpDPdx = 207, ++ SpvOpDPdy = 208, ++ SpvOpFwidth = 209, ++ SpvOpDPdxFine = 210, ++ SpvOpDPdyFine = 211, ++ SpvOpFwidthFine = 212, ++ SpvOpDPdxCoarse = 213, ++ SpvOpDPdyCoarse = 214, ++ SpvOpFwidthCoarse = 215, ++ SpvOpEmitVertex = 218, ++ SpvOpEndPrimitive = 219, ++ SpvOpEmitStreamVertex = 220, ++ SpvOpEndStreamPrimitive = 221, ++ SpvOpControlBarrier = 224, ++ SpvOpMemoryBarrier = 225, ++ SpvOpAtomicLoad = 227, ++ SpvOpAtomicStore = 228, ++ SpvOpAtomicExchange = 229, ++ SpvOpAtomicCompareExchange = 230, ++ SpvOpAtomicCompareExchangeWeak = 231, ++ SpvOpAtomicIIncrement = 232, ++ SpvOpAtomicIDecrement = 233, ++ SpvOpAtomicIAdd = 234, ++ SpvOpAtomicISub = 235, ++ SpvOpAtomicSMin = 236, ++ SpvOpAtomicUMin = 237, ++ SpvOpAtomicSMax = 238, ++ SpvOpAtomicUMax = 239, ++ SpvOpAtomicAnd = 240, ++ SpvOpAtomicOr = 241, ++ SpvOpAtomicXor = 242, ++ SpvOpPhi = 245, ++ SpvOpLoopMerge = 246, ++ SpvOpSelectionMerge = 247, ++ SpvOpLabel = 248, ++ SpvOpBranch = 249, ++ SpvOpBranchConditional = 250, ++ SpvOpSwitch = 251, ++ SpvOpKill = 252, ++ SpvOpReturn = 253, ++ SpvOpReturnValue = 254, ++ SpvOpUnreachable = 255, ++ SpvOpLifetimeStart = 256, ++ SpvOpLifetimeStop = 257, ++ SpvOpGroupAsyncCopy = 259, ++ SpvOpGroupWaitEvents = 260, ++ SpvOpGroupAll = 261, ++ SpvOpGroupAny = 262, ++ SpvOpGroupBroadcast = 263, ++ SpvOpGroupIAdd = 264, ++ SpvOpGroupFAdd = 265, ++ SpvOpGroupFMin = 266, ++ SpvOpGroupUMin = 267, ++ SpvOpGroupSMin = 268, ++ SpvOpGroupFMax = 269, ++ SpvOpGroupUMax = 270, ++ SpvOpGroupSMax = 271, ++ SpvOpReadPipe = 274, ++ SpvOpWritePipe = 275, ++ SpvOpReservedReadPipe = 276, ++ SpvOpReservedWritePipe = 277, ++ SpvOpReserveReadPipePackets = 278, ++ SpvOpReserveWritePipePackets = 279, ++ SpvOpCommitReadPipe = 280, ++ SpvOpCommitWritePipe = 281, ++ SpvOpIsValidReserveId = 282, ++ SpvOpGetNumPipePackets = 283, ++ SpvOpGetMaxPipePackets = 284, ++ SpvOpGroupReserveReadPipePackets = 285, ++ SpvOpGroupReserveWritePipePackets = 286, ++ SpvOpGroupCommitReadPipe = 287, ++ SpvOpGroupCommitWritePipe = 288, ++ SpvOpEnqueueMarker = 291, ++ SpvOpEnqueueKernel = 292, ++ SpvOpGetKernelNDrangeSubGroupCount = 293, ++ SpvOpGetKernelNDrangeMaxSubGroupSize = 294, ++ SpvOpGetKernelWorkGroupSize = 295, ++ SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, ++ SpvOpRetainEvent = 297, ++ SpvOpReleaseEvent = 298, ++ SpvOpCreateUserEvent = 299, ++ SpvOpIsValidEvent = 300, ++ SpvOpSetUserEventStatus = 301, ++ SpvOpCaptureEventProfilingInfo = 302, ++ SpvOpGetDefaultQueue = 303, ++ SpvOpBuildNDRange = 304, ++ SpvOpImageSparseSampleImplicitLod = 305, ++ SpvOpImageSparseSampleExplicitLod = 306, ++ SpvOpImageSparseSampleDrefImplicitLod = 307, ++ SpvOpImageSparseSampleDrefExplicitLod = 308, ++ SpvOpImageSparseSampleProjImplicitLod = 309, ++ SpvOpImageSparseSampleProjExplicitLod = 310, ++ SpvOpImageSparseSampleProjDrefImplicitLod = 311, ++ SpvOpImageSparseSampleProjDrefExplicitLod = 312, ++ SpvOpImageSparseFetch = 313, ++ SpvOpImageSparseGather = 314, ++ SpvOpImageSparseDrefGather = 315, ++ SpvOpImageSparseTexelsResident = 316, ++ SpvOpNoLine = 317, ++ SpvOpAtomicFlagTestAndSet = 318, ++ SpvOpAtomicFlagClear = 319, ++} SpvOp; ++ ++#endif // #ifndef spirv_H ++ diff --cc src/compiler/nir/spirv/spirv_to_nir.c index 00000000000,00000000000..c002457ce12 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/spirv_to_nir.c @@@ -1,0 -1,0 +1,2654 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Jason Ekstrand (jason@jlekstrand.net) ++ * ++ */ ++ ++#include "vtn_private.h" ++#include "nir/nir_vla.h" ++#include "nir/nir_control_flow.h" ++#include "nir/nir_constant_expressions.h" ++ ++static struct vtn_ssa_value * ++vtn_undef_ssa_value(struct vtn_builder *b, const struct glsl_type *type) ++{ ++ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); ++ val->type = type; ++ ++ if (glsl_type_is_vector_or_scalar(type)) { ++ unsigned num_components = glsl_get_vector_elements(val->type); ++ nir_ssa_undef_instr *undef = ++ nir_ssa_undef_instr_create(b->shader, num_components); ++ ++ nir_instr_insert_before_cf_list(&b->impl->body, &undef->instr); ++ val->def = &undef->def; ++ } else { ++ unsigned elems = glsl_get_length(val->type); ++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); ++ if (glsl_type_is_matrix(type)) { ++ const struct glsl_type *elem_type = ++ glsl_vector_type(glsl_get_base_type(type), ++ glsl_get_vector_elements(type)); ++ ++ for (unsigned i = 0; i < elems; i++) ++ val->elems[i] = vtn_undef_ssa_value(b, elem_type); ++ } else if (glsl_type_is_array(type)) { ++ const struct glsl_type *elem_type = glsl_get_array_element(type); ++ for (unsigned i = 0; i < elems; i++) ++ val->elems[i] = vtn_undef_ssa_value(b, elem_type); ++ } else { ++ for (unsigned i = 0; i < elems; i++) { ++ const struct glsl_type *elem_type = glsl_get_struct_field(type, i); ++ val->elems[i] = vtn_undef_ssa_value(b, elem_type); ++ } ++ } ++ } ++ ++ return val; ++} ++ ++static struct vtn_ssa_value * ++vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, ++ const struct glsl_type *type) ++{ ++ struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); ++ ++ if (entry) ++ return entry->data; ++ ++ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); ++ val->type = type; ++ ++ switch (glsl_get_base_type(type)) { ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_BOOL: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_DOUBLE: ++ if (glsl_type_is_vector_or_scalar(type)) { ++ unsigned num_components = glsl_get_vector_elements(val->type); ++ nir_load_const_instr *load = ++ nir_load_const_instr_create(b->shader, num_components); ++ ++ for (unsigned i = 0; i < num_components; i++) ++ load->value.u[i] = constant->value.u[i]; ++ ++ nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); ++ val->def = &load->def; ++ } else { ++ assert(glsl_type_is_matrix(type)); ++ unsigned rows = glsl_get_vector_elements(val->type); ++ unsigned columns = glsl_get_matrix_columns(val->type); ++ val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); ++ ++ for (unsigned i = 0; i < columns; i++) { ++ struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); ++ col_val->type = glsl_get_column_type(val->type); ++ nir_load_const_instr *load = ++ nir_load_const_instr_create(b->shader, rows); ++ ++ for (unsigned j = 0; j < rows; j++) ++ load->value.u[j] = constant->value.u[rows * i + j]; ++ ++ nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); ++ col_val->def = &load->def; ++ ++ val->elems[i] = col_val; ++ } ++ } ++ break; ++ ++ case GLSL_TYPE_ARRAY: { ++ unsigned elems = glsl_get_length(val->type); ++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); ++ const struct glsl_type *elem_type = glsl_get_array_element(val->type); ++ for (unsigned i = 0; i < elems; i++) ++ val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], ++ elem_type); ++ break; ++ } ++ ++ case GLSL_TYPE_STRUCT: { ++ unsigned elems = glsl_get_length(val->type); ++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); ++ for (unsigned i = 0; i < elems; i++) { ++ const struct glsl_type *elem_type = ++ glsl_get_struct_field(val->type, i); ++ val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], ++ elem_type); ++ } ++ break; ++ } ++ ++ default: ++ unreachable("bad constant type"); ++ } ++ ++ return val; ++} ++ ++struct vtn_ssa_value * ++vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) ++{ ++ struct vtn_value *val = vtn_untyped_value(b, value_id); ++ switch (val->value_type) { ++ case vtn_value_type_undef: ++ return vtn_undef_ssa_value(b, val->type->type); ++ ++ case vtn_value_type_constant: ++ return vtn_const_ssa_value(b, val->constant, val->const_type); ++ ++ case vtn_value_type_ssa: ++ return val->ssa; ++ ++ case vtn_value_type_access_chain: ++ /* This is needed for function parameters */ ++ return vtn_variable_load(b, val->access_chain); ++ ++ default: ++ unreachable("Invalid type for an SSA value"); ++ } ++} ++ ++static char * ++vtn_string_literal(struct vtn_builder *b, const uint32_t *words, ++ unsigned word_count, unsigned *words_used) ++{ ++ char *dup = ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); ++ if (words_used) { ++ /* Ammount of space taken by the string (including the null) */ ++ unsigned len = strlen(dup) + 1; ++ *words_used = DIV_ROUND_UP(len, sizeof(*words)); ++ } ++ return dup; ++} ++ ++const uint32_t * ++vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, ++ const uint32_t *end, vtn_instruction_handler handler) ++{ ++ b->file = NULL; ++ b->line = -1; ++ b->col = -1; ++ ++ const uint32_t *w = start; ++ while (w < end) { ++ SpvOp opcode = w[0] & SpvOpCodeMask; ++ unsigned count = w[0] >> SpvWordCountShift; ++ assert(count >= 1 && w + count <= end); ++ ++ switch (opcode) { ++ case SpvOpNop: ++ break; /* Do nothing */ ++ ++ case SpvOpLine: ++ b->file = vtn_value(b, w[1], vtn_value_type_string)->str; ++ b->line = w[2]; ++ b->col = w[3]; ++ break; ++ ++ case SpvOpNoLine: ++ b->file = NULL; ++ b->line = -1; ++ b->col = -1; ++ break; ++ ++ default: ++ if (!handler(b, opcode, w, count)) ++ return w; ++ break; ++ } ++ ++ w += count; ++ } ++ assert(w == end); ++ return w; ++} ++ ++static void ++vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch (opcode) { ++ case SpvOpExtInstImport: { ++ struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); ++ if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { ++ val->ext_handler = vtn_handle_glsl450_instruction; ++ } else { ++ assert(!"Unsupported extension"); ++ } ++ break; ++ } ++ ++ case SpvOpExtInst: { ++ struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); ++ bool handled = val->ext_handler(b, w[4], w, count); ++ (void)handled; ++ assert(handled); ++ break; ++ } ++ ++ default: ++ unreachable("Unhandled opcode"); ++ } ++} ++ ++static void ++_foreach_decoration_helper(struct vtn_builder *b, ++ struct vtn_value *base_value, ++ int parent_member, ++ struct vtn_value *value, ++ vtn_decoration_foreach_cb cb, void *data) ++{ ++ for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { ++ int member; ++ if (dec->scope == VTN_DEC_DECORATION) { ++ member = parent_member; ++ } else if (dec->scope >= VTN_DEC_STRUCT_MEMBER0) { ++ assert(parent_member == -1); ++ member = dec->scope - VTN_DEC_STRUCT_MEMBER0; ++ } else { ++ /* Not a decoration */ ++ continue; ++ } ++ ++ if (dec->group) { ++ assert(dec->group->value_type == vtn_value_type_decoration_group); ++ _foreach_decoration_helper(b, base_value, member, dec->group, ++ cb, data); ++ } else { ++ cb(b, base_value, member, dec, data); ++ } ++ } ++} ++ ++/** Iterates (recursively if needed) over all of the decorations on a value ++ * ++ * This function iterates over all of the decorations applied to a given ++ * value. If it encounters a decoration group, it recurses into the group ++ * and iterates over all of those decorations as well. ++ */ ++void ++vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, ++ vtn_decoration_foreach_cb cb, void *data) ++{ ++ _foreach_decoration_helper(b, value, -1, value, cb, data); ++} ++ ++void ++vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, ++ vtn_execution_mode_foreach_cb cb, void *data) ++{ ++ for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { ++ if (dec->scope != VTN_DEC_EXECUTION_MODE) ++ continue; ++ ++ assert(dec->group == NULL); ++ cb(b, value, dec, data); ++ } ++} ++ ++static void ++vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ const uint32_t *w_end = w + count; ++ const uint32_t target = w[1]; ++ w += 2; ++ ++ switch (opcode) { ++ case SpvOpDecorationGroup: ++ vtn_push_value(b, target, vtn_value_type_decoration_group); ++ break; ++ ++ case SpvOpDecorate: ++ case SpvOpMemberDecorate: ++ case SpvOpExecutionMode: { ++ struct vtn_value *val = &b->values[target]; ++ ++ struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); ++ switch (opcode) { ++ case SpvOpDecorate: ++ dec->scope = VTN_DEC_DECORATION; ++ break; ++ case SpvOpMemberDecorate: ++ dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); ++ break; ++ case SpvOpExecutionMode: ++ dec->scope = VTN_DEC_EXECUTION_MODE; ++ break; ++ default: ++ unreachable("Invalid decoration opcode"); ++ } ++ dec->decoration = *(w++); ++ dec->literals = w; ++ ++ /* Link into the list */ ++ dec->next = val->decoration; ++ val->decoration = dec; ++ break; ++ } ++ ++ case SpvOpGroupMemberDecorate: ++ case SpvOpGroupDecorate: { ++ struct vtn_value *group = ++ vtn_value(b, target, vtn_value_type_decoration_group); ++ ++ for (; w < w_end; w++) { ++ struct vtn_value *val = vtn_untyped_value(b, *w); ++ struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); ++ ++ dec->group = group; ++ if (opcode == SpvOpGroupDecorate) { ++ dec->scope = VTN_DEC_DECORATION; ++ } else { ++ dec->scope = VTN_DEC_STRUCT_MEMBER0 + *(w++); ++ } ++ ++ /* Link into the list */ ++ dec->next = val->decoration; ++ val->decoration = dec; ++ } ++ break; ++ } ++ ++ default: ++ unreachable("Unhandled opcode"); ++ } ++} ++ ++struct member_decoration_ctx { ++ struct glsl_struct_field *fields; ++ struct vtn_type *type; ++}; ++ ++/* does a shallow copy of a vtn_type */ ++ ++static struct vtn_type * ++vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) ++{ ++ struct vtn_type *dest = ralloc(b, struct vtn_type); ++ dest->type = src->type; ++ dest->is_builtin = src->is_builtin; ++ if (src->is_builtin) ++ dest->builtin = src->builtin; ++ ++ if (!glsl_type_is_scalar(src->type)) { ++ switch (glsl_get_base_type(src->type)) { ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_BOOL: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_DOUBLE: ++ case GLSL_TYPE_ARRAY: ++ dest->row_major = src->row_major; ++ dest->stride = src->stride; ++ dest->array_element = src->array_element; ++ break; ++ ++ case GLSL_TYPE_STRUCT: { ++ unsigned elems = glsl_get_length(src->type); ++ ++ dest->members = ralloc_array(b, struct vtn_type *, elems); ++ memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); ++ ++ dest->offsets = ralloc_array(b, unsigned, elems); ++ memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); ++ break; ++ } ++ ++ default: ++ unreachable("unhandled type"); ++ } ++ } ++ ++ return dest; ++} ++ ++static struct vtn_type * ++mutable_matrix_member(struct vtn_builder *b, struct vtn_type *type, int member) ++{ ++ type->members[member] = vtn_type_copy(b, type->members[member]); ++ type = type->members[member]; ++ ++ /* We may have an array of matrices.... Oh, joy! */ ++ while (glsl_type_is_array(type->type)) { ++ type->array_element = vtn_type_copy(b, type->array_element); ++ type = type->array_element; ++ } ++ ++ assert(glsl_type_is_matrix(type->type)); ++ ++ return type; ++} ++ ++static void ++struct_member_decoration_cb(struct vtn_builder *b, ++ struct vtn_value *val, int member, ++ const struct vtn_decoration *dec, void *void_ctx) ++{ ++ struct member_decoration_ctx *ctx = void_ctx; ++ ++ if (member < 0) ++ return; ++ ++ switch (dec->decoration) { ++ case SpvDecorationRelaxedPrecision: ++ break; /* FIXME: Do nothing with this for now. */ ++ case SpvDecorationNoPerspective: ++ ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; ++ break; ++ case SpvDecorationFlat: ++ ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; ++ break; ++ case SpvDecorationCentroid: ++ ctx->fields[member].centroid = true; ++ break; ++ case SpvDecorationSample: ++ ctx->fields[member].sample = true; ++ break; ++ case SpvDecorationLocation: ++ ctx->fields[member].location = dec->literals[0]; ++ break; ++ case SpvDecorationBuiltIn: ++ ctx->type->members[member] = vtn_type_copy(b, ctx->type->members[member]); ++ ctx->type->members[member]->is_builtin = true; ++ ctx->type->members[member]->builtin = dec->literals[0]; ++ ctx->type->builtin_block = true; ++ break; ++ case SpvDecorationOffset: ++ ctx->type->offsets[member] = dec->literals[0]; ++ break; ++ case SpvDecorationMatrixStride: ++ mutable_matrix_member(b, ctx->type, member)->stride = dec->literals[0]; ++ break; ++ case SpvDecorationColMajor: ++ break; /* Nothing to do here. Column-major is the default. */ ++ case SpvDecorationRowMajor: ++ mutable_matrix_member(b, ctx->type, member)->row_major = true; ++ break; ++ default: ++ unreachable("Unhandled member decoration"); ++ } ++} ++ ++static void ++type_decoration_cb(struct vtn_builder *b, ++ struct vtn_value *val, int member, ++ const struct vtn_decoration *dec, void *ctx) ++{ ++ struct vtn_type *type = val->type; ++ ++ if (member != -1) ++ return; ++ ++ switch (dec->decoration) { ++ case SpvDecorationArrayStride: ++ type->stride = dec->literals[0]; ++ break; ++ case SpvDecorationBlock: ++ type->block = true; ++ break; ++ case SpvDecorationBufferBlock: ++ type->buffer_block = true; ++ break; ++ case SpvDecorationGLSLShared: ++ case SpvDecorationGLSLPacked: ++ /* Ignore these, since we get explicit offsets anyways */ ++ break; ++ ++ case SpvDecorationStream: ++ assert(dec->literals[0] == 0); ++ break; ++ ++ default: ++ unreachable("Unhandled type decoration"); ++ } ++} ++ ++static unsigned ++translate_image_format(SpvImageFormat format) ++{ ++ switch (format) { ++ case SpvImageFormatUnknown: return 0; /* GL_NONE */ ++ case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ ++ case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ ++ case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ ++ case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ ++ case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ ++ case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ ++ case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ ++ case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ ++ case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ ++ case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ ++ case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ ++ case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ ++ case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ ++ case SpvImageFormatR16: return 0x822A; /* GL_R16 */ ++ case SpvImageFormatR8: return 0x8229; /* GL_R8 */ ++ case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ ++ case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ ++ case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ ++ case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ ++ case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ ++ case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ ++ case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ ++ case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ ++ case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ ++ case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ ++ case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ ++ case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ ++ case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ ++ case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ ++ case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ ++ case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ ++ case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ ++ case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ ++ case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ ++ case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ ++ case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ ++ case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ ++ case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ ++ case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ ++ default: ++ assert(!"Invalid image format"); ++ return 0; ++ } ++} ++ ++static void ++vtn_handle_type(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); ++ ++ val->type = rzalloc(b, struct vtn_type); ++ val->type->is_builtin = false; ++ val->type->val = val; ++ ++ switch (opcode) { ++ case SpvOpTypeVoid: ++ val->type->type = glsl_void_type(); ++ break; ++ case SpvOpTypeBool: ++ val->type->type = glsl_bool_type(); ++ break; ++ case SpvOpTypeInt: ++ val->type->type = glsl_int_type(); ++ break; ++ case SpvOpTypeFloat: ++ val->type->type = glsl_float_type(); ++ break; ++ ++ case SpvOpTypeVector: { ++ struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; ++ unsigned elems = w[3]; ++ ++ assert(glsl_type_is_scalar(base->type)); ++ val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems); ++ ++ /* Vectors implicitly have sizeof(base_type) stride. For now, this ++ * is always 4 bytes. This will have to change if we want to start ++ * supporting doubles or half-floats. ++ */ ++ val->type->stride = 4; ++ val->type->array_element = base; ++ break; ++ } ++ ++ case SpvOpTypeMatrix: { ++ struct vtn_type *base = vtn_value(b, w[2], vtn_value_type_type)->type; ++ unsigned columns = w[3]; ++ ++ assert(glsl_type_is_vector(base->type)); ++ val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), ++ glsl_get_vector_elements(base->type), ++ columns); ++ assert(!glsl_type_is_error(val->type->type)); ++ val->type->array_element = base; ++ val->type->row_major = false; ++ val->type->stride = 0; ++ break; ++ } ++ ++ case SpvOpTypeRuntimeArray: ++ case SpvOpTypeArray: { ++ struct vtn_type *array_element = ++ vtn_value(b, w[2], vtn_value_type_type)->type; ++ ++ unsigned length; ++ if (opcode == SpvOpTypeRuntimeArray) { ++ /* A length of 0 is used to denote unsized arrays */ ++ length = 0; ++ } else { ++ length = ++ vtn_value(b, w[3], vtn_value_type_constant)->constant->value.u[0]; ++ } ++ ++ val->type->type = glsl_array_type(array_element->type, length); ++ val->type->array_element = array_element; ++ val->type->stride = 0; ++ break; ++ } ++ ++ case SpvOpTypeStruct: { ++ unsigned num_fields = count - 2; ++ val->type->members = ralloc_array(b, struct vtn_type *, num_fields); ++ val->type->offsets = ralloc_array(b, unsigned, num_fields); ++ ++ NIR_VLA(struct glsl_struct_field, fields, count); ++ for (unsigned i = 0; i < num_fields; i++) { ++ val->type->members[i] = ++ vtn_value(b, w[i + 2], vtn_value_type_type)->type; ++ fields[i] = (struct glsl_struct_field) { ++ .type = val->type->members[i]->type, ++ .name = ralloc_asprintf(b, "field%d", i), ++ .location = -1, ++ }; ++ } ++ ++ struct member_decoration_ctx ctx = { ++ .fields = fields, ++ .type = val->type ++ }; ++ ++ vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); ++ ++ const char *name = val->name ? val->name : "struct"; ++ ++ val->type->type = glsl_struct_type(fields, num_fields, name); ++ break; ++ } ++ ++ case SpvOpTypeFunction: { ++ const struct glsl_type *return_type = ++ vtn_value(b, w[2], vtn_value_type_type)->type->type; ++ NIR_VLA(struct glsl_function_param, params, count - 3); ++ for (unsigned i = 0; i < count - 3; i++) { ++ params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; ++ ++ /* FIXME: */ ++ params[i].in = true; ++ params[i].out = true; ++ } ++ val->type->type = glsl_function_type(return_type, params, count - 3); ++ break; ++ } ++ ++ case SpvOpTypePointer: ++ /* FIXME: For now, we'll just do the really lame thing and return ++ * the same type. The validator should ensure that the proper number ++ * of dereferences happen ++ */ ++ val->type = vtn_value(b, w[3], vtn_value_type_type)->type; ++ break; ++ ++ case SpvOpTypeImage: { ++ const struct glsl_type *sampled_type = ++ vtn_value(b, w[2], vtn_value_type_type)->type->type; ++ ++ assert(glsl_type_is_vector_or_scalar(sampled_type)); ++ ++ enum glsl_sampler_dim dim; ++ switch ((SpvDim)w[3]) { ++ case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; ++ case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; ++ case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; ++ case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; ++ case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; ++ case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; ++ default: ++ unreachable("Invalid SPIR-V Sampler dimension"); ++ } ++ ++ bool is_shadow = w[4]; ++ bool is_array = w[5]; ++ bool multisampled = w[6]; ++ unsigned sampled = w[7]; ++ SpvImageFormat format = w[8]; ++ ++ if (count > 9) ++ val->type->access_qualifier = w[9]; ++ else ++ val->type->access_qualifier = SpvAccessQualifierReadWrite; ++ ++ assert(!multisampled && "FIXME: Handl multi-sampled textures"); ++ ++ val->type->image_format = translate_image_format(format); ++ ++ if (sampled == 1) { ++ val->type->type = glsl_sampler_type(dim, is_shadow, is_array, ++ glsl_get_base_type(sampled_type)); ++ } else if (sampled == 2) { ++ assert(format); ++ assert(!is_shadow); ++ val->type->type = glsl_image_type(dim, is_array, ++ glsl_get_base_type(sampled_type)); ++ } else { ++ assert(!"We need to know if the image will be sampled"); ++ } ++ break; ++ } ++ ++ case SpvOpTypeSampledImage: ++ val->type = vtn_value(b, w[2], vtn_value_type_type)->type; ++ break; ++ ++ case SpvOpTypeSampler: ++ /* The actual sampler type here doesn't really matter. It gets ++ * thrown away the moment you combine it with an image. What really ++ * matters is that it's a sampler type as opposed to an integer type ++ * so the backend knows what to do. ++ * ++ * TODO: Eventually we should consider adding a "bare sampler" type ++ * to glsl_types. ++ */ ++ val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, ++ GLSL_TYPE_FLOAT); ++ break; ++ ++ case SpvOpTypeOpaque: ++ case SpvOpTypeEvent: ++ case SpvOpTypeDeviceEvent: ++ case SpvOpTypeReserveId: ++ case SpvOpTypeQueue: ++ case SpvOpTypePipe: ++ default: ++ unreachable("Unhandled opcode"); ++ } ++ ++ vtn_foreach_decoration(b, val, type_decoration_cb, NULL); ++} ++ ++static nir_constant * ++vtn_null_constant(struct vtn_builder *b, const struct glsl_type *type) ++{ ++ nir_constant *c = rzalloc(b, nir_constant); ++ ++ switch (glsl_get_base_type(type)) { ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_BOOL: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_DOUBLE: ++ /* Nothing to do here. It's already initialized to zero */ ++ break; ++ ++ case GLSL_TYPE_ARRAY: ++ assert(glsl_get_length(type) > 0); ++ c->num_elements = glsl_get_length(type); ++ c->elements = ralloc_array(b, nir_constant *, c->num_elements); ++ ++ c->elements[0] = vtn_null_constant(b, glsl_get_array_element(type)); ++ for (unsigned i = 1; i < c->num_elements; i++) ++ c->elements[i] = c->elements[0]; ++ break; ++ ++ case GLSL_TYPE_STRUCT: ++ c->num_elements = glsl_get_length(type); ++ c->elements = ralloc_array(b, nir_constant *, c->num_elements); ++ ++ for (unsigned i = 0; i < c->num_elements; i++) { ++ c->elements[i] = vtn_null_constant(b, glsl_get_struct_field(type, i)); ++ } ++ break; ++ ++ default: ++ unreachable("Invalid type for null constant"); ++ } ++ ++ return c; ++} ++ ++static void ++spec_constant_deocoration_cb(struct vtn_builder *b, struct vtn_value *v, ++ int member, const struct vtn_decoration *dec, ++ void *data) ++{ ++ assert(member == -1); ++ if (dec->decoration != SpvDecorationSpecId) ++ return; ++ ++ uint32_t *const_value = data; ++ ++ for (unsigned i = 0; i < b->num_specializations; i++) { ++ if (b->specializations[i].id == dec->literals[0]) { ++ *const_value = b->specializations[i].data; ++ return; ++ } ++ } ++} ++ ++static uint32_t ++get_specialization(struct vtn_builder *b, struct vtn_value *val, ++ uint32_t const_value) ++{ ++ vtn_foreach_decoration(b, val, spec_constant_deocoration_cb, &const_value); ++ return const_value; ++} ++ ++static void ++vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); ++ val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; ++ val->constant = rzalloc(b, nir_constant); ++ switch (opcode) { ++ case SpvOpConstantTrue: ++ assert(val->const_type == glsl_bool_type()); ++ val->constant->value.u[0] = NIR_TRUE; ++ break; ++ case SpvOpConstantFalse: ++ assert(val->const_type == glsl_bool_type()); ++ val->constant->value.u[0] = NIR_FALSE; ++ break; ++ ++ case SpvOpSpecConstantTrue: ++ case SpvOpSpecConstantFalse: { ++ assert(val->const_type == glsl_bool_type()); ++ uint32_t int_val = ++ get_specialization(b, val, (opcode == SpvOpSpecConstantTrue)); ++ val->constant->value.u[0] = int_val ? NIR_TRUE : NIR_FALSE; ++ break; ++ } ++ ++ case SpvOpConstant: ++ assert(glsl_type_is_scalar(val->const_type)); ++ val->constant->value.u[0] = w[3]; ++ break; ++ case SpvOpSpecConstant: ++ assert(glsl_type_is_scalar(val->const_type)); ++ val->constant->value.u[0] = get_specialization(b, val, w[3]); ++ break; ++ case SpvOpSpecConstantComposite: ++ case SpvOpConstantComposite: { ++ unsigned elem_count = count - 3; ++ nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); ++ for (unsigned i = 0; i < elem_count; i++) ++ elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; ++ ++ switch (glsl_get_base_type(val->const_type)) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ if (glsl_type_is_matrix(val->const_type)) { ++ unsigned rows = glsl_get_vector_elements(val->const_type); ++ assert(glsl_get_matrix_columns(val->const_type) == elem_count); ++ for (unsigned i = 0; i < elem_count; i++) ++ for (unsigned j = 0; j < rows; j++) ++ val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; ++ } else { ++ assert(glsl_type_is_vector(val->const_type)); ++ assert(glsl_get_vector_elements(val->const_type) == elem_count); ++ for (unsigned i = 0; i < elem_count; i++) ++ val->constant->value.u[i] = elems[i]->value.u[0]; ++ } ++ ralloc_free(elems); ++ break; ++ ++ case GLSL_TYPE_STRUCT: ++ case GLSL_TYPE_ARRAY: ++ ralloc_steal(val->constant, elems); ++ val->constant->num_elements = elem_count; ++ val->constant->elements = elems; ++ break; ++ ++ default: ++ unreachable("Unsupported type for constants"); ++ } ++ break; ++ } ++ ++ case SpvOpSpecConstantOp: { ++ SpvOp opcode = get_specialization(b, val, w[3]); ++ switch (opcode) { ++ case SpvOpVectorShuffle: { ++ struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant); ++ struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant); ++ unsigned len0 = glsl_get_vector_elements(v0->const_type); ++ unsigned len1 = glsl_get_vector_elements(v1->const_type); ++ ++ uint32_t u[8]; ++ for (unsigned i = 0; i < len0; i++) ++ u[i] = v0->constant->value.u[i]; ++ for (unsigned i = 0; i < len1; i++) ++ u[len0 + i] = v1->constant->value.u[i]; ++ ++ for (unsigned i = 0; i < count - 6; i++) { ++ uint32_t comp = w[i + 6]; ++ if (comp == (uint32_t)-1) { ++ val->constant->value.u[i] = 0xdeadbeef; ++ } else { ++ val->constant->value.u[i] = u[comp]; ++ } ++ } ++ return; ++ } ++ ++ case SpvOpCompositeExtract: ++ case SpvOpCompositeInsert: { ++ struct vtn_value *comp; ++ unsigned deref_start; ++ struct nir_constant **c; ++ if (opcode == SpvOpCompositeExtract) { ++ comp = vtn_value(b, w[4], vtn_value_type_constant); ++ deref_start = 5; ++ c = &comp->constant; ++ } else { ++ comp = vtn_value(b, w[5], vtn_value_type_constant); ++ deref_start = 6; ++ val->constant = nir_constant_clone(comp->constant, ++ (nir_variable *)b); ++ c = &val->constant; ++ } ++ ++ int elem = -1; ++ const struct glsl_type *type = comp->const_type; ++ for (unsigned i = deref_start; i < count; i++) { ++ switch (glsl_get_base_type(type)) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ /* If we hit this granularity, we're picking off an element */ ++ if (elem < 0) ++ elem = 0; ++ ++ if (glsl_type_is_matrix(type)) { ++ elem += w[i] * glsl_get_vector_elements(type); ++ type = glsl_get_column_type(type); ++ } else { ++ assert(glsl_type_is_vector(type)); ++ elem += w[i]; ++ type = glsl_scalar_type(glsl_get_base_type(type)); ++ } ++ continue; ++ ++ case GLSL_TYPE_ARRAY: ++ c = &(*c)->elements[w[i]]; ++ type = glsl_get_array_element(type); ++ continue; ++ ++ case GLSL_TYPE_STRUCT: ++ c = &(*c)->elements[w[i]]; ++ type = glsl_get_struct_field(type, w[i]); ++ continue; ++ ++ default: ++ unreachable("Invalid constant type"); ++ } ++ } ++ ++ if (opcode == SpvOpCompositeExtract) { ++ if (elem == -1) { ++ val->constant = *c; ++ } else { ++ unsigned num_components = glsl_get_vector_elements(type); ++ for (unsigned i = 0; i < num_components; i++) ++ val->constant->value.u[i] = (*c)->value.u[elem + i]; ++ } ++ } else { ++ struct vtn_value *insert = ++ vtn_value(b, w[4], vtn_value_type_constant); ++ assert(insert->const_type == type); ++ if (elem == -1) { ++ *c = insert->constant; ++ } else { ++ unsigned num_components = glsl_get_vector_elements(type); ++ for (unsigned i = 0; i < num_components; i++) ++ (*c)->value.u[elem + i] = insert->constant->value.u[i]; ++ } ++ } ++ return; ++ } ++ ++ default: { ++ bool swap; ++ nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); ++ ++ unsigned num_components = glsl_get_vector_elements(val->const_type); ++ ++ nir_const_value src[3]; ++ assert(count <= 7); ++ for (unsigned i = 0; i < count - 4; i++) { ++ nir_constant *c = ++ vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; ++ ++ unsigned j = swap ? 1 - i : i; ++ for (unsigned k = 0; k < num_components; k++) ++ src[j].u[k] = c->value.u[k]; ++ } ++ ++ nir_const_value res = nir_eval_const_opcode(op, num_components, src); ++ ++ for (unsigned k = 0; k < num_components; k++) ++ val->constant->value.u[k] = res.u[k]; ++ ++ return; ++ } /* default */ ++ } ++ } ++ ++ case SpvOpConstantNull: ++ val->constant = vtn_null_constant(b, val->const_type); ++ break; ++ ++ case SpvOpConstantSampler: ++ assert(!"OpConstantSampler requires Kernel Capability"); ++ break; ++ ++ default: ++ unreachable("Unhandled opcode"); ++ } ++} ++ ++static void ++vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ struct nir_function *callee = ++ vtn_value(b, w[3], vtn_value_type_function)->func->impl->function; ++ ++ nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee); ++ for (unsigned i = 0; i < call->num_params; i++) { ++ unsigned arg_id = w[4 + i]; ++ struct vtn_value *arg = vtn_untyped_value(b, arg_id); ++ if (arg->value_type == vtn_value_type_access_chain) { ++ nir_deref_var *d = vtn_access_chain_to_deref(b, arg->access_chain); ++ call->params[i] = nir_deref_as_var(nir_copy_deref(call, &d->deref)); ++ } else { ++ struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id); ++ ++ /* Make a temporary to store the argument in */ ++ nir_variable *tmp = ++ nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp"); ++ call->params[i] = nir_deref_var_create(call, tmp); ++ ++ vtn_local_store(b, arg_ssa, call->params[i]); ++ } ++ } ++ ++ nir_variable *out_tmp = NULL; ++ if (!glsl_type_is_void(callee->return_type)) { ++ out_tmp = nir_local_variable_create(b->impl, callee->return_type, ++ "out_tmp"); ++ call->return_deref = nir_deref_var_create(call, out_tmp); ++ } ++ ++ nir_builder_instr_insert(&b->nb, &call->instr); ++ ++ if (glsl_type_is_void(callee->return_type)) { ++ vtn_push_value(b, w[2], vtn_value_type_undef); ++ } else { ++ struct vtn_value *retval = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ retval->ssa = vtn_local_load(b, call->return_deref); ++ } ++} ++ ++struct vtn_ssa_value * ++vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) ++{ ++ struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); ++ val->type = type; ++ ++ if (!glsl_type_is_vector_or_scalar(type)) { ++ unsigned elems = glsl_get_length(type); ++ val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); ++ for (unsigned i = 0; i < elems; i++) { ++ const struct glsl_type *child_type; ++ ++ switch (glsl_get_base_type(type)) { ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_BOOL: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_DOUBLE: ++ child_type = glsl_get_column_type(type); ++ break; ++ case GLSL_TYPE_ARRAY: ++ child_type = glsl_get_array_element(type); ++ break; ++ case GLSL_TYPE_STRUCT: ++ child_type = glsl_get_struct_field(type, i); ++ break; ++ default: ++ unreachable("unkown base type"); ++ } ++ ++ val->elems[i] = vtn_create_ssa_value(b, child_type); ++ } ++ } ++ ++ return val; ++} ++ ++static nir_tex_src ++vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) ++{ ++ nir_tex_src src; ++ src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); ++ src.src_type = type; ++ return src; ++} ++ ++static void ++vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ if (opcode == SpvOpSampledImage) { ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_sampled_image); ++ val->sampled_image = ralloc(b, struct vtn_sampled_image); ++ val->sampled_image->image = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ val->sampled_image->sampler = ++ vtn_value(b, w[4], vtn_value_type_access_chain)->access_chain; ++ return; ++ } else if (opcode == SpvOpImage) { ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_access_chain); ++ struct vtn_value *src_val = vtn_untyped_value(b, w[3]); ++ if (src_val->value_type == vtn_value_type_sampled_image) { ++ val->access_chain = src_val->sampled_image->image; ++ } else { ++ assert(src_val->value_type == vtn_value_type_access_chain); ++ val->access_chain = src_val->access_chain; ++ } ++ return; ++ } ++ ++ struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ ++ struct vtn_sampled_image sampled; ++ struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); ++ if (sampled_val->value_type == vtn_value_type_sampled_image) { ++ sampled = *sampled_val->sampled_image; ++ } else { ++ assert(sampled_val->value_type == vtn_value_type_access_chain); ++ sampled.image = NULL; ++ sampled.sampler = sampled_val->access_chain; ++ } ++ ++ nir_tex_src srcs[8]; /* 8 should be enough */ ++ nir_tex_src *p = srcs; ++ ++ unsigned idx = 4; ++ ++ bool has_coord = false; ++ switch (opcode) { ++ case SpvOpImageSampleImplicitLod: ++ case SpvOpImageSampleExplicitLod: ++ case SpvOpImageSampleDrefImplicitLod: ++ case SpvOpImageSampleDrefExplicitLod: ++ case SpvOpImageSampleProjImplicitLod: ++ case SpvOpImageSampleProjExplicitLod: ++ case SpvOpImageSampleProjDrefImplicitLod: ++ case SpvOpImageSampleProjDrefExplicitLod: ++ case SpvOpImageFetch: ++ case SpvOpImageGather: ++ case SpvOpImageDrefGather: ++ case SpvOpImageQueryLod: { ++ /* All these types have the coordinate as their first real argument */ ++ struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); ++ has_coord = true; ++ p->src = nir_src_for_ssa(coord->def); ++ p->src_type = nir_tex_src_coord; ++ p++; ++ break; ++ } ++ ++ default: ++ break; ++ } ++ ++ /* These all have an explicit depth value as their next source */ ++ switch (opcode) { ++ case SpvOpImageSampleDrefImplicitLod: ++ case SpvOpImageSampleDrefExplicitLod: ++ case SpvOpImageSampleProjDrefImplicitLod: ++ case SpvOpImageSampleProjDrefExplicitLod: ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); ++ break; ++ default: ++ break; ++ } ++ ++ /* For OpImageQuerySizeLod, we always have an LOD */ ++ if (opcode == SpvOpImageQuerySizeLod) ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); ++ ++ /* Figure out the base texture operation */ ++ nir_texop texop; ++ switch (opcode) { ++ case SpvOpImageSampleImplicitLod: ++ case SpvOpImageSampleDrefImplicitLod: ++ case SpvOpImageSampleProjImplicitLod: ++ case SpvOpImageSampleProjDrefImplicitLod: ++ texop = nir_texop_tex; ++ break; ++ ++ case SpvOpImageSampleExplicitLod: ++ case SpvOpImageSampleDrefExplicitLod: ++ case SpvOpImageSampleProjExplicitLod: ++ case SpvOpImageSampleProjDrefExplicitLod: ++ texop = nir_texop_txl; ++ break; ++ ++ case SpvOpImageFetch: ++ texop = nir_texop_txf; ++ break; ++ ++ case SpvOpImageGather: ++ case SpvOpImageDrefGather: ++ texop = nir_texop_tg4; ++ break; ++ ++ case SpvOpImageQuerySizeLod: ++ case SpvOpImageQuerySize: ++ texop = nir_texop_txs; ++ break; ++ ++ case SpvOpImageQueryLod: ++ texop = nir_texop_lod; ++ break; ++ ++ case SpvOpImageQueryLevels: ++ texop = nir_texop_query_levels; ++ break; ++ ++ case SpvOpImageQuerySamples: ++ default: ++ unreachable("Unhandled opcode"); ++ } ++ ++ /* Now we need to handle some number of optional arguments */ ++ if (idx < count) { ++ uint32_t operands = w[idx++]; ++ ++ if (operands & SpvImageOperandsBiasMask) { ++ assert(texop == nir_texop_tex); ++ texop = nir_texop_txb; ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); ++ } ++ ++ if (operands & SpvImageOperandsLodMask) { ++ assert(texop == nir_texop_txl || texop == nir_texop_txf || ++ texop == nir_texop_txs); ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); ++ } ++ ++ if (operands & SpvImageOperandsGradMask) { ++ assert(texop == nir_texop_tex); ++ texop = nir_texop_txd; ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); ++ } ++ ++ if (operands & SpvImageOperandsOffsetMask || ++ operands & SpvImageOperandsConstOffsetMask) ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); ++ ++ if (operands & SpvImageOperandsConstOffsetsMask) ++ assert(!"Constant offsets to texture gather not yet implemented"); ++ ++ if (operands & SpvImageOperandsSampleMask) { ++ assert(texop == nir_texop_txf); ++ texop = nir_texop_txf_ms; ++ (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); ++ } ++ } ++ /* We should have now consumed exactly all of the arguments */ ++ assert(idx == count); ++ ++ nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); ++ instr->op = texop; ++ ++ memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); ++ ++ const struct glsl_type *image_type; ++ if (sampled.image) { ++ image_type = sampled.image->var->var->interface_type; ++ } else { ++ image_type = sampled.sampler->var->var->interface_type; ++ } ++ ++ instr->sampler_dim = glsl_get_sampler_dim(image_type); ++ instr->is_array = glsl_sampler_type_is_array(image_type); ++ instr->is_shadow = glsl_sampler_type_is_shadow(image_type); ++ instr->is_new_style_shadow = instr->is_shadow; ++ ++ if (has_coord) { ++ switch (instr->sampler_dim) { ++ case GLSL_SAMPLER_DIM_1D: ++ case GLSL_SAMPLER_DIM_BUF: ++ instr->coord_components = 1; ++ break; ++ case GLSL_SAMPLER_DIM_2D: ++ case GLSL_SAMPLER_DIM_RECT: ++ instr->coord_components = 2; ++ break; ++ case GLSL_SAMPLER_DIM_3D: ++ case GLSL_SAMPLER_DIM_CUBE: ++ case GLSL_SAMPLER_DIM_MS: ++ instr->coord_components = 3; ++ break; ++ default: ++ assert("Invalid sampler type"); ++ } ++ ++ if (instr->is_array) ++ instr->coord_components++; ++ } else { ++ instr->coord_components = 0; ++ } ++ ++ switch (glsl_get_sampler_result_type(image_type)) { ++ case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; ++ case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; ++ case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; ++ case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; ++ default: ++ unreachable("Invalid base type for sampler result"); ++ } ++ ++ nir_deref_var *sampler = vtn_access_chain_to_deref(b, sampled.sampler); ++ instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); ++ if (sampled.image) { ++ nir_deref_var *image = vtn_access_chain_to_deref(b, sampled.image); ++ instr->texture = nir_deref_as_var(nir_copy_deref(instr, &image->deref)); ++ } else { ++ instr->texture = NULL; ++ } ++ ++ nir_ssa_dest_init(&instr->instr, &instr->dest, ++ nir_tex_instr_dest_size(instr), NULL); ++ ++ assert(glsl_get_vector_elements(ret_type->type) == ++ nir_tex_instr_dest_size(instr)); ++ ++ val->ssa = vtn_create_ssa_value(b, ret_type->type); ++ val->ssa->def = &instr->dest.ssa; ++ ++ nir_builder_instr_insert(&b->nb, &instr->instr); ++} ++ ++static nir_ssa_def * ++get_image_coord(struct vtn_builder *b, uint32_t value) ++{ ++ struct vtn_ssa_value *coord = vtn_ssa_value(b, value); ++ ++ /* The image_load_store intrinsics assume a 4-dim coordinate */ ++ unsigned dim = glsl_get_vector_elements(coord->type); ++ unsigned swizzle[4]; ++ for (unsigned i = 0; i < 4; i++) ++ swizzle[i] = MIN2(i, dim - 1); ++ ++ return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); ++} ++ ++static void ++vtn_handle_image(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ /* Just get this one out of the way */ ++ if (opcode == SpvOpImageTexelPointer) { ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_image_pointer); ++ val->image = ralloc(b, struct vtn_image_pointer); ++ ++ val->image->image = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ val->image->coord = get_image_coord(b, w[4]); ++ val->image->sample = vtn_ssa_value(b, w[5])->def; ++ return; ++ } ++ ++ struct vtn_image_pointer image; ++ ++ switch (opcode) { ++ case SpvOpAtomicExchange: ++ case SpvOpAtomicCompareExchange: ++ case SpvOpAtomicCompareExchangeWeak: ++ case SpvOpAtomicIIncrement: ++ case SpvOpAtomicIDecrement: ++ case SpvOpAtomicIAdd: ++ case SpvOpAtomicISub: ++ case SpvOpAtomicSMin: ++ case SpvOpAtomicUMin: ++ case SpvOpAtomicSMax: ++ case SpvOpAtomicUMax: ++ case SpvOpAtomicAnd: ++ case SpvOpAtomicOr: ++ case SpvOpAtomicXor: ++ image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; ++ break; ++ ++ case SpvOpImageQuerySize: ++ image.image = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ image.coord = NULL; ++ image.sample = NULL; ++ break; ++ ++ case SpvOpImageRead: ++ image.image = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ image.coord = get_image_coord(b, w[4]); ++ ++ if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { ++ assert(w[5] == SpvImageOperandsSampleMask); ++ image.sample = vtn_ssa_value(b, w[6])->def; ++ } else { ++ image.sample = nir_ssa_undef(&b->nb, 1); ++ } ++ break; ++ ++ case SpvOpImageWrite: ++ image.image = ++ vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; ++ image.coord = get_image_coord(b, w[2]); ++ ++ /* texel = w[3] */ ++ ++ if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { ++ assert(w[4] == SpvImageOperandsSampleMask); ++ image.sample = vtn_ssa_value(b, w[5])->def; ++ } else { ++ image.sample = nir_ssa_undef(&b->nb, 1); ++ } ++ break; ++ ++ default: ++ unreachable("Invalid image opcode"); ++ } ++ ++ nir_intrinsic_op op; ++ switch (opcode) { ++#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; ++ OP(ImageQuerySize, size) ++ OP(ImageRead, load) ++ OP(ImageWrite, store) ++ OP(AtomicExchange, atomic_exchange) ++ OP(AtomicCompareExchange, atomic_comp_swap) ++ OP(AtomicIIncrement, atomic_add) ++ OP(AtomicIDecrement, atomic_add) ++ OP(AtomicIAdd, atomic_add) ++ OP(AtomicISub, atomic_add) ++ OP(AtomicSMin, atomic_min) ++ OP(AtomicUMin, atomic_min) ++ OP(AtomicSMax, atomic_max) ++ OP(AtomicUMax, atomic_max) ++ OP(AtomicAnd, atomic_and) ++ OP(AtomicOr, atomic_or) ++ OP(AtomicXor, atomic_xor) ++#undef OP ++ default: ++ unreachable("Invalid image opcode"); ++ } ++ ++ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); ++ ++ nir_deref_var *image_deref = vtn_access_chain_to_deref(b, image.image); ++ intrin->variables[0] = ++ nir_deref_as_var(nir_copy_deref(&intrin->instr, &image_deref->deref)); ++ ++ /* ImageQuerySize doesn't take any extra parameters */ ++ if (opcode != SpvOpImageQuerySize) { ++ /* The image coordinate is always 4 components but we may not have that ++ * many. Swizzle to compensate. ++ */ ++ unsigned swiz[4]; ++ for (unsigned i = 0; i < 4; i++) ++ swiz[i] = i < image.coord->num_components ? i : 0; ++ intrin->src[0] = nir_src_for_ssa(nir_swizzle(&b->nb, image.coord, ++ swiz, 4, false)); ++ intrin->src[1] = nir_src_for_ssa(image.sample); ++ } ++ ++ switch (opcode) { ++ case SpvOpImageQuerySize: ++ case SpvOpImageRead: ++ break; ++ case SpvOpImageWrite: ++ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); ++ break; ++ case SpvOpAtomicIIncrement: ++ intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); ++ break; ++ case SpvOpAtomicIDecrement: ++ intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); ++ break; ++ ++ case SpvOpAtomicExchange: ++ case SpvOpAtomicIAdd: ++ case SpvOpAtomicSMin: ++ case SpvOpAtomicUMin: ++ case SpvOpAtomicSMax: ++ case SpvOpAtomicUMax: ++ case SpvOpAtomicAnd: ++ case SpvOpAtomicOr: ++ case SpvOpAtomicXor: ++ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); ++ break; ++ ++ case SpvOpAtomicCompareExchange: ++ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); ++ intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); ++ break; ++ ++ case SpvOpAtomicISub: ++ intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); ++ break; ++ ++ default: ++ unreachable("Invalid image opcode"); ++ } ++ ++ if (opcode != SpvOpImageWrite) { ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL); ++ ++ nir_builder_instr_insert(&b->nb, &intrin->instr); ++ ++ /* The image intrinsics always return 4 channels but we may not want ++ * that many. Emit a mov to trim it down. ++ */ ++ unsigned swiz[4] = {0, 1, 2, 3}; ++ val->ssa = vtn_create_ssa_value(b, type->type); ++ val->ssa->def = nir_swizzle(&b->nb, &intrin->dest.ssa, swiz, ++ glsl_get_vector_elements(type->type), false); ++ } else { ++ nir_builder_instr_insert(&b->nb, &intrin->instr); ++ } ++} ++ ++static nir_intrinsic_op ++get_ssbo_nir_atomic_op(SpvOp opcode) ++{ ++ switch (opcode) { ++#define OP(S, N) case SpvOp##S: return nir_intrinsic_ssbo_##N; ++ OP(AtomicExchange, atomic_exchange) ++ OP(AtomicCompareExchange, atomic_comp_swap) ++ OP(AtomicIIncrement, atomic_add) ++ OP(AtomicIDecrement, atomic_add) ++ OP(AtomicIAdd, atomic_add) ++ OP(AtomicISub, atomic_add) ++ OP(AtomicSMin, atomic_imin) ++ OP(AtomicUMin, atomic_umin) ++ OP(AtomicSMax, atomic_imax) ++ OP(AtomicUMax, atomic_umax) ++ OP(AtomicAnd, atomic_and) ++ OP(AtomicOr, atomic_or) ++ OP(AtomicXor, atomic_xor) ++#undef OP ++ default: ++ unreachable("Invalid SSBO atomic"); ++ } ++} ++ ++static nir_intrinsic_op ++get_shared_nir_atomic_op(SpvOp opcode) ++{ ++ switch (opcode) { ++#define OP(S, N) case SpvOp##S: return nir_intrinsic_var_##N; ++ OP(AtomicExchange, atomic_exchange) ++ OP(AtomicCompareExchange, atomic_comp_swap) ++ OP(AtomicIIncrement, atomic_add) ++ OP(AtomicIDecrement, atomic_add) ++ OP(AtomicIAdd, atomic_add) ++ OP(AtomicISub, atomic_add) ++ OP(AtomicSMin, atomic_imin) ++ OP(AtomicUMin, atomic_umin) ++ OP(AtomicSMax, atomic_imax) ++ OP(AtomicUMax, atomic_umax) ++ OP(AtomicAnd, atomic_and) ++ OP(AtomicOr, atomic_or) ++ OP(AtomicXor, atomic_xor) ++#undef OP ++ default: ++ unreachable("Invalid shared atomic"); ++ } ++} ++ ++static void ++fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, nir_src *src) ++{ ++ switch (opcode) { ++ case SpvOpAtomicIIncrement: ++ src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); ++ break; ++ ++ case SpvOpAtomicIDecrement: ++ src[0] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); ++ break; ++ ++ case SpvOpAtomicISub: ++ src[0] = ++ nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); ++ break; ++ ++ case SpvOpAtomicCompareExchange: ++ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); ++ src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); ++ break; ++ /* Fall through */ ++ ++ case SpvOpAtomicExchange: ++ case SpvOpAtomicIAdd: ++ case SpvOpAtomicSMin: ++ case SpvOpAtomicUMin: ++ case SpvOpAtomicSMax: ++ case SpvOpAtomicUMax: ++ case SpvOpAtomicAnd: ++ case SpvOpAtomicOr: ++ case SpvOpAtomicXor: ++ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); ++ break; ++ ++ default: ++ unreachable("Invalid SPIR-V atomic"); ++ } ++} ++ ++static void ++vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ struct vtn_access_chain *chain = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ nir_intrinsic_instr *atomic; ++ ++ /* ++ SpvScope scope = w[4]; ++ SpvMemorySemanticsMask semantics = w[5]; ++ */ ++ ++ if (chain->var->mode == vtn_variable_mode_workgroup) { ++ nir_deref *deref = &vtn_access_chain_to_deref(b, chain)->deref; ++ nir_intrinsic_op op = get_shared_nir_atomic_op(opcode); ++ atomic = nir_intrinsic_instr_create(b->nb.shader, op); ++ atomic->variables[0] = nir_deref_as_var(nir_copy_deref(atomic, deref)); ++ fill_common_atomic_sources(b, opcode, w, &atomic->src[0]); ++ } else { ++ assert(chain->var->mode == vtn_variable_mode_ssbo); ++ struct vtn_type *type; ++ nir_ssa_def *offset, *index; ++ offset = vtn_access_chain_to_offset(b, chain, &index, &type, NULL, false); ++ ++ nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode); ++ ++ atomic = nir_intrinsic_instr_create(b->nb.shader, op); ++ atomic->src[0] = nir_src_for_ssa(index); ++ atomic->src[1] = nir_src_for_ssa(offset); ++ fill_common_atomic_sources(b, opcode, w, &atomic->src[2]); ++ } ++ ++ nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL); ++ ++ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ val->ssa = rzalloc(b, struct vtn_ssa_value); ++ val->ssa->def = &atomic->dest.ssa; ++ val->ssa->type = type->type; ++ ++ nir_builder_instr_insert(&b->nb, &atomic->instr); ++} ++ ++static nir_alu_instr * ++create_vec(nir_shader *shader, unsigned num_components) ++{ ++ nir_op op; ++ switch (num_components) { ++ case 1: op = nir_op_fmov; break; ++ case 2: op = nir_op_vec2; break; ++ case 3: op = nir_op_vec3; break; ++ case 4: op = nir_op_vec4; break; ++ default: unreachable("bad vector size"); ++ } ++ ++ nir_alu_instr *vec = nir_alu_instr_create(shader, op); ++ nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); ++ vec->dest.write_mask = (1 << num_components) - 1; ++ ++ return vec; ++} ++ ++struct vtn_ssa_value * ++vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) ++{ ++ if (src->transposed) ++ return src->transposed; ++ ++ struct vtn_ssa_value *dest = ++ vtn_create_ssa_value(b, glsl_transposed_type(src->type)); ++ ++ for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { ++ nir_alu_instr *vec = create_vec(b->shader, ++ glsl_get_matrix_columns(src->type)); ++ if (glsl_type_is_vector_or_scalar(src->type)) { ++ vec->src[0].src = nir_src_for_ssa(src->def); ++ vec->src[0].swizzle[0] = i; ++ } else { ++ for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { ++ vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); ++ vec->src[j].swizzle[0] = i; ++ } ++ } ++ nir_builder_instr_insert(&b->nb, &vec->instr); ++ dest->elems[i]->def = &vec->dest.dest.ssa; ++ } ++ ++ dest->transposed = src; ++ ++ return dest; ++} ++ ++nir_ssa_def * ++vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) ++{ ++ unsigned swiz[4] = { index }; ++ return nir_swizzle(&b->nb, src, swiz, 1, true); ++} ++ ++nir_ssa_def * ++vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, ++ unsigned index) ++{ ++ nir_alu_instr *vec = create_vec(b->shader, src->num_components); ++ ++ for (unsigned i = 0; i < src->num_components; i++) { ++ if (i == index) { ++ vec->src[i].src = nir_src_for_ssa(insert); ++ } else { ++ vec->src[i].src = nir_src_for_ssa(src); ++ vec->src[i].swizzle[0] = i; ++ } ++ } ++ ++ nir_builder_instr_insert(&b->nb, &vec->instr); ++ ++ return &vec->dest.dest.ssa; ++} ++ ++nir_ssa_def * ++vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, ++ nir_ssa_def *index) ++{ ++ nir_ssa_def *dest = vtn_vector_extract(b, src, 0); ++ for (unsigned i = 1; i < src->num_components; i++) ++ dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), ++ vtn_vector_extract(b, src, i), dest); ++ ++ return dest; ++} ++ ++nir_ssa_def * ++vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, ++ nir_ssa_def *insert, nir_ssa_def *index) ++{ ++ nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); ++ for (unsigned i = 1; i < src->num_components; i++) ++ dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), ++ vtn_vector_insert(b, src, insert, i), dest); ++ ++ return dest; ++} ++ ++static nir_ssa_def * ++vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, ++ nir_ssa_def *src0, nir_ssa_def *src1, ++ const uint32_t *indices) ++{ ++ nir_alu_instr *vec = create_vec(b->shader, num_components); ++ ++ nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); ++ nir_builder_instr_insert(&b->nb, &undef->instr); ++ ++ for (unsigned i = 0; i < num_components; i++) { ++ uint32_t index = indices[i]; ++ if (index == 0xffffffff) { ++ vec->src[i].src = nir_src_for_ssa(&undef->def); ++ } else if (index < src0->num_components) { ++ vec->src[i].src = nir_src_for_ssa(src0); ++ vec->src[i].swizzle[0] = index; ++ } else { ++ vec->src[i].src = nir_src_for_ssa(src1); ++ vec->src[i].swizzle[0] = index - src0->num_components; ++ } ++ } ++ ++ nir_builder_instr_insert(&b->nb, &vec->instr); ++ ++ return &vec->dest.dest.ssa; ++} ++ ++/* ++ * Concatentates a number of vectors/scalars together to produce a vector ++ */ ++static nir_ssa_def * ++vtn_vector_construct(struct vtn_builder *b, unsigned num_components, ++ unsigned num_srcs, nir_ssa_def **srcs) ++{ ++ nir_alu_instr *vec = create_vec(b->shader, num_components); ++ ++ unsigned dest_idx = 0; ++ for (unsigned i = 0; i < num_srcs; i++) { ++ nir_ssa_def *src = srcs[i]; ++ for (unsigned j = 0; j < src->num_components; j++) { ++ vec->src[dest_idx].src = nir_src_for_ssa(src); ++ vec->src[dest_idx].swizzle[0] = j; ++ dest_idx++; ++ } ++ } ++ ++ nir_builder_instr_insert(&b->nb, &vec->instr); ++ ++ return &vec->dest.dest.ssa; ++} ++ ++static struct vtn_ssa_value * ++vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) ++{ ++ struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); ++ dest->type = src->type; ++ ++ if (glsl_type_is_vector_or_scalar(src->type)) { ++ dest->def = src->def; ++ } else { ++ unsigned elems = glsl_get_length(src->type); ++ ++ dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); ++ for (unsigned i = 0; i < elems; i++) ++ dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); ++ } ++ ++ return dest; ++} ++ ++static struct vtn_ssa_value * ++vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, ++ struct vtn_ssa_value *insert, const uint32_t *indices, ++ unsigned num_indices) ++{ ++ struct vtn_ssa_value *dest = vtn_composite_copy(b, src); ++ ++ struct vtn_ssa_value *cur = dest; ++ unsigned i; ++ for (i = 0; i < num_indices - 1; i++) { ++ cur = cur->elems[indices[i]]; ++ } ++ ++ if (glsl_type_is_vector_or_scalar(cur->type)) { ++ /* According to the SPIR-V spec, OpCompositeInsert may work down to ++ * the component granularity. In that case, the last index will be ++ * the index to insert the scalar into the vector. ++ */ ++ ++ cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); ++ } else { ++ cur->elems[indices[i]] = insert; ++ } ++ ++ return dest; ++} ++ ++static struct vtn_ssa_value * ++vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, ++ const uint32_t *indices, unsigned num_indices) ++{ ++ struct vtn_ssa_value *cur = src; ++ for (unsigned i = 0; i < num_indices; i++) { ++ if (glsl_type_is_vector_or_scalar(cur->type)) { ++ assert(i == num_indices - 1); ++ /* According to the SPIR-V spec, OpCompositeExtract may work down to ++ * the component granularity. The last index will be the index of the ++ * vector to extract. ++ */ ++ ++ struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); ++ ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); ++ ret->def = vtn_vector_extract(b, cur->def, indices[i]); ++ return ret; ++ } else { ++ cur = cur->elems[indices[i]]; ++ } ++ } ++ ++ return cur; ++} ++ ++static void ++vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ const struct glsl_type *type = ++ vtn_value(b, w[1], vtn_value_type_type)->type->type; ++ val->ssa = vtn_create_ssa_value(b, type); ++ ++ switch (opcode) { ++ case SpvOpVectorExtractDynamic: ++ val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, ++ vtn_ssa_value(b, w[4])->def); ++ break; ++ ++ case SpvOpVectorInsertDynamic: ++ val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, ++ vtn_ssa_value(b, w[4])->def, ++ vtn_ssa_value(b, w[5])->def); ++ break; ++ ++ case SpvOpVectorShuffle: ++ val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), ++ vtn_ssa_value(b, w[3])->def, ++ vtn_ssa_value(b, w[4])->def, ++ w + 5); ++ break; ++ ++ case SpvOpCompositeConstruct: { ++ unsigned elems = count - 3; ++ if (glsl_type_is_vector_or_scalar(type)) { ++ nir_ssa_def *srcs[4]; ++ for (unsigned i = 0; i < elems; i++) ++ srcs[i] = vtn_ssa_value(b, w[3 + i])->def; ++ val->ssa->def = ++ vtn_vector_construct(b, glsl_get_vector_elements(type), ++ elems, srcs); ++ } else { ++ val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); ++ for (unsigned i = 0; i < elems; i++) ++ val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); ++ } ++ break; ++ } ++ case SpvOpCompositeExtract: ++ val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), ++ w + 4, count - 4); ++ break; ++ ++ case SpvOpCompositeInsert: ++ val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), ++ vtn_ssa_value(b, w[3]), ++ w + 5, count - 5); ++ break; ++ ++ case SpvOpCopyObject: ++ val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); ++ break; ++ ++ default: ++ unreachable("unknown composite operation"); ++ } ++} ++ ++static void ++vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ nir_intrinsic_op intrinsic_op; ++ switch (opcode) { ++ case SpvOpEmitVertex: ++ case SpvOpEmitStreamVertex: ++ intrinsic_op = nir_intrinsic_emit_vertex; ++ break; ++ case SpvOpEndPrimitive: ++ case SpvOpEndStreamPrimitive: ++ intrinsic_op = nir_intrinsic_end_primitive; ++ break; ++ case SpvOpMemoryBarrier: ++ intrinsic_op = nir_intrinsic_memory_barrier; ++ break; ++ case SpvOpControlBarrier: ++ intrinsic_op = nir_intrinsic_barrier; ++ break; ++ default: ++ unreachable("unknown barrier instruction"); ++ } ++ ++ nir_intrinsic_instr *intrin = ++ nir_intrinsic_instr_create(b->shader, intrinsic_op); ++ ++ if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) ++ intrin->const_index[0] = w[1]; ++ ++ nir_builder_instr_insert(&b->nb, &intrin->instr); ++} ++ ++static unsigned ++gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) ++{ ++ switch (mode) { ++ case SpvExecutionModeInputPoints: ++ case SpvExecutionModeOutputPoints: ++ return 0; /* GL_POINTS */ ++ case SpvExecutionModeInputLines: ++ return 1; /* GL_LINES */ ++ case SpvExecutionModeInputLinesAdjacency: ++ return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ ++ case SpvExecutionModeTriangles: ++ return 4; /* GL_TRIANGLES */ ++ case SpvExecutionModeInputTrianglesAdjacency: ++ return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ ++ case SpvExecutionModeQuads: ++ return 7; /* GL_QUADS */ ++ case SpvExecutionModeIsolines: ++ return 0x8E7A; /* GL_ISOLINES */ ++ case SpvExecutionModeOutputLineStrip: ++ return 3; /* GL_LINE_STRIP */ ++ case SpvExecutionModeOutputTriangleStrip: ++ return 5; /* GL_TRIANGLE_STRIP */ ++ default: ++ assert(!"Invalid primitive type"); ++ return 4; ++ } ++} ++ ++static unsigned ++vertices_in_from_spv_execution_mode(SpvExecutionMode mode) ++{ ++ switch (mode) { ++ case SpvExecutionModeInputPoints: ++ return 1; ++ case SpvExecutionModeInputLines: ++ return 2; ++ case SpvExecutionModeInputLinesAdjacency: ++ return 4; ++ case SpvExecutionModeTriangles: ++ return 3; ++ case SpvExecutionModeInputTrianglesAdjacency: ++ return 6; ++ default: ++ assert(!"Invalid GS input mode"); ++ return 0; ++ } ++} ++ ++static gl_shader_stage ++stage_for_execution_model(SpvExecutionModel model) ++{ ++ switch (model) { ++ case SpvExecutionModelVertex: ++ return MESA_SHADER_VERTEX; ++ case SpvExecutionModelTessellationControl: ++ return MESA_SHADER_TESS_CTRL; ++ case SpvExecutionModelTessellationEvaluation: ++ return MESA_SHADER_TESS_EVAL; ++ case SpvExecutionModelGeometry: ++ return MESA_SHADER_GEOMETRY; ++ case SpvExecutionModelFragment: ++ return MESA_SHADER_FRAGMENT; ++ case SpvExecutionModelGLCompute: ++ return MESA_SHADER_COMPUTE; ++ default: ++ unreachable("Unsupported execution model"); ++ } ++} ++ ++static bool ++vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch (opcode) { ++ case SpvOpSource: ++ case SpvOpSourceExtension: ++ case SpvOpSourceContinued: ++ case SpvOpExtension: ++ /* Unhandled, but these are for debug so that's ok. */ ++ break; ++ ++ case SpvOpCapability: ++ switch ((SpvCapability)w[1]) { ++ case SpvCapabilityMatrix: ++ case SpvCapabilityShader: ++ case SpvCapabilityGeometry: ++ break; ++ default: ++ assert(!"Unsupported capability"); ++ } ++ break; ++ ++ case SpvOpExtInstImport: ++ vtn_handle_extension(b, opcode, w, count); ++ break; ++ ++ case SpvOpMemoryModel: ++ assert(w[1] == SpvAddressingModelLogical); ++ assert(w[2] == SpvMemoryModelGLSL450); ++ break; ++ ++ case SpvOpEntryPoint: { ++ struct vtn_value *entry_point = &b->values[w[2]]; ++ /* Let this be a name label regardless */ ++ unsigned name_words; ++ entry_point->name = vtn_string_literal(b, &w[3], count - 3, &name_words); ++ ++ if (strcmp(entry_point->name, b->entry_point_name) != 0 || ++ stage_for_execution_model(w[1]) != b->entry_point_stage) ++ break; ++ ++ assert(b->entry_point == NULL); ++ b->entry_point = entry_point; ++ break; ++ } ++ ++ case SpvOpString: ++ vtn_push_value(b, w[1], vtn_value_type_string)->str = ++ vtn_string_literal(b, &w[2], count - 2, NULL); ++ break; ++ ++ case SpvOpName: ++ b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2, NULL); ++ break; ++ ++ case SpvOpMemberName: ++ /* TODO */ ++ break; ++ ++ case SpvOpExecutionMode: ++ case SpvOpDecorationGroup: ++ case SpvOpDecorate: ++ case SpvOpMemberDecorate: ++ case SpvOpGroupDecorate: ++ case SpvOpGroupMemberDecorate: ++ vtn_handle_decoration(b, opcode, w, count); ++ break; ++ ++ default: ++ return false; /* End of preamble */ ++ } ++ ++ return true; ++} ++ ++static void ++vtn_handle_execution_mode(struct vtn_builder *b, struct vtn_value *entry_point, ++ const struct vtn_decoration *mode, void *data) ++{ ++ assert(b->entry_point == entry_point); ++ ++ switch(mode->exec_mode) { ++ case SpvExecutionModeOriginUpperLeft: ++ case SpvExecutionModeOriginLowerLeft: ++ b->origin_upper_left = ++ (mode->exec_mode == SpvExecutionModeOriginUpperLeft); ++ break; ++ ++ case SpvExecutionModeEarlyFragmentTests: ++ assert(b->shader->stage == MESA_SHADER_FRAGMENT); ++ b->shader->info.fs.early_fragment_tests = true; ++ break; ++ ++ case SpvExecutionModeInvocations: ++ assert(b->shader->stage == MESA_SHADER_GEOMETRY); ++ b->shader->info.gs.invocations = MAX2(1, mode->literals[0]); ++ break; ++ ++ case SpvExecutionModeDepthReplacing: ++ assert(b->shader->stage == MESA_SHADER_FRAGMENT); ++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; ++ break; ++ case SpvExecutionModeDepthGreater: ++ assert(b->shader->stage == MESA_SHADER_FRAGMENT); ++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; ++ break; ++ case SpvExecutionModeDepthLess: ++ assert(b->shader->stage == MESA_SHADER_FRAGMENT); ++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; ++ break; ++ case SpvExecutionModeDepthUnchanged: ++ assert(b->shader->stage == MESA_SHADER_FRAGMENT); ++ b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; ++ break; ++ ++ case SpvExecutionModeLocalSize: ++ assert(b->shader->stage == MESA_SHADER_COMPUTE); ++ b->shader->info.cs.local_size[0] = mode->literals[0]; ++ b->shader->info.cs.local_size[1] = mode->literals[1]; ++ b->shader->info.cs.local_size[2] = mode->literals[2]; ++ break; ++ case SpvExecutionModeLocalSizeHint: ++ break; /* Nothing do do with this */ ++ ++ case SpvExecutionModeOutputVertices: ++ assert(b->shader->stage == MESA_SHADER_GEOMETRY); ++ b->shader->info.gs.vertices_out = mode->literals[0]; ++ break; ++ ++ case SpvExecutionModeInputPoints: ++ case SpvExecutionModeInputLines: ++ case SpvExecutionModeInputLinesAdjacency: ++ case SpvExecutionModeTriangles: ++ case SpvExecutionModeInputTrianglesAdjacency: ++ case SpvExecutionModeQuads: ++ case SpvExecutionModeIsolines: ++ if (b->shader->stage == MESA_SHADER_GEOMETRY) { ++ b->shader->info.gs.vertices_in = ++ vertices_in_from_spv_execution_mode(mode->exec_mode); ++ } else { ++ assert(!"Tesselation shaders not yet supported"); ++ } ++ break; ++ ++ case SpvExecutionModeOutputPoints: ++ case SpvExecutionModeOutputLineStrip: ++ case SpvExecutionModeOutputTriangleStrip: ++ assert(b->shader->stage == MESA_SHADER_GEOMETRY); ++ b->shader->info.gs.output_primitive = ++ gl_primitive_from_spv_execution_mode(mode->exec_mode); ++ break; ++ ++ case SpvExecutionModeSpacingEqual: ++ case SpvExecutionModeSpacingFractionalEven: ++ case SpvExecutionModeSpacingFractionalOdd: ++ case SpvExecutionModeVertexOrderCw: ++ case SpvExecutionModeVertexOrderCcw: ++ case SpvExecutionModePointMode: ++ assert(!"TODO: Add tessellation metadata"); ++ break; ++ ++ case SpvExecutionModePixelCenterInteger: ++ case SpvExecutionModeXfb: ++ assert(!"Unhandled execution mode"); ++ break; ++ ++ case SpvExecutionModeVecTypeHint: ++ case SpvExecutionModeContractionOff: ++ break; /* OpenCL */ ++ } ++} ++ ++static bool ++vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch (opcode) { ++ case SpvOpSource: ++ case SpvOpSourceContinued: ++ case SpvOpSourceExtension: ++ case SpvOpExtension: ++ case SpvOpCapability: ++ case SpvOpExtInstImport: ++ case SpvOpMemoryModel: ++ case SpvOpEntryPoint: ++ case SpvOpExecutionMode: ++ case SpvOpString: ++ case SpvOpName: ++ case SpvOpMemberName: ++ case SpvOpDecorationGroup: ++ case SpvOpDecorate: ++ case SpvOpMemberDecorate: ++ case SpvOpGroupDecorate: ++ case SpvOpGroupMemberDecorate: ++ assert(!"Invalid opcode types and variables section"); ++ break; ++ ++ case SpvOpTypeVoid: ++ case SpvOpTypeBool: ++ case SpvOpTypeInt: ++ case SpvOpTypeFloat: ++ case SpvOpTypeVector: ++ case SpvOpTypeMatrix: ++ case SpvOpTypeImage: ++ case SpvOpTypeSampler: ++ case SpvOpTypeSampledImage: ++ case SpvOpTypeArray: ++ case SpvOpTypeRuntimeArray: ++ case SpvOpTypeStruct: ++ case SpvOpTypeOpaque: ++ case SpvOpTypePointer: ++ case SpvOpTypeFunction: ++ case SpvOpTypeEvent: ++ case SpvOpTypeDeviceEvent: ++ case SpvOpTypeReserveId: ++ case SpvOpTypeQueue: ++ case SpvOpTypePipe: ++ vtn_handle_type(b, opcode, w, count); ++ break; ++ ++ case SpvOpConstantTrue: ++ case SpvOpConstantFalse: ++ case SpvOpConstant: ++ case SpvOpConstantComposite: ++ case SpvOpConstantSampler: ++ case SpvOpConstantNull: ++ case SpvOpSpecConstantTrue: ++ case SpvOpSpecConstantFalse: ++ case SpvOpSpecConstant: ++ case SpvOpSpecConstantComposite: ++ case SpvOpSpecConstantOp: ++ vtn_handle_constant(b, opcode, w, count); ++ break; ++ ++ case SpvOpVariable: ++ vtn_handle_variables(b, opcode, w, count); ++ break; ++ ++ default: ++ return false; /* End of preamble */ ++ } ++ ++ return true; ++} ++ ++static bool ++vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch (opcode) { ++ case SpvOpLabel: ++ break; ++ ++ case SpvOpLoopMerge: ++ case SpvOpSelectionMerge: ++ /* This is handled by cfg pre-pass and walk_blocks */ ++ break; ++ ++ case SpvOpUndef: { ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef); ++ val->type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ break; ++ } ++ ++ case SpvOpExtInst: ++ vtn_handle_extension(b, opcode, w, count); ++ break; ++ ++ case SpvOpVariable: ++ case SpvOpLoad: ++ case SpvOpStore: ++ case SpvOpCopyMemory: ++ case SpvOpCopyMemorySized: ++ case SpvOpAccessChain: ++ case SpvOpInBoundsAccessChain: ++ case SpvOpArrayLength: ++ vtn_handle_variables(b, opcode, w, count); ++ break; ++ ++ case SpvOpFunctionCall: ++ vtn_handle_function_call(b, opcode, w, count); ++ break; ++ ++ case SpvOpSampledImage: ++ case SpvOpImage: ++ case SpvOpImageSampleImplicitLod: ++ case SpvOpImageSampleExplicitLod: ++ case SpvOpImageSampleDrefImplicitLod: ++ case SpvOpImageSampleDrefExplicitLod: ++ case SpvOpImageSampleProjImplicitLod: ++ case SpvOpImageSampleProjExplicitLod: ++ case SpvOpImageSampleProjDrefImplicitLod: ++ case SpvOpImageSampleProjDrefExplicitLod: ++ case SpvOpImageFetch: ++ case SpvOpImageGather: ++ case SpvOpImageDrefGather: ++ case SpvOpImageQuerySizeLod: ++ case SpvOpImageQueryLod: ++ case SpvOpImageQueryLevels: ++ case SpvOpImageQuerySamples: ++ vtn_handle_texture(b, opcode, w, count); ++ break; ++ ++ case SpvOpImageRead: ++ case SpvOpImageWrite: ++ case SpvOpImageTexelPointer: ++ vtn_handle_image(b, opcode, w, count); ++ break; ++ ++ case SpvOpImageQuerySize: { ++ struct vtn_access_chain *image = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ if (glsl_type_is_image(image->var->var->interface_type)) { ++ vtn_handle_image(b, opcode, w, count); ++ } else { ++ vtn_handle_texture(b, opcode, w, count); ++ } ++ break; ++ } ++ ++ case SpvOpAtomicExchange: ++ case SpvOpAtomicCompareExchange: ++ case SpvOpAtomicCompareExchangeWeak: ++ case SpvOpAtomicIIncrement: ++ case SpvOpAtomicIDecrement: ++ case SpvOpAtomicIAdd: ++ case SpvOpAtomicISub: ++ case SpvOpAtomicSMin: ++ case SpvOpAtomicUMin: ++ case SpvOpAtomicSMax: ++ case SpvOpAtomicUMax: ++ case SpvOpAtomicAnd: ++ case SpvOpAtomicOr: ++ case SpvOpAtomicXor: { ++ struct vtn_value *pointer = vtn_untyped_value(b, w[3]); ++ if (pointer->value_type == vtn_value_type_image_pointer) { ++ vtn_handle_image(b, opcode, w, count); ++ } else { ++ assert(pointer->value_type == vtn_value_type_access_chain); ++ vtn_handle_ssbo_or_shared_atomic(b, opcode, w, count); ++ } ++ break; ++ } ++ ++ case SpvOpSNegate: ++ case SpvOpFNegate: ++ case SpvOpNot: ++ case SpvOpAny: ++ case SpvOpAll: ++ case SpvOpConvertFToU: ++ case SpvOpConvertFToS: ++ case SpvOpConvertSToF: ++ case SpvOpConvertUToF: ++ case SpvOpUConvert: ++ case SpvOpSConvert: ++ case SpvOpFConvert: ++ case SpvOpQuantizeToF16: ++ case SpvOpConvertPtrToU: ++ case SpvOpConvertUToPtr: ++ case SpvOpPtrCastToGeneric: ++ case SpvOpGenericCastToPtr: ++ case SpvOpBitcast: ++ case SpvOpIsNan: ++ case SpvOpIsInf: ++ case SpvOpIsFinite: ++ case SpvOpIsNormal: ++ case SpvOpSignBitSet: ++ case SpvOpLessOrGreater: ++ case SpvOpOrdered: ++ case SpvOpUnordered: ++ case SpvOpIAdd: ++ case SpvOpFAdd: ++ case SpvOpISub: ++ case SpvOpFSub: ++ case SpvOpIMul: ++ case SpvOpFMul: ++ case SpvOpUDiv: ++ case SpvOpSDiv: ++ case SpvOpFDiv: ++ case SpvOpUMod: ++ case SpvOpSRem: ++ case SpvOpSMod: ++ case SpvOpFRem: ++ case SpvOpFMod: ++ case SpvOpVectorTimesScalar: ++ case SpvOpDot: ++ case SpvOpIAddCarry: ++ case SpvOpISubBorrow: ++ case SpvOpUMulExtended: ++ case SpvOpSMulExtended: ++ case SpvOpShiftRightLogical: ++ case SpvOpShiftRightArithmetic: ++ case SpvOpShiftLeftLogical: ++ case SpvOpLogicalEqual: ++ case SpvOpLogicalNotEqual: ++ case SpvOpLogicalOr: ++ case SpvOpLogicalAnd: ++ case SpvOpLogicalNot: ++ case SpvOpBitwiseOr: ++ case SpvOpBitwiseXor: ++ case SpvOpBitwiseAnd: ++ case SpvOpSelect: ++ case SpvOpIEqual: ++ case SpvOpFOrdEqual: ++ case SpvOpFUnordEqual: ++ case SpvOpINotEqual: ++ case SpvOpFOrdNotEqual: ++ case SpvOpFUnordNotEqual: ++ case SpvOpULessThan: ++ case SpvOpSLessThan: ++ case SpvOpFOrdLessThan: ++ case SpvOpFUnordLessThan: ++ case SpvOpUGreaterThan: ++ case SpvOpSGreaterThan: ++ case SpvOpFOrdGreaterThan: ++ case SpvOpFUnordGreaterThan: ++ case SpvOpULessThanEqual: ++ case SpvOpSLessThanEqual: ++ case SpvOpFOrdLessThanEqual: ++ case SpvOpFUnordLessThanEqual: ++ case SpvOpUGreaterThanEqual: ++ case SpvOpSGreaterThanEqual: ++ case SpvOpFOrdGreaterThanEqual: ++ case SpvOpFUnordGreaterThanEqual: ++ case SpvOpDPdx: ++ case SpvOpDPdy: ++ case SpvOpFwidth: ++ case SpvOpDPdxFine: ++ case SpvOpDPdyFine: ++ case SpvOpFwidthFine: ++ case SpvOpDPdxCoarse: ++ case SpvOpDPdyCoarse: ++ case SpvOpFwidthCoarse: ++ case SpvOpBitFieldInsert: ++ case SpvOpBitFieldSExtract: ++ case SpvOpBitFieldUExtract: ++ case SpvOpBitReverse: ++ case SpvOpBitCount: ++ case SpvOpTranspose: ++ case SpvOpOuterProduct: ++ case SpvOpMatrixTimesScalar: ++ case SpvOpVectorTimesMatrix: ++ case SpvOpMatrixTimesVector: ++ case SpvOpMatrixTimesMatrix: ++ vtn_handle_alu(b, opcode, w, count); ++ break; ++ ++ case SpvOpVectorExtractDynamic: ++ case SpvOpVectorInsertDynamic: ++ case SpvOpVectorShuffle: ++ case SpvOpCompositeConstruct: ++ case SpvOpCompositeExtract: ++ case SpvOpCompositeInsert: ++ case SpvOpCopyObject: ++ vtn_handle_composite(b, opcode, w, count); ++ break; ++ ++ case SpvOpEmitVertex: ++ case SpvOpEndPrimitive: ++ case SpvOpEmitStreamVertex: ++ case SpvOpEndStreamPrimitive: ++ case SpvOpControlBarrier: ++ case SpvOpMemoryBarrier: ++ vtn_handle_barrier(b, opcode, w, count); ++ break; ++ ++ default: ++ unreachable("Unhandled opcode"); ++ } ++ ++ return true; ++} ++ ++nir_function * ++spirv_to_nir(const uint32_t *words, size_t word_count, ++ struct nir_spirv_specialization *spec, unsigned num_spec, ++ gl_shader_stage stage, const char *entry_point_name, ++ const nir_shader_compiler_options *options) ++{ ++ const uint32_t *word_end = words + word_count; ++ ++ /* Handle the SPIR-V header (first 4 dwords) */ ++ assert(word_count > 5); ++ ++ assert(words[0] == SpvMagicNumber); ++ assert(words[1] >= 0x10000); ++ /* words[2] == generator magic */ ++ unsigned value_id_bound = words[3]; ++ assert(words[4] == 0); ++ ++ words+= 5; ++ ++ /* Initialize the stn_builder object */ ++ struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); ++ b->value_id_bound = value_id_bound; ++ b->values = rzalloc_array(b, struct vtn_value, value_id_bound); ++ exec_list_make_empty(&b->functions); ++ b->entry_point_stage = stage; ++ b->entry_point_name = entry_point_name; ++ ++ /* Handle all the preamble instructions */ ++ words = vtn_foreach_instruction(b, words, word_end, ++ vtn_handle_preamble_instruction); ++ ++ if (b->entry_point == NULL) { ++ assert(!"Entry point not found"); ++ ralloc_free(b); ++ return NULL; ++ } ++ ++ b->shader = nir_shader_create(NULL, stage, options); ++ ++ /* Parse execution modes */ ++ vtn_foreach_execution_mode(b, b->entry_point, ++ vtn_handle_execution_mode, NULL); ++ ++ b->specializations = spec; ++ b->num_specializations = num_spec; ++ ++ /* Handle all variable, type, and constant instructions */ ++ words = vtn_foreach_instruction(b, words, word_end, ++ vtn_handle_variable_or_type_instruction); ++ ++ vtn_build_cfg(b, words, word_end); ++ ++ foreach_list_typed(struct vtn_function, func, node, &b->functions) { ++ b->impl = func->impl; ++ b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, ++ _mesa_key_pointer_equal); ++ ++ vtn_function_emit(b, func, vtn_handle_body_instruction); ++ } ++ ++ assert(b->entry_point->value_type == vtn_value_type_function); ++ nir_function *entry_point = b->entry_point->func->impl->function; ++ assert(entry_point); ++ ++ ralloc_free(b); ++ ++ return entry_point; ++} diff --cc src/compiler/nir/spirv/vtn_alu.c index 00000000000,00000000000..d866da7445e new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_alu.c @@@ -1,0 -1,0 +1,448 @@@ ++/* ++ * Copyright © 2016 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "vtn_private.h" ++ ++/* ++ * Normally, column vectors in SPIR-V correspond to a single NIR SSA ++ * definition. But for matrix multiplies, we want to do one routine for ++ * multiplying a matrix by a matrix and then pretend that vectors are matrices ++ * with one column. So we "wrap" these things, and unwrap the result before we ++ * send it off. ++ */ ++ ++static struct vtn_ssa_value * ++wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) ++{ ++ if (val == NULL) ++ return NULL; ++ ++ if (glsl_type_is_matrix(val->type)) ++ return val; ++ ++ struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); ++ dest->type = val->type; ++ dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); ++ dest->elems[0] = val; ++ ++ return dest; ++} ++ ++static struct vtn_ssa_value * ++unwrap_matrix(struct vtn_ssa_value *val) ++{ ++ if (glsl_type_is_matrix(val->type)) ++ return val; ++ ++ return val->elems[0]; ++} ++ ++static struct vtn_ssa_value * ++matrix_multiply(struct vtn_builder *b, ++ struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) ++{ ++ ++ struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); ++ struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); ++ struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); ++ struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); ++ ++ unsigned src0_rows = glsl_get_vector_elements(src0->type); ++ unsigned src0_columns = glsl_get_matrix_columns(src0->type); ++ unsigned src1_columns = glsl_get_matrix_columns(src1->type); ++ ++ const struct glsl_type *dest_type; ++ if (src1_columns > 1) { ++ dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), ++ src0_rows, src1_columns); ++ } else { ++ dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); ++ } ++ struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); ++ ++ dest = wrap_matrix(b, dest); ++ ++ bool transpose_result = false; ++ if (src0_transpose && src1_transpose) { ++ /* transpose(A) * transpose(B) = transpose(B * A) */ ++ src1 = src0_transpose; ++ src0 = src1_transpose; ++ src0_transpose = NULL; ++ src1_transpose = NULL; ++ transpose_result = true; ++ } ++ ++ if (src0_transpose && !src1_transpose && ++ glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { ++ /* We already have the rows of src0 and the columns of src1 available, ++ * so we can just take the dot product of each row with each column to ++ * get the result. ++ */ ++ ++ for (unsigned i = 0; i < src1_columns; i++) { ++ nir_ssa_def *vec_src[4]; ++ for (unsigned j = 0; j < src0_rows; j++) { ++ vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, ++ src1->elems[i]->def); ++ } ++ dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); ++ } ++ } else { ++ /* We don't handle the case where src1 is transposed but not src0, since ++ * the general case only uses individual components of src1 so the ++ * optimizer should chew through the transpose we emitted for src1. ++ */ ++ ++ for (unsigned i = 0; i < src1_columns; i++) { ++ /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ ++ dest->elems[i]->def = ++ nir_fmul(&b->nb, src0->elems[0]->def, ++ nir_channel(&b->nb, src1->elems[i]->def, 0)); ++ for (unsigned j = 1; j < src0_columns; j++) { ++ dest->elems[i]->def = ++ nir_fadd(&b->nb, dest->elems[i]->def, ++ nir_fmul(&b->nb, src0->elems[j]->def, ++ nir_channel(&b->nb, src1->elems[i]->def, j))); ++ } ++ } ++ } ++ ++ dest = unwrap_matrix(dest); ++ ++ if (transpose_result) ++ dest = vtn_ssa_transpose(b, dest); ++ ++ return dest; ++} ++ ++static struct vtn_ssa_value * ++mat_times_scalar(struct vtn_builder *b, ++ struct vtn_ssa_value *mat, ++ nir_ssa_def *scalar) ++{ ++ struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); ++ for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { ++ if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) ++ dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); ++ else ++ dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); ++ } ++ ++ return dest; ++} ++ ++static void ++vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, ++ struct vtn_value *dest, ++ struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) ++{ ++ switch (opcode) { ++ case SpvOpFNegate: { ++ dest->ssa = vtn_create_ssa_value(b, src0->type); ++ unsigned cols = glsl_get_matrix_columns(src0->type); ++ for (unsigned i = 0; i < cols; i++) ++ dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); ++ break; ++ } ++ ++ case SpvOpFAdd: { ++ dest->ssa = vtn_create_ssa_value(b, src0->type); ++ unsigned cols = glsl_get_matrix_columns(src0->type); ++ for (unsigned i = 0; i < cols; i++) ++ dest->ssa->elems[i]->def = ++ nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); ++ break; ++ } ++ ++ case SpvOpFSub: { ++ dest->ssa = vtn_create_ssa_value(b, src0->type); ++ unsigned cols = glsl_get_matrix_columns(src0->type); ++ for (unsigned i = 0; i < cols; i++) ++ dest->ssa->elems[i]->def = ++ nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); ++ break; ++ } ++ ++ case SpvOpTranspose: ++ dest->ssa = vtn_ssa_transpose(b, src0); ++ break; ++ ++ case SpvOpMatrixTimesScalar: ++ if (src0->transposed) { ++ dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, ++ src1->def)); ++ } else { ++ dest->ssa = mat_times_scalar(b, src0, src1->def); ++ } ++ break; ++ ++ case SpvOpVectorTimesMatrix: ++ case SpvOpMatrixTimesVector: ++ case SpvOpMatrixTimesMatrix: ++ if (opcode == SpvOpVectorTimesMatrix) { ++ dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); ++ } else { ++ dest->ssa = matrix_multiply(b, src0, src1); ++ } ++ break; ++ ++ default: unreachable("unknown matrix opcode"); ++ } ++} ++ ++nir_op ++vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap) ++{ ++ /* Indicates that the first two arguments should be swapped. This is ++ * used for implementing greater-than and less-than-or-equal. ++ */ ++ *swap = false; ++ ++ switch (opcode) { ++ case SpvOpSNegate: return nir_op_ineg; ++ case SpvOpFNegate: return nir_op_fneg; ++ case SpvOpNot: return nir_op_inot; ++ case SpvOpIAdd: return nir_op_iadd; ++ case SpvOpFAdd: return nir_op_fadd; ++ case SpvOpISub: return nir_op_isub; ++ case SpvOpFSub: return nir_op_fsub; ++ case SpvOpIMul: return nir_op_imul; ++ case SpvOpFMul: return nir_op_fmul; ++ case SpvOpUDiv: return nir_op_udiv; ++ case SpvOpSDiv: return nir_op_idiv; ++ case SpvOpFDiv: return nir_op_fdiv; ++ case SpvOpUMod: return nir_op_umod; ++ case SpvOpSMod: return nir_op_imod; ++ case SpvOpFMod: return nir_op_fmod; ++ case SpvOpSRem: return nir_op_irem; ++ case SpvOpFRem: return nir_op_frem; ++ ++ case SpvOpShiftRightLogical: return nir_op_ushr; ++ case SpvOpShiftRightArithmetic: return nir_op_ishr; ++ case SpvOpShiftLeftLogical: return nir_op_ishl; ++ case SpvOpLogicalOr: return nir_op_ior; ++ case SpvOpLogicalEqual: return nir_op_ieq; ++ case SpvOpLogicalNotEqual: return nir_op_ine; ++ case SpvOpLogicalAnd: return nir_op_iand; ++ case SpvOpLogicalNot: return nir_op_inot; ++ case SpvOpBitwiseOr: return nir_op_ior; ++ case SpvOpBitwiseXor: return nir_op_ixor; ++ case SpvOpBitwiseAnd: return nir_op_iand; ++ case SpvOpSelect: return nir_op_bcsel; ++ case SpvOpIEqual: return nir_op_ieq; ++ ++ case SpvOpBitFieldInsert: return nir_op_bitfield_insert; ++ case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; ++ case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; ++ case SpvOpBitReverse: return nir_op_bitfield_reverse; ++ case SpvOpBitCount: return nir_op_bit_count; ++ ++ /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ ++ case SpvOpFOrdEqual: return nir_op_feq; ++ case SpvOpFUnordEqual: return nir_op_feq; ++ case SpvOpINotEqual: return nir_op_ine; ++ case SpvOpFOrdNotEqual: return nir_op_fne; ++ case SpvOpFUnordNotEqual: return nir_op_fne; ++ case SpvOpULessThan: return nir_op_ult; ++ case SpvOpSLessThan: return nir_op_ilt; ++ case SpvOpFOrdLessThan: return nir_op_flt; ++ case SpvOpFUnordLessThan: return nir_op_flt; ++ case SpvOpUGreaterThan: *swap = true; return nir_op_ult; ++ case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; ++ case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt; ++ case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt; ++ case SpvOpULessThanEqual: *swap = true; return nir_op_uge; ++ case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; ++ case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge; ++ case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge; ++ case SpvOpUGreaterThanEqual: return nir_op_uge; ++ case SpvOpSGreaterThanEqual: return nir_op_ige; ++ case SpvOpFOrdGreaterThanEqual: return nir_op_fge; ++ case SpvOpFUnordGreaterThanEqual: return nir_op_fge; ++ ++ /* Conversions: */ ++ case SpvOpConvertFToU: return nir_op_f2u; ++ case SpvOpConvertFToS: return nir_op_f2i; ++ case SpvOpConvertSToF: return nir_op_i2f; ++ case SpvOpConvertUToF: return nir_op_u2f; ++ case SpvOpBitcast: return nir_op_imov; ++ case SpvOpUConvert: ++ case SpvOpQuantizeToF16: return nir_op_fquantize2f16; ++ /* TODO: NIR is 32-bit only; these are no-ops. */ ++ case SpvOpSConvert: return nir_op_imov; ++ case SpvOpFConvert: return nir_op_fmov; ++ ++ /* Derivatives: */ ++ case SpvOpDPdx: return nir_op_fddx; ++ case SpvOpDPdy: return nir_op_fddy; ++ case SpvOpDPdxFine: return nir_op_fddx_fine; ++ case SpvOpDPdyFine: return nir_op_fddy_fine; ++ case SpvOpDPdxCoarse: return nir_op_fddx_coarse; ++ case SpvOpDPdyCoarse: return nir_op_fddy_coarse; ++ ++ default: ++ unreachable("No NIR equivalent"); ++ } ++} ++ ++void ++vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ const struct glsl_type *type = ++ vtn_value(b, w[1], vtn_value_type_type)->type->type; ++ ++ /* Collect the various SSA sources */ ++ const unsigned num_inputs = count - 3; ++ struct vtn_ssa_value *vtn_src[4] = { NULL, }; ++ for (unsigned i = 0; i < num_inputs; i++) ++ vtn_src[i] = vtn_ssa_value(b, w[i + 3]); ++ ++ if (glsl_type_is_matrix(vtn_src[0]->type) || ++ (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { ++ vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]); ++ return; ++ } ++ ++ val->ssa = vtn_create_ssa_value(b, type); ++ nir_ssa_def *src[4] = { NULL, }; ++ for (unsigned i = 0; i < num_inputs; i++) { ++ assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); ++ src[i] = vtn_src[i]->def; ++ } ++ ++ switch (opcode) { ++ case SpvOpAny: ++ if (src[0]->num_components == 1) { ++ val->ssa->def = nir_imov(&b->nb, src[0]); ++ } else { ++ nir_op op; ++ switch (src[0]->num_components) { ++ case 2: op = nir_op_bany_inequal2; break; ++ case 3: op = nir_op_bany_inequal3; break; ++ case 4: op = nir_op_bany_inequal4; break; ++ } ++ val->ssa->def = nir_build_alu(&b->nb, op, src[0], ++ nir_imm_int(&b->nb, NIR_FALSE), ++ NULL, NULL); ++ } ++ return; ++ ++ case SpvOpAll: ++ if (src[0]->num_components == 1) { ++ val->ssa->def = nir_imov(&b->nb, src[0]); ++ } else { ++ nir_op op; ++ switch (src[0]->num_components) { ++ case 2: op = nir_op_ball_iequal2; break; ++ case 3: op = nir_op_ball_iequal3; break; ++ case 4: op = nir_op_ball_iequal4; break; ++ } ++ val->ssa->def = nir_build_alu(&b->nb, op, src[0], ++ nir_imm_int(&b->nb, NIR_TRUE), ++ NULL, NULL); ++ } ++ return; ++ ++ case SpvOpOuterProduct: { ++ for (unsigned i = 0; i < src[1]->num_components; i++) { ++ val->ssa->elems[i]->def = ++ nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); ++ } ++ return; ++ } ++ ++ case SpvOpDot: ++ val->ssa->def = nir_fdot(&b->nb, src[0], src[1]); ++ return; ++ ++ case SpvOpIAddCarry: ++ assert(glsl_type_is_struct(val->ssa->type)); ++ val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); ++ val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); ++ return; ++ ++ case SpvOpISubBorrow: ++ assert(glsl_type_is_struct(val->ssa->type)); ++ val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); ++ val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); ++ return; ++ ++ case SpvOpUMulExtended: ++ assert(glsl_type_is_struct(val->ssa->type)); ++ val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); ++ val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); ++ return; ++ ++ case SpvOpSMulExtended: ++ assert(glsl_type_is_struct(val->ssa->type)); ++ val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); ++ val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); ++ return; ++ ++ case SpvOpFwidth: ++ val->ssa->def = nir_fadd(&b->nb, ++ nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), ++ nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); ++ return; ++ case SpvOpFwidthFine: ++ val->ssa->def = nir_fadd(&b->nb, ++ nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), ++ nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); ++ return; ++ case SpvOpFwidthCoarse: ++ val->ssa->def = nir_fadd(&b->nb, ++ nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), ++ nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); ++ return; ++ ++ case SpvOpVectorTimesScalar: ++ /* The builder will take care of splatting for us. */ ++ val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); ++ return; ++ ++ case SpvOpIsNan: ++ val->ssa->def = nir_fne(&b->nb, src[0], src[0]); ++ return; ++ ++ case SpvOpIsInf: ++ val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]), ++ nir_imm_float(&b->nb, INFINITY)); ++ return; ++ ++ default: { ++ bool swap; ++ nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap); ++ ++ if (swap) { ++ nir_ssa_def *tmp = src[0]; ++ src[0] = src[1]; ++ src[1] = tmp; ++ } ++ ++ val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); ++ return; ++ } /* default */ ++ } ++} diff --cc src/compiler/nir/spirv/vtn_cfg.c index 00000000000,00000000000..041408b1cfb new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_cfg.c @@@ -1,0 -1,0 +1,768 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include "vtn_private.h" ++#include "nir/nir_vla.h" ++ ++static bool ++vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch (opcode) { ++ case SpvOpFunction: { ++ assert(b->func == NULL); ++ b->func = rzalloc(b, struct vtn_function); ++ ++ list_inithead(&b->func->body); ++ b->func->control = w[3]; ++ ++ const struct glsl_type *result_type = ++ vtn_value(b, w[1], vtn_value_type_type)->type->type; ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); ++ val->func = b->func; ++ ++ const struct glsl_type *func_type = ++ vtn_value(b, w[4], vtn_value_type_type)->type->type; ++ ++ assert(glsl_get_function_return_type(func_type) == result_type); ++ ++ nir_function *func = ++ nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); ++ ++ func->num_params = glsl_get_length(func_type); ++ func->params = ralloc_array(b->shader, nir_parameter, func->num_params); ++ for (unsigned i = 0; i < func->num_params; i++) { ++ const struct glsl_function_param *param = ++ glsl_get_function_param(func_type, i); ++ func->params[i].type = param->type; ++ if (param->in) { ++ if (param->out) { ++ func->params[i].param_type = nir_parameter_inout; ++ } else { ++ func->params[i].param_type = nir_parameter_in; ++ } ++ } else { ++ if (param->out) { ++ func->params[i].param_type = nir_parameter_out; ++ } else { ++ assert(!"Parameter is neither in nor out"); ++ } ++ } ++ } ++ ++ func->return_type = glsl_get_function_return_type(func_type); ++ ++ b->func->impl = nir_function_impl_create(func); ++ if (!glsl_type_is_void(func->return_type)) { ++ b->func->impl->return_var = ++ nir_local_variable_create(b->func->impl, func->return_type, "ret"); ++ } ++ ++ b->func_param_idx = 0; ++ break; ++ } ++ ++ case SpvOpFunctionEnd: ++ b->func->end = w; ++ b->func = NULL; ++ break; ++ ++ case SpvOpFunctionParameter: { ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_access_chain); ++ ++ assert(b->func_param_idx < b->func->impl->num_params); ++ unsigned idx = b->func_param_idx++; ++ ++ nir_variable *param = ++ nir_local_variable_create(b->func->impl, ++ b->func->impl->function->params[idx].type, ++ val->name); ++ b->func->impl->params[idx] = param; ++ ++ struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable); ++ vtn_var->mode = vtn_variable_mode_param; ++ vtn_var->type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ vtn_var->var = param; ++ vtn_var->chain.var = vtn_var; ++ vtn_var->chain.length = 0; ++ ++ val->access_chain = &vtn_var->chain; ++ break; ++ } ++ ++ case SpvOpLabel: { ++ assert(b->block == NULL); ++ b->block = rzalloc(b, struct vtn_block); ++ b->block->node.type = vtn_cf_node_type_block; ++ b->block->label = w; ++ vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; ++ ++ if (b->func->start_block == NULL) { ++ /* This is the first block encountered for this function. In this ++ * case, we set the start block and add it to the list of ++ * implemented functions that we'll walk later. ++ */ ++ b->func->start_block = b->block; ++ exec_list_push_tail(&b->functions, &b->func->node); ++ } ++ break; ++ } ++ ++ case SpvOpSelectionMerge: ++ case SpvOpLoopMerge: ++ assert(b->block && b->block->merge == NULL); ++ b->block->merge = w; ++ break; ++ ++ case SpvOpBranch: ++ case SpvOpBranchConditional: ++ case SpvOpSwitch: ++ case SpvOpKill: ++ case SpvOpReturn: ++ case SpvOpReturnValue: ++ case SpvOpUnreachable: ++ assert(b->block && b->block->branch == NULL); ++ b->block->branch = w; ++ b->block = NULL; ++ break; ++ ++ default: ++ /* Continue on as per normal */ ++ return true; ++ } ++ ++ return true; ++} ++ ++static void ++vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch, ++ struct vtn_block *break_block, ++ uint32_t block_id, uint32_t val, bool is_default) ++{ ++ struct vtn_block *case_block = ++ vtn_value(b, block_id, vtn_value_type_block)->block; ++ ++ /* Don't create dummy cases that just break */ ++ if (case_block == break_block) ++ return; ++ ++ if (case_block->switch_case == NULL) { ++ struct vtn_case *c = ralloc(b, struct vtn_case); ++ ++ list_inithead(&c->body); ++ c->start_block = case_block; ++ c->fallthrough = NULL; ++ nir_array_init(&c->values, b); ++ c->is_default = false; ++ c->visited = false; ++ ++ list_addtail(&c->link, &swtch->cases); ++ ++ case_block->switch_case = c; ++ } ++ ++ if (is_default) { ++ case_block->switch_case->is_default = true; ++ } else { ++ nir_array_add(&case_block->switch_case->values, uint32_t, val); ++ } ++} ++ ++/* This function performs a depth-first search of the cases and puts them ++ * in fall-through order. ++ */ ++static void ++vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse) ++{ ++ if (cse->visited) ++ return; ++ ++ cse->visited = true; ++ ++ list_del(&cse->link); ++ ++ if (cse->fallthrough) { ++ vtn_order_case(swtch, cse->fallthrough); ++ ++ /* If we have a fall-through, place this case right before the case it ++ * falls through to. This ensures that fallthroughs come one after ++ * the other. These two can never get separated because that would ++ * imply something else falling through to the same case. Also, this ++ * can't break ordering because the DFS ensures that this case is ++ * visited before anything that falls through to it. ++ */ ++ list_addtail(&cse->link, &cse->fallthrough->link); ++ } else { ++ list_add(&cse->link, &swtch->cases); ++ } ++} ++ ++static enum vtn_branch_type ++vtn_get_branch_type(struct vtn_block *block, ++ struct vtn_case *swcase, struct vtn_block *switch_break, ++ struct vtn_block *loop_break, struct vtn_block *loop_cont) ++{ ++ if (block->switch_case) { ++ /* This branch is actually a fallthrough */ ++ assert(swcase->fallthrough == NULL || ++ swcase->fallthrough == block->switch_case); ++ swcase->fallthrough = block->switch_case; ++ return vtn_branch_type_switch_fallthrough; ++ } else if (block == switch_break) { ++ return vtn_branch_type_switch_break; ++ } else if (block == loop_break) { ++ return vtn_branch_type_loop_break; ++ } else if (block == loop_cont) { ++ return vtn_branch_type_loop_continue; ++ } else { ++ return vtn_branch_type_none; ++ } ++} ++ ++static void ++vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, ++ struct vtn_block *start, struct vtn_case *switch_case, ++ struct vtn_block *switch_break, ++ struct vtn_block *loop_break, struct vtn_block *loop_cont, ++ struct vtn_block *end) ++{ ++ struct vtn_block *block = start; ++ while (block != end) { ++ if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge && ++ !block->loop) { ++ struct vtn_loop *loop = ralloc(b, struct vtn_loop); ++ ++ loop->node.type = vtn_cf_node_type_loop; ++ list_inithead(&loop->body); ++ list_inithead(&loop->cont_body); ++ loop->control = block->merge[3]; ++ ++ list_addtail(&loop->node.link, cf_list); ++ block->loop = loop; ++ ++ struct vtn_block *new_loop_break = ++ vtn_value(b, block->merge[1], vtn_value_type_block)->block; ++ struct vtn_block *new_loop_cont = ++ vtn_value(b, block->merge[2], vtn_value_type_block)->block; ++ ++ /* Note: This recursive call will start with the current block as ++ * its start block. If we weren't careful, we would get here ++ * again and end up in infinite recursion. This is why we set ++ * block->loop above and check for it before creating one. This ++ * way, we only create the loop once and the second call that ++ * tries to handle this loop goes to the cases below and gets ++ * handled as a regular block. ++ * ++ * Note: When we make the recursive walk calls, we pass NULL for ++ * the switch break since you have to break out of the loop first. ++ * We do, however, still pass the current switch case because it's ++ * possible that the merge block for the loop is the start of ++ * another case. ++ */ ++ vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL, ++ new_loop_break, new_loop_cont, NULL ); ++ vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL, ++ new_loop_break, NULL, block); ++ ++ block = new_loop_break; ++ continue; ++ } ++ ++ assert(block->node.link.next == NULL); ++ list_addtail(&block->node.link, cf_list); ++ ++ switch (*block->branch & SpvOpCodeMask) { ++ case SpvOpBranch: { ++ struct vtn_block *branch_block = ++ vtn_value(b, block->branch[1], vtn_value_type_block)->block; ++ ++ block->branch_type = vtn_get_branch_type(branch_block, ++ switch_case, switch_break, ++ loop_break, loop_cont); ++ ++ if (block->branch_type != vtn_branch_type_none) ++ return; ++ ++ block = branch_block; ++ continue; ++ } ++ ++ case SpvOpReturn: ++ case SpvOpReturnValue: ++ block->branch_type = vtn_branch_type_return; ++ return; ++ ++ case SpvOpKill: ++ block->branch_type = vtn_branch_type_discard; ++ return; ++ ++ case SpvOpBranchConditional: { ++ struct vtn_block *then_block = ++ vtn_value(b, block->branch[2], vtn_value_type_block)->block; ++ struct vtn_block *else_block = ++ vtn_value(b, block->branch[3], vtn_value_type_block)->block; ++ ++ struct vtn_if *if_stmt = ralloc(b, struct vtn_if); ++ ++ if_stmt->node.type = vtn_cf_node_type_if; ++ if_stmt->condition = block->branch[1]; ++ list_inithead(&if_stmt->then_body); ++ list_inithead(&if_stmt->else_body); ++ ++ list_addtail(&if_stmt->node.link, cf_list); ++ ++ if (block->merge && ++ (*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) { ++ if_stmt->control = block->merge[2]; ++ } ++ ++ if_stmt->then_type = vtn_get_branch_type(then_block, ++ switch_case, switch_break, ++ loop_break, loop_cont); ++ if_stmt->else_type = vtn_get_branch_type(else_block, ++ switch_case, switch_break, ++ loop_break, loop_cont); ++ ++ if (if_stmt->then_type == vtn_branch_type_none && ++ if_stmt->else_type == vtn_branch_type_none) { ++ /* Neither side of the if is something we can short-circuit. */ ++ assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); ++ struct vtn_block *merge_block = ++ vtn_value(b, block->merge[1], vtn_value_type_block)->block; ++ ++ vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block, ++ switch_case, switch_break, ++ loop_break, loop_cont, merge_block); ++ vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block, ++ switch_case, switch_break, ++ loop_break, loop_cont, merge_block); ++ ++ enum vtn_branch_type merge_type = ++ vtn_get_branch_type(merge_block, switch_case, switch_break, ++ loop_break, loop_cont); ++ if (merge_type == vtn_branch_type_none) { ++ block = merge_block; ++ continue; ++ } else { ++ return; ++ } ++ } else if (if_stmt->then_type != vtn_branch_type_none && ++ if_stmt->else_type != vtn_branch_type_none) { ++ /* Both sides were short-circuited. We're done here. */ ++ return; ++ } else { ++ /* Exeactly one side of the branch could be short-circuited. ++ * We set the branch up as a predicated break/continue and we ++ * continue on with the other side as if it were what comes ++ * after the if. ++ */ ++ if (if_stmt->then_type == vtn_branch_type_none) { ++ block = then_block; ++ } else { ++ block = else_block; ++ } ++ continue; ++ } ++ unreachable("Should have returned or continued"); ++ } ++ ++ case SpvOpSwitch: { ++ assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge); ++ struct vtn_block *break_block = ++ vtn_value(b, block->merge[1], vtn_value_type_block)->block; ++ ++ struct vtn_switch *swtch = ralloc(b, struct vtn_switch); ++ ++ swtch->node.type = vtn_cf_node_type_switch; ++ swtch->selector = block->branch[1]; ++ list_inithead(&swtch->cases); ++ ++ list_addtail(&swtch->node.link, cf_list); ++ ++ /* First, we go through and record all of the cases. */ ++ const uint32_t *branch_end = ++ block->branch + (block->branch[0] >> SpvWordCountShift); ++ ++ vtn_add_case(b, swtch, break_block, block->branch[2], 0, true); ++ for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2) ++ vtn_add_case(b, swtch, break_block, w[1], w[0], false); ++ ++ /* Now, we go through and walk the blocks. While we walk through ++ * the blocks, we also gather the much-needed fall-through ++ * information. ++ */ ++ list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) { ++ assert(cse->start_block != break_block); ++ vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse, ++ break_block, NULL, loop_cont, NULL); ++ } ++ ++ /* Finally, we walk over all of the cases one more time and put ++ * them in fall-through order. ++ */ ++ for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) { ++ struct vtn_block *case_block = ++ vtn_value(b, *w, vtn_value_type_block)->block; ++ ++ if (case_block == break_block) ++ continue; ++ ++ assert(case_block->switch_case); ++ ++ vtn_order_case(swtch, case_block->switch_case); ++ } ++ ++ block = break_block; ++ continue; ++ } ++ ++ case SpvOpUnreachable: ++ return; ++ ++ default: ++ unreachable("Unhandled opcode"); ++ } ++ } ++} ++ ++void ++vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end) ++{ ++ vtn_foreach_instruction(b, words, end, ++ vtn_cfg_handle_prepass_instruction); ++ ++ foreach_list_typed(struct vtn_function, func, node, &b->functions) { ++ vtn_cfg_walk_blocks(b, &func->body, func->start_block, ++ NULL, NULL, NULL, NULL, NULL); ++ } ++} ++ ++static bool ++vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ if (opcode == SpvOpLabel) ++ return true; /* Nothing to do */ ++ ++ /* If this isn't a phi node, stop. */ ++ if (opcode != SpvOpPhi) ++ return false; ++ ++ /* For handling phi nodes, we do a poor-man's out-of-ssa on the spot. ++ * For each phi, we create a variable with the appropreate type and ++ * do a load from that variable. Then, in a second pass, we add ++ * stores to that variable to each of the predecessor blocks. ++ * ++ * We could do something more intelligent here. However, in order to ++ * handle loops and things properly, we really need dominance ++ * information. It would end up basically being the into-SSA ++ * algorithm all over again. It's easier if we just let ++ * lower_vars_to_ssa do that for us instead of repeating it here. ++ */ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ ++ struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ nir_variable *phi_var = ++ nir_local_variable_create(b->nb.impl, type->type, "phi"); ++ _mesa_hash_table_insert(b->phi_table, w, phi_var); ++ ++ val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var)); ++ ++ return true; ++} ++ ++static bool ++vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ if (opcode != SpvOpPhi) ++ return true; ++ ++ struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w); ++ assert(phi_entry); ++ nir_variable *phi_var = phi_entry->data; ++ ++ for (unsigned i = 3; i < count; i += 2) { ++ struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]); ++ struct vtn_block *pred = ++ vtn_value(b, w[i + 1], vtn_value_type_block)->block; ++ ++ b->nb.cursor = nir_after_block_before_jump(pred->end_block); ++ ++ vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); ++ } ++ ++ return true; ++} ++ ++static void ++vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type, ++ nir_variable *switch_fall_var, bool *has_switch_break) ++{ ++ switch (branch_type) { ++ case vtn_branch_type_switch_break: ++ nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); ++ *has_switch_break = true; ++ break; ++ case vtn_branch_type_switch_fallthrough: ++ break; /* Nothing to do */ ++ case vtn_branch_type_loop_break: ++ nir_jump(&b->nb, nir_jump_break); ++ break; ++ case vtn_branch_type_loop_continue: ++ nir_jump(&b->nb, nir_jump_continue); ++ break; ++ case vtn_branch_type_return: ++ nir_jump(&b->nb, nir_jump_return); ++ break; ++ case vtn_branch_type_discard: { ++ nir_intrinsic_instr *discard = ++ nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard); ++ nir_builder_instr_insert(&b->nb, &discard->instr); ++ break; ++ } ++ default: ++ unreachable("Invalid branch type"); ++ } ++} ++ ++static void ++vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, ++ nir_variable *switch_fall_var, bool *has_switch_break, ++ vtn_instruction_handler handler) ++{ ++ list_for_each_entry(struct vtn_cf_node, node, cf_list, link) { ++ switch (node->type) { ++ case vtn_cf_node_type_block: { ++ struct vtn_block *block = (struct vtn_block *)node; ++ ++ const uint32_t *block_start = block->label; ++ const uint32_t *block_end = block->merge ? block->merge : ++ block->branch; ++ ++ block_start = vtn_foreach_instruction(b, block_start, block_end, ++ vtn_handle_phis_first_pass); ++ ++ vtn_foreach_instruction(b, block_start, block_end, handler); ++ ++ block->end_block = nir_cursor_current_block(b->nb.cursor); ++ ++ if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { ++ struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); ++ vtn_local_store(b, src, ++ nir_deref_var_create(b, b->impl->return_var)); ++ } ++ ++ if (block->branch_type != vtn_branch_type_none) { ++ vtn_emit_branch(b, block->branch_type, ++ switch_fall_var, has_switch_break); ++ } ++ ++ break; ++ } ++ ++ case vtn_cf_node_type_if: { ++ struct vtn_if *vtn_if = (struct vtn_if *)node; ++ ++ nir_if *if_stmt = nir_if_create(b->shader); ++ if_stmt->condition = ++ nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def); ++ nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); ++ ++ bool sw_break = false; ++ ++ b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); ++ if (vtn_if->then_type == vtn_branch_type_none) { ++ vtn_emit_cf_list(b, &vtn_if->then_body, ++ switch_fall_var, &sw_break, handler); ++ } else { ++ vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break); ++ } ++ ++ b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); ++ if (vtn_if->else_type == vtn_branch_type_none) { ++ vtn_emit_cf_list(b, &vtn_if->else_body, ++ switch_fall_var, &sw_break, handler); ++ } else { ++ vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break); ++ } ++ ++ b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); ++ ++ /* If we encountered a switch break somewhere inside of the if, ++ * then it would have been handled correctly by calling ++ * emit_cf_list or emit_branch for the interrior. However, we ++ * need to predicate everything following on wether or not we're ++ * still going. ++ */ ++ if (sw_break) { ++ *has_switch_break = true; ++ ++ nir_if *switch_if = nir_if_create(b->shader); ++ switch_if->condition = ++ nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var)); ++ nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node); ++ ++ b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); ++ } ++ break; ++ } ++ ++ case vtn_cf_node_type_loop: { ++ struct vtn_loop *vtn_loop = (struct vtn_loop *)node; ++ ++ nir_loop *loop = nir_loop_create(b->shader); ++ nir_cf_node_insert(b->nb.cursor, &loop->cf_node); ++ ++ b->nb.cursor = nir_after_cf_list(&loop->body); ++ vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler); ++ ++ if (!list_empty(&vtn_loop->cont_body)) { ++ /* If we have a non-trivial continue body then we need to put ++ * it at the beginning of the loop with a flag to ensure that ++ * it doesn't get executed in the first iteration. ++ */ ++ nir_variable *do_cont = ++ nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont"); ++ ++ b->nb.cursor = nir_before_cf_node(&loop->cf_node); ++ nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1); ++ ++ b->nb.cursor = nir_before_cf_list(&loop->body); ++ nir_if *cont_if = nir_if_create(b->shader); ++ cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont)); ++ nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node); ++ ++ b->nb.cursor = nir_after_cf_list(&cont_if->then_list); ++ vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler); ++ ++ b->nb.cursor = nir_after_cf_node(&cont_if->cf_node); ++ nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1); ++ ++ b->has_loop_continue = true; ++ } ++ ++ b->nb.cursor = nir_after_cf_node(&loop->cf_node); ++ break; ++ } ++ ++ case vtn_cf_node_type_switch: { ++ struct vtn_switch *vtn_switch = (struct vtn_switch *)node; ++ ++ /* First, we create a variable to keep track of whether or not the ++ * switch is still going at any given point. Any switch breaks ++ * will set this variable to false. ++ */ ++ nir_variable *fall_var = ++ nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall"); ++ nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1); ++ ++ /* Next, we gather up all of the conditions. We have to do this ++ * up-front because we also need to build an "any" condition so ++ * that we can use !any for default. ++ */ ++ const int num_cases = list_length(&vtn_switch->cases); ++ NIR_VLA(nir_ssa_def *, conditions, num_cases); ++ ++ nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def; ++ /* An accumulation of all conditions. Used for the default */ ++ nir_ssa_def *any = NULL; ++ ++ int i = 0; ++ list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { ++ if (cse->is_default) { ++ conditions[i++] = NULL; ++ continue; ++ } ++ ++ nir_ssa_def *cond = NULL; ++ nir_array_foreach(&cse->values, uint32_t, val) { ++ nir_ssa_def *is_val = ++ nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val)); ++ ++ cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val; ++ } ++ ++ any = any ? nir_ior(&b->nb, any, cond) : cond; ++ conditions[i++] = cond; ++ } ++ assert(i == num_cases); ++ ++ /* Now we can walk the list of cases and actually emit code */ ++ i = 0; ++ list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) { ++ /* Figure out the condition */ ++ nir_ssa_def *cond = conditions[i++]; ++ if (cse->is_default) { ++ assert(cond == NULL); ++ cond = nir_inot(&b->nb, any); ++ } ++ /* Take fallthrough into account */ ++ cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var)); ++ ++ nir_if *case_if = nir_if_create(b->nb.shader); ++ case_if->condition = nir_src_for_ssa(cond); ++ nir_cf_node_insert(b->nb.cursor, &case_if->cf_node); ++ ++ bool has_break = false; ++ b->nb.cursor = nir_after_cf_list(&case_if->then_list); ++ nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1); ++ vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler); ++ (void)has_break; /* We don't care */ ++ ++ b->nb.cursor = nir_after_cf_node(&case_if->cf_node); ++ } ++ assert(i == num_cases); ++ ++ break; ++ } ++ ++ default: ++ unreachable("Invalid CF node type"); ++ } ++ } ++} ++ ++void ++vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, ++ vtn_instruction_handler instruction_handler) ++{ ++ nir_builder_init(&b->nb, func->impl); ++ b->nb.cursor = nir_after_cf_list(&func->impl->body); ++ b->has_loop_continue = false; ++ b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer, ++ _mesa_key_pointer_equal); ++ ++ vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler); ++ ++ vtn_foreach_instruction(b, func->start_block->label, func->end, ++ vtn_handle_phi_second_pass); ++ ++ /* Continue blocks for loops get inserted before the body of the loop ++ * but instructions in the continue may use SSA defs in the loop body. ++ * Therefore, we need to repair SSA to insert the needed phi nodes. ++ */ ++ if (b->has_loop_continue) ++ nir_repair_ssa_impl(func->impl); ++} diff --cc src/compiler/nir/spirv/vtn_glsl450.c index 00000000000,00000000000..bc38aa4b1be new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_glsl450.c @@@ -1,0 -1,0 +1,684 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Jason Ekstrand (jason@jlekstrand.net) ++ * ++ */ ++ ++#include "vtn_private.h" ++#include "GLSL.std.450.h" ++ ++#define M_PIf ((float) M_PI) ++#define M_PI_2f ((float) M_PI_2) ++#define M_PI_4f ((float) M_PI_4) ++ ++static nir_ssa_def * ++build_mat2_det(nir_builder *b, nir_ssa_def *col[2]) ++{ ++ unsigned swiz[4] = {1, 0, 0, 0}; ++ nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true)); ++ return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1)); ++} ++ ++static nir_ssa_def * ++build_mat3_det(nir_builder *b, nir_ssa_def *col[3]) ++{ ++ unsigned yzx[4] = {1, 2, 0, 0}; ++ unsigned zxy[4] = {2, 0, 1, 0}; ++ ++ nir_ssa_def *prod0 = ++ nir_fmul(b, col[0], ++ nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true), ++ nir_swizzle(b, col[2], zxy, 3, true))); ++ nir_ssa_def *prod1 = ++ nir_fmul(b, col[0], ++ nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true), ++ nir_swizzle(b, col[2], yzx, 3, true))); ++ ++ nir_ssa_def *diff = nir_fsub(b, prod0, prod1); ++ ++ return nir_fadd(b, nir_channel(b, diff, 0), ++ nir_fadd(b, nir_channel(b, diff, 1), ++ nir_channel(b, diff, 2))); ++} ++ ++static nir_ssa_def * ++build_mat4_det(nir_builder *b, nir_ssa_def **col) ++{ ++ nir_ssa_def *subdet[4]; ++ for (unsigned i = 0; i < 4; i++) { ++ unsigned swiz[3]; ++ for (unsigned j = 0, k = 0; j < 3; j++, k++) { ++ if (k == i) ++ k++; /* skip column */ ++ swiz[j] = k; ++ } ++ ++ nir_ssa_def *subcol[3]; ++ subcol[0] = nir_swizzle(b, col[1], swiz, 3, true); ++ subcol[1] = nir_swizzle(b, col[2], swiz, 3, true); ++ subcol[2] = nir_swizzle(b, col[3], swiz, 3, true); ++ ++ subdet[i] = build_mat3_det(b, subcol); ++ } ++ ++ nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4)); ++ ++ return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0), ++ nir_channel(b, prod, 1)), ++ nir_fsub(b, nir_channel(b, prod, 2), ++ nir_channel(b, prod, 3))); ++} ++ ++static nir_ssa_def * ++build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src) ++{ ++ unsigned size = glsl_get_vector_elements(src->type); ++ ++ nir_ssa_def *cols[4]; ++ for (unsigned i = 0; i < size; i++) ++ cols[i] = src->elems[i]->def; ++ ++ switch(size) { ++ case 2: return build_mat2_det(&b->nb, cols); ++ case 3: return build_mat3_det(&b->nb, cols); ++ case 4: return build_mat4_det(&b->nb, cols); ++ default: ++ unreachable("Invalid matrix size"); ++ } ++} ++ ++/* Computes the determinate of the submatrix given by taking src and ++ * removing the specified row and column. ++ */ ++static nir_ssa_def * ++build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src, ++ unsigned size, unsigned row, unsigned col) ++{ ++ assert(row < size && col < size); ++ if (size == 2) { ++ return nir_channel(b, src->elems[1 - col]->def, 1 - row); ++ } else { ++ /* Swizzle to get all but the specified row */ ++ unsigned swiz[3]; ++ for (unsigned j = 0; j < 4; j++) ++ swiz[j - (j > row)] = j; ++ ++ /* Grab all but the specified column */ ++ nir_ssa_def *subcol[3]; ++ for (unsigned j = 0; j < size; j++) { ++ if (j != col) { ++ subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def, ++ swiz, size - 1, true); ++ } ++ } ++ ++ if (size == 3) { ++ return build_mat2_det(b, subcol); ++ } else { ++ assert(size == 4); ++ return build_mat3_det(b, subcol); ++ } ++ } ++} ++ ++static struct vtn_ssa_value * ++matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src) ++{ ++ nir_ssa_def *adj_col[4]; ++ unsigned size = glsl_get_vector_elements(src->type); ++ ++ /* Build up an adjugate matrix */ ++ for (unsigned c = 0; c < size; c++) { ++ nir_ssa_def *elem[4]; ++ for (unsigned r = 0; r < size; r++) { ++ elem[r] = build_mat_subdet(&b->nb, src, size, c, r); ++ ++ if ((r + c) % 2) ++ elem[r] = nir_fneg(&b->nb, elem[r]); ++ } ++ ++ adj_col[c] = nir_vec(&b->nb, elem, size); ++ } ++ ++ nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src)); ++ ++ struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type); ++ for (unsigned i = 0; i < size; i++) ++ val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv); ++ ++ return val; ++} ++ ++static nir_ssa_def* ++build_length(nir_builder *b, nir_ssa_def *vec) ++{ ++ switch (vec->num_components) { ++ case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); ++ case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); ++ case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); ++ case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); ++ default: ++ unreachable("Invalid number of components"); ++ } ++} ++ ++static inline nir_ssa_def * ++build_fclamp(nir_builder *b, ++ nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) ++{ ++ return nir_fmin(b, nir_fmax(b, x, min_val), max_val); ++} ++ ++/** ++ * Return e^x. ++ */ ++static nir_ssa_def * ++build_exp(nir_builder *b, nir_ssa_def *x) ++{ ++ return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E))); ++} ++ ++/** ++ * Return ln(x) - the natural logarithm of x. ++ */ ++static nir_ssa_def * ++build_log(nir_builder *b, nir_ssa_def *x) ++{ ++ return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E)); ++} ++ ++static nir_ssa_def * ++build_asin(nir_builder *b, nir_ssa_def *x) ++{ ++ /* ++ * asin(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi / 4 - 1 + |x| * (0.086566724 + |x| * -0.03102955))) ++ */ ++ nir_ssa_def *abs_x = nir_fabs(b, x); ++ return nir_fmul(b, nir_fsign(b, x), ++ nir_fsub(b, nir_imm_float(b, M_PI_2f), ++ nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), ++ nir_fadd(b, nir_imm_float(b, M_PI_2f), ++ nir_fmul(b, abs_x, ++ nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), ++ nir_fmul(b, abs_x, ++ nir_fadd(b, nir_imm_float(b, 0.086566724f), ++ nir_fmul(b, abs_x, ++ nir_imm_float(b, -0.03102955f)))))))))); ++} ++ ++static nir_ssa_def * ++build_acos(nir_builder *b, nir_ssa_def *x) ++{ ++ /* ++ * poly(x) = sign(x) * sqrt(1 - |x|) * (pi / 2 + |x| * (pi / 4 - 1 + |x| * (0.08132463 + |x| * -0.02363318))) ++ */ ++ nir_ssa_def *abs_x = nir_fabs(b, x); ++ nir_ssa_def *poly = nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)), ++ nir_fadd(b, nir_imm_float(b, M_PI_2f), ++ nir_fmul(b, abs_x, ++ nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f), ++ nir_fmul(b, abs_x, ++ nir_fadd(b, nir_imm_float(b, 0.08132463f), ++ nir_fmul(b, abs_x, ++ nir_imm_float(b, -0.02363318f)))))))); ++ return nir_bcsel(b, nir_flt(b, x, nir_imm_float(b, 0)), ++ nir_fsub(b, nir_imm_float(b, M_PI), poly), ++ poly); ++} ++ ++/** ++ * Compute xs[0] + xs[1] + xs[2] + ... using fadd. ++ */ ++static nir_ssa_def * ++build_fsum(nir_builder *b, nir_ssa_def **xs, int terms) ++{ ++ nir_ssa_def *accum = xs[0]; ++ ++ for (int i = 1; i < terms; i++) ++ accum = nir_fadd(b, accum, xs[i]); ++ ++ return accum; ++} ++ ++static nir_ssa_def * ++build_atan(nir_builder *b, nir_ssa_def *y_over_x) ++{ ++ nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); ++ nir_ssa_def *one = nir_imm_float(b, 1.0f); ++ ++ /* ++ * range-reduction, first step: ++ * ++ * / y_over_x if |y_over_x| <= 1.0; ++ * x = < ++ * \ 1.0 / y_over_x otherwise ++ */ ++ nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), ++ nir_fmax(b, abs_y_over_x, one)); ++ ++ /* ++ * approximate atan by evaluating polynomial: ++ * ++ * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + ++ * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + ++ * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 ++ */ ++ nir_ssa_def *x_2 = nir_fmul(b, x, x); ++ nir_ssa_def *x_3 = nir_fmul(b, x_2, x); ++ nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); ++ nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); ++ nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); ++ nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); ++ ++ nir_ssa_def *polynomial_terms[] = { ++ nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)), ++ nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)), ++ nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)), ++ nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)), ++ nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)), ++ nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)), ++ }; ++ ++ nir_ssa_def *tmp = ++ build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); ++ ++ /* range-reduction fixup */ ++ tmp = nir_fadd(b, tmp, ++ nir_fmul(b, ++ nir_b2f(b, nir_flt(b, one, abs_y_over_x)), ++ nir_fadd(b, nir_fmul(b, tmp, ++ nir_imm_float(b, -2.0f)), ++ nir_imm_float(b, M_PI_2f)))); ++ ++ /* sign fixup */ ++ return nir_fmul(b, tmp, nir_fsign(b, y_over_x)); ++} ++ ++static nir_ssa_def * ++build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) ++{ ++ nir_ssa_def *zero = nir_imm_float(b, 0.0f); ++ ++ /* If |x| >= 1.0e-8 * |y|: */ ++ nir_ssa_def *condition = ++ nir_fge(b, nir_fabs(b, x), ++ nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); ++ ++ /* Then...call atan(y/x) and fix it up: */ ++ nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); ++ nir_ssa_def *r_then = ++ nir_bcsel(b, nir_flt(b, x, zero), ++ nir_fadd(b, atan1, ++ nir_bcsel(b, nir_fge(b, y, zero), ++ nir_imm_float(b, M_PIf), ++ nir_imm_float(b, -M_PIf))), ++ atan1); ++ ++ /* Else... */ ++ nir_ssa_def *r_else = ++ nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); ++ ++ return nir_bcsel(b, condition, r_then, r_else); ++} ++ ++static nir_ssa_def * ++build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent) ++{ ++ nir_ssa_def *abs_x = nir_fabs(b, x); ++ nir_ssa_def *zero = nir_imm_float(b, 0.0f); ++ ++ /* Single-precision floating-point values are stored as ++ * 1 sign bit; ++ * 8 exponent bits; ++ * 23 mantissa bits. ++ * ++ * An exponent shift of 23 will shift the mantissa out, leaving only the ++ * exponent and sign bit (which itself may be zero, if the absolute value ++ * was taken before the bitcast and shift. ++ */ ++ nir_ssa_def *exponent_shift = nir_imm_int(b, 23); ++ nir_ssa_def *exponent_bias = nir_imm_int(b, -126); ++ ++ nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); ++ ++ /* Exponent of floating-point values in the range [0.5, 1.0). */ ++ nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u); ++ ++ nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); ++ ++ *exponent = ++ nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), ++ nir_bcsel(b, is_not_zero, exponent_bias, zero)); ++ ++ return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), ++ nir_bcsel(b, is_not_zero, exponent_value, zero)); ++} ++ ++static void ++handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint, ++ const uint32_t *w, unsigned count) ++{ ++ struct nir_builder *nb = &b->nb; ++ const struct glsl_type *dest_type = ++ vtn_value(b, w[1], vtn_value_type_type)->type->type; ++ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ val->ssa = vtn_create_ssa_value(b, dest_type); ++ ++ /* Collect the various SSA sources */ ++ unsigned num_inputs = count - 5; ++ nir_ssa_def *src[3]; ++ for (unsigned i = 0; i < num_inputs; i++) ++ src[i] = vtn_ssa_value(b, w[i + 5])->def; ++ ++ nir_op op; ++ switch (entrypoint) { ++ case GLSLstd450Round: op = nir_op_fround_even; break; /* TODO */ ++ case GLSLstd450RoundEven: op = nir_op_fround_even; break; ++ case GLSLstd450Trunc: op = nir_op_ftrunc; break; ++ case GLSLstd450FAbs: op = nir_op_fabs; break; ++ case GLSLstd450SAbs: op = nir_op_iabs; break; ++ case GLSLstd450FSign: op = nir_op_fsign; break; ++ case GLSLstd450SSign: op = nir_op_isign; break; ++ case GLSLstd450Floor: op = nir_op_ffloor; break; ++ case GLSLstd450Ceil: op = nir_op_fceil; break; ++ case GLSLstd450Fract: op = nir_op_ffract; break; ++ case GLSLstd450Radians: ++ val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251)); ++ return; ++ case GLSLstd450Degrees: ++ val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131)); ++ return; ++ case GLSLstd450Sin: op = nir_op_fsin; break; ++ case GLSLstd450Cos: op = nir_op_fcos; break; ++ case GLSLstd450Tan: ++ val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]), ++ nir_fcos(nb, src[0])); ++ return; ++ case GLSLstd450Pow: op = nir_op_fpow; break; ++ case GLSLstd450Exp2: op = nir_op_fexp2; break; ++ case GLSLstd450Log2: op = nir_op_flog2; break; ++ case GLSLstd450Sqrt: op = nir_op_fsqrt; break; ++ case GLSLstd450InverseSqrt: op = nir_op_frsq; break; ++ ++ case GLSLstd450Modf: { ++ nir_ssa_def *sign = nir_fsign(nb, src[0]); ++ nir_ssa_def *abs = nir_fabs(nb, src[0]); ++ val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); ++ nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), ++ nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf); ++ return; ++ } ++ ++ case GLSLstd450ModfStruct: { ++ nir_ssa_def *sign = nir_fsign(nb, src[0]); ++ nir_ssa_def *abs = nir_fabs(nb, src[0]); ++ assert(glsl_type_is_struct(val->ssa->type)); ++ val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs)); ++ val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs)); ++ return; ++ } ++ ++ case GLSLstd450FMin: op = nir_op_fmin; break; ++ case GLSLstd450UMin: op = nir_op_umin; break; ++ case GLSLstd450SMin: op = nir_op_imin; break; ++ case GLSLstd450FMax: op = nir_op_fmax; break; ++ case GLSLstd450UMax: op = nir_op_umax; break; ++ case GLSLstd450SMax: op = nir_op_imax; break; ++ case GLSLstd450FMix: op = nir_op_flrp; break; ++ case GLSLstd450Step: ++ val->ssa->def = nir_sge(nb, src[1], src[0]); ++ return; ++ ++ case GLSLstd450Fma: op = nir_op_ffma; break; ++ case GLSLstd450Ldexp: op = nir_op_ldexp; break; ++ ++ /* Packing/Unpacking functions */ ++ case GLSLstd450PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; ++ case GLSLstd450PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; ++ case GLSLstd450PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; ++ case GLSLstd450PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; ++ case GLSLstd450PackHalf2x16: op = nir_op_pack_half_2x16; break; ++ case GLSLstd450UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; ++ case GLSLstd450UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; ++ case GLSLstd450UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; ++ case GLSLstd450UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; ++ case GLSLstd450UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; ++ ++ case GLSLstd450Length: ++ val->ssa->def = build_length(nb, src[0]); ++ return; ++ case GLSLstd450Distance: ++ val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1])); ++ return; ++ case GLSLstd450Normalize: ++ val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0])); ++ return; ++ ++ case GLSLstd450Exp: ++ val->ssa->def = build_exp(nb, src[0]); ++ return; ++ ++ case GLSLstd450Log: ++ val->ssa->def = build_log(nb, src[0]); ++ return; ++ ++ case GLSLstd450FClamp: ++ val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]); ++ return; ++ case GLSLstd450UClamp: ++ val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]); ++ return; ++ case GLSLstd450SClamp: ++ val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]); ++ return; ++ ++ case GLSLstd450Cross: { ++ unsigned yzx[4] = { 1, 2, 0, 0 }; ++ unsigned zxy[4] = { 2, 0, 1, 0 }; ++ val->ssa->def = ++ nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true), ++ nir_swizzle(nb, src[1], zxy, 3, true)), ++ nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true), ++ nir_swizzle(nb, src[1], yzx, 3, true))); ++ return; ++ } ++ ++ case GLSLstd450SmoothStep: { ++ /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ ++ nir_ssa_def *t = ++ build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]), ++ nir_fsub(nb, src[1], src[0])), ++ nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0)); ++ /* result = t * t * (3 - 2 * t) */ ++ val->ssa->def = ++ nir_fmul(nb, t, nir_fmul(nb, t, ++ nir_fsub(nb, nir_imm_float(nb, 3.0), ++ nir_fmul(nb, nir_imm_float(nb, 2.0), t)))); ++ return; ++ } ++ ++ case GLSLstd450FaceForward: ++ val->ssa->def = ++ nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]), ++ nir_imm_float(nb, 0.0)), ++ src[0], nir_fneg(nb, src[0])); ++ return; ++ ++ case GLSLstd450Reflect: ++ /* I - 2 * dot(N, I) * N */ ++ val->ssa->def = ++ nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0), ++ nir_fmul(nb, nir_fdot(nb, src[0], src[1]), ++ src[1]))); ++ return; ++ ++ case GLSLstd450Refract: { ++ nir_ssa_def *I = src[0]; ++ nir_ssa_def *N = src[1]; ++ nir_ssa_def *eta = src[2]; ++ nir_ssa_def *n_dot_i = nir_fdot(nb, N, I); ++ nir_ssa_def *one = nir_imm_float(nb, 1.0); ++ nir_ssa_def *zero = nir_imm_float(nb, 0.0); ++ /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */ ++ nir_ssa_def *k = ++ nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta, ++ nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i))))); ++ nir_ssa_def *result = ++ nir_fsub(nb, nir_fmul(nb, eta, I), ++ nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i), ++ nir_fsqrt(nb, k)), N)); ++ /* XXX: bcsel, or if statement? */ ++ val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result); ++ return; ++ } ++ ++ case GLSLstd450Sinh: ++ /* 0.5 * (e^x - e^(-x)) */ ++ val->ssa->def = ++ nir_fmul(nb, nir_imm_float(nb, 0.5f), ++ nir_fsub(nb, build_exp(nb, src[0]), ++ build_exp(nb, nir_fneg(nb, src[0])))); ++ return; ++ ++ case GLSLstd450Cosh: ++ /* 0.5 * (e^x + e^(-x)) */ ++ val->ssa->def = ++ nir_fmul(nb, nir_imm_float(nb, 0.5f), ++ nir_fadd(nb, build_exp(nb, src[0]), ++ build_exp(nb, nir_fneg(nb, src[0])))); ++ return; ++ ++ case GLSLstd450Tanh: ++ /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ ++ val->ssa->def = ++ nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), ++ nir_fsub(nb, build_exp(nb, src[0]), ++ build_exp(nb, nir_fneg(nb, src[0])))), ++ nir_fmul(nb, nir_imm_float(nb, 0.5f), ++ nir_fadd(nb, build_exp(nb, src[0]), ++ build_exp(nb, nir_fneg(nb, src[0]))))); ++ return; ++ ++ case GLSLstd450Asinh: ++ val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), ++ build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]), ++ nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]), ++ nir_imm_float(nb, 1.0f)))))); ++ return; ++ case GLSLstd450Acosh: ++ val->ssa->def = build_log(nb, nir_fadd(nb, src[0], ++ nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]), ++ nir_imm_float(nb, 1.0f))))); ++ return; ++ case GLSLstd450Atanh: { ++ nir_ssa_def *one = nir_imm_float(nb, 1.0); ++ val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f), ++ build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]), ++ nir_fsub(nb, one, src[0])))); ++ return; ++ } ++ ++ case GLSLstd450FindILsb: op = nir_op_find_lsb; break; ++ case GLSLstd450FindSMsb: op = nir_op_ifind_msb; break; ++ case GLSLstd450FindUMsb: op = nir_op_ufind_msb; break; ++ ++ case GLSLstd450Asin: ++ val->ssa->def = build_asin(nb, src[0]); ++ return; ++ ++ case GLSLstd450Acos: ++ val->ssa->def = build_acos(nb, src[0]); ++ return; ++ ++ case GLSLstd450Atan: ++ val->ssa->def = build_atan(nb, src[0]); ++ return; ++ ++ case GLSLstd450Atan2: ++ val->ssa->def = build_atan2(nb, src[0], src[1]); ++ return; ++ ++ case GLSLstd450Frexp: { ++ nir_ssa_def *exponent; ++ val->ssa->def = build_frexp(nb, src[0], &exponent); ++ nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf); ++ return; ++ } ++ ++ case GLSLstd450FrexpStruct: { ++ assert(glsl_type_is_struct(val->ssa->type)); ++ val->ssa->elems[0]->def = build_frexp(nb, src[0], ++ &val->ssa->elems[1]->def); ++ return; ++ } ++ ++ case GLSLstd450PackDouble2x32: ++ case GLSLstd450UnpackDouble2x32: ++ default: ++ unreachable("Unhandled opcode"); ++ } ++ ++ nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); ++ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, ++ glsl_get_vector_elements(val->ssa->type), val->name); ++ instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1; ++ val->ssa->def = &instr->dest.dest.ssa; ++ ++ for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) ++ instr->src[i].src = nir_src_for_ssa(src[i]); ++ ++ nir_builder_instr_insert(nb, &instr->instr); ++} ++ ++bool ++vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch ((enum GLSLstd450)ext_opcode) { ++ case GLSLstd450Determinant: { ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ val->ssa = rzalloc(b, struct vtn_ssa_value); ++ val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; ++ val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5])); ++ break; ++ } ++ ++ case GLSLstd450MatrixInverse: { ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5])); ++ break; ++ } ++ ++ case GLSLstd450InterpolateAtCentroid: ++ case GLSLstd450InterpolateAtSample: ++ case GLSLstd450InterpolateAtOffset: ++ unreachable("Unhandled opcode"); ++ ++ default: ++ handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count); ++ } ++ ++ return true; ++} diff --cc src/compiler/nir/spirv/vtn_private.h index 00000000000,00000000000..3840d8c4b65 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_private.h @@@ -1,0 -1,0 +1,484 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Jason Ekstrand (jason@jlekstrand.net) ++ * ++ */ ++ ++#include "nir/nir.h" ++#include "nir/nir_builder.h" ++#include "nir/nir_array.h" ++#include "nir_spirv.h" ++#include "spirv.h" ++ ++struct vtn_builder; ++struct vtn_decoration; ++ ++enum vtn_value_type { ++ vtn_value_type_invalid = 0, ++ vtn_value_type_undef, ++ vtn_value_type_string, ++ vtn_value_type_decoration_group, ++ vtn_value_type_type, ++ vtn_value_type_constant, ++ vtn_value_type_access_chain, ++ vtn_value_type_function, ++ vtn_value_type_block, ++ vtn_value_type_ssa, ++ vtn_value_type_extension, ++ vtn_value_type_image_pointer, ++ vtn_value_type_sampled_image, ++}; ++ ++enum vtn_branch_type { ++ vtn_branch_type_none, ++ vtn_branch_type_switch_break, ++ vtn_branch_type_switch_fallthrough, ++ vtn_branch_type_loop_break, ++ vtn_branch_type_loop_continue, ++ vtn_branch_type_discard, ++ vtn_branch_type_return, ++}; ++ ++enum vtn_cf_node_type { ++ vtn_cf_node_type_block, ++ vtn_cf_node_type_if, ++ vtn_cf_node_type_loop, ++ vtn_cf_node_type_switch, ++}; ++ ++struct vtn_cf_node { ++ struct list_head link; ++ enum vtn_cf_node_type type; ++}; ++ ++struct vtn_loop { ++ struct vtn_cf_node node; ++ ++ /* The main body of the loop */ ++ struct list_head body; ++ ++ /* The "continue" part of the loop. This gets executed after the body ++ * and is where you go when you hit a continue. ++ */ ++ struct list_head cont_body; ++ ++ SpvLoopControlMask control; ++}; ++ ++struct vtn_if { ++ struct vtn_cf_node node; ++ ++ uint32_t condition; ++ ++ enum vtn_branch_type then_type; ++ struct list_head then_body; ++ ++ enum vtn_branch_type else_type; ++ struct list_head else_body; ++ ++ SpvSelectionControlMask control; ++}; ++ ++struct vtn_case { ++ struct list_head link; ++ ++ struct list_head body; ++ ++ /* The block that starts this case */ ++ struct vtn_block *start_block; ++ ++ /* The fallthrough case, if any */ ++ struct vtn_case *fallthrough; ++ ++ /* The uint32_t values that map to this case */ ++ nir_array values; ++ ++ /* True if this is the default case */ ++ bool is_default; ++ ++ /* Initialized to false; used when sorting the list of cases */ ++ bool visited; ++}; ++ ++struct vtn_switch { ++ struct vtn_cf_node node; ++ ++ uint32_t selector; ++ ++ struct list_head cases; ++}; ++ ++struct vtn_block { ++ struct vtn_cf_node node; ++ ++ /** A pointer to the label instruction */ ++ const uint32_t *label; ++ ++ /** A pointer to the merge instruction (or NULL if non exists) */ ++ const uint32_t *merge; ++ ++ /** A pointer to the branch instruction that ends this block */ ++ const uint32_t *branch; ++ ++ enum vtn_branch_type branch_type; ++ ++ /** Points to the loop that this block starts (if it starts a loop) */ ++ struct vtn_loop *loop; ++ ++ /** Points to the switch case started by this block (if any) */ ++ struct vtn_case *switch_case; ++ ++ /** The last block in this SPIR-V block. */ ++ nir_block *end_block; ++}; ++ ++struct vtn_function { ++ struct exec_node node; ++ ++ nir_function_impl *impl; ++ struct vtn_block *start_block; ++ ++ struct list_head body; ++ ++ const uint32_t *end; ++ ++ SpvFunctionControlMask control; ++}; ++ ++typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, ++ const uint32_t *, unsigned); ++ ++void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, ++ const uint32_t *end); ++void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func, ++ vtn_instruction_handler instruction_handler); ++ ++const uint32_t * ++vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, ++ const uint32_t *end, vtn_instruction_handler handler); ++ ++struct vtn_ssa_value { ++ union { ++ nir_ssa_def *def; ++ struct vtn_ssa_value **elems; ++ }; ++ ++ /* For matrices, if this is non-NULL, then this value is actually the ++ * transpose of some other value. The value that `transposed` points to ++ * always dominates this value. ++ */ ++ struct vtn_ssa_value *transposed; ++ ++ const struct glsl_type *type; ++}; ++ ++struct vtn_type { ++ const struct glsl_type *type; ++ ++ /* The value that declares this type. Used for finding decorations */ ++ struct vtn_value *val; ++ ++ /* for matrices, whether the matrix is stored row-major */ ++ bool row_major; ++ ++ /* for structs, the offset of each member */ ++ unsigned *offsets; ++ ++ /* for structs, whether it was decorated as a "non-SSBO-like" block */ ++ bool block; ++ ++ /* for structs, whether it was decorated as an "SSBO-like" block */ ++ bool buffer_block; ++ ++ /* for structs with block == true, whether this is a builtin block (i.e. a ++ * block that contains only builtins). ++ */ ++ bool builtin_block; ++ ++ /* Image format for image_load_store type images */ ++ unsigned image_format; ++ ++ /* Access qualifier for storage images */ ++ SpvAccessQualifier access_qualifier; ++ ++ /* for arrays and matrices, the array stride */ ++ unsigned stride; ++ ++ /* for arrays, the vtn_type for the elements of the array */ ++ struct vtn_type *array_element; ++ ++ /* for structures, the vtn_type for each member */ ++ struct vtn_type **members; ++ ++ /* Whether this type, or a parent type, has been decorated as a builtin */ ++ bool is_builtin; ++ ++ SpvBuiltIn builtin; ++}; ++ ++struct vtn_variable; ++ ++enum vtn_access_mode { ++ vtn_access_mode_id, ++ vtn_access_mode_literal, ++}; ++ ++struct vtn_access_link { ++ enum vtn_access_mode mode; ++ uint32_t id; ++}; ++ ++struct vtn_access_chain { ++ struct vtn_variable *var; ++ ++ uint32_t length; ++ ++ /* Struct elements and array offsets */ ++ struct vtn_access_link link[0]; ++}; ++ ++enum vtn_variable_mode { ++ vtn_variable_mode_local, ++ vtn_variable_mode_global, ++ vtn_variable_mode_param, ++ vtn_variable_mode_ubo, ++ vtn_variable_mode_ssbo, ++ vtn_variable_mode_push_constant, ++ vtn_variable_mode_image, ++ vtn_variable_mode_sampler, ++ vtn_variable_mode_workgroup, ++ vtn_variable_mode_input, ++ vtn_variable_mode_output, ++}; ++ ++struct vtn_variable { ++ enum vtn_variable_mode mode; ++ ++ struct vtn_type *type; ++ ++ unsigned descriptor_set; ++ unsigned binding; ++ ++ nir_variable *var; ++ nir_variable **members; ++ ++ struct vtn_access_chain chain; ++}; ++ ++struct vtn_image_pointer { ++ struct vtn_access_chain *image; ++ nir_ssa_def *coord; ++ nir_ssa_def *sample; ++}; ++ ++struct vtn_sampled_image { ++ struct vtn_access_chain *image; /* Image or array of images */ ++ struct vtn_access_chain *sampler; /* Sampler */ ++}; ++ ++struct vtn_value { ++ enum vtn_value_type value_type; ++ const char *name; ++ struct vtn_decoration *decoration; ++ union { ++ void *ptr; ++ char *str; ++ struct vtn_type *type; ++ struct { ++ nir_constant *constant; ++ const struct glsl_type *const_type; ++ }; ++ struct vtn_access_chain *access_chain; ++ struct vtn_image_pointer *image; ++ struct vtn_sampled_image *sampled_image; ++ struct vtn_function *func; ++ struct vtn_block *block; ++ struct vtn_ssa_value *ssa; ++ vtn_instruction_handler ext_handler; ++ }; ++}; ++ ++#define VTN_DEC_DECORATION -1 ++#define VTN_DEC_EXECUTION_MODE -2 ++#define VTN_DEC_STRUCT_MEMBER0 0 ++ ++struct vtn_decoration { ++ struct vtn_decoration *next; ++ ++ /* Specifies how to apply this decoration. Negative values represent a ++ * decoration or execution mode. (See the VTN_DEC_ #defines above.) ++ * Non-negative values specify that it applies to a structure member. ++ */ ++ int scope; ++ ++ const uint32_t *literals; ++ struct vtn_value *group; ++ ++ union { ++ SpvDecoration decoration; ++ SpvExecutionMode exec_mode; ++ }; ++}; ++ ++struct vtn_builder { ++ nir_builder nb; ++ ++ nir_shader *shader; ++ nir_function_impl *impl; ++ struct vtn_block *block; ++ ++ /* Current file, line, and column. Useful for debugging. Set ++ * automatically by vtn_foreach_instruction. ++ */ ++ char *file; ++ int line, col; ++ ++ /* ++ * In SPIR-V, constants are global, whereas in NIR, the load_const ++ * instruction we use is per-function. So while we parse each function, we ++ * keep a hash table of constants we've resolved to nir_ssa_value's so ++ * far, and we lazily resolve them when we see them used in a function. ++ */ ++ struct hash_table *const_table; ++ ++ /* ++ * Map from phi instructions (pointer to the start of the instruction) ++ * to the variable corresponding to it. ++ */ ++ struct hash_table *phi_table; ++ ++ unsigned num_specializations; ++ struct nir_spirv_specialization *specializations; ++ ++ unsigned value_id_bound; ++ struct vtn_value *values; ++ ++ gl_shader_stage entry_point_stage; ++ const char *entry_point_name; ++ struct vtn_value *entry_point; ++ bool origin_upper_left; ++ ++ struct vtn_function *func; ++ struct exec_list functions; ++ ++ /* Current function parameter index */ ++ unsigned func_param_idx; ++ ++ bool has_loop_continue; ++}; ++ ++static inline struct vtn_value * ++vtn_push_value(struct vtn_builder *b, uint32_t value_id, ++ enum vtn_value_type value_type) ++{ ++ assert(value_id < b->value_id_bound); ++ assert(b->values[value_id].value_type == vtn_value_type_invalid); ++ ++ b->values[value_id].value_type = value_type; ++ ++ return &b->values[value_id]; ++} ++ ++static inline struct vtn_value * ++vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) ++{ ++ assert(value_id < b->value_id_bound); ++ return &b->values[value_id]; ++} ++ ++static inline struct vtn_value * ++vtn_value(struct vtn_builder *b, uint32_t value_id, ++ enum vtn_value_type value_type) ++{ ++ struct vtn_value *val = vtn_untyped_value(b, value_id); ++ assert(val->value_type == value_type); ++ return val; ++} ++ ++struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); ++ ++struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, ++ const struct glsl_type *type); ++ ++struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b, ++ struct vtn_ssa_value *src); ++ ++nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, ++ unsigned index); ++nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, ++ nir_ssa_def *index); ++nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, ++ nir_ssa_def *insert, unsigned index); ++nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, ++ nir_ssa_def *insert, nir_ssa_def *index); ++ ++nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id); ++ ++nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b, ++ struct vtn_access_chain *chain); ++nir_ssa_def * ++vtn_access_chain_to_offset(struct vtn_builder *b, ++ struct vtn_access_chain *chain, ++ nir_ssa_def **index_out, struct vtn_type **type_out, ++ unsigned *end_idx_out, bool stop_at_matrix); ++ ++struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src); ++ ++void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, ++ nir_deref_var *dest); ++ ++struct vtn_ssa_value * ++vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src); ++ ++void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, ++ struct vtn_access_chain *dest); ++ ++void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count); ++ ++ ++typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, ++ struct vtn_value *, ++ int member, ++ const struct vtn_decoration *, ++ void *); ++ ++void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, ++ vtn_decoration_foreach_cb cb, void *data); ++ ++typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *, ++ struct vtn_value *, ++ const struct vtn_decoration *, ++ void *); ++ ++void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value, ++ vtn_execution_mode_foreach_cb cb, void *data); ++ ++nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap); ++ ++void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count); ++ ++bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, ++ const uint32_t *words, unsigned count); diff --cc src/compiler/nir/spirv/vtn_variables.c index 00000000000,00000000000..3ad98aa5310 new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv/vtn_variables.c @@@ -1,0 -1,0 +1,1412 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Jason Ekstrand (jason@jlekstrand.net) ++ * ++ */ ++ ++#include "vtn_private.h" ++ ++static struct vtn_access_chain * ++vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old, ++ unsigned new_ids) ++{ ++ struct vtn_access_chain *chain; ++ ++ unsigned new_len = old->length + new_ids; ++ chain = ralloc_size(b, sizeof(*chain) + new_len * sizeof(chain->link[0])); ++ ++ chain->var = old->var; ++ chain->length = new_len; ++ ++ for (unsigned i = 0; i < old->length; i++) ++ chain->link[i] = old->link[i]; ++ ++ return chain; ++} ++ ++static nir_ssa_def * ++vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link, ++ unsigned stride) ++{ ++ assert(stride > 0); ++ if (link.mode == vtn_access_mode_literal) { ++ return nir_imm_int(&b->nb, link.id * stride); ++ } else if (stride == 1) { ++ return vtn_ssa_value(b, link.id)->def; ++ } else { ++ return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def, ++ nir_imm_int(&b->nb, stride)); ++ } ++} ++ ++static struct vtn_type * ++vtn_access_chain_tail_type(struct vtn_builder *b, ++ struct vtn_access_chain *chain) ++{ ++ struct vtn_type *type = chain->var->type; ++ for (unsigned i = 0; i < chain->length; i++) { ++ if (glsl_type_is_struct(type->type)) { ++ assert(chain->link[i].mode == vtn_access_mode_literal); ++ type = type->members[chain->link[i].id]; ++ } else { ++ type = type->array_element; ++ } ++ } ++ return type; ++} ++ ++/* Crawls a chain of array derefs and rewrites the types so that the ++ * lengths stay the same but the terminal type is the one given by ++ * tail_type. This is useful for split structures. ++ */ ++static void ++rewrite_deref_types(nir_deref *deref, const struct glsl_type *type) ++{ ++ deref->type = type; ++ if (deref->child) { ++ assert(deref->child->deref_type == nir_deref_type_array); ++ assert(glsl_type_is_array(deref->type)); ++ rewrite_deref_types(deref->child, glsl_get_array_element(type)); ++ } ++} ++ ++nir_deref_var * ++vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain) ++{ ++ nir_deref_var *deref_var; ++ if (chain->var->var) { ++ deref_var = nir_deref_var_create(b, chain->var->var); ++ } else { ++ assert(chain->var->members); ++ /* Create the deref_var manually. It will get filled out later. */ ++ deref_var = rzalloc(b, nir_deref_var); ++ deref_var->deref.deref_type = nir_deref_type_var; ++ } ++ ++ struct vtn_type *deref_type = chain->var->type; ++ nir_deref *tail = &deref_var->deref; ++ nir_variable **members = chain->var->members; ++ ++ for (unsigned i = 0; i < chain->length; i++) { ++ enum glsl_base_type base_type = glsl_get_base_type(deref_type->type); ++ switch (base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_DOUBLE: ++ case GLSL_TYPE_BOOL: ++ case GLSL_TYPE_ARRAY: { ++ deref_type = deref_type->array_element; ++ ++ nir_deref_array *deref_arr = nir_deref_array_create(b); ++ deref_arr->deref.type = deref_type->type; ++ ++ if (chain->link[i].mode == vtn_access_mode_literal) { ++ deref_arr->deref_array_type = nir_deref_array_type_direct; ++ deref_arr->base_offset = chain->link[i].id; ++ } else { ++ assert(chain->link[i].mode == vtn_access_mode_id); ++ deref_arr->deref_array_type = nir_deref_array_type_indirect; ++ deref_arr->base_offset = 0; ++ deref_arr->indirect = ++ nir_src_for_ssa(vtn_ssa_value(b, chain->link[i].id)->def); ++ } ++ tail->child = &deref_arr->deref; ++ tail = tail->child; ++ break; ++ } ++ ++ case GLSL_TYPE_STRUCT: { ++ assert(chain->link[i].mode == vtn_access_mode_literal); ++ unsigned idx = chain->link[i].id; ++ deref_type = deref_type->members[idx]; ++ if (members) { ++ /* This is a pre-split structure. */ ++ deref_var->var = members[idx]; ++ rewrite_deref_types(&deref_var->deref, members[idx]->type); ++ assert(tail->type == deref_type->type); ++ members = NULL; ++ } else { ++ nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); ++ deref_struct->deref.type = deref_type->type; ++ tail->child = &deref_struct->deref; ++ tail = tail->child; ++ } ++ break; ++ } ++ default: ++ unreachable("Invalid type for deref"); ++ } ++ } ++ ++ assert(members == NULL); ++ return deref_var; ++} ++ ++static void ++_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref, ++ nir_deref *tail, struct vtn_ssa_value *inout) ++{ ++ /* The deref tail may contain a deref to select a component of a vector (in ++ * other words, it might not be an actual tail) so we have to save it away ++ * here since we overwrite it later. ++ */ ++ nir_deref *old_child = tail->child; ++ ++ if (glsl_type_is_vector_or_scalar(tail->type)) { ++ /* Terminate the deref chain in case there is one more link to pick ++ * off a component of the vector. ++ */ ++ tail->child = NULL; ++ ++ nir_intrinsic_op op = load ? nir_intrinsic_load_var : ++ nir_intrinsic_store_var; ++ ++ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); ++ intrin->variables[0] = ++ nir_deref_as_var(nir_copy_deref(intrin, &deref->deref)); ++ intrin->num_components = glsl_get_vector_elements(tail->type); ++ ++ if (load) { ++ nir_ssa_dest_init(&intrin->instr, &intrin->dest, ++ intrin->num_components, NULL); ++ inout->def = &intrin->dest.ssa; ++ } else { ++ intrin->const_index[0] = (1 << intrin->num_components) - 1; ++ intrin->src[0] = nir_src_for_ssa(inout->def); ++ } ++ ++ nir_builder_instr_insert(&b->nb, &intrin->instr); ++ } else if (glsl_get_base_type(tail->type) == GLSL_TYPE_ARRAY || ++ glsl_type_is_matrix(tail->type)) { ++ unsigned elems = glsl_get_length(tail->type); ++ nir_deref_array *deref_arr = nir_deref_array_create(b); ++ deref_arr->deref_array_type = nir_deref_array_type_direct; ++ deref_arr->deref.type = glsl_get_array_element(tail->type); ++ tail->child = &deref_arr->deref; ++ for (unsigned i = 0; i < elems; i++) { ++ deref_arr->base_offset = i; ++ _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); ++ } ++ } else { ++ assert(glsl_get_base_type(tail->type) == GLSL_TYPE_STRUCT); ++ unsigned elems = glsl_get_length(tail->type); ++ nir_deref_struct *deref_struct = nir_deref_struct_create(b, 0); ++ tail->child = &deref_struct->deref; ++ for (unsigned i = 0; i < elems; i++) { ++ deref_struct->index = i; ++ deref_struct->deref.type = glsl_get_struct_field(tail->type, i); ++ _vtn_local_load_store(b, load, deref, tail->child, inout->elems[i]); ++ } ++ } ++ ++ tail->child = old_child; ++} ++ ++nir_deref_var * ++vtn_nir_deref(struct vtn_builder *b, uint32_t id) ++{ ++ struct vtn_access_chain *chain = ++ vtn_value(b, id, vtn_value_type_access_chain)->access_chain; ++ ++ return vtn_access_chain_to_deref(b, chain); ++} ++ ++/* ++ * Gets the NIR-level deref tail, which may have as a child an array deref ++ * selecting which component due to OpAccessChain supporting per-component ++ * indexing in SPIR-V. ++ */ ++static nir_deref * ++get_deref_tail(nir_deref_var *deref) ++{ ++ nir_deref *cur = &deref->deref; ++ while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) ++ cur = cur->child; ++ ++ return cur; ++} ++ ++struct vtn_ssa_value * ++vtn_local_load(struct vtn_builder *b, nir_deref_var *src) ++{ ++ nir_deref *src_tail = get_deref_tail(src); ++ struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type); ++ _vtn_local_load_store(b, true, src, src_tail, val); ++ ++ if (src_tail->child) { ++ nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); ++ assert(vec_deref->deref.child == NULL); ++ val->type = vec_deref->deref.type; ++ if (vec_deref->deref_array_type == nir_deref_array_type_direct) ++ val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); ++ else ++ val->def = vtn_vector_extract_dynamic(b, val->def, ++ vec_deref->indirect.ssa); ++ } ++ ++ return val; ++} ++ ++void ++vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src, ++ nir_deref_var *dest) ++{ ++ nir_deref *dest_tail = get_deref_tail(dest); ++ ++ if (dest_tail->child) { ++ struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type); ++ _vtn_local_load_store(b, true, dest, dest_tail, val); ++ nir_deref_array *deref = nir_deref_as_array(dest_tail->child); ++ assert(deref->deref.child == NULL); ++ if (deref->deref_array_type == nir_deref_array_type_direct) ++ val->def = vtn_vector_insert(b, val->def, src->def, ++ deref->base_offset); ++ else ++ val->def = vtn_vector_insert_dynamic(b, val->def, src->def, ++ deref->indirect.ssa); ++ _vtn_local_load_store(b, false, dest, dest_tail, val); ++ } else { ++ _vtn_local_load_store(b, false, dest, dest_tail, src); ++ } ++} ++ ++static nir_ssa_def * ++get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain, ++ struct vtn_type **type, unsigned *chain_idx) ++{ ++ /* Push constants have no explicit binding */ ++ if (chain->var->mode == vtn_variable_mode_push_constant) { ++ *chain_idx = 0; ++ *type = chain->var->type; ++ return NULL; ++ } ++ ++ nir_ssa_def *array_index; ++ if (glsl_type_is_array(chain->var->type->type)) { ++ assert(chain->length > 0); ++ array_index = vtn_access_link_as_ssa(b, chain->link[0], 1); ++ *chain_idx = 1; ++ *type = chain->var->type->array_element; ++ } else { ++ array_index = nir_imm_int(&b->nb, 0); ++ *chain_idx = 0; ++ *type = chain->var->type; ++ } ++ ++ nir_intrinsic_instr *instr = ++ nir_intrinsic_instr_create(b->nb.shader, ++ nir_intrinsic_vulkan_resource_index); ++ instr->src[0] = nir_src_for_ssa(array_index); ++ instr->const_index[0] = chain->var->descriptor_set; ++ instr->const_index[1] = chain->var->binding; ++ ++ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); ++ nir_builder_instr_insert(&b->nb, &instr->instr); ++ ++ return &instr->dest.ssa; ++} ++ ++nir_ssa_def * ++vtn_access_chain_to_offset(struct vtn_builder *b, ++ struct vtn_access_chain *chain, ++ nir_ssa_def **index_out, struct vtn_type **type_out, ++ unsigned *end_idx_out, bool stop_at_matrix) ++{ ++ unsigned idx = 0; ++ struct vtn_type *type; ++ *index_out = get_vulkan_resource_index(b, chain, &type, &idx); ++ ++ nir_ssa_def *offset = nir_imm_int(&b->nb, 0); ++ for (; idx < chain->length; idx++) { ++ enum glsl_base_type base_type = glsl_get_base_type(type->type); ++ switch (base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_DOUBLE: ++ case GLSL_TYPE_BOOL: ++ /* Some users may not want matrix or vector derefs */ ++ if (stop_at_matrix) ++ goto end; ++ /* Fall through */ ++ ++ case GLSL_TYPE_ARRAY: ++ offset = nir_iadd(&b->nb, offset, ++ vtn_access_link_as_ssa(b, chain->link[idx], ++ type->stride)); ++ ++ type = type->array_element; ++ break; ++ ++ case GLSL_TYPE_STRUCT: { ++ assert(chain->link[idx].mode == vtn_access_mode_literal); ++ unsigned member = chain->link[idx].id; ++ offset = nir_iadd(&b->nb, offset, ++ nir_imm_int(&b->nb, type->offsets[member])); ++ type = type->members[member]; ++ break; ++ } ++ ++ default: ++ unreachable("Invalid type for deref"); ++ } ++ } ++ ++end: ++ *type_out = type; ++ if (end_idx_out) ++ *end_idx_out = idx; ++ ++ return offset; ++} ++ ++static void ++_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, ++ nir_ssa_def *index, nir_ssa_def *offset, ++ struct vtn_ssa_value **inout, const struct glsl_type *type) ++{ ++ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op); ++ instr->num_components = glsl_get_vector_elements(type); ++ ++ int src = 0; ++ if (!load) { ++ instr->const_index[0] = (1 << instr->num_components) - 1; /* write mask */ ++ instr->src[src++] = nir_src_for_ssa((*inout)->def); ++ } ++ ++ /* We set the base and size for push constant load to the entire push ++ * constant block for now. ++ */ ++ if (op == nir_intrinsic_load_push_constant) { ++ instr->const_index[0] = 0; ++ instr->const_index[1] = 128; ++ } ++ ++ if (index) ++ instr->src[src++] = nir_src_for_ssa(index); ++ ++ instr->src[src++] = nir_src_for_ssa(offset); ++ ++ if (load) { ++ nir_ssa_dest_init(&instr->instr, &instr->dest, ++ instr->num_components, NULL); ++ (*inout)->def = &instr->dest.ssa; ++ } ++ ++ nir_builder_instr_insert(&b->nb, &instr->instr); ++ ++ if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL) ++ (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0)); ++} ++ ++static void ++_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load, ++ nir_ssa_def *index, nir_ssa_def *offset, ++ struct vtn_access_chain *chain, unsigned chain_idx, ++ struct vtn_type *type, struct vtn_ssa_value **inout) ++{ ++ if (chain && chain_idx >= chain->length) ++ chain = NULL; ++ ++ if (load && chain == NULL && *inout == NULL) ++ *inout = vtn_create_ssa_value(b, type->type); ++ ++ enum glsl_base_type base_type = glsl_get_base_type(type->type); ++ switch (base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ /* This is where things get interesting. At this point, we've hit ++ * a vector, a scalar, or a matrix. ++ */ ++ if (glsl_type_is_matrix(type->type)) { ++ if (chain == NULL) { ++ /* Loading the whole matrix */ ++ struct vtn_ssa_value *transpose; ++ unsigned num_ops, vec_width; ++ if (type->row_major) { ++ num_ops = glsl_get_vector_elements(type->type); ++ vec_width = glsl_get_matrix_columns(type->type); ++ if (load) { ++ const struct glsl_type *transpose_type = ++ glsl_matrix_type(base_type, vec_width, num_ops); ++ *inout = vtn_create_ssa_value(b, transpose_type); ++ } else { ++ transpose = vtn_ssa_transpose(b, *inout); ++ inout = &transpose; ++ } ++ } else { ++ num_ops = glsl_get_matrix_columns(type->type); ++ vec_width = glsl_get_vector_elements(type->type); ++ } ++ ++ for (unsigned i = 0; i < num_ops; i++) { ++ nir_ssa_def *elem_offset = ++ nir_iadd(&b->nb, offset, ++ nir_imm_int(&b->nb, i * type->stride)); ++ _vtn_load_store_tail(b, op, load, index, elem_offset, ++ &(*inout)->elems[i], ++ glsl_vector_type(base_type, vec_width)); ++ } ++ ++ if (load && type->row_major) ++ *inout = vtn_ssa_transpose(b, *inout); ++ } else if (type->row_major) { ++ /* Row-major but with an access chiain. */ ++ nir_ssa_def *col_offset = ++ vtn_access_link_as_ssa(b, chain->link[chain_idx], ++ type->array_element->stride); ++ offset = nir_iadd(&b->nb, offset, col_offset); ++ ++ if (chain_idx + 1 < chain->length) { ++ /* Picking off a single element */ ++ nir_ssa_def *row_offset = ++ vtn_access_link_as_ssa(b, chain->link[chain_idx + 1], ++ type->stride); ++ offset = nir_iadd(&b->nb, offset, row_offset); ++ if (load) ++ *inout = vtn_create_ssa_value(b, glsl_scalar_type(base_type)); ++ _vtn_load_store_tail(b, op, load, index, offset, inout, ++ glsl_scalar_type(base_type)); ++ } else { ++ /* Grabbing a column; picking one element off each row */ ++ unsigned num_comps = glsl_get_vector_elements(type->type); ++ const struct glsl_type *column_type = ++ glsl_get_column_type(type->type); ++ ++ nir_ssa_def *comps[4]; ++ for (unsigned i = 0; i < num_comps; i++) { ++ nir_ssa_def *elem_offset = ++ nir_iadd(&b->nb, offset, ++ nir_imm_int(&b->nb, i * type->stride)); ++ ++ struct vtn_ssa_value *comp, temp_val; ++ if (!load) { ++ temp_val.def = nir_channel(&b->nb, (*inout)->def, i); ++ temp_val.type = glsl_scalar_type(base_type); ++ } ++ comp = &temp_val; ++ _vtn_load_store_tail(b, op, load, index, elem_offset, ++ &comp, glsl_scalar_type(base_type)); ++ comps[i] = comp->def; ++ } ++ ++ if (load) { ++ if (*inout == NULL) ++ *inout = vtn_create_ssa_value(b, column_type); ++ ++ (*inout)->def = nir_vec(&b->nb, comps, num_comps); ++ } ++ } ++ } else { ++ /* Column-major with a deref. Fall through to array case. */ ++ nir_ssa_def *col_offset = ++ vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); ++ offset = nir_iadd(&b->nb, offset, col_offset); ++ ++ _vtn_block_load_store(b, op, load, index, offset, ++ chain, chain_idx + 1, ++ type->array_element, inout); ++ } ++ } else if (chain == NULL) { ++ /* Single whole vector */ ++ assert(glsl_type_is_vector_or_scalar(type->type)); ++ _vtn_load_store_tail(b, op, load, index, offset, inout, type->type); ++ } else { ++ /* Single component of a vector. Fall through to array case. */ ++ nir_ssa_def *elem_offset = ++ vtn_access_link_as_ssa(b, chain->link[chain_idx], type->stride); ++ offset = nir_iadd(&b->nb, offset, elem_offset); ++ ++ _vtn_block_load_store(b, op, load, index, offset, NULL, 0, ++ type->array_element, inout); ++ } ++ return; ++ ++ case GLSL_TYPE_ARRAY: { ++ unsigned elems = glsl_get_length(type->type); ++ for (unsigned i = 0; i < elems; i++) { ++ nir_ssa_def *elem_off = ++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); ++ _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, ++ type->array_element, &(*inout)->elems[i]); ++ } ++ return; ++ } ++ ++ case GLSL_TYPE_STRUCT: { ++ unsigned elems = glsl_get_length(type->type); ++ for (unsigned i = 0; i < elems; i++) { ++ nir_ssa_def *elem_off = ++ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); ++ _vtn_block_load_store(b, op, load, index, elem_off, NULL, 0, ++ type->members[i], &(*inout)->elems[i]); ++ } ++ return; ++ } ++ ++ default: ++ unreachable("Invalid block member type"); ++ } ++} ++ ++static struct vtn_ssa_value * ++vtn_block_load(struct vtn_builder *b, struct vtn_access_chain *src) ++{ ++ nir_intrinsic_op op; ++ switch (src->var->mode) { ++ case vtn_variable_mode_ubo: ++ op = nir_intrinsic_load_ubo; ++ break; ++ case vtn_variable_mode_ssbo: ++ op = nir_intrinsic_load_ssbo; ++ break; ++ case vtn_variable_mode_push_constant: ++ op = nir_intrinsic_load_push_constant; ++ break; ++ default: ++ assert(!"Invalid block variable mode"); ++ } ++ ++ nir_ssa_def *offset, *index = NULL; ++ struct vtn_type *type; ++ unsigned chain_idx; ++ offset = vtn_access_chain_to_offset(b, src, &index, &type, &chain_idx, true); ++ ++ struct vtn_ssa_value *value = NULL; ++ _vtn_block_load_store(b, op, true, index, offset, ++ src, chain_idx, type, &value); ++ return value; ++} ++ ++static void ++vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, ++ struct vtn_access_chain *dst) ++{ ++ nir_ssa_def *offset, *index = NULL; ++ struct vtn_type *type; ++ unsigned chain_idx; ++ offset = vtn_access_chain_to_offset(b, dst, &index, &type, &chain_idx, true); ++ ++ _vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset, ++ dst, chain_idx, type, &src); ++} ++ ++static bool ++vtn_variable_is_external_block(struct vtn_variable *var) ++{ ++ return var->mode == vtn_variable_mode_ssbo || ++ var->mode == vtn_variable_mode_ubo || ++ var->mode == vtn_variable_mode_push_constant; ++} ++ ++static void ++_vtn_variable_load_store(struct vtn_builder *b, bool load, ++ struct vtn_access_chain *chain, ++ struct vtn_type *tail_type, ++ struct vtn_ssa_value **inout) ++{ ++ enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); ++ switch (base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ /* At this point, we have a scalar, vector, or matrix so we know that ++ * there cannot be any structure splitting still in the way. By ++ * stopping at the matrix level rather than the vector level, we ++ * ensure that matrices get loaded in the optimal way even if they ++ * are storred row-major in a UBO. ++ */ ++ if (load) { ++ *inout = vtn_local_load(b, vtn_access_chain_to_deref(b, chain)); ++ } else { ++ vtn_local_store(b, *inout, vtn_access_chain_to_deref(b, chain)); ++ } ++ return; ++ ++ case GLSL_TYPE_ARRAY: ++ case GLSL_TYPE_STRUCT: { ++ struct vtn_access_chain *new_chain = ++ vtn_access_chain_extend(b, chain, 1); ++ new_chain->link[chain->length].mode = vtn_access_mode_literal; ++ unsigned elems = glsl_get_length(tail_type->type); ++ if (load) { ++ assert(*inout == NULL); ++ *inout = rzalloc(b, struct vtn_ssa_value); ++ (*inout)->type = tail_type->type; ++ (*inout)->elems = rzalloc_array(b, struct vtn_ssa_value *, elems); ++ } ++ for (unsigned i = 0; i < elems; i++) { ++ new_chain->link[chain->length].id = i; ++ struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? ++ tail_type->array_element : tail_type->members[i]; ++ _vtn_variable_load_store(b, load, new_chain, elem_type, ++ &(*inout)->elems[i]); ++ } ++ return; ++ } ++ ++ default: ++ unreachable("Invalid access chain type"); ++ } ++} ++ ++struct vtn_ssa_value * ++vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src) ++{ ++ if (vtn_variable_is_external_block(src->var)) { ++ return vtn_block_load(b, src); ++ } else { ++ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); ++ struct vtn_ssa_value *val = NULL; ++ _vtn_variable_load_store(b, true, src, tail_type, &val); ++ return val; ++ } ++} ++ ++void ++vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, ++ struct vtn_access_chain *dest) ++{ ++ if (vtn_variable_is_external_block(dest->var)) { ++ assert(dest->var->mode == vtn_variable_mode_ssbo); ++ vtn_block_store(b, src, dest); ++ } else { ++ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, dest); ++ _vtn_variable_load_store(b, false, dest, tail_type, &src); ++ } ++} ++ ++static void ++_vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, ++ struct vtn_access_chain *src, struct vtn_type *tail_type) ++{ ++ enum glsl_base_type base_type = glsl_get_base_type(tail_type->type); ++ switch (base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ /* At this point, we have a scalar, vector, or matrix so we know that ++ * there cannot be any structure splitting still in the way. By ++ * stopping at the matrix level rather than the vector level, we ++ * ensure that matrices get loaded in the optimal way even if they ++ * are storred row-major in a UBO. ++ */ ++ vtn_variable_store(b, vtn_variable_load(b, src), dest); ++ return; ++ ++ case GLSL_TYPE_ARRAY: ++ case GLSL_TYPE_STRUCT: { ++ struct vtn_access_chain *new_src, *new_dest; ++ new_src = vtn_access_chain_extend(b, src, 1); ++ new_dest = vtn_access_chain_extend(b, dest, 1); ++ new_src->link[src->length].mode = vtn_access_mode_literal; ++ new_dest->link[dest->length].mode = vtn_access_mode_literal; ++ unsigned elems = glsl_get_length(tail_type->type); ++ for (unsigned i = 0; i < elems; i++) { ++ new_src->link[src->length].id = i; ++ new_dest->link[dest->length].id = i; ++ struct vtn_type *elem_type = base_type == GLSL_TYPE_ARRAY ? ++ tail_type->array_element : tail_type->members[i]; ++ _vtn_variable_copy(b, new_dest, new_src, elem_type); ++ } ++ return; ++ } ++ ++ default: ++ unreachable("Invalid access chain type"); ++ } ++} ++ ++static void ++vtn_variable_copy(struct vtn_builder *b, struct vtn_access_chain *dest, ++ struct vtn_access_chain *src) ++{ ++ struct vtn_type *tail_type = vtn_access_chain_tail_type(b, src); ++ assert(vtn_access_chain_tail_type(b, dest)->type == tail_type->type); ++ ++ /* TODO: At some point, we should add a special-case for when we can ++ * just emit a copy_var intrinsic. ++ */ ++ _vtn_variable_copy(b, dest, src, tail_type); ++} ++ ++static void ++set_mode_system_value(nir_variable_mode *mode) ++{ ++ assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); ++ *mode = nir_var_system_value; ++} ++ ++static void ++vtn_get_builtin_location(struct vtn_builder *b, ++ SpvBuiltIn builtin, int *location, ++ nir_variable_mode *mode) ++{ ++ switch (builtin) { ++ case SpvBuiltInPosition: ++ *location = VARYING_SLOT_POS; ++ break; ++ case SpvBuiltInPointSize: ++ *location = VARYING_SLOT_PSIZ; ++ break; ++ case SpvBuiltInClipDistance: ++ *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ ++ break; ++ case SpvBuiltInCullDistance: ++ /* XXX figure this out */ ++ break; ++ case SpvBuiltInVertexIndex: ++ *location = SYSTEM_VALUE_VERTEX_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInVertexId: ++ /* Vulkan defines VertexID to be zero-based and reserves the new ++ * builtin keyword VertexIndex to indicate the non-zero-based value. ++ */ ++ *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInInstanceIndex: ++ *location = SYSTEM_VALUE_INSTANCE_INDEX; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInInstanceId: ++ *location = SYSTEM_VALUE_INSTANCE_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInPrimitiveId: ++ *location = VARYING_SLOT_PRIMITIVE_ID; ++ *mode = nir_var_shader_out; ++ break; ++ case SpvBuiltInInvocationId: ++ *location = SYSTEM_VALUE_INVOCATION_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInLayer: ++ *location = VARYING_SLOT_LAYER; ++ *mode = nir_var_shader_out; ++ break; ++ case SpvBuiltInViewportIndex: ++ *location = VARYING_SLOT_VIEWPORT; ++ if (b->shader->stage == MESA_SHADER_GEOMETRY) ++ *mode = nir_var_shader_out; ++ else if (b->shader->stage == MESA_SHADER_FRAGMENT) ++ *mode = nir_var_shader_in; ++ else ++ unreachable("invalid stage for SpvBuiltInViewportIndex"); ++ break; ++ case SpvBuiltInTessLevelOuter: ++ case SpvBuiltInTessLevelInner: ++ case SpvBuiltInTessCoord: ++ case SpvBuiltInPatchVertices: ++ unreachable("no tessellation support"); ++ case SpvBuiltInFragCoord: ++ *location = VARYING_SLOT_POS; ++ assert(*mode == nir_var_shader_in); ++ break; ++ case SpvBuiltInPointCoord: ++ *location = VARYING_SLOT_PNTC; ++ assert(*mode == nir_var_shader_in); ++ break; ++ case SpvBuiltInFrontFacing: ++ *location = VARYING_SLOT_FACE; ++ assert(*mode == nir_var_shader_in); ++ break; ++ case SpvBuiltInSampleId: ++ *location = SYSTEM_VALUE_SAMPLE_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInSamplePosition: ++ *location = SYSTEM_VALUE_SAMPLE_POS; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInSampleMask: ++ *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInFragDepth: ++ *location = FRAG_RESULT_DEPTH; ++ assert(*mode == nir_var_shader_out); ++ break; ++ case SpvBuiltInNumWorkgroups: ++ *location = SYSTEM_VALUE_NUM_WORK_GROUPS; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInWorkgroupSize: ++ /* This should already be handled */ ++ unreachable("unsupported builtin"); ++ break; ++ case SpvBuiltInWorkgroupId: ++ *location = SYSTEM_VALUE_WORK_GROUP_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInLocalInvocationId: ++ *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInLocalInvocationIndex: ++ *location = SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInGlobalInvocationId: ++ *location = SYSTEM_VALUE_GLOBAL_INVOCATION_ID; ++ set_mode_system_value(mode); ++ break; ++ case SpvBuiltInHelperInvocation: ++ default: ++ unreachable("unsupported builtin"); ++ } ++} ++ ++static void ++var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, ++ const struct vtn_decoration *dec, void *void_var) ++{ ++ struct vtn_variable *vtn_var = void_var; ++ ++ /* Handle decorations that apply to a vtn_variable as a whole */ ++ switch (dec->decoration) { ++ case SpvDecorationBinding: ++ vtn_var->binding = dec->literals[0]; ++ return; ++ case SpvDecorationDescriptorSet: ++ vtn_var->descriptor_set = dec->literals[0]; ++ return; ++ ++ case SpvDecorationLocation: { ++ unsigned location = dec->literals[0]; ++ bool is_vertex_input; ++ if (b->shader->stage == MESA_SHADER_FRAGMENT && ++ vtn_var->mode == vtn_variable_mode_output) { ++ is_vertex_input = false; ++ location += FRAG_RESULT_DATA0; ++ } else if (b->shader->stage == MESA_SHADER_VERTEX && ++ vtn_var->mode == vtn_variable_mode_input) { ++ is_vertex_input = true; ++ location += VERT_ATTRIB_GENERIC0; ++ } else if (vtn_var->mode == vtn_variable_mode_input || ++ vtn_var->mode == vtn_variable_mode_output) { ++ is_vertex_input = false; ++ location += VARYING_SLOT_VAR0; ++ } else { ++ assert(!"Location must be on input or output variable"); ++ } ++ ++ if (vtn_var->var) { ++ vtn_var->var->data.location = location; ++ vtn_var->var->data.explicit_location = true; ++ } else { ++ assert(vtn_var->members); ++ unsigned length = glsl_get_length(vtn_var->type->type); ++ for (unsigned i = 0; i < length; i++) { ++ vtn_var->members[i]->data.location = location; ++ vtn_var->members[i]->data.explicit_location = true; ++ location += ++ glsl_count_attribute_slots(vtn_var->members[i]->interface_type, ++ is_vertex_input); ++ } ++ } ++ return; ++ } ++ ++ default: ++ break; ++ } ++ ++ /* Now we handle decorations that apply to a particular nir_variable */ ++ nir_variable *nir_var = vtn_var->var; ++ if (val->value_type == vtn_value_type_access_chain) { ++ assert(val->access_chain->length == 0); ++ assert(val->access_chain->var == void_var); ++ assert(member == -1); ++ } else { ++ assert(val->value_type == vtn_value_type_type); ++ if (member != -1) ++ nir_var = vtn_var->members[member]; ++ } ++ ++ if (nir_var == NULL) ++ return; ++ ++ switch (dec->decoration) { ++ case SpvDecorationRelaxedPrecision: ++ break; /* FIXME: Do nothing with this for now. */ ++ case SpvDecorationNoPerspective: ++ nir_var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; ++ break; ++ case SpvDecorationFlat: ++ nir_var->data.interpolation = INTERP_QUALIFIER_FLAT; ++ break; ++ case SpvDecorationCentroid: ++ nir_var->data.centroid = true; ++ break; ++ case SpvDecorationSample: ++ nir_var->data.sample = true; ++ break; ++ case SpvDecorationInvariant: ++ nir_var->data.invariant = true; ++ break; ++ case SpvDecorationConstant: ++ assert(nir_var->constant_initializer != NULL); ++ nir_var->data.read_only = true; ++ break; ++ case SpvDecorationNonWritable: ++ nir_var->data.read_only = true; ++ break; ++ case SpvDecorationComponent: ++ nir_var->data.location_frac = dec->literals[0]; ++ break; ++ case SpvDecorationIndex: ++ nir_var->data.explicit_index = true; ++ nir_var->data.index = dec->literals[0]; ++ break; ++ case SpvDecorationBuiltIn: { ++ SpvBuiltIn builtin = dec->literals[0]; ++ ++ if (builtin == SpvBuiltInWorkgroupSize) { ++ /* This shouldn't be a builtin. It's actually a constant. */ ++ nir_var->data.mode = nir_var_global; ++ nir_var->data.read_only = true; ++ ++ nir_constant *c = rzalloc(nir_var, nir_constant); ++ c->value.u[0] = b->shader->info.cs.local_size[0]; ++ c->value.u[1] = b->shader->info.cs.local_size[1]; ++ c->value.u[2] = b->shader->info.cs.local_size[2]; ++ nir_var->constant_initializer = c; ++ break; ++ } ++ ++ nir_variable_mode mode = nir_var->data.mode; ++ vtn_get_builtin_location(b, builtin, &nir_var->data.location, &mode); ++ nir_var->data.explicit_location = true; ++ nir_var->data.mode = mode; ++ ++ if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) ++ nir_var->data.origin_upper_left = b->origin_upper_left; ++ break; ++ } ++ case SpvDecorationRowMajor: ++ case SpvDecorationColMajor: ++ case SpvDecorationGLSLShared: ++ case SpvDecorationPatch: ++ case SpvDecorationRestrict: ++ case SpvDecorationAliased: ++ case SpvDecorationVolatile: ++ case SpvDecorationCoherent: ++ case SpvDecorationNonReadable: ++ case SpvDecorationUniform: ++ /* This is really nice but we have no use for it right now. */ ++ case SpvDecorationCPacked: ++ case SpvDecorationSaturatedConversion: ++ case SpvDecorationStream: ++ case SpvDecorationOffset: ++ case SpvDecorationXfbBuffer: ++ case SpvDecorationFuncParamAttr: ++ case SpvDecorationFPRoundingMode: ++ case SpvDecorationFPFastMathMode: ++ case SpvDecorationLinkageAttributes: ++ case SpvDecorationSpecId: ++ break; ++ default: ++ unreachable("Unhandled variable decoration"); ++ } ++} ++ ++/* Tries to compute the size of an interface block based on the strides and ++ * offsets that are provided to us in the SPIR-V source. ++ */ ++static unsigned ++vtn_type_block_size(struct vtn_type *type) ++{ ++ enum glsl_base_type base_type = glsl_get_base_type(type->type); ++ switch (base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ case GLSL_TYPE_DOUBLE: { ++ unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : ++ glsl_get_matrix_columns(type->type); ++ if (cols > 1) { ++ assert(type->stride > 0); ++ return type->stride * cols; ++ } else if (base_type == GLSL_TYPE_DOUBLE) { ++ return glsl_get_vector_elements(type->type) * 8; ++ } else { ++ return glsl_get_vector_elements(type->type) * 4; ++ } ++ } ++ ++ case GLSL_TYPE_STRUCT: ++ case GLSL_TYPE_INTERFACE: { ++ unsigned size = 0; ++ unsigned num_fields = glsl_get_length(type->type); ++ for (unsigned f = 0; f < num_fields; f++) { ++ unsigned field_end = type->offsets[f] + ++ vtn_type_block_size(type->members[f]); ++ size = MAX2(size, field_end); ++ } ++ return size; ++ } ++ ++ case GLSL_TYPE_ARRAY: ++ assert(type->stride > 0); ++ assert(glsl_get_length(type->type) > 0); ++ return type->stride * glsl_get_length(type->type); ++ ++ default: ++ assert(!"Invalid block type"); ++ return 0; ++ } ++} ++ ++void ++vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, ++ const uint32_t *w, unsigned count) ++{ ++ switch (opcode) { ++ case SpvOpVariable: { ++ struct vtn_variable *var = rzalloc(b, struct vtn_variable); ++ var->type = vtn_value(b, w[1], vtn_value_type_type)->type; ++ ++ var->chain.var = var; ++ var->chain.length = 0; ++ ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_access_chain); ++ val->access_chain = &var->chain; ++ ++ struct vtn_type *without_array = var->type; ++ while(glsl_type_is_array(without_array->type)) ++ without_array = without_array->array_element; ++ ++ nir_variable_mode nir_mode; ++ switch ((SpvStorageClass)w[3]) { ++ case SpvStorageClassUniform: ++ case SpvStorageClassUniformConstant: ++ if (without_array->block) { ++ var->mode = vtn_variable_mode_ubo; ++ b->shader->info.num_ubos++; ++ } else if (without_array->buffer_block) { ++ var->mode = vtn_variable_mode_ssbo; ++ b->shader->info.num_ssbos++; ++ } else if (glsl_type_is_image(without_array->type)) { ++ var->mode = vtn_variable_mode_image; ++ nir_mode = nir_var_uniform; ++ b->shader->info.num_images++; ++ } else if (glsl_type_is_sampler(without_array->type)) { ++ var->mode = vtn_variable_mode_sampler; ++ nir_mode = nir_var_uniform; ++ b->shader->info.num_textures++; ++ } else { ++ assert(!"Invalid uniform variable type"); ++ } ++ break; ++ case SpvStorageClassPushConstant: ++ var->mode = vtn_variable_mode_push_constant; ++ assert(b->shader->num_uniforms == 0); ++ b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; ++ break; ++ case SpvStorageClassInput: ++ var->mode = vtn_variable_mode_input; ++ nir_mode = nir_var_shader_in; ++ break; ++ case SpvStorageClassOutput: ++ var->mode = vtn_variable_mode_output; ++ nir_mode = nir_var_shader_out; ++ break; ++ case SpvStorageClassPrivate: ++ var->mode = vtn_variable_mode_global; ++ nir_mode = nir_var_global; ++ break; ++ case SpvStorageClassFunction: ++ var->mode = vtn_variable_mode_local; ++ nir_mode = nir_var_local; ++ break; ++ case SpvStorageClassWorkgroup: ++ var->mode = vtn_variable_mode_workgroup; ++ nir_mode = nir_var_shared; ++ break; ++ case SpvStorageClassCrossWorkgroup: ++ case SpvStorageClassGeneric: ++ case SpvStorageClassAtomicCounter: ++ default: ++ unreachable("Unhandled variable storage class"); ++ } ++ ++ switch (var->mode) { ++ case vtn_variable_mode_local: ++ case vtn_variable_mode_global: ++ case vtn_variable_mode_image: ++ case vtn_variable_mode_sampler: ++ case vtn_variable_mode_workgroup: ++ /* For these, we create the variable normally */ ++ var->var = rzalloc(b->shader, nir_variable); ++ var->var->name = ralloc_strdup(var->var, val->name); ++ var->var->type = var->type->type; ++ var->var->data.mode = nir_mode; ++ ++ switch (var->mode) { ++ case vtn_variable_mode_image: ++ case vtn_variable_mode_sampler: ++ var->var->interface_type = without_array->type; ++ break; ++ default: ++ var->var->interface_type = NULL; ++ break; ++ } ++ break; ++ ++ case vtn_variable_mode_input: ++ case vtn_variable_mode_output: { ++ /* For inputs and outputs, we immediately split structures. This ++ * is for a couple of reasons. For one, builtins may all come in ++ * a struct and we really want those split out into separate ++ * variables. For another, interpolation qualifiers can be ++ * applied to members of the top-level struct ane we need to be ++ * able to preserve that information. ++ */ ++ ++ int array_length = -1; ++ struct vtn_type *interface_type = var->type; ++ if (b->shader->stage == MESA_SHADER_GEOMETRY && ++ glsl_type_is_array(var->type->type)) { ++ /* In Geometry shaders (and some tessellation), inputs come ++ * in per-vertex arrays. However, some builtins come in ++ * non-per-vertex, hence the need for the is_array check. In ++ * any case, there are no non-builtin arrays allowed so this ++ * check should be sufficient. ++ */ ++ interface_type = var->type->array_element; ++ array_length = glsl_get_length(var->type->type); ++ } ++ ++ if (glsl_type_is_struct(interface_type->type)) { ++ /* It's a struct. Split it. */ ++ unsigned num_members = glsl_get_length(interface_type->type); ++ var->members = ralloc_array(b, nir_variable *, num_members); ++ ++ for (unsigned i = 0; i < num_members; i++) { ++ const struct glsl_type *mtype = interface_type->members[i]->type; ++ if (array_length >= 0) ++ mtype = glsl_array_type(mtype, array_length); ++ ++ var->members[i] = rzalloc(b->shader, nir_variable); ++ var->members[i]->name = ++ ralloc_asprintf(var->members[i], "%s.%d", val->name, i); ++ var->members[i]->type = mtype; ++ var->members[i]->interface_type = ++ interface_type->members[i]->type; ++ var->members[i]->data.mode = nir_mode; ++ } ++ } else { ++ var->var = rzalloc(b->shader, nir_variable); ++ var->var->name = ralloc_strdup(var->var, val->name); ++ var->var->type = var->type->type; ++ var->var->interface_type = interface_type->type; ++ var->var->data.mode = nir_mode; ++ } ++ ++ /* For inputs and outputs, we need to grab locations and builtin ++ * information from the interface type. ++ */ ++ vtn_foreach_decoration(b, interface_type->val, var_decoration_cb, var); ++ break; ++ ++ case vtn_variable_mode_param: ++ unreachable("Not created through OpVariable"); ++ } ++ ++ case vtn_variable_mode_ubo: ++ case vtn_variable_mode_ssbo: ++ case vtn_variable_mode_push_constant: ++ /* These don't need actual variables. */ ++ break; ++ } ++ ++ if (count > 4) { ++ assert(count == 5); ++ nir_constant *constant = ++ vtn_value(b, w[4], vtn_value_type_constant)->constant; ++ var->var->constant_initializer = ++ nir_constant_clone(constant, var->var); ++ } ++ ++ vtn_foreach_decoration(b, val, var_decoration_cb, var); ++ ++ if (var->mode == vtn_variable_mode_image || ++ var->mode == vtn_variable_mode_sampler) { ++ /* XXX: We still need the binding information in the nir_variable ++ * for these. We should fix that. ++ */ ++ var->var->data.binding = var->binding; ++ var->var->data.descriptor_set = var->descriptor_set; ++ ++ if (var->mode == vtn_variable_mode_image) ++ var->var->data.image.format = without_array->image_format; ++ } ++ ++ if (var->mode == vtn_variable_mode_local) { ++ assert(var->members == NULL && var->var != NULL); ++ nir_function_impl_add_variable(b->impl, var->var); ++ } else if (var->var) { ++ nir_shader_add_variable(b->shader, var->var); ++ } else if (var->members) { ++ unsigned count = glsl_get_length(without_array->type); ++ for (unsigned i = 0; i < count; i++) { ++ assert(var->members[i]->data.mode != nir_var_local); ++ nir_shader_add_variable(b->shader, var->members[i]); ++ } ++ } else { ++ assert(var->mode == vtn_variable_mode_ubo || ++ var->mode == vtn_variable_mode_ssbo || ++ var->mode == vtn_variable_mode_push_constant); ++ } ++ break; ++ } ++ ++ case SpvOpAccessChain: ++ case SpvOpInBoundsAccessChain: { ++ struct vtn_access_chain *base, *chain; ++ struct vtn_value *base_val = vtn_untyped_value(b, w[3]); ++ if (base_val->value_type == vtn_value_type_sampled_image) { ++ /* This is rather insane. SPIR-V allows you to use OpSampledImage ++ * to combine an array of images with a single sampler to get an ++ * array of sampled images that all share the same sampler. ++ * Fortunately, this means that we can more-or-less ignore the ++ * sampler when crawling the access chain, but it does leave us ++ * with this rather awkward little special-case. ++ */ ++ base = base_val->sampled_image->image; ++ } else { ++ assert(base_val->value_type == vtn_value_type_access_chain); ++ base = base_val->access_chain; ++ } ++ ++ chain = vtn_access_chain_extend(b, base, count - 4); ++ ++ unsigned idx = base->length; ++ for (int i = 4; i < count; i++) { ++ struct vtn_value *link_val = vtn_untyped_value(b, w[i]); ++ if (link_val->value_type == vtn_value_type_constant) { ++ chain->link[idx].mode = vtn_access_mode_literal; ++ chain->link[idx].id = link_val->constant->value.u[0]; ++ } else { ++ chain->link[idx].mode = vtn_access_mode_id; ++ chain->link[idx].id = w[i]; ++ } ++ idx++; ++ } ++ ++ if (base_val->value_type == vtn_value_type_sampled_image) { ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_sampled_image); ++ val->sampled_image = ralloc(b, struct vtn_sampled_image); ++ val->sampled_image->image = chain; ++ val->sampled_image->sampler = base_val->sampled_image->sampler; ++ } else { ++ struct vtn_value *val = ++ vtn_push_value(b, w[2], vtn_value_type_access_chain); ++ val->access_chain = chain; ++ } ++ break; ++ } ++ ++ case SpvOpCopyMemory: { ++ struct vtn_value *dest = vtn_value(b, w[1], vtn_value_type_access_chain); ++ struct vtn_value *src = vtn_value(b, w[2], vtn_value_type_access_chain); ++ ++ vtn_variable_copy(b, dest->access_chain, src->access_chain); ++ break; ++ } ++ ++ case SpvOpLoad: { ++ struct vtn_access_chain *src = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ ++ if (src->var->mode == vtn_variable_mode_image || ++ src->var->mode == vtn_variable_mode_sampler) { ++ vtn_push_value(b, w[2], vtn_value_type_access_chain)->access_chain = src; ++ return; ++ } ++ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ val->ssa = vtn_variable_load(b, src); ++ break; ++ } ++ ++ case SpvOpStore: { ++ struct vtn_access_chain *dest = ++ vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain; ++ struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); ++ vtn_variable_store(b, src, dest); ++ break; ++ } ++ ++ case SpvOpArrayLength: { ++ struct vtn_access_chain *chain = ++ vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain; ++ ++ const uint32_t offset = chain->var->type->offsets[w[4]]; ++ const uint32_t stride = chain->var->type->members[w[4]]->stride; ++ ++ unsigned chain_idx; ++ struct vtn_type *type; ++ nir_ssa_def *index = ++ get_vulkan_resource_index(b, chain, &type, &chain_idx); ++ ++ nir_intrinsic_instr *instr = ++ nir_intrinsic_instr_create(b->nb.shader, ++ nir_intrinsic_get_buffer_size); ++ instr->src[0] = nir_src_for_ssa(index); ++ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); ++ nir_builder_instr_insert(&b->nb, &instr->instr); ++ nir_ssa_def *buf_size = &instr->dest.ssa; ++ ++ /* array_length = max(buffer_size - offset, 0) / stride */ ++ nir_ssa_def *array_length = ++ nir_idiv(&b->nb, ++ nir_imax(&b->nb, ++ nir_isub(&b->nb, ++ buf_size, ++ nir_imm_int(&b->nb, offset)), ++ nir_imm_int(&b->nb, 0u)), ++ nir_imm_int(&b->nb, stride)); ++ ++ struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); ++ val->ssa = vtn_create_ssa_value(b, glsl_uint_type()); ++ val->ssa->def = array_length; ++ break; ++ } ++ ++ case SpvOpCopyMemorySized: ++ default: ++ unreachable("Unhandled opcode"); ++ } ++} diff --cc src/compiler/nir/spirv2nir.c index 00000000000,00000000000..c837186bdfc new file mode 100644 --- /dev/null +++ b/src/compiler/nir/spirv2nir.c @@@ -1,0 -1,0 +1,55 @@@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Jason Ekstrand (jason@jlekstrand.net) ++ * ++ */ ++ ++/* ++ * A simple executable that opens a SPIR-V shader, converts it to NIR, and ++ * dumps out the result. This should be useful for testing the ++ * spirv_to_nir code. ++ */ ++ ++#include "spirv/nir_spirv.h" ++ ++#include ++#include ++#include ++#include ++ ++int main(int argc, char **argv) ++{ ++ int fd = open(argv[1], O_RDONLY); ++ off_t len = lseek(fd, 0, SEEK_END); ++ ++ assert(len % 4 == 0); ++ size_t word_count = len / 4; ++ ++ const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); ++ assert(map != NULL); ++ ++ nir_function *func = spirv_to_nir(map, word_count, NULL, 0, ++ MESA_SHADER_FRAGMENT, "main", NULL); ++ nir_print_shader(func->shader, stderr); ++} diff --cc src/compiler/nir_types.cpp index 00000000000,a87dcd8dc6a..00703fe6f52 mode 000000,100644..100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@@ -1,0 -1,192 +1,336 @@@ + /* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #include "nir_types.h" + #include "compiler/glsl/ir.h" + + void + glsl_print_type(const glsl_type *type, FILE *fp) + { + if (type->base_type == GLSL_TYPE_ARRAY) { + glsl_print_type(type->fields.array, fp); + fprintf(fp, "[%u]", type->length); + } else if ((type->base_type == GLSL_TYPE_STRUCT) + && !is_gl_identifier(type->name)) { + fprintf(fp, "%s@%p", type->name, (void *) type); + } else { + fprintf(fp, "%s", type->name); + } + } + + void + glsl_print_struct(const glsl_type *type, FILE *fp) + { + assert(type->base_type == GLSL_TYPE_STRUCT); + + fprintf(fp, "struct {\n"); + for (unsigned i = 0; i < type->length; i++) { + fprintf(fp, "\t"); + glsl_print_type(type->fields.structure[i].type, fp); + fprintf(fp, " %s;\n", type->fields.structure[i].name); + } + fprintf(fp, "}\n"); + } + + const glsl_type * + glsl_get_array_element(const glsl_type* type) + { + if (type->is_matrix()) + return type->column_type(); + return type->fields.array; + } + + const glsl_type * + glsl_get_struct_field(const glsl_type *type, unsigned index) + { + return type->fields.structure[index].type; + } + ++const glsl_type * ++glsl_get_function_return_type(const glsl_type *type) ++{ ++ return type->fields.parameters[0].type; ++} ++ ++const glsl_function_param * ++glsl_get_function_param(const glsl_type *type, unsigned index) ++{ ++ return &type->fields.parameters[index + 1]; ++} ++ + const struct glsl_type * + glsl_get_column_type(const struct glsl_type *type) + { + return type->column_type(); + } + + enum glsl_base_type + glsl_get_base_type(const struct glsl_type *type) + { + return type->base_type; + } + + unsigned + glsl_get_vector_elements(const struct glsl_type *type) + { + return type->vector_elements; + } + + unsigned + glsl_get_components(const struct glsl_type *type) + { + return type->components(); + } + + unsigned + glsl_get_matrix_columns(const struct glsl_type *type) + { + return type->matrix_columns; + } + + unsigned + glsl_get_length(const struct glsl_type *type) + { + return type->is_matrix() ? type->matrix_columns : type->length; + } + + unsigned + glsl_get_aoa_size(const struct glsl_type *type) + { + return type->arrays_of_arrays_size(); + } + ++unsigned ++glsl_count_attribute_slots(const struct glsl_type *type, ++ bool vertex_input_slots) ++{ ++ return type->count_attribute_slots(vertex_input_slots); ++} ++ + const char * + glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) + { + return type->fields.structure[index].name; + } + ++glsl_sampler_dim ++glsl_get_sampler_dim(const struct glsl_type *type) ++{ ++ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); ++ return (glsl_sampler_dim)type->sampler_dimensionality; ++} ++ ++glsl_base_type ++glsl_get_sampler_result_type(const struct glsl_type *type) ++{ ++ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); ++ return (glsl_base_type)type->sampler_type; ++} ++ + unsigned + glsl_get_record_location_offset(const struct glsl_type *type, + unsigned length) + { + return type->record_location_offset(length); + } + + bool + glsl_type_is_void(const glsl_type *type) + { + return type->is_void(); + } + ++bool ++glsl_type_is_error(const glsl_type *type) ++{ ++ return type->is_error(); ++} ++ + bool + glsl_type_is_vector(const struct glsl_type *type) + { + return type->is_vector(); + } + + bool + glsl_type_is_scalar(const struct glsl_type *type) + { + return type->is_scalar(); + } + + bool + glsl_type_is_vector_or_scalar(const struct glsl_type *type) + { + return type->is_vector() || type->is_scalar(); + } + + bool + glsl_type_is_matrix(const struct glsl_type *type) + { + return type->is_matrix(); + } + ++bool ++glsl_type_is_array(const struct glsl_type *type) ++{ ++ return type->is_array(); ++} ++ ++bool ++glsl_type_is_struct(const struct glsl_type *type) ++{ ++ return type->is_record() || type->is_interface(); ++} ++ ++bool ++glsl_type_is_sampler(const struct glsl_type *type) ++{ ++ return type->is_sampler(); ++} ++ ++bool ++glsl_type_is_image(const struct glsl_type *type) ++{ ++ return type->is_image(); ++} ++ ++bool ++glsl_sampler_type_is_shadow(const struct glsl_type *type) ++{ ++ assert(glsl_type_is_sampler(type)); ++ return type->sampler_shadow; ++} ++ ++bool ++glsl_sampler_type_is_array(const struct glsl_type *type) ++{ ++ assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); ++ return type->sampler_array; ++} ++ + const glsl_type * + glsl_void_type(void) + { + return glsl_type::void_type; + } + + const glsl_type * + glsl_float_type(void) + { + return glsl_type::float_type; + } + + const glsl_type * + glsl_vec_type(unsigned n) + { + return glsl_type::vec(n); + } + + const glsl_type * + glsl_vec4_type(void) + { + return glsl_type::vec4_type; + } + ++const glsl_type * ++glsl_int_type(void) ++{ ++ return glsl_type::int_type; ++} ++ + const glsl_type * + glsl_uint_type(void) + { + return glsl_type::uint_type; + } + ++const glsl_type * ++glsl_bool_type(void) ++{ ++ return glsl_type::bool_type; ++} ++ ++const glsl_type * ++glsl_scalar_type(enum glsl_base_type base_type) ++{ ++ return glsl_type::get_instance(base_type, 1, 1); ++} ++ ++const glsl_type * ++glsl_vector_type(enum glsl_base_type base_type, unsigned components) ++{ ++ assert(components > 1 && components <= 4); ++ return glsl_type::get_instance(base_type, components, 1); ++} ++ ++const glsl_type * ++glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) ++{ ++ assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4); ++ return glsl_type::get_instance(base_type, rows, columns); ++} ++ + const glsl_type * + glsl_array_type(const glsl_type *base, unsigned elements) + { + return glsl_type::get_array_instance(base, elements); + } ++ ++const glsl_type * ++glsl_struct_type(const glsl_struct_field *fields, ++ unsigned num_fields, const char *name) ++{ ++ return glsl_type::get_record_instance(fields, num_fields, name); ++} ++ ++const struct glsl_type * ++glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, ++ enum glsl_base_type base_type) ++{ ++ return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type); ++} ++ ++const struct glsl_type * ++glsl_image_type(enum glsl_sampler_dim dim, bool is_array, ++ enum glsl_base_type base_type) ++{ ++ return glsl_type::get_image_instance(dim, is_array, base_type); ++} ++ ++const glsl_type * ++glsl_function_type(const glsl_type *return_type, ++ const glsl_function_param *params, unsigned num_params) ++{ ++ return glsl_type::get_function_instance(return_type, params, num_params); ++} ++ ++const glsl_type * ++glsl_transposed_type(const struct glsl_type *type) ++{ ++ return glsl_type::get_instance(type->base_type, type->matrix_columns, ++ type->vector_elements); ++} diff --cc src/compiler/nir_types.h index 00000000000,32fc7661159..4ef0dcf9a31 mode 000000,100644..100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@@ -1,0 -1,87 +1,127 @@@ + /* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + + #pragma once + + #include + #include + + /* C wrapper around compiler/glsl_types.h */ + + #include "glsl_types.h" + + #ifdef __cplusplus + extern "C" { + #else + struct glsl_type; + #endif + + void glsl_print_type(const struct glsl_type *type, FILE *fp); + void glsl_print_struct(const struct glsl_type *type, FILE *fp); + + const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type, + unsigned index); + + const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); + + const struct glsl_type *glsl_get_column_type(const struct glsl_type *type); + ++const struct glsl_type * ++glsl_get_function_return_type(const struct glsl_type *type); ++ ++const struct glsl_function_param * ++glsl_get_function_param(const struct glsl_type *type, unsigned index); ++ + enum glsl_base_type glsl_get_base_type(const struct glsl_type *type); + + unsigned glsl_get_vector_elements(const struct glsl_type *type); + + unsigned glsl_get_components(const struct glsl_type *type); + + unsigned glsl_get_matrix_columns(const struct glsl_type *type); + + unsigned glsl_get_length(const struct glsl_type *type); + + unsigned glsl_get_aoa_size(const struct glsl_type *type); + ++unsigned glsl_count_attribute_slots(const struct glsl_type *type, ++ bool vertex_input_slots); ++ + const char *glsl_get_struct_elem_name(const struct glsl_type *type, + unsigned index); + ++enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type); ++enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); ++ + unsigned glsl_get_record_location_offset(const struct glsl_type *type, + unsigned length); + + bool glsl_type_is_void(const struct glsl_type *type); ++bool glsl_type_is_error(const struct glsl_type *type); + bool glsl_type_is_vector(const struct glsl_type *type); + bool glsl_type_is_scalar(const struct glsl_type *type); + bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); + bool glsl_type_is_matrix(const struct glsl_type *type); ++bool glsl_type_is_array(const struct glsl_type *type); ++bool glsl_type_is_struct(const struct glsl_type *type); ++bool glsl_type_is_sampler(const struct glsl_type *type); ++bool glsl_type_is_image(const struct glsl_type *type); ++bool glsl_sampler_type_is_shadow(const struct glsl_type *type); ++bool glsl_sampler_type_is_array(const struct glsl_type *type); + + const struct glsl_type *glsl_void_type(void); + const struct glsl_type *glsl_float_type(void); + const struct glsl_type *glsl_vec_type(unsigned n); + const struct glsl_type *glsl_vec4_type(void); ++const struct glsl_type *glsl_int_type(void); + const struct glsl_type *glsl_uint_type(void); ++const struct glsl_type *glsl_bool_type(void); ++ ++const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type); ++const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, ++ unsigned components); ++const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, ++ unsigned rows, unsigned columns); + const struct glsl_type *glsl_array_type(const struct glsl_type *base, + unsigned elements); ++const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, ++ unsigned num_fields, const char *name); ++const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, ++ bool is_shadow, bool is_array, ++ enum glsl_base_type base_type); ++const struct glsl_type *glsl_image_type(enum glsl_sampler_dim dim, ++ bool is_array, ++ enum glsl_base_type base_type); ++const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, ++ const struct glsl_function_param *params, ++ unsigned num_params); ++ ++const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); + + #ifdef __cplusplus + } + #endif diff --cc src/compiler/shader_enums.c index 00000000000,942d152b129..ff2f564dc98 mode 000000,100644..100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@@ -1,0 -1,257 +1,260 @@@ + /* + * Mesa 3-D graphics library + * + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ + + #include "shader_enums.h" + #include "util/macros.h" + #include "mesa/main/config.h" + + #define ENUM(x) [x] = #x + #define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN") + + const char * + gl_shader_stage_name(gl_shader_stage stage) + { + static const char *names[] = { + ENUM(MESA_SHADER_VERTEX), + ENUM(MESA_SHADER_TESS_CTRL), + ENUM(MESA_SHADER_TESS_EVAL), + ENUM(MESA_SHADER_GEOMETRY), + ENUM(MESA_SHADER_FRAGMENT), + ENUM(MESA_SHADER_COMPUTE), + }; + STATIC_ASSERT(ARRAY_SIZE(names) == MESA_SHADER_STAGES); + return NAME(stage); + } + + /** + * Translate a gl_shader_stage to a short shader stage name for debug + * printouts and error messages. + */ + const char * + _mesa_shader_stage_to_string(unsigned stage) + { + switch (stage) { + case MESA_SHADER_VERTEX: return "vertex"; + case MESA_SHADER_FRAGMENT: return "fragment"; + case MESA_SHADER_GEOMETRY: return "geometry"; + case MESA_SHADER_COMPUTE: return "compute"; + case MESA_SHADER_TESS_CTRL: return "tessellation control"; + case MESA_SHADER_TESS_EVAL: return "tessellation evaluation"; + } + + unreachable("Unknown shader stage."); + } + + /** + * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS) + * for debug printouts and error messages. + */ + const char * + _mesa_shader_stage_to_abbrev(unsigned stage) + { + switch (stage) { + case MESA_SHADER_VERTEX: return "VS"; + case MESA_SHADER_FRAGMENT: return "FS"; + case MESA_SHADER_GEOMETRY: return "GS"; + case MESA_SHADER_COMPUTE: return "CS"; + case MESA_SHADER_TESS_CTRL: return "TCS"; + case MESA_SHADER_TESS_EVAL: return "TES"; + } + + unreachable("Unknown shader stage."); + } + + const char * + gl_vert_attrib_name(gl_vert_attrib attrib) + { + static const char *names[] = { + ENUM(VERT_ATTRIB_POS), + ENUM(VERT_ATTRIB_WEIGHT), + ENUM(VERT_ATTRIB_NORMAL), + ENUM(VERT_ATTRIB_COLOR0), + ENUM(VERT_ATTRIB_COLOR1), + ENUM(VERT_ATTRIB_FOG), + ENUM(VERT_ATTRIB_COLOR_INDEX), + ENUM(VERT_ATTRIB_EDGEFLAG), + ENUM(VERT_ATTRIB_TEX0), + ENUM(VERT_ATTRIB_TEX1), + ENUM(VERT_ATTRIB_TEX2), + ENUM(VERT_ATTRIB_TEX3), + ENUM(VERT_ATTRIB_TEX4), + ENUM(VERT_ATTRIB_TEX5), + ENUM(VERT_ATTRIB_TEX6), + ENUM(VERT_ATTRIB_TEX7), + ENUM(VERT_ATTRIB_POINT_SIZE), + ENUM(VERT_ATTRIB_GENERIC0), + ENUM(VERT_ATTRIB_GENERIC1), + ENUM(VERT_ATTRIB_GENERIC2), + ENUM(VERT_ATTRIB_GENERIC3), + ENUM(VERT_ATTRIB_GENERIC4), + ENUM(VERT_ATTRIB_GENERIC5), + ENUM(VERT_ATTRIB_GENERIC6), + ENUM(VERT_ATTRIB_GENERIC7), + ENUM(VERT_ATTRIB_GENERIC8), + ENUM(VERT_ATTRIB_GENERIC9), + ENUM(VERT_ATTRIB_GENERIC10), + ENUM(VERT_ATTRIB_GENERIC11), + ENUM(VERT_ATTRIB_GENERIC12), + ENUM(VERT_ATTRIB_GENERIC13), + ENUM(VERT_ATTRIB_GENERIC14), + ENUM(VERT_ATTRIB_GENERIC15), + }; + STATIC_ASSERT(ARRAY_SIZE(names) == VERT_ATTRIB_MAX); + return NAME(attrib); + } + + const char * + gl_varying_slot_name(gl_varying_slot slot) + { + static const char *names[] = { + ENUM(VARYING_SLOT_POS), + ENUM(VARYING_SLOT_COL0), + ENUM(VARYING_SLOT_COL1), + ENUM(VARYING_SLOT_FOGC), + ENUM(VARYING_SLOT_TEX0), + ENUM(VARYING_SLOT_TEX1), + ENUM(VARYING_SLOT_TEX2), + ENUM(VARYING_SLOT_TEX3), + ENUM(VARYING_SLOT_TEX4), + ENUM(VARYING_SLOT_TEX5), + ENUM(VARYING_SLOT_TEX6), + ENUM(VARYING_SLOT_TEX7), + ENUM(VARYING_SLOT_PSIZ), + ENUM(VARYING_SLOT_BFC0), + ENUM(VARYING_SLOT_BFC1), + ENUM(VARYING_SLOT_EDGE), + ENUM(VARYING_SLOT_CLIP_VERTEX), + ENUM(VARYING_SLOT_CLIP_DIST0), + ENUM(VARYING_SLOT_CLIP_DIST1), + ENUM(VARYING_SLOT_PRIMITIVE_ID), + ENUM(VARYING_SLOT_LAYER), + ENUM(VARYING_SLOT_VIEWPORT), + ENUM(VARYING_SLOT_FACE), + ENUM(VARYING_SLOT_PNTC), + ENUM(VARYING_SLOT_TESS_LEVEL_OUTER), + ENUM(VARYING_SLOT_TESS_LEVEL_INNER), + ENUM(VARYING_SLOT_VAR0), + ENUM(VARYING_SLOT_VAR1), + ENUM(VARYING_SLOT_VAR2), + ENUM(VARYING_SLOT_VAR3), + ENUM(VARYING_SLOT_VAR4), + ENUM(VARYING_SLOT_VAR5), + ENUM(VARYING_SLOT_VAR6), + ENUM(VARYING_SLOT_VAR7), + ENUM(VARYING_SLOT_VAR8), + ENUM(VARYING_SLOT_VAR9), + ENUM(VARYING_SLOT_VAR10), + ENUM(VARYING_SLOT_VAR11), + ENUM(VARYING_SLOT_VAR12), + ENUM(VARYING_SLOT_VAR13), + ENUM(VARYING_SLOT_VAR14), + ENUM(VARYING_SLOT_VAR15), + ENUM(VARYING_SLOT_VAR16), + ENUM(VARYING_SLOT_VAR17), + ENUM(VARYING_SLOT_VAR18), + ENUM(VARYING_SLOT_VAR19), + ENUM(VARYING_SLOT_VAR20), + ENUM(VARYING_SLOT_VAR21), + ENUM(VARYING_SLOT_VAR22), + ENUM(VARYING_SLOT_VAR23), + ENUM(VARYING_SLOT_VAR24), + ENUM(VARYING_SLOT_VAR25), + ENUM(VARYING_SLOT_VAR26), + ENUM(VARYING_SLOT_VAR27), + ENUM(VARYING_SLOT_VAR28), + ENUM(VARYING_SLOT_VAR29), + ENUM(VARYING_SLOT_VAR30), + ENUM(VARYING_SLOT_VAR31), + }; + STATIC_ASSERT(ARRAY_SIZE(names) == VARYING_SLOT_MAX); + return NAME(slot); + } + + const char * + gl_system_value_name(gl_system_value sysval) + { + static const char *names[] = { + ENUM(SYSTEM_VALUE_VERTEX_ID), + ENUM(SYSTEM_VALUE_INSTANCE_ID), ++ ENUM(SYSTEM_VALUE_INSTANCE_INDEX), + ENUM(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE), + ENUM(SYSTEM_VALUE_BASE_VERTEX), + ENUM(SYSTEM_VALUE_INVOCATION_ID), + ENUM(SYSTEM_VALUE_FRONT_FACE), + ENUM(SYSTEM_VALUE_SAMPLE_ID), + ENUM(SYSTEM_VALUE_SAMPLE_POS), + ENUM(SYSTEM_VALUE_SAMPLE_MASK_IN), + ENUM(SYSTEM_VALUE_TESS_COORD), + ENUM(SYSTEM_VALUE_VERTICES_IN), + ENUM(SYSTEM_VALUE_PRIMITIVE_ID), + ENUM(SYSTEM_VALUE_TESS_LEVEL_OUTER), + ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER), + ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID), ++ ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX), ++ ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID), + ENUM(SYSTEM_VALUE_WORK_GROUP_ID), + ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS), + ENUM(SYSTEM_VALUE_VERTEX_CNT), + }; + STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX); + return NAME(sysval); + } + + const char * + glsl_interp_qualifier_name(enum glsl_interp_qualifier qual) + { + static const char *names[] = { + ENUM(INTERP_QUALIFIER_NONE), + ENUM(INTERP_QUALIFIER_SMOOTH), + ENUM(INTERP_QUALIFIER_FLAT), + ENUM(INTERP_QUALIFIER_NOPERSPECTIVE), + }; + STATIC_ASSERT(ARRAY_SIZE(names) == INTERP_QUALIFIER_COUNT); + return NAME(qual); + } + + const char * + gl_frag_result_name(gl_frag_result result) + { + static const char *names[] = { + ENUM(FRAG_RESULT_DEPTH), + ENUM(FRAG_RESULT_STENCIL), + ENUM(FRAG_RESULT_COLOR), + ENUM(FRAG_RESULT_SAMPLE_MASK), + ENUM(FRAG_RESULT_DATA0), + ENUM(FRAG_RESULT_DATA1), + ENUM(FRAG_RESULT_DATA2), + ENUM(FRAG_RESULT_DATA3), + ENUM(FRAG_RESULT_DATA4), + ENUM(FRAG_RESULT_DATA5), + ENUM(FRAG_RESULT_DATA6), + ENUM(FRAG_RESULT_DATA7), + }; + STATIC_ASSERT(ARRAY_SIZE(names) == FRAG_RESULT_MAX); + return NAME(result); + } diff --cc src/compiler/shader_enums.h index 00000000000,efc0b0d515e..e3f46e3d739 mode 000000,100644..100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@@ -1,0 -1,542 +1,551 @@@ + /* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + #ifndef SHADER_ENUMS_H + #define SHADER_ENUMS_H + + #ifdef __cplusplus + extern "C" { + #endif + + /** + * Shader stages. Note that these will become 5 with tessellation. + * + * The order must match how shaders are ordered in the pipeline. + * The GLSL linker assumes that if ictx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args); + va_end(args); +} + +static void +shader_perf_log_mesa(void *data, const char *fmt, ...) +{ + struct brw_context *brw = (struct brw_context *)data; + + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + va_list args_copy; + va_copy(args_copy, args); + vfprintf(stderr, fmt, args_copy); + va_end(args_copy); + } + + if (brw->perf_debug) { + GLuint msg_id = 0; + _mesa_gl_vdebug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_PERFORMANCE, + MESA_DEBUG_SEVERITY_MEDIUM, fmt, args); + } + va_end(args); +} + +#define COMMON_OPTIONS \ + /* In order to help allow for better CSE at the NIR level we tell NIR to \ + * split all ffma instructions during opt_algebraic and we then re-combine \ + * them as a later step. \ + */ \ + .lower_ffma = true, \ + .lower_sub = true, \ + .lower_fdiv = true, \ + .lower_scmp = true, \ + .lower_fmod = true, \ + .lower_bitfield_extract = true, \ + .lower_bitfield_insert = true, \ + .lower_uadd_carry = true, \ + .lower_usub_borrow = true, \ + .lower_fdiv = true, \ + .native_integers = true, \ + .vertex_id_zero_based = true + +static const struct nir_shader_compiler_options scalar_nir_options = { + COMMON_OPTIONS, + .lower_pack_half_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, +}; + +static const struct nir_shader_compiler_options vector_nir_options = { + COMMON_OPTIONS, + + /* In the vec4 backend, our dpN instruction replicates its result to all the + * components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + */ + .fdot_replicates = true, + + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_extract_byte = true, + .lower_extract_word = true, +}; + +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) +{ + struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); + + compiler->devinfo = devinfo; + compiler->shader_debug_log = shader_debug_log_mesa; + compiler->shader_perf_log = shader_perf_log_mesa; + + brw_fs_alloc_reg_sets(compiler); + brw_vec4_alloc_reg_set(compiler); + + compiler->scalar_stage[MESA_SHADER_VERTEX] = + devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); + compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); + compiler->scalar_stage[MESA_SHADER_GEOMETRY] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true); + compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; + compiler->scalar_stage[MESA_SHADER_COMPUTE] = true; + + /* We want the GLSL compiler to emit code that uses condition codes */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; + compiler->glsl_compiler_options[i].MaxIfDepth = + devinfo->gen < 6 ? 16 : UINT_MAX; + + compiler->glsl_compiler_options[i].EmitCondCodes = true; + compiler->glsl_compiler_options[i].EmitNoNoise = true; + compiler->glsl_compiler_options[i].EmitNoMainReturn = true; + compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; + compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; + compiler->glsl_compiler_options[i].LowerClipDistance = true; + + bool is_scalar = compiler->scalar_stage[i]; + + compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; + compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; + compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar; + + /* !ARB_gpu_shader5 */ + if (devinfo->gen < 7) + compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; + + compiler->glsl_compiler_options[i].NirOptions = + is_scalar ? &scalar_nir_options : &vector_nir_options; + + compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; + } + + compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; + compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; + + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; + + compiler->glsl_compiler_options[MESA_SHADER_COMPUTE] + .LowerShaderSharedVariables = true; + + return compiler; +} diff --cc src/vulkan/Makefile.am index 06f67cfd5f8,00000000000..58668c7cf88 mode 100644,000000..100644 --- a/src/vulkan/Makefile.am +++ b/src/vulkan/Makefile.am @@@ -1,180 -1,0 +1,180 @@@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h + +# Used when generating entrypoints to filter out unwanted extensions +VULKAN_ENTRYPOINT_CPPFLAGS = \ + -I$(top_srcdir)/include/vulkan \ + -DVK_USE_PLATFORM_XCB_KHR \ + -DVK_USE_PLATFORM_WAYLAND_KHR + +lib_LTLIBRARIES = libvulkan.la + +check_LTLIBRARIES = libvulkan-test.la + +PER_GEN_LIBS = \ + libanv-gen7.la \ + libanv-gen75.la \ + libanv-gen8.la \ + libanv-gen9.la + +noinst_LTLIBRARIES = $(PER_GEN_LIBS) + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/glsl/nir \ ++ -I$(top_srcdir)/src/compiler \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ + -I$(top_builddir)/src \ - -I$(top_builddir)/src/glsl/nir \ ++ -I$(top_builddir)/src/compiler \ + -I$(top_builddir)/src/vulkan + +libvulkan_la_CFLAGS = $(CFLAGS) -Wno-override-init + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_descriptor_set.c \ + anv_device.c \ + anv_dump.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_meta_clear.c \ + anv_meta_resolve.c \ + anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ + anv_nir_lower_push_constants.c \ + anv_pass.c \ + anv_pipeline.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_wsi.c \ + anv_wsi_x11.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c + +libanv_gen7_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen7_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=70 + +libanv_gen75_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen75_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=75 + +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen8_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=80 + +libanv_gen9_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen9_la_CFLAGS = $(libvulkan_la_CFLAGS) -DANV_GENx10=90 + +if HAVE_EGL_PLATFORM_WAYLAND +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c +libvulkan_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + +libvulkan_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/isl/libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ + ../mesa/libmesa.la \ + ../mesa/drivers/dri/common/libdri_test_stubs.la \ + -lpthread -ldl -lstdc++ \ + $(PER_GEN_LIBS) + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --cc src/vulkan/anv_meta.c index b40151c2be6,00000000000..72a927a08ee mode 100644,000000..100644 --- a/src/vulkan/anv_meta.c +++ b/src/vulkan/anv_meta.c @@@ -1,1596 -1,0 +1,1596 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_meta.h" +#include "anv_private.h" - #include "glsl/nir/nir_builder.h" ++#include "nir/nir_builder.h" + +struct anv_render_pass anv_meta_dummy_renderpass = {0}; + +static nir_shader * +build_nir_vertex_shader(bool attr_flat) +{ + nir_builder b; + + const struct glsl_type *vertex_type = glsl_vec4_type(); + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vertex_type, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vertex_type, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + /* Add one more pass-through attribute. For clear shaders, this is used + * to store the color and for blit shaders it's the texture coordinate. + */ + const struct glsl_type *attr_type = glsl_vec4_type(); + nir_variable *attr_in = nir_variable_create(b.shader, nir_var_shader_in, + attr_type, "a_attr"); + attr_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *attr_out = nir_variable_create(b.shader, nir_var_shader_out, + attr_type, "v_attr"); + attr_out->data.location = VARYING_SLOT_VAR0; + attr_out->data.interpolation = attr_flat ? INTERP_QUALIFIER_FLAT : + INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, attr_out, attr_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + glsl_vec4_type(), "v_attr"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(color_type)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + color_type, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); + + state->dynamic_mask = dynamic_mask; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_mask); +} + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_mask); + cmd_buffer->state.dirty |= state->dynamic_mask; + + /* Since we've used the pipeline with the VS disabled, set + * need_query_wa. See CmdBeginQuery. + */ + cmd_buffer->state.need_query_wa = true; +} + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image) +{ + switch (image->type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("bad VkImageViewType"); + } +} + +/** + * When creating a destination VkImageView, this function provides the needed + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. + */ +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->baseArrayLayer; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static VkResult +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(false), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *saved_state) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *saved_state) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkFormat +vk_format_for_size(int bs) +{ + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + VkFormat copy_format = vk_format_for_size(bs); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } +} + +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(format, aspect, + VK_IMAGE_TILING_LINEAR, + NULL); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = pRegions[r].dstSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, + .z = 0, + }; + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].dstSubresource.layerCount; + } + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_FILTER_NEAREST); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + + anv_finishme("respect VkFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = 0, + }; + + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + + const uint32_t dest_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); + + if (pRegions[r].srcSubresource.layerCount > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_array_slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffsets[0], src_extent, + dest_image, &dest_iview, + dest_offset, dest_extent, + filter); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static struct anv_image * +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + VkImageType image_type, + const VkAllocationCallbacks *alloc, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = buffer_format, + .extent = extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, alloc, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return image; +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(dest_image, aspect); + + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->surface_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = anv_meta_get_view_type(dest_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, + .layerCount = 1 + }, + }, + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + (VkOffset3D){0, 0, 0}, + img_extent_el, + dest_image, + &dest_iview, + dest_offset_el, + img_extent_el, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + src_image->offset += src_image->extent.width * + src_image->extent.height * + src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.layerCount == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.layerCount; + } + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * + src_image->format->isl_layout->bs; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static void * +meta_alloc(void* _device, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void * +meta_realloc(void* _device, void *original, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnReallocation(device->alloc.pUserData, original, + size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void +meta_free(void* _device, void *data) +{ + struct anv_device *device = _device; + return device->alloc.pfnFree(device->alloc.pUserData, data); +} + +VkResult +anv_device_init_meta(struct anv_device *device) +{ + VkResult result; + + device->meta_state.alloc = (VkAllocationCallbacks) { + .pUserData = device, + .pfnAllocation = meta_alloc, + .pfnReallocation = meta_realloc, + .pfnFree = meta_free, + }; + + result = anv_device_init_meta_clear_state(device); + if (result != VK_SUCCESS) + goto fail_clear; + + result = anv_device_init_meta_resolve_state(device); + if (result != VK_SUCCESS) + goto fail_resolve; + + result = anv_device_init_meta_blit_state(device); + if (result != VK_SUCCESS) + goto fail_blit; + + return VK_SUCCESS; + +fail_blit: + anv_device_finish_meta_resolve_state(device); +fail_resolve: + anv_device_finish_meta_clear_state(device); +fail_clear: + return result; +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + anv_device_finish_meta_resolve_state(device); + anv_device_finish_meta_clear_state(device); + + /* Blit */ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} diff --cc src/vulkan/anv_meta_clear.c index 027217b88dc,00000000000..15e24a32a75 mode 100644,000000..100644 --- a/src/vulkan/anv_meta_clear.c +++ b/src/vulkan/anv_meta_clear.c @@@ -1,1097 -1,0 +1,1097 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "anv_private.h" - #include "glsl/nir/nir_builder.h" ++#include "nir/nir_builder.h" + +/** Vertex attributes for color clears. */ +struct color_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + VkClearColorValue color; +}; + +/** Vertex attributes for depthstencil clears. */ +struct depthstencil_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /*<< 3DPRIM_RECTLIST */ +}; + +static void +meta_clear_begin(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR) | + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_clear_end(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static void +build_color_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs, + uint32_t frag_output) +{ + nir_builder vs_b; + nir_builder fs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_variable *vs_in_color = + nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, + "a_color"); + vs_in_color->data.location = VERT_ATTRIB_GENERIC1; + + nir_variable *vs_out_color = + nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, + "v_color"); + vs_out_color->data.location = VARYING_SLOT_VAR0; + vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *fs_in_color = + nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, + "v_color"); + fs_in_color->data.location = vs_out_color->data.location; + fs_in_color->data.interpolation = vs_out_color->data.interpolation; + + nir_variable *fs_out_color = + nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, + "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + nir_copy_var(&vs_b, vs_out_color, vs_in_color); + nir_copy_var(&fs_b, fs_out_color, fs_in_color); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t samples, + struct nir_shader *vs_nir, + struct nir_shader *fs_nir, + const VkPipelineVertexInputStateCreateInfo *vi_state, + const VkPipelineDepthStencilStateCreateInfo *ds_state, + const VkPipelineColorBlendStateCreateInfo *cb_state, + const VkAllocationCallbacks *alloc, + bool use_repclear, + struct anv_pipeline **pipeline) +{ + VkDevice device_h = anv_device_to_handle(device); + VkResult result; + + struct anv_shader_module vs_m = { .nir = vs_nir }; + struct anv_shader_module fs_m = { .nir = fs_nir }; + + VkPipeline pipeline_h = VK_NULL_HANDLE; + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = fs_nir ? 2 : 1, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs_m), + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_m), + .pName = "main", + }, + }, + .pVertexInputState = vi_state, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = NULL, /* dynamic */ + .scissorCount = 1, + .pScissors = NULL, /* dynamic */ + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = false, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = samples, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { ~0 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + /* The meta clear pipeline declares all state as dynamic. + * As a consequence, vkCmdBindPipeline writes no dynamic state + * to the cmd buffer. Therefore, at the end of the meta clear, + * we need only restore dynamic state was vkCmdSet. + */ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = MAX_RTS, + .use_repclear = use_repclear, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + alloc, + &pipeline_h); + + ralloc_free(vs_nir); + ralloc_free(fs_nir); + + *pipeline = anv_pipeline_from_handle(pipeline_h); + + return result; +} + +static VkResult +create_color_pipeline(struct anv_device *device, + uint32_t samples, + uint32_t frag_output, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_color_shaders(&vs_nir, &fs_nir, frag_output); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct color_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct color_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, position), + }, + { + /* Color */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, color), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, + }, + }, + }; + + /* Disable repclear because we do not want the compiler to replace the + * shader. We need the shader to write to the specified color attachment, + * but the repclear shader writes to all color attachments. + */ + return + create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ false, pipeline); +} + +static void +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) +{ + if (!pipeline) + return; + + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline), + &device->meta_state.alloc); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + destroy_pipeline(device, state->clear[i].color_pipelines[j]); + } + + destroy_pipeline(device, state->clear[i].depth_only_pipeline); + destroy_pipeline(device, state->clear[i].stencil_only_pipeline); + destroy_pipeline(device, state->clear[i].depthstencil_pipeline); + } +} + +static void +emit_color_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att]; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + struct anv_pipeline *pipeline = + device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; + VkClearColorValue clear_value = clear_att->clearValue.color; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + + assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); + assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(clear_att->colorAttachment < subpass->color_count); + + const struct color_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + .color = clear_value, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + + +static void +build_depthstencil_shader(struct nir_shader **out_vs) +{ + nir_builder vs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + + *out_vs = vs_b.shader; +} + +static VkResult +create_depthstencil_pipeline(struct anv_device *device, + VkImageAspectFlags aspects, + uint32_t samples, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + + build_depthstencil_shader(&vs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct depthstencil_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct depthstencil_clear_vattrs, position), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthBoundsTestEnable = false, + .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), + .front = { + .passOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .writeMask = UINT32_MAX, + .reference = 0, /* dynamic */ + }, + .back = { 0 /* dont care */ }, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 0, + .pAttachments = NULL, + }; + + return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ true, pipeline); +} + +static void +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_meta_state *meta_state = &device->meta_state; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t pass_att = subpass->depth_stencil_attachment; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + + assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || + aspects == VK_IMAGE_ASPECT_STENCIL_BIT || + aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + assert(pass_att != VK_ATTACHMENT_UNUSED); + + const struct depthstencil_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + + /* Ignored when clearing only stencil. */ + .minDepth = clear_value.depth, + .maxDepth = clear_value.depth, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, + clear_value.stencil); + } + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + struct anv_pipeline *pipeline; + switch (aspects) { + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + pipeline = meta_state->clear[samples_log2].depth_only_pipeline; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + anv_pipeline_to_handle(pipeline)); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + +VkResult +anv_device_init_meta_clear_state(struct anv_device *device) +{ + VkResult res; + struct anv_meta_state *state = &device->meta_state; + + zero(device->meta_state.clear); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + uint32_t samples = 1 << i; + + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + res = create_color_pipeline(device, samples, /* frag_output */ j, + &state->clear[i].color_pipelines[j]); + if (res != VK_SUCCESS) + goto fail; + } + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT, samples, + &state->clear[i].depth_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].stencil_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].depthstencil_pipeline); + if (res != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; + +fail: + anv_device_finish_meta_clear_state(device); + return res; +} + +/** + * The parameters mean that same as those in vkCmdClearAttachments. + */ +static void +emit_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_color_clear(cmd_buffer, clear_att, clear_rect); + } else { + assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); + } +} + +static bool +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (cmd_state->attachments[a].pending_clear_aspects) { + return true; + } + } + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + return true; + } + + return false; +} + +/** + * Emit any pending attachment clears for the current subpass. + * + * @see anv_attachment_state::pending_clear_aspects + */ +void +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_meta_saved_state saved_state; + + if (!subpass_needs_clear(cmd_buffer)) + return; + + meta_clear_begin(&saved_state, cmd_buffer); + + if (cmd_state->framebuffer->layers > 1) + anv_finishme("clearing multi-layer framebuffer"); + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + + if (!cmd_state->attachments[a].pending_clear_aspects) + continue; + + assert(cmd_state->attachments[a].pending_clear_aspects == + VK_IMAGE_ASPECT_COLOR_BIT); + + VkClearAttachment clear_att = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, /* Use attachment index relative to subpass */ + .clearValue = cmd_state->attachments[a].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[a].pending_clear_aspects = 0; + } + + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + + VkClearAttachment clear_att = { + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, + .clearValue = cmd_state->attachments[ds].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[ds].pending_clear_aspects = 0; + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout image_layout, + const VkClearValue *clear_value, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + + for (uint32_t l = 0; l < range->levelCount; ++l) { + for (uint32_t s = 0; s < range->layerCount; ++s) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(image), + .viewType = anv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; + } else { + subpass_desc.pDepthStencilAttachment = &att_ref; + } + + VkRenderPass pass; + anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &att_desc, + .subpassCount = 1, + .pSubpasses = &subpass_desc, + }, + &cmd_buffer->pool->alloc, + &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(DestroyRenderPass)(device_h, pass, + &cmd_buffer->pool->alloc); + ANV_CALL(DestroyFramebuffer)(device_h, fb, + &cmd_buffer->pool->alloc); + } + } + } +} + +void anv_CmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pColor, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pDepthStencil, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + /* FINISHME: We can do better than this dumb loop. It thrashes too much + * state. + */ + for (uint32_t a = 0; a < attachmentCount; ++a) { + for (uint32_t r = 0; r < rectCount; ++r) { + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); + } + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat fill_format, uint32_t data) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = fill_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }; + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + const VkClearValue clear_value = { + .color = { + .uint32 = { data, data, data, data } + } + }; + + const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &clear_value, 1, &range); +} + +void anv_CmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + VkFormat format; + int bs; + if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + /* This is maximum possible width/height our HW can handle */ + const uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; + while (fillSize > max_fill_size) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, max_surface_dim, format, data); + fillSize -= max_fill_size; + dstOffset += max_fill_size; + } + + uint64_t height = fillSize / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + const uint64_t rect_fill_size = height * max_surface_dim * bs; + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, height, format, data); + fillSize -= rect_fill_size; + dstOffset += rect_fill_size; + } + + if (fillSize != 0) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + fillSize / bs, 1, format, data); + } + + meta_clear_end(&saved_state, cmd_buffer); +} diff --cc src/vulkan/anv_meta_resolve.c index 2107a758fde,00000000000..f1c985e04cf mode 100644,000000..100644 --- a/src/vulkan/anv_meta_resolve.c +++ b/src/vulkan/anv_meta_resolve.c @@@ -1,865 -1,0 +1,865 @@@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_meta.h" +#include "anv_private.h" - #include "glsl/nir/nir_builder.h" ++#include "nir/nir_builder.h" + +/** + * Vertex attributes used by all pipelines. + */ +struct vertex_attrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; +}; + +static void +meta_resolve_save(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_resolve_restore(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkPipeline * +get_pipeline_h(struct anv_device *device, uint32_t samples) +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ + + assert(samples >= 2); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + + return &device->meta_state.resolve.pipelines[i]; +} + +static nir_shader * +build_nir_vs(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + nir_builder b; + nir_variable *a_position; + nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); + + a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_position"); + a_position->data.location = VERT_ATTRIB_GENERIC0; + + v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "gl_Position"); + v_position->data.location = VARYING_SLOT_POS; + + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); + + return b.shader; +} + +static nir_shader * +build_nir_fs(uint32_t num_samples) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + const struct glsl_type *sampler2DMS = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, + /*is_shadow*/ false, + /*is_array*/ false, + GLSL_TYPE_FLOAT); + + nir_builder b; + nir_variable *u_tex; /* uniform sampler */ + nir_variable *v_position; /* vec4, varying fragment position */ + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ + nir_variable *f_color; /* vec4, fragment output color */ + nir_ssa_def *accum; /* vec4, accumulation of sample values */ + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs_samples%02d", + num_samples); + + u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, + "u_tex"); + u_tex->data.descriptor_set = 0; + u_tex->data.binding = 0; + + v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_position"); + v_position->data.location = VARYING_SLOT_POS; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "f_color"); + f_color->data.location = FRAG_RESULT_DATA0; + + accum = nir_imm_vec4(&b, 0, 0, 0, 0); + + nir_ssa_def *tex_position_ivec = + nir_f2i(&b, nir_load_var(&b, v_tex_position)); + + for (uint32_t i = 0; i < num_samples; ++i) { + nir_tex_instr *tex; + + tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->sampler = nir_deref_var_create(tex, u_tex); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src = nir_src_for_ssa(tex_position_ivec); + tex->src[0].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 3; + nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + accum = nir_fadd(&b, accum, &tex->dest.ssa); + } + + accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); + nir_store_var(&b, f_color, accum, /*writemask*/ 4); + + return b.shader; +} + +static VkResult +create_pass(struct anv_device *device) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + result = anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .samples = 1, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + alloc, + &device->meta_state.resolve.pass); + + return result; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t num_samples, + VkShaderModule vs_module_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + + struct anv_shader_module fs_module = { + .nir = build_nir_fs(num_samples), + }; + + if (!fs_module.nir) { + /* XXX: Need more accurate error */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto cleanup; + } + + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vs_module_h, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_module), + .pName = "main", + }, + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct vertex_attrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct vertex_attrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, position), + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, + }, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { 0x1 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = device->meta_state.resolve.pipeline_layout, + .renderPass = device->meta_state.resolve.pass, + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.alloc, + get_pipeline_h(device, num_samples)); + if (result != VK_SUCCESS) + goto cleanup; + + goto cleanup; + +cleanup: + ralloc_free(fs_module.nir); + return result; +} + +void +anv_device_finish_meta_resolve_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + VkDevice device_h = anv_device_to_handle(device); + VkRenderPass pass_h = device->meta_state.resolve.pass; + VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; + VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + if (pass_h) + ANV_CALL(DestroyRenderPass)(device_h, pass_h, + &device->meta_state.alloc); + + if (pipeline_layout_h) + ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); + + if (ds_layout_h) + ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { + VkPipeline pipeline_h = state->resolve.pipelines[i]; + + if (pipeline_h) { + ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); + } + } +} + +VkResult +anv_device_init_meta_resolve_state(struct anv_device *device) +{ + VkResult res = VK_SUCCESS; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + const isl_sample_count_mask_t sample_count_mask = + isl_device_get_sample_counts(&device->isl_dev); + + zero(device->meta_state.resolve); + + struct anv_shader_module vs_module = { .nir = build_nir_vs() }; + if (!vs_module.nir) { + /* XXX: Need more accurate error */ + res = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); + + res = anv_CreateDescriptorSetLayout(device_h, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }, + }, + alloc, + &device->meta_state.resolve.ds_layout); + if (res != VK_SUCCESS) + goto fail; + + res = anv_CreatePipelineLayout(device_h, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + alloc, + &device->meta_state.resolve.pipeline_layout); + if (res != VK_SUCCESS) + goto fail; + + res = create_pass(device); + if (res != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { + + uint32_t sample_count = 1 << (1 + i); + if (!(sample_count_mask & sample_count)) + continue; + + res = create_pipeline(device, sample_count, vs_module_h); + if (res != VK_SUCCESS) + goto fail; + } + + goto cleanup; + +fail: + anv_device_finish_meta_resolve_state(device); + +cleanup: + ralloc_free(vs_module.nir); + + return res; +} + +static void +emit_resolve(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src_iview, + const VkOffset2D *src_offset, + struct anv_image_view *dest_iview, + const VkOffset2D *dest_offset, + const VkExtent2D *resolve_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice device_h = anv_device_to_handle(device); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image *src_image = src_iview->image; + VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; + + const struct vertex_attrs vertex_data[3] = { + { + .vue_header = {0}, + .position = { + dest_offset->x + resolve_extent->width, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, + }, + }, + }; + + struct anv_state vertex_mem = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, + sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, + .offset = vertex_mem.offset, + }; + + VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); + + anv_CmdBindVertexBuffers(cmd_buffer_h, + /*firstBinding*/ 0, + /*bindingCount*/ 1, + (VkBuffer[]) { vertex_buffer_h }, + (VkDeviceSize[]) { 0 }); + + VkSampler sampler_h; + ANV_CALL(CreateSampler)(device_h, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0, + .anisotropyEnable = false, + .compareEnable = false, + .minLod = 0.0, + .maxLod = 0.0, + .unnormalizedCoordinates = false, + }, + &cmd_buffer->pool->alloc, + &sampler_h); + + VkDescriptorSet desc_set_h; + anv_AllocateDescriptorSets(device_h, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool_h, + .descriptorSetCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.blit.ds_layout, + }, + }, + &desc_set_h); + + ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); + + anv_UpdateDescriptorSets(device_h, + /*writeCount*/ 1, + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = desc_set_h, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler_h, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, + /*firstViewport*/ 0, + /*viewportCount*/ 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, + /*firstScissor*/ 0, + /*scissorCount*/ 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = (VkExtent2D) { fb->width, fb->height }, + }, + }); + + VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); + ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); + + if (cmd_buffer->state.pipeline != pipeline) { + anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + anv_CmdBindDescriptorSets(cmd_buffer_h, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline_layout, + /*firstSet*/ 0, + /* setCount */ 1, + (VkDescriptorSet[]) { + desc_set_h, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); + + /* All objects below are consumed by the draw call. We may safely destroy + * them. + */ + anv_descriptor_set_destroy(device, desc_set); + anv_DestroySampler(device_h, sampler_h, + &cmd_buffer->pool->alloc); +} + +void anv_CmdResolveImage( + VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve* regions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, src_image, src_image_h); + ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_device *device = cmd_buffer->device; + struct anv_meta_saved_state state; + VkDevice device_h = anv_device_to_handle(device); + + meta_resolve_save(&state, cmd_buffer); + + assert(src_image->samples > 1); + assert(dest_image->samples == 1); + + if (src_image->samples >= 16) { + /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the + * glBlitFramebuffer workaround for samples >= 16. + */ + anv_finishme("vkCmdResolveImage: need interpolation workaround when " + "samples >= 16"); + } + + if (src_image->array_size > 1) + anv_finishme("vkCmdResolveImage: multisample array images"); + + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + + /* From the Vulkan 1.0 spec: + * + * - The aspectMask member of srcSubresource and dstSubresource must + * only contain VK_IMAGE_ASPECT_COLOR_BIT + * + * - The layerCount member of srcSubresource and dstSubresource must + * match + */ + assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->srcSubresource.layerCount == + region->dstSubresource.layerCount); + + const uint32_t src_base_layer = + anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + + const uint32_t dest_base_layer = + anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, + ®ion->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; + ++layer) { + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image_h, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image_h, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dest_iview), + }, + .width = anv_minify(dest_image->extent.width, + region->dstSubresource.mipLevel), + .height = anv_minify(dest_image->extent.height, + region->dstSubresource.mipLevel), + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { + region->dstOffset.x, + region->dstOffset.y, + }, + .extent = { + region->extent.width, + region->extent.height, + } + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + emit_resolve(cmd_buffer, + &src_iview, + &(VkOffset2D) { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + &dest_iview, + &(VkOffset2D) { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + &(VkExtent2D) { + .width = region->extent.width, + .height = region->extent.height, + }); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); + } + } + + meta_resolve_restore(&state, cmd_buffer); +} + +/** + * Emit any needed resolves for the current subpass. + */ +void +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_meta_saved_state saved_state; + + /* FINISHME(perf): Skip clears for resolve attachments. + * + * From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a resolve + * attachment, then the loadOp is effectively ignored as the resolve is + * guaranteed to overwrite all pixels in the render area. + */ + + if (!subpass->has_resolve) + return; + + meta_resolve_save(&saved_state, cmd_buffer); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i]; + uint32_t dest_att = subpass->resolve_attachments[i]; + + if (dest_att == VK_ATTACHMENT_UNUSED) + continue; + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dest_iview = fb->attachments[dest_att]; + + struct anv_subpass resolve_subpass = { + .color_count = 1, + .color_attachments = (uint32_t[]) { dest_att }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + /* Subpass resolves must respect the render area. We can ignore the + * render area here because vkCmdBeginRenderPass set the render area + * with 3DSTATE_DRAWING_RECTANGLE. + * + * XXX(chadv): Does the hardware really respect + * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? + */ + emit_resolve(cmd_buffer, + src_iview, + &(VkOffset2D) { 0, 0 }, + dest_iview, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }); + } + + cmd_buffer->state.subpass = subpass; + meta_resolve_restore(&saved_state, cmd_buffer); +} diff --cc src/vulkan/anv_nir.h index 9a7a76fe216,00000000000..c76314d9df6 mode 100644,000000..100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@@ -1,44 -1,0 +1,44 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + - #include "glsl/nir/nir.h" ++#include "nir/nir.h" +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); + +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); +bool anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, + const struct anv_pipeline_layout *layout); + +#ifdef __cplusplus +} +#endif diff --cc src/vulkan/anv_nir_apply_dynamic_offsets.c index 6837a80460d,00000000000..a5e3238a36a mode 100644,000000..100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@@ -1,171 -1,0 +1,171 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" - #include "glsl/nir/nir_builder.h" ++#include "nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + unsigned block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo: + block_idx_src = 1; + break; + default: + continue; /* the loop */ + } + + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; + + set_layout = state->layout->set[set].layout; + if (set_layout->binding[binding].dynamic_offset_index < 0) + continue; + + b->cursor = nir_before_instr(&intrin->instr); + + /* First, we need to generate the uniform load for the buffer offset */ + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 8; + offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, + nir_imm_int(b, 8))); + + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); + nir_builder_instr_insert(b, &offset_load->instr); + + nir_src *offset_src = nir_get_io_offset_src(intrin); + nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, + &offset_load->dest.ssa); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), + offset_src->ssa); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_remove(&intrin->instr); + *offset_src = nir_src_for_ssa(new_offset); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); + + if (intrin->intrinsic != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(&intrin->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(zero); + exec_list_push_tail(&phi->srcs, &src2->node); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&phi->dest.ssa)); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); + } + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) + return; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].range; + } + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; +} diff --cc src/vulkan/anv_nir_apply_pipeline_layout.c index b7b8bd18ef9,00000000000..00ed7766acb mode 100644,000000..100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@@ -1,322 -1,0 +1,322 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "program/prog_parameter.h" - #include "glsl/nir/nir_builder.h" ++#include "nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + const struct anv_pipeline_layout *layout; + + bool progress; +}; + +static uint32_t +get_surface_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + gl_shader_stage stage = state->shader->stage; + + assert(binding < set_layout->binding_count); + + assert(set_layout->binding[binding].stage[stage].surface_index >= 0); + + uint32_t surface_index = + state->layout->set[set].stage[stage].surface_start + + set_layout->binding[binding].stage[stage].surface_index; + + assert(surface_index < state->layout->stage[stage].surface_count); + + return surface_index; +} + +static uint32_t +get_sampler_index(unsigned set, unsigned binding, nir_texop tex_op, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + if (set_layout->binding[binding].stage[stage].sampler_index < 0) { + assert(tex_op == nir_texop_txf); + return 0; + } + + uint32_t sampler_index = + state->layout->set[set].stage[stage].sampler_start + + set_layout->binding[binding].stage[stage].sampler_index; + + assert(sampler_index < state->layout->stage[stage].sampler_count); + + return sampler_index; +} + +static uint32_t +get_image_index(unsigned set, unsigned binding, + struct apply_pipeline_layout_state *state) +{ + assert(set < state->layout->num_sets); + struct anv_descriptor_set_layout *set_layout = + state->layout->set[set].layout; + + assert(binding < set_layout->binding_count); + + gl_shader_stage stage = state->shader->stage; + + assert(set_layout->binding[binding].stage[stage].image_index >= 0); + + uint32_t image_index = + state->layout->set[set].stage[stage].image_start + + set_layout->binding[binding].stage[stage].image_index; + + assert(image_index < state->layout->stage[stage].image_count); + + return image_index; +} + +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = intrin->const_index[0]; + uint32_t binding = intrin->const_index[1]; + + uint32_t surface_index = get_surface_index(set, binding, state); + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[0]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); +} + +static void +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) +{ + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + *const_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = src_type; + tex->num_srcs++; + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); + } + } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->sampler); + + nir_deref_var *tex_deref = tex->texture ? tex->texture : tex->sampler; + tex->texture_index = + get_surface_index(tex_deref->var->data.descriptor_set, + tex_deref->var->data.binding, state); + lower_tex_deref(tex, tex_deref, &tex->texture_index, + nir_tex_src_texture_offset, state); + + tex->sampler_index = + get_sampler_index(tex->sampler->var->data.descriptor_set, + tex->sampler->var->data.binding, tex->op, state); + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + + /* The backend only ever uses this to mark used surfaces. We don't care + * about that little optimization so it just needs to be non-zero. + */ + tex->texture_array_size = 1; + + if (tex->texture) + cleanup_tex_deref(tex, tex->texture); + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); + state->progress = true; + } + break; + } + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + /* All texture instructions need lowering */ + state->progress = true; + break; + default: + continue; + } + } + + return true; +} + +static void +setup_vec4_uniform_value(const union gl_constant_value **params, + const union gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + +bool +anv_nir_apply_pipeline_layout(nir_shader *shader, + struct brw_stage_prog_data *prog_data, + const struct anv_pipeline_layout *layout) +{ + struct apply_pipeline_layout_state state = { + .shader = shader, + .layout = layout, + }; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (layout->stage[shader->stage].image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned image_index = get_image_index(var->data.descriptor_set, + var->data.binding, &state); + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE * 4; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const union gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const union gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const union gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const union gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const union gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const union gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += layout->stage[shader->stage].image_count * + BRW_IMAGE_PARAM_SIZE * 4; + } + + return state.progress; +} diff --cc src/vulkan/anv_pipeline.c index 106b9221dd7,00000000000..3c5072ba924 mode 100644,000000..100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@@ -1,1300 -1,0 +1,1300 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" - #include "glsl/nir/spirv/nir_spirv.h" ++#include "nir/spirv/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_free2(&device->alloc, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) +{ + if (strcmp(entrypoint_name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + nir_function *entry_point; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = nir_options; + nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(module->size % 4 == 0); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + const uint32_t *data = + spec_info->pData + spec_info->pMapEntries[i].offset; + assert((const void *)(data + 1) <= + spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + spec_entries[i].data = *data; + } + } + + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, nir_options); + nir = entry_point->shader; + assert(nir->stage == stage); + nir_validate_shader(nir); + + free(spec_entries); + + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); + + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); + + nir_lower_system_values(nir); + nir_validate_shader(nir); + } + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entry_point->impl); + + uint32_t indirect_mask = 0; + if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) + indirect_mask |= (1 << nir_var_shader_in); + if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) + indirect_mask |= 1 << nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); + + return nir; +} + +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); +} + +static uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + pthread_mutex_lock(&cache->mutex); + + struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + pthread_mutex_unlock(&cache->mutex); + + assert(size < cache->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset; +} + +VkResult anv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + size_t* pDataSize, + void* pData) +{ + *pDataSize = 0; + + return VK_SUCCESS; +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_SUCCESS); +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_free2(&device->alloc, pAllocator, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + if (extra && extra->color_attachment_count >= 0) { + key->nr_color_regions = extra->color_attachment_count; + } else { + key->nr_color_regions = + render_pass->subpasses[info->subpass].color_count; + } + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage, + spec_info); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0) + prog_data->nr_params += pipeline->layout->stage[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + + if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ + prog_data->param = (const union gl_constant_value **) + malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const union gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + if (pipeline->layout) + anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias; + switch (stage) { + case MESA_SHADER_FRAGMENT: + bias = MAX_RTS; + break; + case MESA_SHADER_COMPUTE: + bias = 1; + break; + default: + bias = 0; + break; + } + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + nir = brw_nir_lower_io(nir, &pipeline->device->info, + compiler->scalar_stage[stage]); + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params * 4; + + return nir; +} + +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + + populate_vs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + const uint32_t offset = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + + populate_gs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + pipeline->gs_kernel = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + pipeline->gs_vertex_count = nir->info.gs.vertices_in; + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + + populate_wm_prog_key(&pipeline->device->info, info, extra, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t offset = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + + populate_cs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.work_groups_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + prog_data->base.total_shared = nir->num_shared; + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pipeline->cs_simd = + anv_pipeline_cache_upload_kernel(cache, shader_code, code_size); + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterizationState); + dynamic->depth_bias.bias = + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; + dynamic->depth_bias.slope = + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConstants, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.compareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.compareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.writeMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.writeMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.reference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.reference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterizationState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) +{ + VkResult result; + + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + if (alloc == NULL) + alloc = &device->alloc; + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); + if (result != VK_SUCCESS) + return result; + + pipeline->batch.alloc = alloc; + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_GEOMETRY_BIT: + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_FRAGMENT_BIT: + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + + uint64_t inputs_read; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + inputs_read = ~0ull; + } else { + inputs_read = pipeline->vs_prog_data.inputs_read; + } + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + pipeline->vb_used |= 1 << desc->binding; + } + + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->stride; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->inputRate) { + default: + case VK_VERTEX_INPUT_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + while (anv_block_pool_size(&device->scratch_block_pool) < + pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + else + return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + else + return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +}