LIBDRM_INTEL_REQUIRED=2.4.61
LIBDRM_NVVIEUX_REQUIRED=2.4.33
LIBDRM_NOUVEAU_REQUIRED=2.4.62
- LIBDRM_FREEDRENO_REQUIRED=2.4.64
+ LIBDRM_FREEDRENO_REQUIRED=2.4.65
DRI2PROTO_REQUIRED=2.6
DRI3PROTO_REQUIRED=1.0
PRESENTPROTO_REQUIRED=1.0
AM_PROG_AS
AX_CHECK_GNU_MAKE
AC_CHECK_PROGS([PYTHON2], [python2 python])
+AC_CHECK_PROGS([PYTHON3], [python3])
AC_PROG_SED
AC_PROG_MKDIR_P
dnl
dnl library names
dnl
+ dnl Unfortunately we need to do a few things that libtool can't help us with,
+ dnl so we need some knowledge of shared library filenames:
+ dnl
+ dnl LIB_EXT is the extension used when creating symlinks for alternate
+ dnl filenames for a shared library which will be dynamically loaded
+ dnl
+ dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+ dnl the file for a shared library we wish to link with
+ dnl
case "$host_os" in
darwin* )
- LIB_EXT='dylib' ;;
+ LIB_EXT='dylib'
+ IMP_LIB_EXT=$LIB_EXT
+ ;;
cygwin* )
- LIB_EXT='dll' ;;
+ LIB_EXT='dll'
+ IMP_LIB_EXT='dll.a'
+ ;;
aix* )
- LIB_EXT='a' ;;
+ LIB_EXT='a'
+ IMP_LIB_EXT=$LIB_EXT
+ ;;
* )
- LIB_EXT='so' ;;
+ LIB_EXT='so'
+ IMP_LIB_EXT=$LIB_EXT
+ ;;
esac
AC_SUBST([LIB_EXT])
AC_SUBST(SHA1_LIBS)
AC_SUBST(SHA1_CFLAGS)
+ # Enable a define for SHA1
+ if test "x$with_sha1" != "x"; then
+ DEFINES="$DEFINES -DHAVE_SHA1"
+ fi
+
# Allow user to configure out the shader-cache feature
AC_ARG_ENABLE([shader-cache],
AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
[DEFINES="${DEFINES} -DGLX_USE_TLS"])
+ dnl Read-only text section on x86 hardened platforms
+ AC_ARG_ENABLE([glx-read-only-text],
+ [AS_HELP_STRING([--enable-glx-read-only-text],
+ [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+ [enable_glx_read_only_text="$enableval"],
+ [enable_glx_read_only_text=no])
+ if test "x$enable_glx_read_only_text" = xyes; then
+ DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+ fi
+
dnl
dnl More DRI setup
dnl
AC_SUBST([GBM_PC_REQ_PRIV])
AC_SUBST([GBM_PC_LIB_PRIV])
+AM_CONDITIONAL(HAVE_VULKAN, true)
+
+AC_ARG_VAR([GLSLC], [Path to the glslc executable])
+AC_CHECK_PROGS([GLSLC], [glslc])
+AC_SUBST([GLSLC])
+
dnl
dnl EGL configuration
dnl
if test "x$enable_gallium_llvm" != "xyes"; then
AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
fi
- llvm_check_version_for "3" "4" "2" $1
+ llvm_check_version_for "3" "5" "0" $1
if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
fi
gallium_require_drm "vc4"
gallium_require_drm_loader
- case "$host_cpu" in
- i?86 | x86_64 | amd64)
- USE_VC4_SIMULATOR=yes
- ;;
- esac
+ PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+ [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
;;
*)
AC_MSG_ERROR([Unknown Gallium driver: $driver])
LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
+ dnl llvm-config may not give the right answer when llvm is a built as a
+ dnl single shared library, so we must work the library name out for
+ dnl ourselves.
+ dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
if test "x$enable_llvm_shared_libs" = xyes; then
dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
- AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+ AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
if test "x$llvm_have_one_so" = xyes; then
dnl LLVM was built using auto*, so there is only one shared object.
else
dnl If LLVM was built with CMake, there will be one shared object per
dnl component.
- AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+ AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
[AC_MSG_ERROR([Could not find llvm shared libraries:
Please make sure you have built llvm with the --enable-shared option
and that your llvm libraries are installed in $LLVM_LIBDIR
AC_SUBST([XA_TINY], $XA_TINY)
AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_TINY")
+PKG_CHECK_MODULES(VALGRIND, [valgrind],
+ [have_valgrind=yes], [have_valgrind=no])
+if test "x$have_valgrind" = "xyes"; then
+ AC_DEFINE([HAVE_VALGRIND], 1,
+ [Use valgrind intrinsics to suppress false warnings])
+fi
+
dnl Restore LDFLAGS and CPPFLAGS
LDFLAGS="$_SAVE_LDFLAGS"
CPPFLAGS="$_SAVE_CPPFLAGS"
src/mesa/drivers/osmesa/osmesa.pc
src/mesa/drivers/x11/Makefile
src/mesa/main/tests/Makefile
+ src/vulkan/Makefile
+ src/vulkan/anv_icd.json
+ src/vulkan/tests/Makefile
src/util/Makefile
src/util/tests/hash_table/Makefile])
echo ""
fi
echo " PYTHON2: $PYTHON2"
+echo " PYTHON3: $PYTHON3"
echo ""
echo " Run '${MAKE-make}' to build Mesa"
AM_CFLAGS = $(VISIBILITY_CFLAGS)
AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
+if HAVE_VULKAN
+SUBDIRS += vulkan
+endif
+
AM_CPPFLAGS = \
-I$(top_srcdir)/include/ \
-I$(top_srcdir)/src/mapi/ \
noinst_LTLIBRARIES = libglsl_util.la
libglsl_util_la_SOURCES = \
+ glsl/shader_enums.c \
mesa/main/imports.c \
mesa/program/prog_hash_table.c \
mesa/program/symbol_table.c \
nir/nir_opcodes_c.py \
nir/nir_opcodes_h.py \
nir/nir_opt_algebraic.py \
+ nir/tests \
SConscript
include Makefile.sources
TESTS = glcpp/tests/glcpp-test \
glcpp/tests/glcpp-test-cr-lf \
+ nir/tests/control_flow_tests \
tests/blob-test \
tests/general-ir-test \
tests/optimization-test \
check_PROGRAMS = \
glcpp/glcpp \
glsl_test \
+ nir/tests/control_flow_tests \
tests/blob-test \
tests/general-ir-test \
tests/sampler-types-test \
tests/uniform-initializer-test
-noinst_PROGRAMS = glsl_compiler
+noinst_PROGRAMS = glsl_compiler spirv2nir
tests_blob_test_SOURCES = \
tests/blob_test.c
glsl_parser.cpp \
glsl_parser.h \
$(LIBGLSL_FILES) \
- $(NIR_FILES)
+ $(NIR_FILES) \
+ $(NIR_GENERATED_FILES)
+
libnir_la_SOURCES = \
glsl_types.cpp \
builtin_types.cpp \
glsl_symbol_table.cpp \
- $(NIR_FILES)
+ $(NIR_FILES) \
+ $(NIR_GENERATED_FILES)
glsl_compiler_SOURCES = \
$(GLSL_COMPILER_CXX_FILES)
glsl_compiler_LDADD = \
libglsl.la \
$(top_builddir)/src/libglsl_util.la \
+ $(PTHREAD_LIBS)
+
+spirv2nir_SOURCES = \
+ standalone_scaffolding.cpp \
+ standalone_scaffolding.h \
+ nir/spirv2nir.c
+
+spirv2nir_LDADD = \
+ libglsl.la \
+ $(top_builddir)/src/libglsl_util.la \
$(PTHREAD_LIBS)
glsl_test_SOURCES = \
am__v_YACC_0 = @echo " YACC " $@;
am__v_YACC_1 =
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
glsl_parser.cpp glsl_parser.h: glsl_parser.yy
- $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+ $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
glsl_lexer.cpp: glsl_lexer.ll
- $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+ $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
- $(AM_V_at)$(MKDIR_P) glcpp
- $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+ $(MKDIR_GEN)
+ $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
- $(AM_V_at)$(MKDIR_P) glcpp
- $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+ $(MKDIR_GEN)
+ $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
# Only the parsers (specifically the header files generated at the same time)
# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
glsl_lexer.cpp \
glcpp/glcpp-parse.c \
glcpp/glcpp-lex.c \
- nir/nir_builder_opcodes.h \
- nir/nir_constant_expressions.c \
- nir/nir_opcodes.c \
- nir/nir_opcodes.h \
- nir/nir_opt_algebraic.c
+ $(NIR_GENERATED_FILES)
CLEANFILES = \
glcpp/glcpp-parse.h \
glsl_parser.h \
$(RM) glcpp/tests/*.out
$(RM) glcpp/tests/subtest*/*.out
+ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
- nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+ nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
- $(AM_V_at)$(MKDIR_P) nir
- $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
+
+ nir_tests_control_flow_tests_SOURCES = \
+ nir/tests/control_flow_tests.cpp
+ nir_tests_control_flow_tests_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+ nir_tests_control_flow_tests_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(top_builddir)/src/glsl/libnir.la \
+ $(top_builddir)/src/libglsl_util.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
nir/nir_control_flow_private.h \
nir/nir_dominance.c \
nir/nir_from_ssa.c \
+ nir/nir_gs_count_vertices.c \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
nir/nir_live_variables.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
+ nir/nir_lower_clip.c \
nir/nir_lower_global_vars_to_local.c \
+ nir/nir_lower_gs_intrinsics.c \
nir/nir_lower_load_const_to_scalar.c \
nir/nir_lower_locals_to_regs.c \
nir/nir_lower_idiv.c \
nir/nir_lower_io.c \
nir/nir_lower_outputs_to_temporaries.c \
nir/nir_lower_phis_to_scalar.c \
- nir/nir_lower_samplers.cpp \
+ nir/nir_lower_samplers.c \
nir/nir_lower_system_values.c \
- nir/nir_lower_tex_projector.c \
+ nir/nir_lower_tex.c \
nir/nir_lower_to_source_mods.c \
+ nir/nir_lower_two_sided_color.c \
nir/nir_lower_vars_to_ssa.c \
nir/nir_lower_var_copies.c \
nir/nir_lower_vec_to_movs.c \
nir/nir_metadata.c \
+ nir/nir_move_vec_src_uses_to_dest.c \
nir/nir_normalize_cubemap_coords.c \
nir/nir_opt_constant_folding.c \
nir/nir_opt_copy_propagate.c \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
+ nir/nir_opt_dead_cf.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_peephole_ffma.c \
nir/nir_remove_dead_variables.c \
nir/nir_search.c \
nir/nir_search.h \
+ nir/nir_spirv.h \
nir/nir_split_var_copies.c \
nir/nir_sweep.c \
nir/nir_to_ssa.c \
nir/nir_vla.h \
nir/nir_worklist.c \
nir/nir_worklist.h \
- nir/nir_types.cpp
+ nir/nir_types.cpp \
+ nir/spirv_to_nir.c \
- nir/spirv_glsl450_to_nir.c \
- $(NIR_GENERATED_FILES)
++ nir/spirv_glsl450_to_nir.c
# libglsl
remove_per_vertex_blocks(exec_list *instructions,
_mesa_glsl_parse_state *state, ir_variable_mode mode);
+ /**
+ * Visitor class that finds the first instance of any write-only variable that
+ * is ever read, if any
+ */
+ class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+ {
+ public:
+ read_from_write_only_variable_visitor() : found(NULL)
+ {
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ {
+ if (this->in_assignee)
+ return visit_continue;
+
+ ir_variable *var = ir->variable_referenced();
+ /* We can have image_write_only set on both images and buffer variables,
+ * but in the former there is a distinction between reads from
+ * the variable itself (write_only) and from the memory they point to
+ * (image_write_only), while in the case of buffer variables there is
+ * no such distinction, that is why this check here is limited to
+ * buffer variables alone.
+ */
+ if (!var || var->data.mode != ir_var_shader_storage)
+ return visit_continue;
+
+ if (var->data.image_write_only) {
+ found = var;
+ return visit_stop;
+ }
+
+ return visit_continue;
+ }
+
+ ir_variable *get_variable() {
+ return found;
+ }
+
+ private:
+ ir_variable *found;
+ };
void
_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
*/
remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+
+ /* Check that we don't have reads from write-only variables */
+ read_from_write_only_variable_visitor v;
+ v.run(instructions);
+ ir_variable *error_var = v.get_variable();
+ if (error_var) {
+ /* It would be nice to have proper location information, but for that
+ * we would need to check this as we process each kind of AST node
+ */
+ YYLTYPE loc;
+ memset(&loc, 0, sizeof(loc));
+ _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+ error_var->name);
+ }
}
"assignment to %s",
non_lvalue_description);
error_emitted = true;
- } else if (lhs_var != NULL && lhs_var->data.read_only) {
+ } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+ (lhs_var->data.mode == ir_var_shader_storage &&
+ lhs_var->data.image_read_only))) {
+ /* We can have image_read_only set on both images and buffer variables,
+ * but in the former there is a distinction between assignments to
+ * the variable itself (read_only) and to the memory they point to
+ * (image_read_only), while in the case of buffer variables there is
+ * no such distinction, that is why this check here is limited to
+ * buffer variables alone.
+ */
_mesa_glsl_error(&lhs_loc, state,
"assignment to read-only variable '%s'",
lhs_var->name);
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_SUBROUTINE:
/* I assume a comparison of a struct containing a sampler just
}
const struct gl_context *const ctx = state->ctx;
- unsigned elements = type->is_array() ? type->length : 1;
+ unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
unsigned max_index = qual->binding + elements - 1;
const glsl_type *base_type = type->without_array();
var->data.depth_layout = ir_depth_layout_none;
if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
qual->flags.q.packed ||
qual->flags.q.shared) {
_mesa_glsl_error(loc, state,
- "uniform block layout qualifiers std140, packed, and "
- "shared can only be applied to uniform blocks, not "
+ "uniform and shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform or shader storage blocks, not "
"members");
}
if (state->es_shader && state->language_version >= 300) {
/* Local shader has no exact candidates; check the built-ins. */
_mesa_glsl_initialize_builtin_functions();
- if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+ if (_mesa_glsl_find_builtin_function_by_name(name)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(& loc, state,
"A shader cannot redefine or overload built-in "
bool is_interface,
enum glsl_matrix_layout matrix_layout,
bool allow_reserved_names,
- ir_variable_mode var_mode)
+ ir_variable_mode var_mode,
+ ast_type_qualifier *layout)
{
unsigned decl_count = 0;
+ /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+ * that we don't have incompatible qualifiers
+ */
+ if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "Interface block sets both readonly and writeonly");
+ }
+
/* Make an initial pass over the list of fields to determine how
* many there are. Each element in this list is an ast_declarator_list.
* This means that we actually need to count the number of elements in the
* is_interface case, will have resulted in compilation having
* already halted due to a syntax error.
*/
- const struct glsl_type *field_type =
- decl_type != NULL ? decl_type : glsl_type::error_type;
+ assert(decl_type);
- if (is_interface && field_type->contains_opaque()) {
+ if (is_interface && decl_type->contains_opaque()) {
YYLTYPE loc = decl_list->get_location();
_mesa_glsl_error(&loc, state,
"uniform/buffer in non-default interface block contains "
"opaque variable");
}
- if (field_type->contains_atomic()) {
+ if (decl_type->contains_atomic()) {
/* From section 4.1.7.3 of the GLSL 4.40 spec:
*
* "Members of structures cannot be declared as atomic counter
"shader storage block or uniform block");
}
- if (field_type->contains_image()) {
+ if (decl_type->contains_image()) {
/* FINISHME: Same problem as with atomic counters.
* FINISHME: Request clarification from Khronos and add
* FINISHME: spec quotation here.
const struct ast_type_qualifier *const qual =
& decl_list->type->qualifier;
if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
qual->flags.q.packed ||
qual->flags.q.shared) {
_mesa_glsl_error(&loc, state,
"uniform/shader storage block layout qualifiers "
- "std140, packed, and shared can only be applied "
- "to uniform/shader storage blocks, not members");
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform/shader storage blocks, not "
+ "members");
}
if (qual->flags.q.constant) {
"to struct or interface block members");
}
- field_type = process_array_type(&loc, decl_type,
- decl->array_specifier, state);
+ const struct glsl_type *field_type =
+ process_array_type(&loc, decl_type, decl->array_specifier, state);
fields[i].type = field_type;
fields[i].name = decl->identifier;
fields[i].location = -1;
|| fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
}
+ /* Image qualifiers are allowed on buffer variables, which can only
+ * be defined inside shader storage buffer objects
+ */
+ if (layout && var_mode == ir_var_shader_storage) {
+ if (qual->flags.q.read_only && qual->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "buffer variable `%s' can't be "
+ "readonly and writeonly.", fields[i].name);
+ }
+
+ /* For readonly and writeonly qualifiers the field definition,
+ * if set, overwrites the layout qualifier.
+ */
+ bool read_only = layout->flags.q.read_only;
+ bool write_only = layout->flags.q.write_only;
+
+ if (qual->flags.q.read_only) {
+ read_only = true;
+ write_only = false;
+ } else if (qual->flags.q.write_only) {
+ read_only = false;
+ write_only = true;
+ }
+
+ fields[i].image_read_only = read_only;
+ fields[i].image_write_only = write_only;
+
+ /* For other qualifiers, we set the flag if either the layout
+ * qualifier or the field qualifier are set
+ */
+ fields[i].image_coherent = qual->flags.q.coherent ||
+ layout->flags.q.coherent;
+ fields[i].image_volatile = qual->flags.q._volatile ||
+ layout->flags.q._volatile;
+ fields[i].image_restrict = qual->flags.q.restrict_flag ||
+ layout->flags.q.restrict_flag;
+ }
+
i++;
}
}
false,
GLSL_MATRIX_LAYOUT_INHERITED,
false /* allow_reserved_names */,
- ir_var_auto);
+ ir_var_auto,
+ NULL);
validate_identifier(this->name, loc, state);
bool found;
};
+ static bool
+ is_unsized_array_last_element(ir_variable *v)
+ {
+ const glsl_type *interface_type = v->get_interface_type();
+ int length = interface_type->length;
+
+ assert(v->type->is_unsized_array());
+
+ /* Check if it is the last element of the interface */
+ if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+ return true;
+ return false;
+ }
ir_rvalue *
ast_interface_block::hir(exec_list *instructions,
this->block_name);
}
+ if (!this->layout.flags.q.buffer &&
+ this->layout.flags.q.std430) {
+ _mesa_glsl_error(&loc, state,
+ "std430 storage block layout qualifier is supported "
+ "only for shader storage blocks");
+ }
+
/* The ast_interface_block has a list of ast_declarator_lists. We
* need to turn those into ir_variables with an association
* with this uniform block.
packing = GLSL_INTERFACE_PACKING_SHARED;
} else if (this->layout.flags.q.packed) {
packing = GLSL_INTERFACE_PACKING_PACKED;
+ } else if (this->layout.flags.q.std430) {
+ packing = GLSL_INTERFACE_PACKING_STD430;
} else {
/* The default layout is std140.
*/
true,
matrix_layout,
redeclaring_per_vertex,
- var_mode);
+ var_mode,
+ &this->layout);
state->struct_specifier_depth--;
else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
handle_tess_ctrl_shader_output_decl(state, loc, var);
+ for (unsigned i = 0; i < num_variables; i++) {
+ if (fields[i].type->is_unsized_array()) {
+ if (var_mode == ir_var_shader_storage) {
+ if (i != (num_variables - 1)) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+ }
+ } else {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+ *
+ * "If an array is declared as the last member of a shader storage
+ * block and the size is not specified at compile-time, it is
+ * sized at run-time. In all other cases, arrays are sized only
+ * at compile-time."
+ */
+ if (state->es_shader) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ fields[i].name);
+ }
+ }
+ }
+ }
+
if (ir_variable *earlier =
state->symbols->get_variable(this->instance_name)) {
if (!redeclaring_per_vertex) {
var->data.stream = this->layout.stream;
+ if (var->data.mode == ir_var_shader_storage) {
+ var->data.image_read_only = fields[i].image_read_only;
+ var->data.image_write_only = fields[i].image_write_only;
+ var->data.image_coherent = fields[i].image_coherent;
+ var->data.image_volatile = fields[i].image_volatile;
+ var->data.image_restrict = fields[i].image_restrict;
+ }
+
/* Examine var name here since var may get deleted in the next call */
bool var_is_gl_id = is_gl_identifier(var->name);
var->data.explicit_binding = this->layout.flags.q.explicit_binding;
var->data.binding = this->layout.binding;
+ if (var->type->is_unsized_array()) {
+ if (var->is_in_shader_storage_block()) {
+ if (!is_unsized_array_last_element(var)) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ var->name);
+ }
+ var->data.from_ssbo_unsized_array = true;
+ } else {
+ /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+ *
+ * "If an array is declared as the last member of a shader storage
+ * block and the size is not specified at compile-time, it is
+ * sized at run-time. In all other cases, arrays are sized only
+ * at compile-time."
+ */
+ if (state->es_shader) {
+ _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+ "only last member of a shader storage block "
+ "can be defined as unsized array",
+ var->name);
+ }
+ }
+ }
+
state->symbols->add_variable(var);
instructions->push_tail(var);
}
this->extensions = &ctx->Extensions;
+ this->ARB_compute_shader_enable = true;
+
this->Const.MaxLights = ctx->Const.MaxLights;
this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes;
this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits;
this->default_uniform_qualifier = new(this) ast_type_qualifier();
this->default_uniform_qualifier->flags.q.shared = 1;
this->default_uniform_qualifier->flags.q.column_major = 1;
+ this->default_uniform_qualifier->is_default_qualifier = true;
+
+ this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+ this->default_shader_storage_qualifier->flags.q.shared = 1;
+ this->default_shader_storage_qualifier->flags.q.column_major = 1;
+ this->default_shader_storage_qualifier->is_default_qualifier = true;
this->fs_uses_gl_fragcoord = false;
this->fs_redeclares_gl_fragcoord = false;
EXT(ARB_shader_image_size, true, false, ARB_shader_image_size),
EXT(ARB_shader_precision, true, false, ARB_shader_precision),
EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export),
- EXT(ARB_shader_storage_buffer_object, true, false, ARB_shader_storage_buffer_object),
+ EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object),
EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine),
+ EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples),
EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod),
EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack),
EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing),
}
}
+ void
+ _mesa_ast_process_interface_block(YYLTYPE *locp,
+ _mesa_glsl_parse_state *state,
+ ast_interface_block *const block,
+ const struct ast_type_qualifier q)
+ {
+ if (q.flags.q.buffer) {
+ if (!state->has_shader_storage_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+ "#version 430 / GL_ARB_shader_storage_buffer_object "
+ "required for defining shader storage blocks");
+ } else if (state->ARB_shader_storage_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+ "#version 430 / GL_ARB_shader_storage_buffer_object "
+ "required for defining shader storage blocks");
+ }
+ } else if (q.flags.q.uniform) {
+ if (!state->has_uniform_buffer_objects()) {
+ _mesa_glsl_error(locp, state,
+ "#version 140 / GL_ARB_uniform_buffer_object "
+ "required for defining uniform blocks");
+ } else if (state->ARB_uniform_buffer_object_warn) {
+ _mesa_glsl_warning(locp, state,
+ "#version 140 / GL_ARB_uniform_buffer_object "
+ "required for defining uniform blocks");
+ }
+ } else {
+ if (state->es_shader || state->language_version < 150) {
+ _mesa_glsl_error(locp, state,
+ "#version 150 required for using "
+ "interface blocks");
+ }
+ }
+
+ /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+ * "It is illegal to have an input block in a vertex shader
+ * or an output block in a fragment shader"
+ */
+ if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+ _mesa_glsl_error(locp, state,
+ "`in' interface block is not allowed for "
+ "a vertex shader");
+ } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+ _mesa_glsl_error(locp, state,
+ "`out' interface block is not allowed for "
+ "a fragment shader");
+ }
+
+ /* Since block arrays require names, and both features are added in
+ * the same language versions, we don't have to explicitly
+ * version-check both things.
+ */
+ if (block->instance_name != NULL) {
+ state->check_version(150, 300, locp, "interface blocks with "
+ "an instance name are not allowed");
+ }
+
+ uint64_t interface_type_mask;
+ struct ast_type_qualifier temp_type_qualifier;
+
+ /* Get a bitmask containing only the in/out/uniform/buffer
+ * flags, allowing us to ignore other irrelevant flags like
+ * interpolation qualifiers.
+ */
+ temp_type_qualifier.flags.i = 0;
+ temp_type_qualifier.flags.q.uniform = true;
+ temp_type_qualifier.flags.q.in = true;
+ temp_type_qualifier.flags.q.out = true;
+ temp_type_qualifier.flags.q.buffer = true;
+ interface_type_mask = temp_type_qualifier.flags.i;
+
+ /* Get the block's interface qualifier. The interface_qualifier
+ * production rule guarantees that only one bit will be set (and
+ * it will be in/out/uniform).
+ */
+ uint64_t block_interface_qualifier = q.flags.i;
+
+ block->layout.flags.i |= block_interface_qualifier;
+
+ if (state->stage == MESA_SHADER_GEOMETRY &&
+ state->has_explicit_attrib_stream()) {
+ /* Assign global layout's stream value. */
+ block->layout.flags.q.stream = 1;
+ block->layout.flags.q.explicit_stream = 0;
+ block->layout.stream = state->out_qualifier->stream;
+ }
+
+ foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+ ast_type_qualifier& qualifier = member->type->qualifier;
+ if ((qualifier.flags.i & interface_type_mask) == 0) {
+ /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+ * "If no optional qualifier is used in a member declaration, the
+ * qualifier of the variable is just in, out, or uniform as declared
+ * by interface-qualifier."
+ */
+ qualifier.flags.i |= block_interface_qualifier;
+ } else if ((qualifier.flags.i & interface_type_mask) !=
+ block_interface_qualifier) {
+ /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+ * "If optional qualifiers are used, they can include interpolation
+ * and storage qualifiers and they must declare an input, output,
+ * or uniform variable consistent with the interface qualifier of
+ * the block."
+ */
+ _mesa_glsl_error(locp, state,
+ "uniform/in/out qualifier on "
+ "interface block member does not match "
+ "the interface block");
+ }
+
+ /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+ *
+ * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+ * outputs."
+ *
+ * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+ *
+ * "Only variables output from a shader can be candidates for
+ * invariance."
+ *
+ * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+ *
+ * "If optional qualifiers are used, they can include interpolation
+ * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+ * and they must declare an input, output, or uniform member
+ * consistent with the interface qualifier of the block"
+ */
+ if (qualifier.flags.q.invariant)
+ _mesa_glsl_error(locp, state,
+ "invariant qualifiers cannot be used "
+ "with interface blocks members");
+ }
+ }
void
_mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
}
}
+ _mesa_glsl_initialize_derived_variables(shader);
+
delete state->symbols;
ralloc_free(state);
}
hash_table *glsl_type::array_types = NULL;
hash_table *glsl_type::record_types = NULL;
hash_table *glsl_type::interface_types = NULL;
+hash_table *glsl_type::function_types = NULL;
hash_table *glsl_type::subroutine_types = NULL;
void *glsl_type::mem_ctx = NULL;
}
glsl_type::glsl_type(GLenum gl_type,
- glsl_base_type base_type, unsigned vector_elements,
- unsigned matrix_columns, const char *name) :
+ glsl_base_type base_type, unsigned vector_elements,
+ unsigned matrix_columns, const char *name) :
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
}
glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
- enum glsl_sampler_dim dim, bool shadow, bool array,
- unsigned type, const char *name) :
+ enum glsl_sampler_dim dim, bool shadow, bool array,
+ unsigned type, const char *name) :
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(dim), sampler_shadow(shadow),
}
glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
- const char *name) :
+ const char *name) :
gl_type(0),
base_type(GLSL_TYPE_STRUCT),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
assert(name != NULL);
this->name = ralloc_strdup(this->mem_ctx, name);
this->fields.structure = ralloc_array(this->mem_ctx,
- glsl_struct_field, length);
+ glsl_struct_field, length);
for (i = 0; i < length; i++) {
this->fields.structure[i].type = fields[i].type;
this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
- fields[i].name);
+ fields[i].name);
this->fields.structure[i].location = fields[i].location;
this->fields.structure[i].interpolation = fields[i].interpolation;
this->fields.structure[i].centroid = fields[i].centroid;
this->fields.structure[i].sample = fields[i].sample;
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].image_read_only = fields[i].image_read_only;
+ this->fields.structure[i].image_write_only = fields[i].image_write_only;
+ this->fields.structure[i].image_coherent = fields[i].image_coherent;
+ this->fields.structure[i].image_volatile = fields[i].image_volatile;
+ this->fields.structure[i].image_restrict = fields[i].image_restrict;
}
mtx_unlock(&glsl_type::mutex);
}
glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
- enum glsl_interface_packing packing, const char *name) :
+ enum glsl_interface_packing packing, const char *name) :
gl_type(0),
base_type(GLSL_TYPE_INTERFACE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
assert(name != NULL);
this->name = ralloc_strdup(this->mem_ctx, name);
this->fields.structure = ralloc_array(this->mem_ctx,
- glsl_struct_field, length);
+ glsl_struct_field, length);
for (i = 0; i < length; i++) {
this->fields.structure[i].type = fields[i].type;
this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
- fields[i].name);
+ fields[i].name);
this->fields.structure[i].location = fields[i].location;
this->fields.structure[i].interpolation = fields[i].interpolation;
this->fields.structure[i].centroid = fields[i].centroid;
mtx_unlock(&glsl_type::mutex);
}
+glsl_type::glsl_type(const glsl_type *return_type,
+ const glsl_function_param *params, unsigned num_params) :
+ gl_type(0),
+ base_type(GLSL_TYPE_FUNCTION),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(0), matrix_columns(0),
+ length(num_params)
+{
+ unsigned int i;
+
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+
+ this->fields.parameters = rzalloc_array(this->mem_ctx,
+ glsl_function_param, num_params + 1);
+
+ /* We store the return type as the first parameter */
+ this->fields.parameters[0].type = return_type;
+ this->fields.parameters[0].in = false;
+ this->fields.parameters[0].out = true;
+
+ /* We store the i'th parameter in slot i+1 */
+ for (i = 0; i < length; i++) {
+ this->fields.parameters[i + 1].type = params[i].type;
+ this->fields.parameters[i + 1].in = params[i].in;
+ this->fields.parameters[i + 1].out = params[i].out;
+ }
+
+ mtx_unlock(&glsl_type::mutex);
+}
+
glsl_type::glsl_type(const char *subroutine_name) :
gl_type(0),
base_type(GLSL_TYPE_SUBROUTINE),
return this->fields.array->contains_sampler();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_sampler())
- return true;
+ if (this->fields.structure[i].type->contains_sampler())
+ return true;
}
return false;
} else {
return this->fields.array->contains_integer();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_integer())
- return true;
+ if (this->fields.structure[i].type->contains_integer())
+ return true;
}
return false;
} else {
return this->fields.array->contains_double();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_double())
- return true;
+ if (this->fields.structure[i].type->contains_double())
+ return true;
}
return false;
} else {
return this->fields.array->contains_subroutine();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_subroutine())
- return true;
+ if (this->fields.structure[i].type->contains_subroutine())
+ return true;
}
return false;
} else {
return this->fields.array->contains_image();
} else if (this->is_record()) {
for (unsigned int i = 0; i < this->length; i++) {
- if (this->fields.structure[i].type->contains_image())
- return true;
+ if (this->fields.structure[i].type->contains_image())
+ return true;
}
return false;
} else {
if (columns == 1) {
switch (base_type) {
case GLSL_TYPE_UINT:
- return uvec(rows);
+ return uvec(rows);
case GLSL_TYPE_INT:
- return ivec(rows);
+ return ivec(rows);
case GLSL_TYPE_FLOAT:
- return vec(rows);
+ return vec(rows);
case GLSL_TYPE_DOUBLE:
- return dvec(rows);
+ return dvec(rows);
case GLSL_TYPE_BOOL:
- return bvec(rows);
+ return bvec(rows);
default:
- return error_type;
+ return error_type;
}
} else {
if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
- return error_type;
+ return error_type;
/* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following
* combinations are valid:
for (unsigned i = 0; i < this->length; i++) {
if (this->fields.structure[i].type != b->fields.structure[i].type)
- return false;
+ return false;
if (strcmp(this->fields.structure[i].name,
- b->fields.structure[i].name) != 0)
- return false;
+ b->fields.structure[i].name) != 0)
+ return false;
if (this->fields.structure[i].matrix_layout
!= b->fields.structure[i].matrix_layout)
return false;
if (this->fields.structure[i].patch
!= b->fields.structure[i].patch)
return false;
+ if (this->fields.structure[i].image_read_only
+ != b->fields.structure[i].image_read_only)
+ return false;
+ if (this->fields.structure[i].image_write_only
+ != b->fields.structure[i].image_write_only)
+ return false;
+ if (this->fields.structure[i].image_coherent
+ != b->fields.structure[i].image_coherent)
+ return false;
+ if (this->fields.structure[i].image_volatile
+ != b->fields.structure[i].image_volatile)
+ return false;
+ if (this->fields.structure[i].image_restrict
+ != b->fields.structure[i].image_restrict)
+ return false;
}
return true;
const glsl_type *
glsl_type::get_record_instance(const glsl_struct_field *fields,
- unsigned num_fields,
- const char *name)
+ unsigned num_fields,
+ const char *name)
{
const glsl_type key(fields, num_fields, name);
const glsl_type *
glsl_type::get_interface_instance(const glsl_struct_field *fields,
- unsigned num_fields,
- enum glsl_interface_packing packing,
- const char *block_name)
+ unsigned num_fields,
+ enum glsl_interface_packing packing,
+ const char *block_name)
{
const glsl_type key(fields, num_fields, packing, block_name);
}
+static bool
+function_key_compare(const void *a, const void *b)
+{
+ const glsl_type *const key1 = (glsl_type *) a;
+ const glsl_type *const key2 = (glsl_type *) b;
+
+ if (key1->length != key2->length)
+ return 1;
+
+ return memcmp(key1->fields.parameters, key2->fields.parameters,
+ (key1->length + 1) * sizeof(*key1->fields.parameters));
+}
+
+
+static uint32_t
+function_key_hash(const void *a)
+{
+ const glsl_type *const key = (glsl_type *) a;
+ char hash_key[128];
+ unsigned size = 0;
+
+ size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
+
+ for (unsigned i = 0; i < key->length; i++) {
+ if (size >= sizeof(hash_key))
+ break;
+
+ size += snprintf(& hash_key[size], sizeof(hash_key) - size,
+ "%p", (void *) key->fields.structure[i].type);
+ }
+
+ return _mesa_hash_string(hash_key);
+}
+
+const glsl_type *
+glsl_type::get_function_instance(const glsl_type *return_type,
+ const glsl_function_param *params,
+ unsigned num_params)
+{
+ const glsl_type key(return_type, params, num_params);
+
+ mtx_lock(&glsl_type::mutex);
+
+ if (function_types == NULL) {
+ function_types = _mesa_hash_table_create(NULL, function_key_hash,
+ function_key_compare);
+ }
+
+ struct hash_entry *entry = _mesa_hash_table_search(function_types, &key);
+ if (entry == NULL) {
+ mtx_unlock(&glsl_type::mutex);
+ const glsl_type *t = new glsl_type(return_type, params, num_params);
+ mtx_lock(&glsl_type::mutex);
+
+ entry = _mesa_hash_table_insert(function_types, t, (void *) t);
+ }
+
+ const glsl_type *t = (const glsl_type *)entry->data;
+
+ assert(t->base_type == GLSL_TYPE_FUNCTION);
+ assert(t->length == num_params);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return t;
+}
+
+
const glsl_type *
glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
{
for (unsigned i = 0; i < this->length; i++) {
if (strcmp(name, this->fields.structure[i].name) == 0)
- return this->fields.structure[i].type;
+ return this->fields.structure[i].type;
}
return error_type;
for (unsigned i = 0; i < this->length; i++) {
if (strcmp(name, this->fields.structure[i].name) == 0)
- return i;
+ return i;
}
return -1;
unsigned size = 0;
for (unsigned i = 0; i < this->length; i++)
- size += this->fields.structure[i].type->component_slots();
+ size += this->fields.structure[i].type->component_slots();
return size;
}
return 1;
case GLSL_TYPE_SUBROUTINE:
return 1;
+
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
return 0;
}
+ unsigned
+ glsl_type::record_location_offset(unsigned length) const
+ {
+ unsigned offset = 0;
+ const glsl_type *t = this->without_array();
+ if (t->is_record()) {
+ assert(length <= t->length);
+
+ for (unsigned i = 0; i < length; i++) {
+ const glsl_type *st = t->fields.structure[i].type;
+ const glsl_type *wa = st->without_array();
+ if (wa->is_record()) {
+ unsigned r_offset = wa->record_location_offset(wa->length);
+ offset += st->is_array() ? st->length * r_offset : r_offset;
+ } else {
+ /* We dont worry about arrays here because unless the array
+ * contains a structure or another array it only takes up a single
+ * uniform slot.
+ */
+ offset += 1;
+ }
+ }
+ }
+ return offset;
+ }
+
unsigned
glsl_type::uniform_locations() const
{
if (this->is_scalar() || this->is_vector()) {
switch (this->vector_elements) {
case 1:
- return N;
+ return N;
case 2:
- return 2 * N;
+ return 2 * N;
case 3:
case 4:
- return 4 * N;
+ return 4 * N;
}
}
*/
if (this->is_array()) {
if (this->fields.array->is_scalar() ||
- this->fields.array->is_vector() ||
- this->fields.array->is_matrix()) {
- return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+ this->fields.array->is_vector() ||
+ this->fields.array->is_matrix()) {
+ return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
} else {
- assert(this->fields.array->is_record() ||
+ assert(this->fields.array->is_record() ||
this->fields.array->is_array());
- return this->fields.array->std140_base_alignment(row_major);
+ return this->fields.array->std140_base_alignment(row_major);
}
}
int r = this->vector_elements;
if (row_major) {
- vec_type = get_instance(base_type, c, 1);
- array_type = glsl_type::get_array_instance(vec_type, r);
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
} else {
- vec_type = get_instance(base_type, r, 1);
- array_type = glsl_type::get_array_instance(vec_type, c);
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
}
return array_type->std140_base_alignment(false);
field_row_major = false;
}
- const struct glsl_type *field_type = this->fields.structure[i].type;
- base_alignment = MAX2(base_alignment,
- field_type->std140_base_alignment(field_row_major));
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+ field_type->std140_base_alignment(field_row_major));
}
return base_alignment;
}
unsigned int array_len;
if (this->is_array()) {
- element_type = this->fields.array;
- array_len = this->length;
+ element_type = this->fields.array;
+ array_len = this->length;
} else {
- element_type = this;
- array_len = 1;
+ element_type = this;
+ array_len = 1;
}
if (row_major) {
vec_type = get_instance(element_type->base_type,
element_type->matrix_columns, 1);
- array_len *= element_type->vector_elements;
+ array_len *= element_type->vector_elements;
} else {
- vec_type = get_instance(element_type->base_type,
- element_type->vector_elements, 1);
- array_len *= element_type->matrix_columns;
+ vec_type = get_instance(element_type->base_type,
+ element_type->vector_elements, 1);
+ array_len *= element_type->matrix_columns;
}
const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
- array_len);
+ array_len);
return array_type->std140_size(false);
}
*/
if (this->is_array()) {
if (this->fields.array->is_record()) {
- return this->length * this->fields.array->std140_size(row_major);
+ return this->length * this->fields.array->std140_size(row_major);
} else {
- unsigned element_base_align =
- this->fields.array->std140_base_alignment(row_major);
- return this->length * MAX2(element_base_align, 16);
+ unsigned element_base_align =
+ this->fields.array->std140_base_alignment(row_major);
+ return this->length * MAX2(element_base_align, 16);
}
}
* rounded up to the next multiple of the base alignment of the
* structure.
*/
- if (this->is_record()) {
+ if (this->is_record() || this->is_interface()) {
unsigned size = 0;
unsigned max_align = 0;
field_row_major = false;
}
- const struct glsl_type *field_type = this->fields.structure[i].type;
- unsigned align = field_type->std140_base_alignment(field_row_major);
- size = glsl_align(size, align);
- size += field_type->std140_size(field_row_major);
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ unsigned align = field_type->std140_base_alignment(field_row_major);
+
+ /* Ignore unsized arrays when calculating size */
+ if (field_type->is_unsized_array())
+ continue;
+
+ size = glsl_align(size, align);
+ size += field_type->std140_size(field_row_major);
max_align = MAX2(align, max_align);
return -1;
}
+ unsigned
+ glsl_type::std430_base_alignment(bool row_major) const
+ {
+
+ unsigned N = is_double() ? 8 : 4;
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_scalar() || this->is_vector()) {
+ switch (this->vector_elements) {
+ case 1:
+ return N;
+ case 2:
+ return 2 * N;
+ case 3:
+ case 4:
+ return 4 * N;
+ }
+ }
+
+ /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+ *
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures
+ * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+ */
+
+ /* (1) If the member is a scalar consuming <N> basic machine units, the
+ * base alignment is <N>.
+ *
+ * (2) If the member is a two- or four-component vector with components
+ * consuming <N> basic machine units, the base alignment is 2<N> or
+ * 4<N>, respectively.
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_array())
+ return this->fields.array->std430_base_alignment(row_major);
+
+ /* (5) If the member is a column-major matrix with <C> columns and
+ * <R> rows, the matrix is stored identically to an array of
+ * <C> column vectors with <R> components each, according to
+ * rule (4).
+ *
+ * (7) If the member is a row-major matrix with <C> columns and <R>
+ * rows, the matrix is stored identically to an array of <R>
+ * row vectors with <C> components each, according to rule (4).
+ */
+ if (this->is_matrix()) {
+ const struct glsl_type *vec_type, *array_type;
+ int c = this->matrix_columns;
+ int r = this->vector_elements;
+
+ if (row_major) {
+ vec_type = get_instance(base_type, c, 1);
+ array_type = glsl_type::get_array_instance(vec_type, r);
+ } else {
+ vec_type = get_instance(base_type, r, 1);
+ array_type = glsl_type::get_array_instance(vec_type, c);
+ }
+
+ return array_type->std430_base_alignment(false);
+ }
+
+ /* (9) If the member is a structure, the base alignment of the
+ * structure is <N>, where <N> is the largest base alignment
+ * value of any of its members, and rounded up to the base
+ * alignment of a vec4. The individual members of this
+ * sub-structure are then assigned offsets by applying this set
+ * of rules recursively, where the base offset of the first
+ * member of the sub-structure is equal to the aligned offset
+ * of the structure. The structure may have padding at the end;
+ * the base offset of the member following the sub-structure is
+ * rounded up to the next multiple of the base alignment of the
+ * structure.
+ */
+ if (this->is_record()) {
+ unsigned base_alignment = 0;
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ base_alignment = MAX2(base_alignment,
+ field_type->std430_base_alignment(field_row_major));
+ }
+ return base_alignment;
+ }
+ assert(!"not reached");
+ return -1;
+ }
+
+ unsigned
+ glsl_type::std430_array_stride(bool row_major) const
+ {
+ unsigned N = is_double() ? 8 : 4;
+
+ /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+ * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+ *
+ * (3) If the member is a three-component vector with components consuming
+ * <N> basic machine units, the base alignment is 4<N>.
+ */
+ if (this->is_vector() && this->vector_elements == 3)
+ return 4 * N;
+
+ /* By default use std430_size(row_major) */
+ return this->std430_size(row_major);
+ }
+
+ unsigned
+ glsl_type::std430_size(bool row_major) const
+ {
+ unsigned N = is_double() ? 8 : 4;
+
+ /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+ *
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures
+ * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+ */
+ if (this->is_scalar() || this->is_vector())
+ return this->vector_elements * N;
+
+ if (this->without_array()->is_matrix()) {
+ const struct glsl_type *element_type;
+ const struct glsl_type *vec_type;
+ unsigned int array_len;
+
+ if (this->is_array()) {
+ element_type = this->fields.array;
+ array_len = this->length;
+ } else {
+ element_type = this;
+ array_len = 1;
+ }
+
+ if (row_major) {
+ vec_type = get_instance(element_type->base_type,
+ element_type->matrix_columns, 1);
+
+ array_len *= element_type->vector_elements;
+ } else {
+ vec_type = get_instance(element_type->base_type,
+ element_type->vector_elements, 1);
+ array_len *= element_type->matrix_columns;
+ }
+ const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+ array_len);
+
+ return array_type->std430_size(false);
+ }
+
+ if (this->is_array()) {
+ if (this->fields.array->is_record())
+ return this->length * this->fields.array->std430_size(row_major);
+ else
+ return this->length * this->fields.array->std430_base_alignment(row_major);
+ }
+
+ if (this->is_record() || this->is_interface()) {
+ unsigned size = 0;
+ unsigned max_align = 0;
+
+ for (unsigned i = 0; i < this->length; i++) {
+ bool field_row_major = row_major;
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+ if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+ field_row_major = true;
+ } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+ field_row_major = false;
+ }
+
+ const struct glsl_type *field_type = this->fields.structure[i].type;
+ unsigned align = field_type->std430_base_alignment(field_row_major);
+ size = glsl_align(size, align);
+ size += field_type->std430_size(field_row_major);
+
+ max_align = MAX2(align, max_align);
+ }
+ size = glsl_align(size, max_align);
+ return size;
+ }
+
+ assert(!"not reached");
+ return -1;
+ }
unsigned
glsl_type::count_attribute_slots() const
case GLSL_TYPE_ARRAY:
return this->length * this->fields.array->count_attribute_slots();
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
GLSL_TYPE_IMAGE,
GLSL_TYPE_ATOMIC_UINT,
GLSL_TYPE_STRUCT,
+ GLSL_TYPE_FUNCTION,
GLSL_TYPE_INTERFACE,
GLSL_TYPE_ARRAY,
GLSL_TYPE_VOID,
enum glsl_interface_packing {
GLSL_INTERFACE_PACKING_STD140,
GLSL_INTERFACE_PACKING_SHARED,
- GLSL_INTERFACE_PACKING_PACKED
+ GLSL_INTERFACE_PACKING_PACKED,
+ GLSL_INTERFACE_PACKING_STD430
};
enum glsl_matrix_layout {
*/
union {
const struct glsl_type *array; /**< Type of array elements. */
- const struct glsl_type *parameters; /**< Parameters to function. */
+ struct glsl_function_param *parameters; /**< Parameters to function. */
struct glsl_struct_field *structure; /**< List of struct fields. */
} fields;
*/
static const glsl_type *get_subroutine_instance(const char *subroutine_name);
+ /**
+ * Get the instance of a function type
+ */
+ static const glsl_type *get_function_instance(const struct glsl_type *return_type,
+ const glsl_function_param *parameters,
+ unsigned num_params);
+
/**
* Get the type resulting from a multiplication of \p type_a * \p type_b
*/
*/
unsigned component_slots() const;
+ /**
+ * Calculate offset between the base location of the struct in
+ * uniform storage and a struct member.
+ * For the initial call, length is the index of the member to find the
+ * offset for.
+ */
+ unsigned record_location_offset(unsigned length) const;
+
/**
* Calculate the number of unique values from glGetUniformLocation for the
* elements of the type.
*/
unsigned std140_size(bool row_major) const;
+ /**
+ * Alignment in bytes of the start of this type in a std430 shader
+ * storage block.
+ */
+ unsigned std430_base_alignment(bool row_major) const;
+
+ /**
+ * Calculate array stride in bytes of this type in a std430 shader storage
+ * block.
+ */
+ unsigned std430_array_stride(bool row_major) const;
+
+ /**
+ * Size in bytes of this type in a std430 shader storage block.
+ *
+ * Note that this is not GL_BUFFER_SIZE
+ */
+ unsigned std430_size(bool row_major) const;
+
/**
* \brief Can this type be implicitly converted to another?
*
return t;
}
+ /**
+ * Return the total number of elements in an array including the elements
+ * in arrays of arrays.
+ */
+ unsigned arrays_of_arrays_size() const
+ {
+ if (!is_array())
+ return 0;
+
+ unsigned size = length;
+ const glsl_type *base_type = fields.array;
+
+ while (base_type->is_array()) {
+ size = size * base_type->length;
+ base_type = base_type->fields.array;
+ }
+ return size;
+ }
+
/**
* Return the amount of atomic counter storage required for a type.
*/
glsl_type(const glsl_struct_field *fields, unsigned num_fields,
enum glsl_interface_packing packing, const char *name);
+ /** Constructor for interface types */
+ glsl_type(const glsl_type *return_type,
+ const glsl_function_param *params, unsigned num_params);
+
/** Constructor for array types */
glsl_type(const glsl_type *array, unsigned length);
/** Hash table containing the known subroutine types. */
static struct hash_table *subroutine_types;
+ /** Hash table containing the known function types. */
+ static struct hash_table *function_types;
+
static bool record_key_compare(const void *a, const void *b);
static unsigned record_key_hash(const void *key);
/*@}*/
};
+#undef DECL_TYPE
+#undef STRUCT_TYPE
+#endif /* __cplusplus */
+
struct glsl_struct_field {
const struct glsl_type *type;
const char *name;
*/
int stream;
-
+ /**
+ * Image qualifiers, applicable to buffer variables defined in shader
+ * storage buffer objects (SSBOs)
+ */
+ unsigned image_read_only:1;
+ unsigned image_write_only:1;
+ unsigned image_coherent:1;
+ unsigned image_volatile:1;
+ unsigned image_restrict:1;
+
+#ifdef __cplusplus
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
{
/* empty */
}
+#endif
+};
+
+struct glsl_function_param {
+ const struct glsl_type *type;
+
+ bool in;
+ bool out;
};
static inline unsigned int
return (a + align - 1) / align * align;
}
-#undef DECL_TYPE
-#undef STRUCT_TYPE
-#endif /* __cplusplus */
-
#endif /* GLSL_TYPES_H */
case ir_tex:
case ir_lod:
case ir_query_levels:
+ case ir_texture_samples:
break;
case ir_txb:
new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
return c;
}
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
get_uniform_block_index(const gl_shader_program *shProg,
const char *uniformBlockName)
{
- for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
return i;
}
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_VOID:
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_ERROR:
class nir_visitor : public ir_visitor
{
public:
- nir_visitor(nir_shader *shader);
+ nir_visitor(nir_shader *shader, gl_shader *sh);
~nir_visitor();
virtual void visit(ir_variable *);
bool supports_ints;
+ struct gl_shader *sh;
+
nir_shader *shader;
nir_function_impl *impl;
exec_list *cf_node_list;
{
nir_shader *shader = nir_shader_create(NULL, sh->Stage, options);
- nir_visitor v1(shader);
+ nir_visitor v1(shader, sh);
nir_function_visitor v2(&v1);
v2.run(sh->ir);
visit_exec_list(sh->ir, &v1);
+ nir_lower_outputs_to_temporaries(shader);
+
+ shader->gs.vertices_out = sh->Geom.VerticesOut;
+ shader->gs.invocations = sh->Geom.Invocations;
+
return shader;
}
-nir_visitor::nir_visitor(nir_shader *shader)
+nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh)
{
this->supports_ints = shader->options->native_integers;
this->shader = shader;
+ this->sh = sh;
this->is_global = true;
this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
}
var->data.index = ir->data.index;
+ var->data.descriptor_set = 0;
var->data.binding = ir->data.binding;
/* XXX Get rid of buffer_index */
var->data.atomic.buffer_index = ir->data.binding;
op = nir_intrinsic_memory_barrier;
} else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
op = nir_intrinsic_image_size;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+ op = nir_intrinsic_image_samples;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+ op = nir_intrinsic_store_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+ op = nir_intrinsic_load_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_min;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_max;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+ op = nir_intrinsic_ssbo_atomic_comp_swap;
} else {
unreachable("not reached");
}
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+ nir_dest *dest = &instr->dest;
switch (op) {
case nir_intrinsic_atomic_counter_read_var:
(ir_dereference *) ir->actual_parameters.get_head();
instr->variables[0] = evaluate_deref(&instr->instr, param);
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
}
case nir_intrinsic_image_load:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_samples:
case nir_intrinsic_image_size: {
nir_ssa_undef_instr *instr_undef =
nir_ssa_undef_instr_create(shader, 1);
info->dest_components, NULL);
}
- if (op == nir_intrinsic_image_size)
+ if (op == nir_intrinsic_image_size ||
+ op == nir_intrinsic_image_samples) {
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
+ }
/* Set the address argument, extending the coordinate vector to four
* components.
instr->src[3] = evaluate_rvalue((ir_dereference *)param);
param = param->get_next();
}
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
}
case nir_intrinsic_memory_barrier:
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
+ case nir_intrinsic_store_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ /* Check if we need the indirect version */
+ ir_constant *const_offset = offset->as_constant();
+ if (!const_offset) {
+ op = nir_intrinsic_store_ssbo_indirect;
+ ralloc_free(instr);
+ instr = nir_intrinsic_instr_create(shader, op);
+ instr->src[2] = evaluate_rvalue(offset);
+ instr->const_index[0] = 0;
+ dest = &instr->dest;
+ } else {
+ instr->const_index[0] = const_offset->value.u[0];
+ }
+
+ instr->const_index[1] = write_mask->value.u[0];
+
+ instr->src[0] = evaluate_rvalue(val);
+ instr->num_components = val->type->vector_elements;
+
+ instr->src[1] = evaluate_rvalue(block);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_load_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ /* Check if we need the indirect version */
+ ir_constant *const_offset = offset->as_constant();
+ if (!const_offset) {
+ op = nir_intrinsic_load_ssbo_indirect;
+ ralloc_free(instr);
+ instr = nir_intrinsic_instr_create(shader, op);
+ instr->src[1] = evaluate_rvalue(offset);
+ instr->const_index[0] = 0;
+ dest = &instr->dest;
+ } else {
+ instr->const_index[0] = const_offset->value.u[0];
+ }
+
+ instr->src[0] = evaluate_rvalue(block);
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ /* Insert the created nir instruction now since in the case of boolean
+ * result we will need to emit another instruction after it
+ */
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+
+ /*
+ * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+ * consider a true boolean to be ~0. Fix this up with a != 0
+ * comparison.
+ */
+ if (type->base_type == GLSL_TYPE_BOOL) {
+ nir_load_const_instr *const_zero =
+ nir_load_const_instr_create(shader, 1);
+ const_zero->value.u[0] = 0;
+ nir_instr_insert_after_cf_list(this->cf_node_list,
+ &const_zero->instr);
+
+ nir_alu_instr *load_ssbo_compare =
+ nir_alu_instr_create(shader, nir_op_ine);
+ load_ssbo_compare->src[0].src.is_ssa = true;
+ load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+ load_ssbo_compare->src[1].src.is_ssa = true;
+ load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+ for (unsigned i = 0; i < type->vector_elements; i++)
+ load_ssbo_compare->src[1].swizzle[i] = 0;
+ nir_ssa_dest_init(&load_ssbo_compare->instr,
+ &load_ssbo_compare->dest.dest,
+ type->vector_elements, NULL);
+ load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+ nir_instr_insert_after_cf_list(this->cf_node_list,
+ &load_ssbo_compare->instr);
+ dest = &load_ssbo_compare->dest.dest;
+ }
+ break;
+ }
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_min:
+ case nir_intrinsic_ssbo_atomic_max:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 3 || param_count == 4);
+
+ /* Block index */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+
+ /* Offset */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 4) {
+ assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+ break;
+ }
default:
unreachable("not reached");
}
- nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
-
if (ir->return_deref) {
nir_intrinsic_instr *store_instr =
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->variables[0] =
evaluate_deref(&store_instr->instr, ir->return_deref);
- store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
+ store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
nir_instr_insert_after_cf_list(this->cf_node_list,
&store_instr->instr);
{
nir_dest *dest = get_instr_dest(instr);
- nir_ssa_dest_init(instr, dest, num_components, NULL);
+ if (dest)
+ nir_ssa_dest_init(instr, dest, num_components, NULL);
nir_instr_insert_after_cf_list(this->cf_node_list, instr);
this->result = instr;
} else {
op = nir_intrinsic_load_ubo_indirect;
}
+
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
load->num_components = ir->type->vector_elements;
load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
- load->const_index[1] = 1; /* number of vec4's */
load->src[0] = evaluate_rvalue(ir->operands[0]);
if (!const_index)
load->src[1] = evaluate_rvalue(ir->operands[1]);
unreachable("not reached");
}
break;
+ case ir_unop_get_buffer_size: {
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+ this->shader,
+ nir_intrinsic_get_buffer_size);
+ load->num_components = ir->type->vector_elements;
+ load->src[0] = evaluate_rvalue(ir->operands[0]);
+ add_instr(&load->instr, ir->type->vector_elements);
+ return;
+ }
+
case ir_binop_add:
case ir_binop_sub:
case ir_binop_mul:
num_srcs = 0;
break;
+ case ir_texture_samples:
+ op = nir_texop_texture_samples;
+ num_srcs = 0;
+ break;
+
default:
unreachable("not reached");
}
* - Fragment shader output: one of the values from \c gl_frag_result.
* - Uniforms: Per-stage uniform slot number for default uniform block.
* - Uniforms: Index within the uniform block definition for UBO members.
+ * - Non-UBO Uniforms: uniform slot number.
* - Other: This field is not currently used.
*
* If the variable is a uniform, shader input, or shader output, and the
*/
int index;
+ /**
+ * Descriptor set binding for sampler or UBO.
+ */
+ int descriptor_set;
+
/**
* Initial binding point for a sampler or UBO.
*
nir_instr_type type;
struct nir_block *block;
+ /** generic instruction index. */
+ unsigned index;
+
/* A temporary for optimization and analysis passes to use for storing
* flags. For instance, DCE uses this to store the "dead/live" info.
*/
bool is_ssa;
} nir_src;
-#define NIR_SRC_INIT (nir_src) { { NULL } }
+#ifdef __cplusplus
+# define NIR_SRC_INIT nir_src()
+#else
+# define NIR_SRC_INIT (nir_src) { { NULL } }
+#endif
#define nir_foreach_use(reg_or_ssa_def, src) \
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
bool is_ssa;
} nir_dest;
-#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+#ifdef __cplusplus
+# define NIR_DEST_INIT nir_dest()
+#else
+# define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+#endif
#define nir_foreach_def(reg, dest) \
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
return dest;
}
- void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
- void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
typedef struct {
nir_src src;
unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
} nir_alu_dest;
- void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
- void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
- void *mem_ctx);
-
typedef enum {
nir_type_invalid = 0, /* Not a valid type */
nir_type_float,
nir_alu_src src[];
} nir_alu_instr;
+ void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr);
+ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr);
+
/* is this source channel used? */
static inline bool
nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
+/** Returns the tail of a deref chain */
+static inline nir_deref *
+nir_deref_tail(nir_deref *deref)
+{
+ while (deref->child)
+ deref = deref->child;
+ return deref;
+}
+
typedef struct {
nir_instr instr;
nir_texop_txs, /**< Texture size */
nir_texop_lod, /**< Texture lod query */
nir_texop_tg4, /**< Texture gather */
- nir_texop_query_levels /**< Texture levels query */
+ nir_texop_query_levels, /**< Texture levels query */
+ nir_texop_texture_samples, /**< Texture samples query */
} nir_texop;
typedef struct {
/* gather component selector */
unsigned component : 2;
+ /* The descriptor set containing this texture */
+ unsigned sampler_set;
+
/** The sampler index
*
* If this texture instruction has a nir_tex_src_sampler_offset source,
case nir_texop_lod:
return 2;
+ case nir_texop_texture_samples:
case nir_texop_query_levels:
return 1;
typedef struct nir_shader_compiler_options {
bool lower_ffma;
+ bool lower_fdiv;
bool lower_flrp;
bool lower_fpow;
bool lower_fsat;
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
bool lower_scmp;
+ /* Does the native fdot instruction replicate its result for four
+ * components? If so, then opt_algebraic_late will turn all fdotN
+ * instructions into fdot_replicatedN instructions.
+ */
+ bool fdot_replicates;
+
+ /** lowers ffract to fsub+ffloor: */
+ bool lower_ffract;
+
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
* are simulated by floats.)
} nir_shader_compiler_options;
typedef struct nir_shader {
- /** hash table of name -> uniform nir_variable */
+ /** list of uniforms (nir_variable) */
struct exec_list uniforms;
- /** hash table of name -> input nir_variable */
+ /** list of inputs (nir_variable) */
struct exec_list inputs;
- /** hash table of name -> output nir_variable */
+ /** list of outputs (nir_variable) */
struct exec_list outputs;
/** Set of driver-specific options for the shader.
*/
const struct nir_shader_compiler_options *options;
- /** list of global variables in the shader */
+ /** list of global variables in the shader (nir_variable) */
struct exec_list globals;
- /** list of system value variables in the shader */
+ /** list of system value variables in the shader (nir_variable) */
struct exec_list system_values;
struct exec_list functions; /** < list of nir_function */
/** The shader stage, such as MESA_SHADER_VERTEX. */
gl_shader_stage stage;
+
+ struct {
+ /** The maximum number of vertices the geometry shader might write. */
+ unsigned vertices_out;
+
+ /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+ unsigned invocations;
+ } gs;
} nir_shader;
#define nir_foreach_overload(shader, overload) \
};
} nir_cursor;
+static inline nir_block *
+nir_cursor_current_block(nir_cursor cursor)
+{
+ if (cursor.option == nir_cursor_before_instr ||
+ cursor.option == nir_cursor_after_instr) {
+ return cursor.instr->block;
+ } else {
+ return cursor.block;
+ }
+}
+
static inline nir_cursor
nir_before_block(nir_block *block)
{
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+ nir_dest new_dest);
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, const char *name);
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name);
- void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+ void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
/* visits basic blocks in source-code order */
typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
void *state);
bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
void *state);
+ bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state);
/* If the following CF node is an if, this function returns that if.
* Otherwise, it returns NULL.
*/
nir_if *nir_block_get_following_if(nir_block *block);
+ nir_loop *nir_block_get_following_loop(nir_block *block);
+
void nir_index_local_regs(nir_function_impl *impl);
void nir_index_global_regs(nir_shader *shader);
void nir_index_ssa_defs(nir_function_impl *impl);
+ unsigned nir_index_instrs(nir_function_impl *impl);
void nir_index_blocks(nir_function_impl *impl);
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
void nir_dump_cfg(nir_shader *shader, FILE *fp);
- void nir_split_var_copies(nir_shader *shader);
+ int nir_gs_count_vertices(nir_shader *shader);
+
+ bool nir_split_var_copies(nir_shader *shader);
void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
void nir_lower_var_copies(nir_shader *shader);
- void nir_lower_global_vars_to_local(nir_shader *shader);
+ bool nir_lower_global_vars_to_local(nir_shader *shader);
- void nir_lower_locals_to_regs(nir_shader *shader);
+ bool nir_lower_locals_to_regs(nir_shader *shader);
+
+ void nir_lower_outputs_to_temporaries(nir_shader *shader);
+void nir_lower_outputs_to_temporaries(nir_shader *shader);
+
void nir_assign_var_locations(struct exec_list *var_list,
unsigned *size,
int (*type_size)(const struct glsl_type *));
int (*type_size)(const struct glsl_type *));
void nir_lower_vars_to_ssa(nir_shader *shader);
- void nir_remove_dead_variables(nir_shader *shader);
+ bool nir_remove_dead_variables(nir_shader *shader);
- void nir_lower_vec_to_movs(nir_shader *shader);
+ void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+ bool nir_lower_vec_to_movs(nir_shader *shader);
void nir_lower_alu_to_scalar(nir_shader *shader);
void nir_lower_load_const_to_scalar(nir_shader *shader);
void nir_lower_samplers(nir_shader *shader,
const struct gl_shader_program *shader_program);
+void nir_lower_samplers_for_vk(nir_shader *shader);
- void nir_lower_system_values(nir_shader *shader);
- void nir_lower_tex_projector(nir_shader *shader);
+ bool nir_lower_system_values(nir_shader *shader);
+
+ typedef struct nir_lower_tex_options {
+ /**
+ * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+ * sampler types a texture projector is lowered.
+ */
+ unsigned lower_txp;
+
+ /**
+ * If true, lower rect textures to 2D, using txs to fetch the
+ * texture dimensions and dividing the texture coords by the
+ * texture dims to normalize.
+ */
+ bool lower_rect;
+
+ /**
+ * To emulate certain texture wrap modes, this can be used
+ * to saturate the specified tex coord to [0.0, 1.0]. The
+ * bits are according to sampler #, ie. if, for example:
+ *
+ * (conf->saturate_s & (1 << n))
+ *
+ * is true, then the s coord for sampler n is saturated.
+ *
+ * Note that clamping must happen *after* projector lowering
+ * so any projected texture sample instruction with a clamped
+ * coordinate gets automatically lowered, regardless of the
+ * 'lower_txp' setting.
+ */
+ unsigned saturate_s;
+ unsigned saturate_t;
+ unsigned saturate_r;
+ } nir_lower_tex_options;
+
+ void nir_lower_tex(nir_shader *shader,
+ const nir_lower_tex_options *options);
+
void nir_lower_idiv(nir_shader *shader);
+ void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+
+ void nir_lower_two_sided_color(nir_shader *shader);
+
void nir_lower_atomics(nir_shader *shader);
void nir_lower_to_source_mods(nir_shader *shader);
- void nir_normalize_cubemap_coords(nir_shader *shader);
+ bool nir_lower_gs_intrinsics(nir_shader *shader);
+
+ bool nir_normalize_cubemap_coords(nir_shader *shader);
void nir_live_variables_impl(nir_function_impl *impl);
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
bool nir_opt_dce_impl(nir_function_impl *impl);
bool nir_opt_dce(nir_shader *shader);
+ bool nir_opt_dead_cf(nir_shader *shader);
+
void nir_opt_gcm(nir_shader *shader);
bool nir_opt_peephole_select(nir_shader *shader);
void nir_sweep(nir_shader *shader);
+ nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+ gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
static inline nir_ssa_def *
nir_imm_float(nir_builder *build, float x)
{
- nir_const_value v = { { .f = {x, 0, 0, 0} } };
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+
return nir_build_imm(build, 1, v);
}
static inline nir_ssa_def *
nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
{
- nir_const_value v = { { .f = {x, y, z, w} } };
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+ v.f[1] = y;
+ v.f[2] = z;
+ v.f[3] = w;
+
return nir_build_imm(build, 4, v);
}
static inline nir_ssa_def *
nir_imm_int(nir_builder *build, int x)
{
- nir_const_value v = { { .i = {x, 0, 0, 0} } };
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.i[0] = x;
+
return nir_build_imm(build, 1, v);
}
#include "nir_builder_opcodes.h"
+ static inline nir_ssa_def *
+ nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+ {
+ switch (num_components) {
+ case 4:
+ return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+ case 3:
+ return nir_vec3(build, comp[0], comp[1], comp[2]);
+ case 2:
+ return nir_vec2(build, comp[0], comp[1]);
+ case 1:
+ return comp[0];
+ default:
+ unreachable("bad component count");
+ return NULL;
+ }
+ }
+
/**
* Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
*/
nir_imov_alu(build, alu_src, num_components);
}
+/* Selects the right fdot given the number of components in each source. */
+static inline nir_ssa_def *
+nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ assert(src0->num_components == src1->num_components);
+ switch (src0->num_components) {
+ case 1: return nir_fmul(build, src0, src1);
+ case 2: return nir_fdot2(build, src0, src1);
+ case 3: return nir_fdot3(build, src0, src1);
+ case 4: return nir_fdot4(build, src0, src1);
+ default:
+ unreachable("bad component size");
+ }
+
+ return NULL;
+}
+
+ static inline nir_ssa_def *
+ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+ {
+ unsigned swizzle[4] = {c, c, c, c};
+ return nir_swizzle(b, def, swizzle, 1, false);
+ }
+
/**
* Turns a nir_src into a nir_ssa_def * so it can be passed to
* nir_build_alu()-based builder calls.
return nir_imov_alu(build, alu, num_components);
}
+ static inline nir_ssa_def *
+ nir_load_var(nir_builder *build, nir_variable *var)
+ {
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+ load->num_components = num_components;
+ load->variables[0] = nir_deref_var_create(load, var);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+ }
+
+ static inline void
+ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+ {
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+ store->num_components = num_components;
+ store->variables[0] = nir_deref_var_create(store, var);
+ store->src[0] = nir_src_for_ssa(value);
+ nir_builder_instr_insert(build, &store->instr);
+ }
+
#endif /* NIR_BUILDER_H */
INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ /*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
/*
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
* around/optimized in general
/** A conditional discard, with a single boolean source. */
INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
+ /**
+ * Basic Geometry Shader intrinsics.
+ *
+ * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single
+ * index, which is the stream ID to write to.
+ *
+ * end_primitive implements GLSL's EndPrimitive() built-in.
+ */
INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0)
INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+ /**
+ * Geometry Shader intrinsics with a vertex count.
+ *
+ * Alternatively, drivers may implement these intrinsics, and use
+ * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+ *
+ * These maintain a count of the number of vertices emitted, as an additional
+ * unsigned integer source.
+ */
+ INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+
/*
* Atomic counters
*
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
- #define SYSTEM_VALUE(name, components) \
- INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+ /*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ * operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ * in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+ INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+
+ #define SYSTEM_VALUE(name, components, num_indices) \
+ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
- SYSTEM_VALUE(front_face, 1)
- SYSTEM_VALUE(vertex_id, 1)
- SYSTEM_VALUE(vertex_id_zero_base, 1)
- SYSTEM_VALUE(base_vertex, 1)
- SYSTEM_VALUE(instance_id, 1)
- SYSTEM_VALUE(sample_id, 1)
- SYSTEM_VALUE(sample_pos, 2)
- SYSTEM_VALUE(sample_mask_in, 1)
- SYSTEM_VALUE(invocation_id, 1)
+ SYSTEM_VALUE(front_face, 1, 0)
+ SYSTEM_VALUE(vertex_id, 1, 0)
+ SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+ SYSTEM_VALUE(base_vertex, 1, 0)
+ SYSTEM_VALUE(instance_id, 1, 0)
+ SYSTEM_VALUE(sample_id, 1, 0)
+ SYSTEM_VALUE(sample_pos, 2, 0)
+ SYSTEM_VALUE(sample_mask_in, 1, 0)
+ SYSTEM_VALUE(invocation_id, 1, 0)
+ SYSTEM_VALUE(local_invocation_id, 3, 0)
+ SYSTEM_VALUE(work_group_id, 3, 0)
+ SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+ SYSTEM_VALUE(num_work_groups, 3, 0)
/*
* The format of the indices depends on the type of the load. For uniforms,
* the first index is the base address and the second index is an offset that
* should be added to the base address. (This way you can determine in the
* back-end which variable is being accessed even in an array.) For inputs,
- * the one and only index corresponds to the attribute slot. UBO loads also
- * have a single index which is the base address to load from.
+ * the one and only index corresponds to the attribute slot. UBO loads
+ * have two indices the first of which is the descriptor set and the second
+ * is the base address to load from.
*
* UBO loads have a (possibly constant) source which is the UBO buffer index.
* For each type of load, the _indirect variant has one additional source
true, 0, 0, indices, flags)
LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
- /* LOAD(ssbo, 1, 0) */
+ LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
/*
* Stores work the same way as loads, except now the first register input is
* the value or array to store and the optional second input is the indirect
- * offset.
+ * offset. SSBO stores are similar, but they accept an extra source for the
+ * block index and an extra index with the writemask to use.
*/
- #define STORE(name, num_indices, flags) \
- INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
- INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
- num_indices, flags) \
+ #define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+ INTRINSIC(store_##name, 1 + extra_srcs, \
+ ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+ false, 0, 0, 1 + extra_indices, flags) \
+ INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+ ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+ false, 0, 0, 1 + extra_indices, flags)
- STORE(output, 1, 0)
- /* STORE(ssbo, 2, 0) */
+ STORE(output, 0, 0, 0, 0)
+ STORE(ssbo, 1, 1, 1, 0)
- LAST_INTRINSIC(store_output_indirect)
+ LAST_INTRINSIC(store_ssbo_indirect)
--- /dev/null
+ /*
+ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
+ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include "nir.h"
+ #include "nir_builder.h"
+ #include "../program.h"
+ #include "program/hash_table.h"
+ #include "ir_uniform.h"
+
+ #include "main/compiler.h"
+ #include "main/mtypes.h"
+ #include "program/prog_parameter.h"
+ #include "program/program.h"
+
++static void
++add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect)
++{
++ /* First, we have to resize the array of texture sources */
++ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
++ instr->num_srcs + 1);
++
++ for (unsigned i = 0; i < instr->num_srcs; i++) {
++ new_srcs[i].src_type = instr->src[i].src_type;
++ nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src);
++ }
++
++ ralloc_free(instr->src);
++ instr->src = new_srcs;
++
++ /* Now we can go ahead and move the source over to being a
++ * first-class texture source.
++ */
++ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
++ instr->num_srcs++;
++ nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src,
++ indirect);
++}
++
+ /* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+ static void
+ calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+ unsigned *array_elements, nir_ssa_def **indirect,
+ nir_builder *b, unsigned *location)
+ {
+ if (tail->child == NULL)
+ return;
+
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+ assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ instr->sampler_index += deref_array->base_offset * *array_elements;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_ssa_def *mul =
+ nir_imul(b, nir_imm_int(b, *array_elements),
+ nir_ssa_for_src(b, deref_array->indirect, 1));
+
+ nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+ NIR_SRC_INIT);
+
+ if (*indirect) {
+ *indirect = nir_iadd(b, *indirect, mul);
+ } else {
+ *indirect = mul;
+ }
+ }
+
+ *array_elements *= glsl_get_length(tail->type);
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+ *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref type");
+ break;
+ }
+ }
+
+ static void
+ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, nir_builder *builder)
+ {
+ if (instr->sampler == NULL)
+ return;
+
+ instr->sampler_index = 0;
+ unsigned location = instr->sampler->var->data.location;
+ unsigned array_elements = 1;
+ nir_ssa_def *indirect = NULL;
+
+ builder->cursor = nir_before_instr(&instr->instr);
+ calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+ &indirect, builder, &location);
+
+ if (indirect) {
+ /* First, we have to resize the array of texture sources */
+ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+ instr->num_srcs + 1);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ new_srcs[i].src_type = instr->src[i].src_type;
+ nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+ &instr->src[i].src);
+ }
+
+ ralloc_free(instr->src);
+ instr->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+ instr->num_srcs++;
+ nir_instr_rewrite_src(&instr->instr,
+ &instr->src[instr->num_srcs - 1].src,
+ nir_src_for_ssa(indirect));
+
+ instr->sampler_array_size = array_elements;
+ }
+
+ if (location > shader_program->NumUniformStorage - 1 ||
+ !shader_program->UniformStorage[location].sampler[stage].active) {
+ assert(!"cannot return a sampler");
+ return;
+ }
+
+ instr->sampler_index +=
+ shader_program->UniformStorage[location].sampler[stage].index;
+
+ instr->sampler = NULL;
+ }
+
+ typedef struct {
+ nir_builder builder;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
+ } lower_state;
+
+ static bool
+ lower_block_cb(nir_block *block, void *_state)
+ {
+ lower_state *state = (lower_state *) _state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ lower_sampler(tex_instr, state->shader_program, state->stage,
+ &state->builder);
+ }
+ }
+
+ return true;
+ }
+
+ static void
+ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
+ {
+ lower_state state;
+
+ nir_builder_init(&state.builder, impl);
+ state.shader_program = shader_program;
+ state.stage = stage;
+
+ nir_foreach_block(impl, lower_block_cb, &state);
+ }
+
+ void
+ nir_lower_samplers(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+ {
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ lower_impl(overload->impl, shader_program, shader->stage);
+ }
+ }
++
++static bool
++lower_samplers_for_vk_block(nir_block *block, void *data)
++{
++ nir_foreach_instr(block, instr) {
++ if (instr->type != nir_instr_type_tex)
++ continue;
++
++ nir_tex_instr *tex = nir_instr_as_tex(instr);
++
++ assert(tex->sampler);
++
++ tex->sampler_set = tex->sampler->var->data.descriptor_set;
++ tex->sampler_index = tex->sampler->var->data.binding;
++
++ if (tex->sampler->deref.child) {
++ assert(tex->sampler->deref.child->deref_type == nir_deref_type_array);
++ nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child);
++
++ /* Only one-level arrays are allowed in vulkan */
++ assert(arr->deref.child == NULL);
++
++ tex->sampler_index += arr->base_offset;
++ if (arr->deref_array_type == nir_deref_array_type_indirect) {
++ add_indirect_to_tex(tex, arr->indirect);
++ nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT);
++
++ tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type);
++ }
++ }
++
++ tex->sampler = NULL;
++ }
++
++ return true;
++}
++
++void
++nir_lower_samplers_for_vk(nir_shader *shader)
++{
++ nir_foreach_overload(shader, overload) {
++ if (overload->impl) {
++ nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL);
++ }
++ }
++}
(('imul', a, 1), a),
(('fmul', a, -1.0), ('fneg', a)),
(('imul', a, -1), ('ineg', a)),
+ (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
(('ffma', 0.0, a, b), b),
(('ffma', a, 0.0, b), b),
(('ffma', a, b, 0.0), ('fmul', a, b)),
(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
(('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
(('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
(('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
(('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+ (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+ (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+ (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
]
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
struct split_var_copies_state {
void *mem_ctx;
void *dead_ctx;
+ bool progress;
};
-static nir_deref *
-get_deref_tail(nir_deref *deref)
-{
- while (deref->child != NULL)
- deref = deref->child;
- return deref;
-}
-
/* Recursively constructs deref chains to split a copy instruction into
* multiple (if needed) copy instructions with full-length deref chains.
* External callers of this function should pass the tail and head of the
* remove the old one later.
*/
nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+ state->progress = true;
}
break;
nir_deref *dest_head = &intrinsic->variables[0]->deref;
nir_deref *src_head = &intrinsic->variables[1]->deref;
- nir_deref *dest_tail = get_deref_tail(dest_head);
- nir_deref *src_tail = get_deref_tail(src_head);
+ nir_deref *dest_tail = nir_deref_tail(dest_head);
+ nir_deref *src_tail = nir_deref_tail(src_head);
switch (glsl_get_base_type(src_tail->type)) {
case GLSL_TYPE_ARRAY:
return true;
}
- static void
+ static bool
split_var_copies_impl(nir_function_impl *impl)
{
struct split_var_copies_state state;
state.mem_ctx = ralloc_parent(impl);
state.dead_ctx = ralloc_context(NULL);
+ state.progress = false;
nir_foreach_block(impl, split_var_copies_block, &state);
ralloc_free(state.dead_ctx);
+
+ return state.progress;
}
- void
+ bool
nir_split_var_copies(nir_shader *shader)
{
+ bool progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
- split_var_copies_impl(overload->impl);
+ progress = split_var_copies_impl(overload->impl) || progress;
}
+
+ return progress;
}
return type->fields.structure[index].type;
}
+const glsl_type *
+glsl_get_function_return_type(const glsl_type *type)
+{
+ return type->fields.parameters[0].type;
+}
+
+const glsl_function_param *
+glsl_get_function_param(const glsl_type *type, unsigned index)
+{
+ return &type->fields.parameters[index + 1];
+}
+
const struct glsl_type *
glsl_get_column_type(const struct glsl_type *type)
{
return type->fields.structure[index].name;
}
+glsl_sampler_dim
+glsl_get_sampler_dim(const struct glsl_type *type)
+{
+ assert(glsl_type_is_sampler(type));
+ return (glsl_sampler_dim)type->sampler_dimensionality;
+}
+
+glsl_base_type
+glsl_get_sampler_result_type(const struct glsl_type *type)
+{
+ assert(glsl_type_is_sampler(type));
+ return (glsl_base_type)type->sampler_type;
+}
+
+ unsigned
+ glsl_get_record_location_offset(const struct glsl_type *type,
+ unsigned length)
+ {
+ return type->record_location_offset(length);
+ }
+
bool
glsl_type_is_void(const glsl_type *type)
{
return type->is_scalar();
}
+bool
+glsl_type_is_vector_or_scalar(const struct glsl_type *type)
+{
+ return type->is_vector() || type->is_scalar();
+}
+
bool
glsl_type_is_matrix(const struct glsl_type *type)
{
return type->is_matrix();
}
+bool
+glsl_type_is_array(const struct glsl_type *type)
+{
+ return type->is_array();
+}
+
+bool
+glsl_type_is_struct(const struct glsl_type *type)
+{
+ return type->is_record() || type->is_interface();
+}
+
+bool
+glsl_type_is_sampler(const struct glsl_type *type)
+{
+ return type->is_sampler();
+}
+
+bool
+glsl_sampler_type_is_shadow(const struct glsl_type *type)
+{
+ assert(glsl_type_is_sampler(type));
+ return type->sampler_shadow;
+}
+
+bool
+glsl_sampler_type_is_array(const struct glsl_type *type)
+{
+ assert(glsl_type_is_sampler(type));
+ return type->sampler_array;
+}
+
const glsl_type *
glsl_void_type(void)
{
}
const glsl_type *
-glsl_vec4_type(void)
+glsl_int_type(void)
{
- return glsl_type::vec4_type;
+ return glsl_type::int_type;
}
const glsl_type *
return glsl_type::uint_type;
}
+const glsl_type *
+glsl_bool_type(void)
+{
+ return glsl_type::bool_type;
+}
+
+const glsl_type *
+glsl_vec4_type(void)
+{
+ return glsl_type::vec4_type;
+}
+
+const glsl_type *
+glsl_scalar_type(enum glsl_base_type base_type)
+{
+ return glsl_type::get_instance(base_type, 1, 1);
+}
+
+const glsl_type *
+glsl_vector_type(enum glsl_base_type base_type, unsigned components)
+{
+ assert(components > 1 && components <= 4);
+ return glsl_type::get_instance(base_type, components, 1);
+}
+
+const glsl_type *
+glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns)
+{
+ assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4);
+ return glsl_type::get_instance(base_type, rows, columns);
+}
+
const glsl_type *
glsl_array_type(const glsl_type *base, unsigned elements)
{
return glsl_type::get_array_instance(base, elements);
}
+
+const glsl_type *
+glsl_struct_type(const glsl_struct_field *fields,
+ unsigned num_fields, const char *name)
+{
+ return glsl_type::get_record_instance(fields, num_fields, name);
+}
+
+const struct glsl_type *
+glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
+ enum glsl_base_type base_type)
+{
+ return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
+}
+
+const glsl_type *
+glsl_function_type(const glsl_type *return_type,
+ const glsl_function_param *params, unsigned num_params)
+{
+ return glsl_type::get_function_instance(return_type, params, num_params);
+}
+
+const glsl_type *
+glsl_transposed_type(const struct glsl_type *type)
+{
+ return glsl_type::get_instance(type->base_type, type->matrix_columns,
+ type->vector_elements);
+}
#pragma once
+ #include <stdio.h>
+
/* C wrapper around glsl_types.h */
#include "../glsl_types.h"
struct glsl_type;
#endif
- #include <stdio.h>
-
void glsl_print_type(const struct glsl_type *type, FILE *fp);
void glsl_print_struct(const struct glsl_type *type, FILE *fp);
const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
+const struct glsl_type *
+glsl_get_function_return_type(const struct glsl_type *type);
+
+const struct glsl_function_param *
+glsl_get_function_param(const struct glsl_type *type, unsigned index);
+
enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
unsigned glsl_get_vector_elements(const struct glsl_type *type);
const char *glsl_get_struct_elem_name(const struct glsl_type *type,
unsigned index);
+enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
+enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
+
+ unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+ unsigned length);
+
bool glsl_type_is_void(const struct glsl_type *type);
bool glsl_type_is_vector(const struct glsl_type *type);
bool glsl_type_is_scalar(const struct glsl_type *type);
+bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
bool glsl_type_is_matrix(const struct glsl_type *type);
+bool glsl_type_is_array(const struct glsl_type *type);
+bool glsl_type_is_struct(const struct glsl_type *type);
+bool glsl_type_is_sampler(const struct glsl_type *type);
+bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
+bool glsl_sampler_type_is_array(const struct glsl_type *type);
const struct glsl_type *glsl_void_type(void);
const struct glsl_type *glsl_float_type(void);
-const struct glsl_type *glsl_vec4_type(void);
+const struct glsl_type *glsl_int_type(void);
const struct glsl_type *glsl_uint_type(void);
+const struct glsl_type *glsl_bool_type(void);
+
+const struct glsl_type *glsl_vec4_type(void);
+const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type);
+const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type,
+ unsigned components);
+const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
+ unsigned rows, unsigned columns);
const struct glsl_type *glsl_array_type(const struct glsl_type *base,
unsigned elements);
+const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
+ unsigned num_fields, const char *name);
+const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
+ bool is_shadow, bool is_array,
+ enum glsl_base_type base_type);
+const struct glsl_type * glsl_function_type(const struct glsl_type *return_type,
+ const struct glsl_function_param *params,
+ unsigned num_params);
+
+const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
#ifdef __cplusplus
}
#include "util/ralloc.h"
#include "util/strtod.h"
+extern "C" void
+_mesa_error_no_memory(const char *caller)
+{
+ fprintf(stderr, "Mesa error: out of memory in %s", caller);
+}
+
void
_mesa_warning(struct gl_context *ctx, const char *fmt, ...)
{
}
void
- _mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
+ _mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
const char *, int)
{
}
ralloc_free(shProg->UniformBlocks);
shProg->UniformBlocks = NULL;
- shProg->NumUniformBlocks = 0;
+ shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->UniformBlockStageIndex[i]);
shProg->UniformBlockStageIndex[i] = NULL;
BRW_STATE_GS_CONSTBUF,
BRW_STATE_PROGRAM_CACHE,
BRW_STATE_STATE_BASE_ADDRESS,
- BRW_STATE_VUE_MAP_VS,
BRW_STATE_VUE_MAP_GEOM_OUT,
BRW_STATE_TRANSFORM_FEEDBACK,
BRW_STATE_RASTERIZER_DISCARD,
BRW_STATE_SAMPLER_STATE_TABLE,
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
BRW_STATE_COMPUTE_PROGRAM,
+ BRW_STATE_CS_WORK_GROUPS,
BRW_NUM_STATE_BITS
};
#define BRW_NEW_GS_CONSTBUF (1ull << BRW_STATE_GS_CONSTBUF)
#define BRW_NEW_PROGRAM_CACHE (1ull << BRW_STATE_PROGRAM_CACHE)
#define BRW_NEW_STATE_BASE_ADDRESS (1ull << BRW_STATE_STATE_BASE_ADDRESS)
- #define BRW_NEW_VUE_MAP_VS (1ull << BRW_STATE_VUE_MAP_VS)
#define BRW_NEW_VUE_MAP_GEOM_OUT (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
#define BRW_NEW_TRANSFORM_FEEDBACK (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
#define BRW_NEW_RASTERIZER_DISCARD (1ull << BRW_STATE_RASTERIZER_DISCARD)
#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
+ #define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
/** @} */
} binding_table;
+ uint32_t *map_entries;
+ struct {
+ uint32_t index_count;
+ uint32_t *index;
+ } bind_map[8]; /* MAX_SETS from vulkan/private.h */
+
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
unsigned nr_image_params;
GLuint dispatch_grf_start_reg_16;
unsigned local_size[3];
unsigned simd_size;
+ bool uses_barrier;
+ bool uses_num_work_groups;
+
+ struct {
+ /** @{
+ * surface indices the CS-specific surfaces
+ */
+ uint32_t work_groups_start;
+ /** @} */
+ } binding_table;
};
/**
*/
GLbitfield64 slots_valid;
+ /**
+ * Is this VUE map for a separate shader pipeline?
+ *
+ * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+ * without the linker having a chance to dead code eliminate unused varyings.
+ *
+ * This means that we have to use a fixed slot layout, based on the output's
+ * location field, rather than assigning slots in a compact contiguous block.
+ */
+ bool separate;
+
/**
* Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
* not stored in a slot (because they are not written, or because
void brw_compute_vue_map(const struct brw_device_info *devinfo,
struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid);
+ GLbitfield64 slots_valid,
+ bool separate_shader);
/**
12 + /* ubo */ \
BRW_MAX_ABO + \
BRW_MAX_IMAGES + \
- 2 /* shader time, pull constants */)
+ 2 + /* shader time, pull constants */ \
+ 1 /* cs num work groups */)
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
bool include_primitive_id;
+ /**
+ * The number of vertices emitted, if constant - otherwise -1.
+ */
+ int static_vertex_count;
+
int invocations;
/**
uint32_t draw_params_offset;
} draw;
+ struct {
+ /**
+ * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+ * an indirect call, and num_work_groups_offset is valid. Otherwise,
+ * num_work_groups is set based on glDispatchCompute.
+ */
+ drm_intel_bo *num_work_groups_bo;
+ GLintptr num_work_groups_offset;
+ const GLuint *num_work_groups;
+ } compute;
+
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
GLuint curbe_offset;
} curbe;
- /**
- * Layout of vertex data exiting the vertex shader.
- *
- * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
- */
- struct brw_vue_map vue_map_vs;
-
/**
* Layout of vertex data exiting the geometry portion of the pipleine.
- * This comes from the geometry shader if one exists, otherwise from the
- * vertex shader.
+ * This comes from the last enabled shader stage (GS, DS, or VS).
*
* BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
*/
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[60];
- const struct brw_tracked_state compute_atoms[4];
+ const struct brw_tracked_state compute_atoms[7];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
uint32_t size,
uint32_t *out_offset,
bool dword_pitch);
+ void brw_create_buffer_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t *out_offset,
+ bool dword_pitch);
void brw_update_buffer_texture_surface(struct gl_context *ctx,
unsigned unit,
uint32_t *surf_offset);
uint32_t get_hw_prim_for_gl_prim(int mode);
- void
- brw_setup_vue_key_clip_info(struct brw_context *brw,
- struct brw_vue_prog_key *key,
- bool program_uses_clip_distance);
-
void
gen6_upload_push_constants(struct brw_context *brw,
const struct gl_program *prog,
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
+#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
- #define _3DPRIM_LINELIST_ADJ 0x09
- #define _3DPRIM_LINESTRIP_ADJ 0x0A
- #define _3DPRIM_TRILIST_ADJ 0x0B
- #define _3DPRIM_TRISTRIP_ADJ 0x0C
+ #define _3DPRIM_LINELIST_ADJ 0x09 /* G45+ */
+ #define _3DPRIM_LINESTRIP_ADJ 0x0A /* G45+ */
+ #define _3DPRIM_TRILIST_ADJ 0x0B /* G45+ */
+ #define _3DPRIM_TRISTRIP_ADJ 0x0C /* G45+ */
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
- #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+ #define _3DPRIM_TRIFAN_NOSTIPPLE 0x16
+#endif
/* We use this offset to be able to pass native primitive types in struct
* _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET +
SHADER_OPCODE_TG4_LOGICAL,
SHADER_OPCODE_TG4_OFFSET,
SHADER_OPCODE_TG4_OFFSET_LOGICAL,
+ SHADER_OPCODE_SAMPLEINFO,
/**
* Combines multiple sources of size 1 into a larger virtual GRF.
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ FS_OPCODE_GET_BUFFER_SIZE,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID,
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+ VS_OPCODE_GET_BUFFER_SIZE,
+
VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
/**
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4 8
#define GEN5_SAMPLER_MESSAGE_LOD 9
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
+ #define GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO 11
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C 16
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
#define BRW_URB_OPCODE_WRITE_HWORD 0
#define BRW_URB_OPCODE_WRITE_OWORD 1
- #define GEN8_URB_OPCODE_SIMD8_WRITE 7
+ #define BRW_URB_OPCODE_READ_HWORD 2
+ #define BRW_URB_OPCODE_READ_OWORD 3
+ #define GEN7_URB_OPCODE_ATOMIC_MOV 4
+ #define GEN7_URB_OPCODE_ATOMIC_INC 5
+ #define GEN8_URB_OPCODE_ATOMIC_ADD 6
+ #define GEN8_URB_OPCODE_SIMD8_WRITE 7
+ #define GEN8_URB_OPCODE_SIMD8_READ 8
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
/* DW3: PS */
#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */
+ #define _3DSTATE_SAMPLER_STATE_POINTERS_HS 0x782C /* GEN7+ */
+ #define _3DSTATE_SAMPLER_STATE_POINTERS_DS 0x782D /* GEN7+ */
#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */
#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */
#define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES (5*128)
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
+ #define _3DSTATE_PUSH_CONSTANT_ALLOC_HS 0x7913 /* GEN7+ */
+ #define _3DSTATE_PUSH_CONSTANT_ALLOC_DS 0x7914 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
# define GEN6_GS_ENABLE (1 << 15)
+ /* Gen8+ DW8 */
+ # define GEN8_GS_STATIC_OUTPUT (1 << 30)
+ # define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT 16
+ # define GEN8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16)
+
/* Gen8+ DW9 */
# define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
# define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT 16
#define GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES (62*16)
#define _3DSTATE_HS 0x781B /* GEN7+ */
+ /* DW1 */
+ # define GEN7_HS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
+ # define GEN7_HS_SAMPLER_COUNT_SHIFT 27
+ # define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18)
+ # define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+ # define GEN7_HS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+ # define GEN7_HS_FLOATING_POINT_MODE_ALT (1 << 16)
+ # define GEN7_HS_MAX_THREADS_SHIFT 0
+ /* DW2 */
+ # define GEN7_HS_ENABLE (1 << 31)
+ # define GEN7_HS_STATISTICS_ENABLE (1 << 29)
+ # define GEN8_HS_MAX_THREADS_SHIFT 8
+ # define GEN7_HS_INSTANCE_COUNT_MASK INTEL_MASK(3, 0)
+ # define GEN7_HS_INSTANCE_COUNT_SHIFT 0
+ /* DW5 */
+ # define GEN7_HS_SINGLE_PROGRAM_FLOW (1 << 27)
+ # define GEN7_HS_VECTOR_MASK_ENABLE (1 << 26)
+ # define HSW_HS_ACCESSES_UAV (1 << 25)
+ # define GEN7_HS_INCLUDE_VERTEX_HANDLES (1 << 24)
+ # define GEN7_HS_DISPATCH_START_GRF_MASK INTEL_MASK(23, 19)
+ # define GEN7_HS_DISPATCH_START_GRF_SHIFT 19
+ # define GEN7_HS_URB_READ_LENGTH_MASK INTEL_MASK(16, 11)
+ # define GEN7_HS_URB_READ_LENGTH_SHIFT 11
+ # define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
+ # define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT 4
+
#define _3DSTATE_TE 0x781C /* GEN7+ */
+ /* DW1 */
+ # define GEN7_TE_PARTITIONING_SHIFT 12
+ # define GEN7_TE_OUTPUT_TOPOLOGY_SHIFT 8
+ # define GEN7_TE_DOMAIN_SHIFT 4
+ //# define GEN7_TE_MODE_SW (1 << 1)
+ # define GEN7_TE_ENABLE (1 << 0)
+
#define _3DSTATE_DS 0x781D /* GEN7+ */
+ /* DW2 */
+ # define GEN7_DS_SINGLE_DOMAIN_POINT_DISPATCH (1 << 31)
+ # define GEN7_DS_VECTOR_MASK_ENABLE (1 << 30)
+ # define GEN7_DS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27)
+ # define GEN7_DS_SAMPLER_COUNT_SHIFT 27
+ # define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_MASK INTEL_MASK(25, 18)
+ # define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+ # define GEN7_DS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+ # define GEN7_DS_FLOATING_POINT_MODE_ALT (1 << 16)
+ # define HSW_DS_ACCESSES_UAV (1 << 14)
+ /* DW4 */
+ # define GEN7_DS_DISPATCH_START_GRF_MASK INTEL_MASK(24, 20)
+ # define GEN7_DS_DISPATCH_START_GRF_SHIFT 20
+ # define GEN7_DS_URB_READ_LENGTH_MASK INTEL_MASK(17, 11)
+ # define GEN7_DS_URB_READ_LENGTH_SHIFT 11
+ # define GEN7_DS_URB_ENTRY_READ_OFFSET_MASK INTEL_MASK(9, 4)
+ # define GEN7_DS_URB_ENTRY_READ_OFFSET_SHIFT 4
+ /* DW5 */
+ # define GEN7_DS_MAX_THREADS_SHIFT 25
+ # define HSW_DS_MAX_THREADS_SHIFT 21
+ # define GEN7_DS_STATISTICS_ENABLE (1 << 10)
+ # define GEN7_DS_SIMD8_DISPATCH_ENABLE (1 << 3)
+ # define GEN7_DS_COMPUTE_W_COORDINATE_ENABLE (1 << 2)
+ # define GEN7_DS_CACHE_DISABLE (1 << 1)
+ # define GEN7_DS_ENABLE (1 << 0)
+ /* Gen8+ DW8 */
+ # define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK INTEL_MASK(26, 21)
+ # define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21
+ # define GEN8_DS_URB_OUTPUT_LENGTH_MASK INTEL_MASK(20, 16)
+ # define GEN8_DS_URB_OUTPUT_LENGTH_SHIFT 16
+ # define GEN8_DS_USER_CLIP_DISTANCE_MASK INTEL_MASK(15, 8)
+ # define GEN8_DS_USER_CLIP_DISTANCE_SHIFT 8
+ # define GEN8_DS_USER_CULL_DISTANCE_MASK INTEL_MASK(7, 0)
+ # define GEN8_DS_USER_CULL_DISTANCE_SHIFT 0
+
#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
/* DW1 */
BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
};
+ enum brw_pixel_shader_coverage_mask_mode {
+ BRW_PSICMS_OFF = 0, /* PS does not use input coverage masks. */
+ BRW_PSICMS_NORMAL = 1, /* Input Coverage masks based on outer conservatism
+ * and factors in SAMPLE_MASK. If Pixel is
+ * conservatively covered, all samples are enabled.
+ */
+
+ BRW_PSICMS_INNER = 2, /* Input Coverage masks based on inner conservatism
+ * and factors in SAMPLE_MASK. If Pixel is
+ * conservatively *FULLY* covered, all samples are
+ * enabled.
+ */
+ BRW_PCICMS_DEPTH = 3,
+ };
+
#define _3DSTATE_PS_EXTRA 0x784F /* GEN8+ */
/* DW1 */
# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31)
# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
+ # define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT 0
enum brw_wm_barycentric_interp_mode {
BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0,
# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT 0
# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
+ #define MEDIA_CURBE_LOAD 0x7001
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+ /* GEN7 DW4, GEN8+ DW5 */
+ # define MEDIA_CURBE_READ_LENGTH_SHIFT 16
+ # define MEDIA_CURBE_READ_LENGTH_MASK INTEL_MASK(31, 16)
+ # define MEDIA_CURBE_READ_OFFSET_SHIFT 0
+ # define MEDIA_CURBE_READ_OFFSET_MASK INTEL_MASK(15, 0)
/* GEN7 DW5, GEN8+ DW6 */
+ # define MEDIA_BARRIER_ENABLE_SHIFT 21
+ # define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21)
# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_WALKER 0x7105
+ /* GEN7 DW0 */
+ # define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
/* GEN8+ DW2 */
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
.max_wm_threads = 64 * 6, \
.max_cs_threads = 56, \
.urb = { \
- .size = 192, \
+ .size = 384, \
.min_vs_entries = 64, \
.max_vs_entries = 1856, \
.max_hs_entries = 672, \
static const struct brw_device_info brw_device_info_skl_gt1 = {
GEN9_FEATURES, .gt = 1,
+ .urb.size = 192,
};
static const struct brw_device_info brw_device_info_skl_gt2 = {
return devinfo;
}
+
+const char *
+brw_get_device_name(int devid)
+{
+ switch (devid) {
+#undef CHIPSET
+#define CHIPSET(id, family, name) case id: return name;
+#include "pci_ids/i965_pci_ids.h"
+ default:
+ return NULL;
+ }
+}
#include "brw_eu.h"
#include "brw_wm.h"
#include "brw_fs.h"
+ #include "brw_cs.h"
#include "brw_cfg.h"
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
break;
case CS_OPCODE_CS_TERMINATE:
+ case SHADER_OPCODE_BARRIER:
return 1;
default:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_LOD:
+ case SHADER_OPCODE_SAMPLEINFO:
return 1;
case FS_OPCODE_FB_WRITE:
return 2;
+ case FS_OPCODE_GET_BUFFER_SIZE:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_GEN4_SCRATCH_READ:
return 1;
}
}
}
+
+ /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
+ this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
}
void
*/
struct brw_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
- key->input_slots_valid);
+ key->input_slots_valid,
+ shader_prog->SeparateShader);
int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
}
/* Each attribute is 4 setup channels, each of which is half a reg. */
- this->first_non_payload_grf =
- urb_start + prog_data->num_varying_inputs * 2;
+ this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
}
void
count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf =
- payload.num_regs + prog_data->curb_read_length + count * 4;
+ this->first_non_payload_grf += count * 4;
unsigned vue_entries =
MAX2(count, vs_prog_data->base.vue_map.num_slots);
+ /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
+ * command). Each attribute is 16 bytes (4 floats/dwords), so each unit
+ * fits four attributes.
+ */
vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
vs_prog_data->base.urb_read_length = (count + 1) / 2;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
- retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+ stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+ inst->src[i].subreg_offset),
+ inst->exec_size * inst->src[i].stride,
+ inst->exec_size, inst->src[i].stride);
}
}
}
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
int base_mrf = 1;
int color_mrf = base_mrf + 2;
+ fs_inst *mov;
- fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ if (uniforms == 1) {
+ mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ } else {
+ struct brw_reg reg =
+ brw_reg(BRW_GENERAL_REGISTER_FILE,
+ 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+
+ mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(reg));
+ }
fs_inst *write;
if (key->nr_color_regions == 1) {
assign_curb_setup();
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
- assert(mov->src[0].file == HW_REG);
- mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ if (uniforms == 1) {
+ assert(mov->src[0].file == HW_REG);
+ mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ }
}
/**
{
int write_len = inst->regs_written;
int first_write_grf = inst->dst.reg;
- bool needs_dep[BRW_MAX_MRF];
+ bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
memset(needs_dep, false, sizeof(needs_dep));
{
int write_len = inst->regs_written;
int first_write_grf = inst->dst.reg;
- bool needs_dep[BRW_MAX_MRF];
+ bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
memset(needs_dep, false, sizeof(needs_dep));
* schedule multi-component multiplications much better.
*/
- if (inst->conditional_mod && inst->dst.is_null()) {
+ fs_reg orig_dst = inst->dst;
+ if (orig_dst.is_null() || orig_dst.file == MRF) {
inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
ibld.ADD(dst, low, high);
- if (inst->conditional_mod) {
- fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+ if (inst->conditional_mod || orig_dst.file == MRF) {
set_condmod(inst->conditional_mod,
- ibld.MOV(null, inst->dst));
+ ibld.MOV(orig_dst, inst->dst));
}
}
assert(devinfo->gen >= 7);
payload.num_regs = 1;
+
+ if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ const unsigned local_id_dwords =
+ brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+ assert((local_id_dwords & 0x7) == 0);
+ const unsigned local_id_regs = local_id_dwords / 8;
+ payload.local_invocation_id_reg = payload.num_regs;
+ payload.num_regs += local_id_regs;
+ }
}
void
- fs_visitor::assign_binding_table_offsets()
+ fs_visitor::assign_fs_binding_table_offsets()
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
assign_common_binding_table_offsets(next_binding_table_offset);
}
+ void
+ fs_visitor::assign_cs_binding_table_offsets()
+ {
+ assert(stage == MESA_SHADER_COMPUTE);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+ uint32_t next_binding_table_offset = 0;
+
+ /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+ prog_data->binding_table.work_groups_start = next_binding_table_offset;
+ next_binding_table_offset++;
+
+ assign_common_binding_table_offsets(next_binding_table_offset);
+ }
+
void
fs_visitor::calculate_register_pressure()
{
void
fs_visitor::optimize()
{
+ /* Start by validating the shader we currently have. */
+ validate();
+
/* bld is the common builder object pointing at the end of the program we
* used to translate it into i965 IR. For the optimization and lowering
* passes coming next, any code added after the end of the program without
assign_constant_locations();
demote_pull_constants();
+ validate();
+
split_virtual_grfs();
+ validate();
#define OPT(pass, args...) ({ \
pass_num++; \
backend_shader::dump_instructions(filename); \
} \
\
+ validate(); \
+ \
progress = progress || this_progress; \
this_progress; \
})
OPT(lower_integer_multiplication);
lower_uniform_pull_constant_loads();
+
+ validate();
}
/**
{
assert(stage == MESA_SHADER_VERTEX);
- assign_common_binding_table_offsets(0);
+ if (prog_data->map_entries == NULL)
+ assign_common_binding_table_offsets(0);
setup_vs_payload();
if (shader_time_index >= 0)
assert(stage == MESA_SHADER_FRAGMENT);
- assign_fs_binding_table_offsets();
+ sanity_param_count = prog->Parameters->NumParameters;
+
- assign_binding_table_offsets();
+ if (prog_data->map_entries == NULL)
++ assign_fs_binding_table_offsets();
if (devinfo->gen >= 6)
setup_payload_gen6();
else
wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
sanity_param_count = prog->Parameters->NumParameters;
- assign_common_binding_table_offsets(0);
+ assign_cs_binding_table_offsets();
setup_cs_payload();
struct gl_shader_program *prog,
unsigned *final_assembly_size)
{
- bool start_busy = false;
- double start_time = 0;
-
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- drm_intel_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
struct brw_shader *shader = NULL;
if (prog)
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (unlikely(INTEL_DEBUG & DEBUG_WM))
+ if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir)
brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
int st_index8 = -1, st_index16 = -1;
if (simd16_cfg)
prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
- if (unlikely(brw->perf_debug) && shader) {
- if (shader->compiled_once)
- brw_wm_debug_recompile(brw, prog, key);
- shader->compiled_once = true;
-
- if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
- perf_debug("FS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- }
-
return g.get_assembly(final_assembly_size);
}
- extern "C" bool
- brw_fs_precompile(struct gl_context *ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog)
+ fs_reg *
+ fs_visitor::emit_cs_local_invocation_id_setup()
{
- struct brw_context *brw = brw_context(ctx);
- struct brw_wm_prog_key key;
-
- struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
- struct brw_fragment_program *bfp = brw_fragment_program(fp);
- bool program_uses_dfdy = fp->UsesDFdy;
-
- memset(&key, 0, sizeof(key));
+ assert(stage == MESA_SHADER_COMPUTE);
- if (brw->gen < 6) {
- if (fp->UsesKill)
- key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+ fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
- if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
- key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+ struct brw_reg src =
+ brw_vec8_grf(payload.local_invocation_id_reg, 0);
+ src = retype(src, BRW_REGISTER_TYPE_UD);
+ bld.MOV(*reg, src);
+ src.nr += dispatch_width / 8;
+ bld.MOV(offset(*reg, bld, 1), src);
+ src.nr += dispatch_width / 8;
+ bld.MOV(offset(*reg, bld, 2), src);
- /* Just assume depth testing. */
- key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
- key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
- }
+ return reg;
+ }
- if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
- BRW_FS_VARYING_INPUT_MASK) > 16)
- key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+ fs_reg *
+ fs_visitor::emit_cs_work_group_id_setup()
+ {
+ assert(stage == MESA_SHADER_COMPUTE);
- brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+ fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
- if (fp->Base.InputsRead & VARYING_BIT_POS) {
- key.drawable_height = ctx->DrawBuffer->Height;
- }
+ struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+ struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+ struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
- key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
- ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
- BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+ bld.MOV(*reg, r0_1);
+ bld.MOV(offset(*reg, bld, 1), r0_6);
+ bld.MOV(offset(*reg, bld, 2), r0_7);
- if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
- key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
- key.nr_color_regions > 1;
- }
+ return reg;
+ }
- key.program_string_id = bfp->id;
+ const unsigned *
+ brw_cs_emit(struct brw_context *brw,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ struct gl_compute_program *cp,
+ struct gl_shader_program *prog,
+ unsigned *final_assembly_size)
+ {
+ struct brw_shader *shader =
+ (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
- uint32_t old_prog_offset = brw->wm.base.prog_offset;
- struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ brw_dump_ir("compute", prog, &shader->base, &cp->Base);
- bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+ prog_data->local_size[0] = cp->LocalSize[0];
+ prog_data->local_size[1] = cp->LocalSize[1];
+ prog_data->local_size[2] = cp->LocalSize[2];
+ unsigned local_workgroup_size =
+ cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
- brw->wm.base.prog_offset = old_prog_offset;
- brw->wm.prog_data = old_prog_data;
+ cfg_t *cfg = NULL;
+ const char *fail_msg = NULL;
- return success;
- }
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
- void
- brw_setup_tex_for_precompile(struct brw_context *brw,
- struct brw_sampler_prog_key_data *tex,
- struct gl_program *prog)
- {
- const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
- unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
- for (unsigned i = 0; i < sampler_count; i++) {
- if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
- /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
- tex->swizzles[i] =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+ /* Now the main event: Visit the shader IR and generate our CS IR for it.
+ */
+ fs_visitor v8(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ &cp->Base, 8, st_index);
+ if (!v8.run_cs()) {
+ fail_msg = v8.fail_msg;
+ } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+ cfg = v8.cfg;
+ prog_data->simd_size = 8;
+ }
+
+ fs_visitor v16(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ &cp->Base, 16, st_index);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+ !fail_msg && !v8.simd16_unsupported &&
+ local_workgroup_size <= 16 * brw->max_cs_threads) {
+ /* Try a SIMD16 compile */
+ v16.import_uniforms(&v8);
+ if (!v16.run_cs()) {
+ perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+ if (!cfg) {
+ fail_msg =
+ "Couldn't generate SIMD16 program and not "
+ "enough threads for SIMD8";
+ }
} else {
- /* Color sampler: assume no swizzling. */
- tex->swizzles[i] = SWIZZLE_XYZW;
+ cfg = v16.cfg;
+ prog_data->simd_size = 16;
}
}
+
+ if (unlikely(cfg == NULL)) {
+ assert(fail_msg);
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, fail_msg);
+ _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
+ fail_msg);
+ return NULL;
+ }
+
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+ v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+ if (INTEL_DEBUG & DEBUG_CS) {
+ char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+ prog->Label ? prog->Label : "unnamed",
+ prog->Name);
+ g.enable_debug(name);
+ }
+
+ g.generate_code(cfg, prog_data->simd_size);
+
+ return g.get_assembly(final_assembly_size);
}
#include "brw_fs.h"
#include "brw_fs_surface_builder.h"
#include "brw_nir.h"
+ #include "brw_fs_surface_builder.h"
using namespace brw;
+ using namespace brw::surface_access;
void
fs_visitor::emit_nir_code()
*/
nir_setup_inputs(nir);
nir_setup_outputs(nir);
- nir_setup_uniforms(nir);
+ uniforms = nir->num_uniforms;
+ //nir_setup_uniforms(nir);
nir_emit_system_values(nir);
/* get the main function and emit it */
BRW_REGISTER_TYPE_D));
break;
+ case nir_intrinsic_load_local_invocation_id:
+ assert(v->stage == MESA_SHADER_COMPUTE);
+ reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+ if (reg->file == BAD_FILE)
+ *reg = *v->emit_cs_local_invocation_id_setup();
+ break;
+
+ case nir_intrinsic_load_work_group_id:
+ assert(v->stage == MESA_SHADER_COMPUTE);
+ reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+ if (reg->file == BAD_FILE)
+ *reg = *v->emit_cs_work_group_id_setup();
+ break;
+
default:
break;
}
break;
}
+ case nir_intrinsic_image_samples:
+ /* The driver does not support multi-sampled images. */
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+ break;
+
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
*emit_frontfacing_interpolation());
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id()");
- case nir_intrinsic_load_vertex_id_zero_base: {
- fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
- assert(vertex_id.file != BAD_FILE);
- dest.type = vertex_id.type;
- bld.MOV(dest, vertex_id);
- break;
- }
-
- case nir_intrinsic_load_base_vertex: {
- fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
- assert(base_vertex.file != BAD_FILE);
- dest.type = base_vertex.type;
- bld.MOV(dest, base_vertex);
- break;
- }
-
- case nir_intrinsic_load_instance_id: {
- fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
- assert(instance_id.file != BAD_FILE);
- dest.type = instance_id.type;
- bld.MOV(dest, instance_id);
- break;
- }
-
- case nir_intrinsic_load_sample_mask_in: {
- fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
- assert(sample_mask_in.file != BAD_FILE);
- dest.type = sample_mask_in.type;
- bld.MOV(dest, sample_mask_in);
+ case nir_intrinsic_load_vertex_id_zero_base:
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_sample_mask_in:
+ case nir_intrinsic_load_sample_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ bld.MOV(dest, val);
break;
}
break;
}
- case nir_intrinsic_load_sample_id: {
- fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
- assert(sample_id.file != BAD_FILE);
- dest.type = sample_id.type;
- bld.MOV(dest, sample_id);
- break;
- }
-
case nir_intrinsic_load_uniform_indirect:
has_indirect = true;
/* fallthrough */
has_indirect = true;
/* fallthrough */
case nir_intrinsic_load_ubo: {
+ uint32_t set = instr->const_index[0];
nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
fs_reg surf_index;
if (const_index) {
- surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
- const_index->u[0]);
+ uint32_t binding = const_index->u[0];
+
+ /* FIXME: We should probably assert here, but dota2 seems to hit
+ * it and we'd like to keep going.
+ */
+ if (binding >= stage_prog_data->bind_map[set].index_count)
+ binding = 0;
+
+ surf_index = fs_reg(stage_prog_data->bind_map[set].index[binding]);
} else {
+ assert(0 && "need more info from the ir for this.");
/* The block index is not a constant. Evaluate the index expression
* per-channel and add the base UBO index; we have to select a value
* from any live channel.
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
+ shader_prog->NumBufferInterfaceBlocks - 1);
}
if (has_indirect) {
BRW_REGISTER_TYPE_D),
fs_reg(2));
- unsigned vec4_offset = instr->const_index[0] / 4;
+ unsigned vec4_offset = instr->const_index[1] / 4;
for (int i = 0; i < instr->num_components; i++)
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
base_offset, vec4_offset + i);
fs_reg packed_consts = vgrf(glsl_type::float_type);
packed_consts.type = dest.type;
- fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+ fs_reg const_offset_reg((unsigned) instr->const_index[1] & ~15);
bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
surf_index, const_offset_reg);
break;
}
+ case nir_intrinsic_load_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[0]);
+
+ fs_reg surf_index;
+ if (const_uniform_block) {
+ unsigned index = stage_prog_data->binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
+ } else {
+ surf_index = vgrf(glsl_type::uint_type);
+ bld.ADD(surf_index, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+ surf_index = bld.emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
+ */
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ /* Get the offset to read from */
+ fs_reg offset_reg = vgrf(glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ }
+
+ /* Read the vector */
+ for (int i = 0; i < instr->num_components; i++) {
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ bld.MOV(dest, read_result);
+ dest = offset(dest, bld, 1);
+
+ /* Vector components are stored contiguous in memory */
+ if (i < instr->num_components) {
+ if (!has_indirect) {
+ const_offset_bytes += 4;
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ } else {
+ bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+ }
+ }
+ }
+
+ break;
+ }
+
case nir_intrinsic_load_input_indirect:
has_indirect = true;
/* fallthrough */
break;
}
+ case nir_intrinsic_store_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_store_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ fs_reg surf_index;
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[1]);
+ if (const_uniform_block) {
+ unsigned index = stage_prog_data->binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = fs_reg(index);
+ brw_mark_surface_used(prog_data, index);
+ } else {
+ surf_index = vgrf(glsl_type::uint_type);
+ bld.ADD(surf_index, get_nir_src(instr->src[1]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+ surf_index = bld.emit_uniformize(surf_index);
+
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ /* Offset */
+ fs_reg offset_reg = vgrf(glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ }
+
+ /* Value */
+ fs_reg val_reg = get_nir_src(instr->src[0]);
+
+ /* Writemask */
+ unsigned writemask = instr->const_index[1];
+
+ /* Write each component present in the writemask */
+ unsigned skipped_channels = 0;
+ for (int i = 0; i < instr->num_components; i++) {
+ int component_mask = 1 << i;
+ if (writemask & component_mask) {
+ if (skipped_channels) {
+ if (!has_indirect) {
+ const_offset_bytes += 4 * skipped_channels;
+ bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ } else {
+ bld.ADD(offset_reg, offset_reg,
+ brw_imm_ud(4 * skipped_channels));
+ }
+ skipped_channels = 0;
+ }
+
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, i),
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ }
+
+ skipped_channels++;
+ }
+ break;
+ }
+
case nir_intrinsic_store_output_indirect:
has_indirect = true;
/* fallthrough */
case nir_intrinsic_barrier:
emit_barrier();
+ if (stage == MESA_SHADER_COMPUTE)
+ ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
+ break;
+
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_work_group_id: {
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ fs_reg val = nir_system_values[sv];
+ assert(val.file != BAD_FILE);
+ dest.type = val.type;
+ for (unsigned i = 0; i < 3; i++)
+ bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+ break;
+ }
+
+ case nir_intrinsic_ssbo_atomic_add:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_min:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+ else
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_max:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+ else
+ nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_and:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_or:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_xor:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
break;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+ break;
+
+ case nir_intrinsic_get_buffer_size: {
+ nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+ unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ int reg_width = dispatch_width / 8;
+
+ assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+ /* Set LOD = 0 */
+ fs_reg source = fs_reg(0);
+
+ int mlen = 1 * reg_width;
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+ BRW_REGISTER_TYPE_UD);
+ bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+
+ fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+ fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+ src_payload, surf_index);
+ inst->header_size = 0;
+ inst->mlen = mlen;
+ bld.emit(inst);
+ break;
+ }
+
+ case nir_intrinsic_load_num_work_groups: {
+ assert(devinfo->gen >= 7);
+ assert(stage == MESA_SHADER_COMPUTE);
+
+ struct brw_cs_prog_data *cs_prog_data =
+ (struct brw_cs_prog_data *) prog_data;
+ const unsigned surface =
+ cs_prog_data->binding_table.work_groups_start;
+
+ cs_prog_data->uses_num_work_groups = true;
+
+ fs_reg surf_index = fs_reg(surface);
+ brw_mark_surface_used(prog_data, surface);
+
+ /* Read the 3 GLuint components of gl_NumWorkGroups */
+ for (unsigned i = 0; i < 3; i++) {
+ fs_reg read_result =
+ emit_untyped_read(bld, surf_index,
+ fs_reg(i << 2),
+ 1 /* dims */, 1 /* size */,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ bld.MOV(dest, read_result);
+ dest = offset(dest, bld, 1);
+ }
+ break;
+ }
default:
unreachable("unknown intrinsic");
}
}
+ void
+ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+ int op, nir_intrinsic_instr *instr)
+ {
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ fs_reg surface;
+ nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+ if (const_surface) {
+ unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+ const_surface->u[0];
+ surface = fs_reg(surf_index);
+ brw_mark_surface_used(prog_data, surf_index);
+ } else {
+ surface = vgrf(glsl_type::uint_type);
+ bld.ADD(surface, get_nir_src(instr->src[0]),
+ fs_reg(stage_prog_data->binding_table.ubo_start));
+
+ /* Assume this may touch any UBO. This is the same we do for other
+ * UBO/SSBO accesses with non-constant surface.
+ */
+ brw_mark_surface_used(prog_data,
+ stage_prog_data->binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ fs_reg offset = get_nir_src(instr->src[1]);
+ fs_reg data1 = get_nir_src(instr->src[2]);
+ fs_reg data2;
+ if (op == BRW_AOP_CMPWR)
+ data2 = get_nir_src(instr->src[3]);
+
+ /* Emit the actual atomic operation operation */
+
+ fs_reg atomic_result =
+ surface_access::emit_untyped_atomic(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
+ dest.type = atomic_result.type;
+ bld.MOV(dest, atomic_result);
+ }
+
void
fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
{
- unsigned sampler = instr->sampler_index;
+ uint32_t set = instr->sampler_set;
+ uint32_t binding = instr->sampler_index;
+
+ assert(binding < stage_prog_data->bind_map[set].index_count);
+ assert(stage_prog_data->bind_map[set].index[binding] < 1000);
+
+ unsigned sampler = stage_prog_data->bind_map[set].index[binding];
fs_reg sampler_reg(sampler);
/* FINISHME: We're failing to recompile our programs when the sampler is
case nir_texop_txf_ms: op = ir_txf_ms; break;
case nir_texop_txl: op = ir_txl; break;
case nir_texop_txs: op = ir_txs; break;
+ case nir_texop_texture_samples: {
+ fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+ fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+ bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+ sampler_reg);
+ inst->mlen = 1;
+ inst->header_size = 1;
+ inst->base_mrf = -1;
+ return;
+ }
default:
unreachable("unknown texture opcode");
}
bld.emit(BRW_OPCODE_CONTINUE);
break;
case nir_jump_return:
+ /* This has to be the last block in the shader. We don't handle
+ * early returns.
+ */
+ assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL &&
+ instr->instr.block->cf_node.parent->type == nir_cf_node_function);
+ break;
default:
unreachable("unknown jump");
}
#include "brw_state.h"
#include "brw_ff_gs.h"
-
bool
-brw_codegen_gs_prog(struct brw_context *brw,
+brw_compile_gs_prog(struct brw_context *brw,
struct gl_shader_program *prog,
struct brw_geometry_program *gp,
- struct brw_gs_prog_key *key)
+ struct brw_gs_prog_key *key,
+ struct brw_gs_compile_output *output)
{
- struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_compile c;
memset(&c, 0, sizeof(c));
c.key = *key;
c.gp = gp;
+ /* We get the bind map as input in the output struct...*/
+ c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries;
+ memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map,
+ sizeof(c.prog_data.base.base.bind_map));
+
c.prog_data.include_primitive_id =
(gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
int param_count = gs->num_uniform_components * 4;
- /* We also upload clip plane data as uniforms */
- param_count += MAX_CLIP_PLANES * 4;
param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
c.prog_data.base.base.param =
c.prog_data.base.base.nr_params = param_count;
c.prog_data.base.base.nr_image_params = gs->NumImages;
+ if (brw->gen >= 8) {
+ c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+ nir_gs_count_vertices(gp->program.Base.nir);
+ }
+
if (brw->gen >= 7) {
if (gp->program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
- /* In order for legacy clipping to work, we need to populate the clip
- * distance varying slots whenever clipping is enabled, even if the vertex
- * shader doesn't write to gl_ClipDistance.
- */
- if (c.key.base.userclip_active) {
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
- outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
- }
-
brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.prog_data.base.vue_map, outputs_written);
+ &c.prog_data.base.vue_map, outputs_written,
+ prog ? prog->SeparateShader : false);
/* Compute the output vertex size.
*
c.prog_data.output_topology =
get_hw_prim_for_gl_prim(gp->program.OutputType);
+ /* The GLSL linker will have already matched up GS inputs and the outputs
+ * of prior stages. The driver does extend VS outputs in some cases, but
+ * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+ * geometry shader support. So we can safely ignore that.
+ *
+ * For SSO pipelines, we use a fixed VUE map layout based on variable
+ * locations, so we can rely on rendezvous-by-location making this work.
+ *
+ * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+ * written by previous stages and shows up via payload magic.
+ */
+ GLbitfield64 inputs_read =
+ gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.input_vue_map, c.key.input_varyings);
+ &c.input_vue_map, inputs_read,
+ prog->SeparateShader);
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
return false;
}
- /* Scratch space is used for register spilling */
- if (c.prog_data.base.base.total_scratch) {
+ output->mem_ctx = mem_ctx;
+ output->program = program;
+ output->program_size = program_size;
+ memcpy(&output->prog_data, &c.prog_data,
+ sizeof(output->prog_data));
+
+ return true;
+}
+
+bool
+brw_codegen_gs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_geometry_program *gp,
+ struct brw_gs_prog_key *key)
+{
+ struct brw_gs_compile_output output;
+ struct brw_stage_state *stage_state = &brw->gs.base;
+
+ if (brw_compile_gs_prog(brw, prog, gp, key, &output))
+ return false;
+
+ if (output.prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- c.prog_data.base.base.total_scratch *
+ output.prog_data.base.base.total_scratch *
brw->max_gs_threads);
}
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
- &c.key, sizeof(c.key),
- program, program_size,
- &c.prog_data, sizeof(c.prog_data),
+ key, sizeof(*key),
+ output.program, output.program_size,
+ &output.prog_data, sizeof(output.prog_data),
&stage_state->prog_offset, &brw->gs.prog_data);
- ralloc_free(mem_ctx);
+ ralloc_free(output.mem_ctx);
return true;
}
return brw_state_dirty(brw,
_NEW_TEXTURE,
BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_TRANSFORM_FEEDBACK |
- BRW_NEW_VUE_MAP_VS);
+ BRW_NEW_TRANSFORM_FEEDBACK);
}
static void
memset(key, 0, sizeof(*key));
- key->base.program_string_id = gp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- gp->program.Base.UsesClipDistanceOut);
+ key->program_string_id = gp->id;
/* _NEW_TEXTURE */
brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
- &key->base.tex);
-
- /* BRW_NEW_VUE_MAP_VS */
- key->input_varyings = brw->vue_map_vs.slots_valid;
+ &key->tex);
}
void
if (gp == NULL) {
/* No geometry shader. Vertex data just passes straight through. */
- if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
- brw->vue_map_geom_out = brw->vue_map_vs;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
-
if (brw->gen == 6 &&
(brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
gen6_brw_upload_ff_gs_prog(brw);
(void)success;
}
brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
-
- if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
- sizeof(brw->vue_map_geom_out)) != 0) {
- brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
- brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
- }
}
bool
memset(&key, 0, sizeof(key));
- brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bgp->id, &gp->Base);
-
- /* Assume that the set of varyings coming in from the vertex shader exactly
- * matches what the geometry shader requires.
- */
- key.input_varyings = gp->Base.InputsRead;
+ brw_setup_tex_for_precompile(brw, &key.tex, prog);
+ key.program_string_id = bgp->id;
success = brw_codegen_gs_prog(brw, shader_prog, bgp, &key);
nir_validate_shader(nir);
progress |= nir_opt_constant_folding(nir);
nir_validate_shader(nir);
+ progress |= nir_opt_dead_cf(nir);
+ nir_validate_shader(nir);
progress |= nir_opt_remove_phis(nir);
nir_validate_shader(nir);
progress |= nir_opt_undef(nir);
struct gl_context *ctx = &brw->ctx;
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- static const nir_lower_tex_options tex_options = {
- .lower_txp = ~0,
- };
struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
- bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
nir_shader *nir;
/* First, lower the GLSL IR or Mesa IR to NIR */
}
nir_validate_shader(nir);
+ brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar);
+
+ static GLuint msg_id = 0;
+ _mesa_gl_debug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_NOTIFICATION,
+ "%s NIR shader:\n",
+ _mesa_shader_stage_to_abbrev(stage));
+
+ return nir;
+}
+
+void
+brw_process_nir(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ gl_shader_stage stage, bool is_scalar)
+{
+ bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
++ static const nir_lower_tex_options tex_options = {
++ .lower_txp = ~0,
++ };
++
+ if (stage == MESA_SHADER_GEOMETRY) {
+ nir_lower_gs_intrinsics(nir);
+ nir_validate_shader(nir);
+ }
nir_lower_global_vars_to_local(nir);
nir_validate_shader(nir);
- nir_lower_tex_projector(nir);
+ nir_lower_tex(nir, &tex_options);
nir_validate_shader(nir);
nir_normalize_cubemap_coords(nir);
if (shader_prog) {
nir_lower_samplers(nir, shader_prog);
- nir_validate_shader(nir);
+ } else {
+ nir_lower_samplers_for_vk(nir);
}
+ nir_validate_shader(nir);
nir_lower_system_values(nir);
nir_validate_shader(nir);
nir_optimize(nir, is_scalar);
- if (brw->gen >= 6) {
+ if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
nir_opt_peephole_ffma(nir);
nir_validate_shader(nir);
nir_print_shader(nir, stderr);
}
- nir_convert_from_ssa(nir, is_scalar);
+ nir_convert_from_ssa(nir, true);
nir_validate_shader(nir);
if (!is_scalar) {
+ nir_move_vec_src_uses_to_dest(nir);
+ nir_validate_shader(nir);
+
nir_lower_vec_to_movs(nir);
nir_validate_shader(nir);
}
* run it last because it stashes data in instr->pass_flags and we don't
* want that to be squashed by other NIR passes.
*/
- if (brw->gen <= 5)
+ if (devinfo->gen <= 5)
brw_nir_analyze_boolean_resolves(nir);
nir_sweep(nir);
_mesa_shader_stage_to_string(stage));
nir_print_shader(nir, stderr);
}
-
- return nir;
}
enum brw_reg_type
void brwInitFragProgFuncs( struct dd_function_table *functions )
{
- assert(functions->ProgramStringNotify == _tnl_program_string);
+ /* assert(functions->ProgramStringNotify == _tnl_program_string); */
functions->NewProgram = brwNewProgram;
functions->DeleteProgram = brwDeleteProgram;
_mesa_print_program(prog);
}
}
+
+ void
+ brw_setup_tex_for_precompile(struct brw_context *brw,
+ struct brw_sampler_prog_key_data *tex,
+ struct gl_program *prog)
+ {
+ const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+ unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+ for (unsigned i = 0; i < sampler_count; i++) {
+ if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+ /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+ tex->swizzles[i] =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+ } else {
+ /* Color sampler: assume no swizzling. */
+ tex->swizzles[i] = SWIZZLE_XYZW;
+ }
+ }
+ }
*/
nir_options->lower_ffma = true;
nir_options->lower_sub = true;
+ nir_options->lower_fdiv = true;
+
+ /* In the vec4 backend, our dpN instruction replicates its result to all
+ * the components of a vec4. We would like NIR to give us replicated fdot
+ * instructions because it can optimize better for us.
+ *
+ * For the FS backend, it should be lowered away by the scalarizing pass so
+ * we should never see fdot anyway.
+ */
+ nir_options->fdot_replicates = true;
+
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
compiler->glsl_compiler_options[i].EmitNoNoise = true;
compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
- compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
- (i == MESA_SHADER_FRAGMENT);
- compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
- (i == MESA_SHADER_FRAGMENT);
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
compiler->glsl_compiler_options[i].LowerClipDistance = true;
+ bool is_scalar;
+ switch (i) {
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ is_scalar = true;
+ break;
+ case MESA_SHADER_VERTEX:
+ is_scalar = compiler->scalar_vs;
+ break;
+ default:
+ is_scalar = false;
+ break;
+ }
+
+ compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+ compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+ compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+
/* !ARB_gpu_shader5 */
if (devinfo->gen < 7)
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
- }
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
- compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
- if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
- if (compiler->scalar_vs) {
- /* If we're using the scalar backend for vertex shaders, we need to
- * configure these accordingly.
- */
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
- }
-
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
- }
-
- if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
- compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+ if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
+ compiler->glsl_compiler_options[i].NirOptions = nir_options;
}
- compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
- compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
-
return compiler;
}
{
switch (stage) {
case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
return true;
case MESA_SHADER_VERTEX:
return brw->intelScreen->compiler->scalar_vs;
options, ctx->Const.NativeIntegers) || progress;
} while (progress);
- if (options->NirOptions != NULL)
- lower_output_reads(stage, shader->ir);
-
validate_ir_tree(shader->ir);
/* Now that we've finished altering the linked IR, reparent any live IR back
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
return "tg4_offset";
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
return "tg4_offset_logical";
+ case SHADER_OPCODE_SAMPLEINFO:
+ return "sampleinfo";
case SHADER_OPCODE_SHADER_TIME_ADD:
return "shader_time_add";
case FS_OPCODE_PIXEL_Y:
return "pixel_y";
+ case FS_OPCODE_GET_BUFFER_SIZE:
+ return "fs_get_buffer_size";
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return "uniform_pull_const";
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
return "set_simd4x2_header_gen9";
+ case VS_OPCODE_GET_BUFFER_SIZE:
+ return "vs_get_buffer_size";
+
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
return "unpack_flags_simd4x2";
this->swizzle = brw_swizzle_for_size(type->vector_elements);
else
this->swizzle = BRW_SWIZZLE_XYZW;
+ if (type)
+ this->type = brw_type_for_base_type(type);
}
/** Generic unset register constructor. */
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_SAMPLEINFO:
+ case VS_OPCODE_GET_BUFFER_SIZE:
return inst->header_size;
default:
unreachable("not reached");
}
bool
- vec4_instruction::can_reswizzle(int dst_writemask,
+ vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
+ int dst_writemask,
int swizzle,
int swizzle_mask)
{
+ /* Gen6 MATH instructions can not execute in align16 mode, so swizzles
+ * or writemasking are not allowed.
+ */
+ if (devinfo->gen == 6 && is_math() &&
+ (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW))
+ return false;
+
/* If this instruction sets anything not referenced by swizzle, then we'd
* totally break it when we reswizzle.
*/
if (mlen > 0)
return false;
+ /* We can't use swizzles on the accumulator and that's really the only
+ * HW_REG we would care to reswizzle so just disallow them all.
+ */
+ for (int i = 0; i < 3; i++) {
+ if (src[i].file == HW_REG)
+ return false;
+ }
+
return true;
}
inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
continue;
+ /* Remove no-op MOVs */
+ if (inst->dst.file == inst->src[0].file &&
+ inst->dst.reg == inst->src[0].reg &&
+ inst->dst.reg_offset == inst->src[0].reg_offset) {
+ bool is_nop_mov = true;
+
+ for (unsigned c = 0; c < 4; c++) {
+ if ((inst->dst.writemask & (1 << c)) == 0)
+ continue;
+
+ if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
+ is_nop_mov = false;
+ break;
+ }
+ }
+
+ if (is_nop_mov) {
+ inst->remove(block);
+ continue;
+ }
+ }
+
bool to_mrf = (inst->dst.file == MRF);
/* Can't coalesce this GRF if someone else was going to
}
}
+ /* This doesn't handle saturation on the instruction we
+ * want to coalesce away if the register types do not match.
+ * But if scan_inst is a non type-converting 'mov', we can fix
+ * the types later.
+ */
+ if (inst->saturate &&
+ inst->dst.type != scan_inst->dst.type &&
+ !(scan_inst->opcode == BRW_OPCODE_MOV &&
+ scan_inst->dst.type == scan_inst->src[0].type))
+ break;
+
/* If we can't handle the swizzle, bail. */
- if (!scan_inst->can_reswizzle(inst->dst.writemask,
+ if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
inst->src[0].swizzle,
chans_needed)) {
break;
if (interfered)
break;
- /* If somebody else writes our destination here, we can't coalesce
- * before that.
+ /* If somebody else writes the same channels of our destination here,
+ * we can't coalesce before that.
*/
- if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
- break;
+ if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+ (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+ break;
+ }
/* Check for reads of the register we're trying to coalesce into. We
* can't go rewriting instructions above that to put some other value
scan_inst->dst.file = inst->dst.file;
scan_inst->dst.reg = inst->dst.reg;
scan_inst->dst.reg_offset = inst->dst.reg_offset;
+ if (inst->saturate &&
+ inst->dst.type != scan_inst->dst.type) {
+ /* If we have reached this point, scan_inst is a non
+ * type-converting 'mov' and we can modify its register types
+ * to match the ones in inst. Otherwise, we could have an
+ * incorrect saturation result.
+ */
+ scan_inst->dst.type = inst->dst.type;
+ scan_inst->src[0].type = inst->src[0].type;
+ }
scan_inst->saturate |= inst->saturate;
}
scan_inst = (vec4_instruction *)scan_inst->next;
}
bool
- vec4_visitor::run(gl_clip_plane *clip_planes)
+ vec4_visitor::run()
{
bool use_vec4_nir =
compiler->glsl_compiler_options[stage].NirOptions != NULL;
}
base_ir = NULL;
- if (key->userclip_active && !prog->UsesClipDistanceOut)
- setup_uniform_clipplane_values(clip_planes);
-
emit_thread_end();
calculate_cfg();
setup_payload();
- if (false) {
+ if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
/* Debug of register spilling: Go spill everything. */
const int grf_count = alloc.count;
float spill_costs[alloc.count];
struct gl_shader_program *prog,
unsigned *final_assembly_size)
{
- bool start_busy = false;
- double start_time = 0;
const unsigned *assembly = NULL;
- if (unlikely(brw->perf_debug)) {
- start_busy = (brw->batch.last_bo &&
- drm_intel_bo_busy(brw->batch.last_bo));
- start_time = get_time();
- }
-
struct brw_shader *shader = NULL;
if (prog)
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
- if (unlikely(INTEL_DEBUG & DEBUG_VS))
+ if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
if (!vp->Base.nir &&
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
- vp, prog, mem_ctx, st_index,
+ vp, prog, brw_select_clip_planes(&brw->ctx),
+ mem_ctx, st_index,
!_mesa_is_gles3(&brw->ctx));
- if (!v.run(brw_select_clip_planes(&brw->ctx))) {
+ if (!v.run()) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
assembly = g.generate_assembly(v.cfg, final_assembly_size);
}
- if (unlikely(brw->perf_debug) && shader) {
- if (shader->compiled_once) {
- brw_vs_debug_recompile(brw, prog, key);
- }
- if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
- perf_debug("VS compile took %.03f ms and stalled the GPU\n",
- (get_time() - start_time) * 1000);
- }
- shader->compiled_once = true;
- }
-
return assembly;
}
-
- void
- brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
- struct brw_vue_prog_key *key,
- GLuint id, struct gl_program *prog)
- {
- struct brw_context *brw = brw_context(ctx);
- key->program_string_id = id;
-
- brw_setup_tex_for_precompile(brw, &key->tex, prog);
- }
-
} /* extern "C" */
#include "brw_nir.h"
#include "brw_vec4.h"
+ #include "brw_vec4_builder.h"
+ #include "brw_vec4_surface_builder.h"
#include "glsl/ir_uniform.h"
+ using namespace brw;
+ using namespace brw::surface_access;
+
namespace brw {
void
unreachable("should be lowered by lower_vertex_id().");
case nir_intrinsic_load_vertex_id_zero_base:
- reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+ reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
if (reg->file == BAD_FILE)
- *reg =
- *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
- glsl_type::int_type);
+ *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+ glsl_type::int_type);
break;
case nir_intrinsic_load_base_vertex:
- reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+ reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
if (reg->file == BAD_FILE)
- *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
- glsl_type::int_type);
+ *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+ glsl_type::int_type);
break;
case nir_intrinsic_load_instance_id:
- reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+ reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
if (reg->file == BAD_FILE)
- *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
- glsl_type::int_type);
+ *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+ glsl_type::int_type);
break;
default:
}
assert(uniforms < uniform_array_size);
- this->uniform_size[uniforms] = type_size_vec4(var->type);
+ uniform_size[uniforms] = type_size_vec4(var->type);
if (strncmp(var->name, "gl_", 3) == 0)
nir_setup_builtin_uniform(var);
strcmp(var->name, "parameters") == 0);
assert(uniforms < uniform_array_size);
- this->uniform_size[uniforms] = type_size_vec4(var->type);
+ uniform_size[uniforms] = type_size_vec4(var->type);
struct gl_program_parameter_list *plist = prog->Parameters;
for (unsigned p = 0; p < plist->NumParameters; p++) {
* ParameterValues directly, since unlike brw_fs.cpp, we never
* add new state references during compile.
*/
- int index = _mesa_add_state_reference(this->prog->Parameters,
+ int index = _mesa_add_state_reference(prog->Parameters,
(gl_state_index *)slots[i].tokens);
gl_constant_value *values =
- &this->prog->Parameters->ParameterValues[index][0];
+ &prog->Parameters->ParameterValues[index][0];
assert(uniforms < uniform_array_size);
stage_prog_data->param[uniforms * 4 + j] =
&values[GET_SWZ(slots[i].swizzle, j)];
- this->uniform_vector_size[uniforms] =
+ uniform_vector_size[uniforms] =
(var->type->is_scalar() || var->type->is_vector() ||
var->type->is_matrix() ? var->type->vector_elements : 4);
void
vec4_visitor::nir_emit_instr(nir_instr *instr)
{
- this->base_ir = instr;
+ base_ir = instr;
switch (instr->type) {
case nir_instr_type_load_const:
nir_emit_texture(nir_instr_as_tex(instr));
break;
+ case nir_instr_type_ssa_undef:
+ nir_emit_undef(nir_instr_as_ssa_undef(instr));
+ break;
+
default:
fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
break;
dst_reg
vec4_visitor::get_nir_dest(nir_dest dest)
{
- assert(!dest.is_ssa);
- return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
- dest.reg.indirect);
+ if (dest.is_ssa) {
+ dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+ nir_ssa_values[dest.ssa.index] = dst;
+ return dst;
+ } else {
+ return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+ dest.reg.indirect);
+ }
}
dst_reg
vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
{
dst_reg reg = dst_reg(GRF, alloc.allocate(1));
- reg.type = BRW_REGISTER_TYPE_F;
+ reg.type = BRW_REGISTER_TYPE_D;
unsigned remaining = brw_writemask_for_size(instr->def.num_components);
}
reg.writemask = writemask;
- emit(MOV(reg, src_reg(instr->value.f[i])));
+ emit(MOV(reg, src_reg(instr->value.i[i])));
remaining &= ~writemask;
}
break;
}
- case nir_intrinsic_load_vertex_id:
- unreachable("should be lowered by lower_vertex_id()");
+ case nir_intrinsic_get_buffer_size: {
+ nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+ unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+
+ assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+ src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+ ubo_index);
+ dst_reg result_dst = get_nir_dest(instr->dest);
+ vec4_instruction *inst = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+
+ inst->base_mrf = 2;
+ inst->mlen = 1; /* always at least one */
+ inst->src[1] = src_reg(surf_index);
+
+ /* MRF for the first parameter */
+ src_reg lod = src_reg(0);
+ int param_base = inst->base_mrf;
+ int writemask = WRITEMASK_X;
+ emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+ emit(inst);
+ break;
+ }
+
+ case nir_intrinsic_store_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_store_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ src_reg surf_index;
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[1]);
+ if (const_uniform_block) {
+ unsigned index = prog_data->base.binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = src_reg(index);
+ brw_mark_surface_used(&prog_data->base, index);
+ } else {
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ /* Offset */
+ src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+ }
+
+ /* Value */
+ src_reg val_reg = get_nir_src(instr->src[0], 4);
+
+ /* Writemask */
+ unsigned write_mask = instr->const_index[1];
+
+ /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+ * writes will use SIMD8 mode. In order to hide this and keep symmetry across
+ * typed and untyped messages and across hardware platforms, the
+ * current implementation of the untyped messages will transparently convert
+ * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+ * and enabling only channel X on the SEND instruction.
+ *
+ * The above, works well for full vector writes, but not for partial writes
+ * where we want to write some channels and not others, like when we have
+ * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+ * quite restrictive with regards to the channel enables we can configure in
+ * the message descriptor (not all combinations are allowed) we cannot simply
+ * implement these scenarios with a single message while keeping the
+ * aforementioned symmetry in the implementation. For now we de decided that
+ * it is better to keep the symmetry to reduce complexity, so in situations
+ * such as the one described we end up emitting two untyped write messages
+ * (one for xy and another for w).
+ *
+ * The code below packs consecutive channels into a single write message,
+ * detects gaps in the vector write and if needed, sends a second message
+ * with the remaining channels. If in the future we decide that we want to
+ * emit a single message at the expense of losing the symmetry in the
+ * implementation we can:
+ *
+ * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+ * message payload. In this mode we can write up to 8 offsets and dwords
+ * to the red channel only (for the two vec4s in the SIMD4x2 execution)
+ * and select which of the 8 channels carry data to write by setting the
+ * appropriate writemask in the dst register of the SEND instruction.
+ * It would require to write a new generator opcode specifically for
+ * IvyBridge since we would need to prepare a SIMD8 payload that could
+ * use any channel, not just X.
+ *
+ * 2) For Haswell+: Simply send a single write message but set the writemask
+ * on the dst of the SEND instruction to select the channels we want to
+ * write. It would require to modify the current messages to receive
+ * and honor the writemask provided.
+ */
+ const vec4_builder bld = vec4_builder(this).at_end()
+ .annotate(current_annotation, base_ir);
+
+ int swizzle[4] = { 0, 0, 0, 0};
+ int num_channels = 0;
+ unsigned skipped_channels = 0;
+ int num_components = instr->num_components;
+ for (int i = 0; i < num_components; i++) {
+ /* Check if this channel needs to be written. If so, record the
+ * channel we need to take the data from in the swizzle array
+ */
+ int component_mask = 1 << i;
+ int write_test = write_mask & component_mask;
+ if (write_test)
+ swizzle[num_channels++] = i;
+
+ /* If we don't have to write this channel it means we have a gap in the
+ * vector, so write the channels we accumulated until now, if any. Do
+ * the same if this was the last component in the vector.
+ */
+ if (!write_test || i == num_components - 1) {
+ if (num_channels > 0) {
+ /* We have channels to write, so update the offset we need to
+ * write at to skip the channels we skipped, if any.
+ */
+ if (skipped_channels > 0) {
+ if (!has_indirect) {
+ const_offset_bytes += 4 * skipped_channels;
+ offset_reg = src_reg(const_offset_bytes);
+ } else {
+ emit(ADD(dst_reg(offset_reg), offset_reg,
+ brw_imm_ud(4 * skipped_channels)));
+ }
+ }
+
+ /* Swizzle the data register so we take the data from the channels
+ * we need to write and send the write message. This will write
+ * num_channels consecutive dwords starting at offset.
+ */
+ val_reg.swizzle =
+ BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+ emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+ 1 /* dims */, num_channels /* size */,
+ BRW_PREDICATE_NONE);
+
+ /* If we have to do a second write we will have to update the
+ * offset so that we jump over the channels we have just written
+ * now.
+ */
+ skipped_channels = num_channels;
+
+ /* Restart the count for the next write message */
+ num_channels = 0;
+ }
+
+ /* We did not write the current channel, so increase skipped count */
+ skipped_channels++;
+ }
+ }
- case nir_intrinsic_load_vertex_id_zero_base: {
- src_reg vertex_id =
- src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
- assert(vertex_id.file != BAD_FILE);
- dest = get_nir_dest(instr->dest, vertex_id.type);
- emit(MOV(dest, vertex_id));
break;
}
- case nir_intrinsic_load_base_vertex: {
- src_reg base_vertex =
- src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
- assert(base_vertex.file != BAD_FILE);
- dest = get_nir_dest(instr->dest, base_vertex.type);
- emit(MOV(dest, base_vertex));
+ case nir_intrinsic_load_ssbo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_ssbo: {
+ assert(devinfo->gen >= 7);
+
+ nir_const_value *const_uniform_block =
+ nir_src_as_const_value(instr->src[0]);
+
+ src_reg surf_index;
+ if (const_uniform_block) {
+ unsigned index = prog_data->base.binding_table.ubo_start +
+ const_uniform_block->u[0];
+ surf_index = src_reg(index);
+
+ brw_mark_surface_used(&prog_data->base, index);
+ } else {
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
+ */
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+ unsigned const_offset_bytes = 0;
+ if (has_indirect) {
+ emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+ } else {
+ const_offset_bytes = instr->const_index[0];
+ emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+ }
+
+ /* Read the vector */
+ const vec4_builder bld = vec4_builder(this).at_end()
+ .annotate(current_annotation, base_ir);
+
+ src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */, 4 /* size*/,
+ BRW_PREDICATE_NONE);
+ dst_reg dest = get_nir_dest(instr->dest);
+ read_result.type = dest.type;
+ read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+ emit(MOV(dest, read_result));
+
break;
}
+ case nir_intrinsic_ssbo_atomic_add:
+ nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_min:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+ else
+ nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_max:
+ if (dest.type == BRW_REGISTER_TYPE_D)
+ nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+ else
+ nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_and:
+ nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_or:
+ nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_xor:
+ nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+ break;
+
+ case nir_intrinsic_load_vertex_id:
+ unreachable("should be lowered by lower_vertex_id()");
+
+ case nir_intrinsic_load_vertex_id_zero_base:
+ case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id: {
- src_reg instance_id =
- src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
- assert(instance_id.file != BAD_FILE);
- dest = get_nir_dest(instr->dest, instance_id.type);
- emit(MOV(dest, instance_id));
+ gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+ src_reg val = src_reg(nir_system_values[sv]);
+ assert(val.file != BAD_FILE);
+ dest = get_nir_dest(instr->dest, val.type);
+ emit(MOV(dest, val));
break;
}
has_indirect = true;
/* fallthrough */
case nir_intrinsic_load_ubo: {
+ const uint32_t set = instr->const_index[0];
nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
src_reg surf_index;
dest = get_nir_dest(instr->dest);
if (const_block_index) {
+ uint32_t binding = const_block_index->u[0];
+
/* The block index is a constant, so just emit the binding table entry
* as an immediate.
*/
- surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- const_block_index->u[0]);
+ surf_index = src_reg(stage_prog_data->bind_map[set].index[binding]);
} else {
/* The block index is not a constant. Evaluate the index expression
* per-channel and add the base UBO index; we have to select a value
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
+ shader_prog->NumBufferInterfaceBlocks - 1);
}
- unsigned const_offset = instr->const_index[0];
+ unsigned const_offset = instr->const_index[1];
src_reg offset;
if (!has_indirect) {
}
}
+ void
+ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+ {
+ dst_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ src_reg surface;
+ nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+ if (const_surface) {
+ unsigned surf_index = prog_data->base.binding_table.ubo_start +
+ const_surface->u[0];
+ surface = src_reg(surf_index);
+ brw_mark_surface_used(&prog_data->base, surf_index);
+ } else {
+ surface = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+
+ /* Assume this may touch any UBO. This is the same we do for other
+ * UBO/SSBO accesses with non-constant surface.
+ */
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumBufferInterfaceBlocks - 1);
+ }
+
+ src_reg offset = get_nir_src(instr->src[1], 1);
+ src_reg data1 = get_nir_src(instr->src[2], 1);
+ src_reg data2;
+ if (op == BRW_AOP_CMPWR)
+ data2 = get_nir_src(instr->src[3], 1);
+
+ /* Emit the actual atomic operation operation */
+ const vec4_builder bld =
+ vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+
+ src_reg atomic_result =
+ surface_access::emit_untyped_atomic(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
+ dest.type = atomic_result.type;
+ bld.MOV(dest, atomic_result);
+ }
+
static unsigned
brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
{
inst->predicate = BRW_PREDICATE_NORMAL;
break;
- case nir_op_fdot2:
+ case nir_op_fdot_replicated2:
inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_fdot3:
+ case nir_op_fdot_replicated3:
inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
- case nir_op_fdot4:
+ case nir_op_fdot_replicated4:
inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
inst->saturate = instr->dest.saturate;
break;
+ case nir_op_fdph_replicated:
+ inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
case nir_op_bany2:
case nir_op_bany3:
case nir_op_bany4: {
switch (texop) {
case nir_texop_lod: op = ir_lod; break;
case nir_texop_query_levels: op = ir_query_levels; break;
+ case nir_texop_texture_samples: op = ir_texture_samples; break;
case nir_texop_tex: op = ir_tex; break;
case nir_texop_tg4: op = ir_tg4; break;
case nir_texop_txb: op = ir_txb; break;
* emitting anything other than setting up the constant result.
*/
if (instr->op == nir_texop_tg4) {
- int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+ int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
return;
sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
assert(coord_type != NULL);
if (devinfo->gen >= 7 &&
- key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+ key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
} else {
mcs = src_reg(0u);
mcs, is_cube_array, sampler, sampler_reg);
}
+ void
+ vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
+ {
+ nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+ }
+
}
#include "glsl/ir_uniform.h"
#include "program/sampler.h"
+ #define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+
namespace brw {
vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
dst, index);
- inst->base_mrf = 14;
+ inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
inst->mlen = 2;
return inst;
inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
dst, src, index);
- inst->base_mrf = 13;
+ inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
inst->mlen = 3;
return inst;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
}
}
- void
- vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
- {
- for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
- assert(this->uniforms < uniform_array_size);
- this->uniform_vector_size[this->uniforms] = 4;
- this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
- this->userplane[i].type = BRW_REGISTER_TYPE_F;
- for (int j = 0; j < 4; ++j) {
- stage_prog_data->param[this->uniforms * 4 + j] =
- (gl_constant_value *) &clip_planes[i][j];
- }
- ++this->uniforms;
- }
- }
-
/* Our support for builtin uniforms is even scarier than non-builtin.
* It sits on top of the PROG_STATE_VAR parameters that are
* automatically updated from GL context state.
break;
case ir_var_uniform:
+ case ir_var_shader_storage:
reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
/* Thanks to the lower_ubo_reference pass, we will see only
- * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
- * variables, so no need for them to be in variable_ht.
+ * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+ * for UBO/SSBO variables, so no need for them to be in variable_ht.
*
* Some uniforms, such as samplers and atomic counters, have no actual
* storage, so we should ignore them.
dst,
surf_index,
offset_reg);
- pull->base_mrf = 14;
+ pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
pull->mlen = 1;
}
emit(MOV(result_dst, op[0]));
break;
+ case ir_unop_ssbo_unsized_array_length:
+ unreachable("not reached: should be handled by lower_ubo_reference");
+ break;
+
case ir_binop_add:
emit(ADD(result_dst, op[0], op[1]));
break;
emit(RNDE(result_dst, op[0]));
break;
+ case ir_unop_get_buffer_size:
+ unreachable("not reached: not implemented");
+ break;
+
case ir_binop_min:
emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
break;
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumUniformBlocks - 1);
+ shader_prog->NumBufferInterfaceBlocks - 1);
}
if (const_offset_ir) {
case ir_tg4: opcode = offset_value.file != BAD_FILE
? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+ case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
case ir_txb:
unreachable("TXB is not valid for vertex shaders.");
case ir_lod:
* - Texel offsets
* - Gather channel selection
* - Sampler indices too large to fit in a 4-bit value.
+ * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
*/
inst->header_size =
(devinfo->gen < 5 || devinfo->gen >= 9 ||
inst->offset != 0 || op == ir_tg4 ||
+ op == ir_texture_samples ||
is_high_sampler(sampler_reg)) ? 1 : 0;
inst->base_mrf = 2;
- inst->mlen = inst->header_size + 1; /* always at least one */
+ inst->mlen = inst->header_size;
inst->dst.writemask = WRITEMASK_XYZW;
inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
if (op == ir_txs || op == ir_query_levels) {
int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+ inst->mlen++;
+ } else if (op == ir_texture_samples) {
+ inst->dst.writemask = WRITEMASK_X;
} else {
/* Load the coordinate */
/* FINISHME: gl_clamp_mask and saturate */
emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
coordinate));
+ inst->mlen++;
if (zero_mask != 0) {
emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
mrf = param_base;
writemask = WRITEMASK_W;
}
- lod.swizzle = BRW_SWIZZLE_XXXX;
emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
} else if (op == ir_txf) {
emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
}
if (devinfo->gen == 6 && op == ir_tg4) {
- emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+ emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
}
swizzle_result(op, dest,
*/
if (ir->op == ir_tg4) {
ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+ int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
dst_reg result(this, ir->type);
this->result = src_reg(result);
ir->lod_info.sample_index->accept(this);
sample_index = this->result;
- if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+ if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
else
mcs = src_reg(0u);
case ir_txb:
case ir_lod:
case ir_tg4:
+ case ir_texture_samples:
break;
}
uint32_t
vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
{
- int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
+ int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
switch (swiz) {
case SWIZZLE_X: return 0;
case SWIZZLE_Y:
/* gather4 sampler is broken for green channel on RG32F --
* we must ask for blue instead.
*/
- if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+ if (key_tex->gather_channel_quirk_mask & (1 << sampler))
return 2;
return 1;
case SWIZZLE_Z: return 2;
src_reg orig_val, uint32_t sampler,
const glsl_type *dest_type)
{
- int s = key->tex.swizzles[sampler];
+ int s = key_tex->swizzles[sampler];
dst_reg swizzled_result = dest;
{
if (devinfo->gen < 6 &&
((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
- key->userclip_active || devinfo->has_negative_rhw_bug)) {
+ output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+ devinfo->has_negative_rhw_bug)) {
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
dst_reg header1_w = header1;
header1_w.writemask = WRITEMASK_W;
emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
}
- if (key->userclip_active) {
+ if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
current_annotation = "Clipping flags";
dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
}
}
- void
- vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
- {
- /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
- *
- * "If a linked set of shaders forming the vertex stage contains no
- * static write to gl_ClipVertex or gl_ClipDistance, but the
- * application has requested clipping against user clip planes through
- * the API, then the coordinate written to gl_Position is used for
- * comparison against the user clip planes."
- *
- * This function is only called if the shader didn't write to
- * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
- * if the user wrote to it; otherwise we use gl_Position.
- */
- gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
- if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
- clip_vertex = VARYING_SLOT_POS;
- }
-
- for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
- ++i) {
- reg.writemask = 1 << i;
- emit(DP4(reg,
- src_reg(output_reg[clip_vertex]),
- src_reg(this->userplane[i + offset])));
- }
- }
-
vec4_instruction *
vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
{
case BRW_VARYING_SLOT_PAD:
/* No need to write to this slot */
break;
- case VARYING_SLOT_COL0:
- case VARYING_SLOT_COL1:
- case VARYING_SLOT_BFC0:
- case VARYING_SLOT_BFC1: {
- /* These built-in varyings are only supported in compatibility mode,
- * and we only support GS in core profile. So, this must be a vertex
- * shader.
- */
- assert(stage == MESA_SHADER_VERTEX);
- vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
- if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
- inst->saturate = true;
- break;
- }
-
default:
emit_generic_urb_slot(reg, varying);
break;
* may need to unspill a register or load from an array. Those
* reads would use MRFs 14-15.
*/
- int max_usable_mrf = 13;
+ int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
/* The following assertion verifies that max_usable_mrf causes an
* even-numbered amount of URB write data, which will meet gen6's
emit_ndc_computation();
}
- /* Lower legacy ff and ClipVertex clipping to clip distances */
- if (key->userclip_active && !prog->UsesClipDistanceOut) {
- current_annotation = "user clip distances";
-
- output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
- output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
-
- emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
- emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
- }
-
/* We may need to split this up into several URB writes, so do them in a
* loop.
*/
prog_data->vue_map.slot_to_varying[slot]);
/* If this was max_usable_mrf, we can't fit anything more into this
- * URB WRITE.
+ * URB WRITE. Same thing if we reached the maximum length available.
*/
- if (mrf > max_usable_mrf) {
+ if (mrf > max_usable_mrf ||
+ align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
slot++;
break;
}
vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
struct gl_program *prog,
- const struct brw_vue_prog_key *key,
+ const struct brw_sampler_prog_key_data *key_tex,
struct brw_vue_prog_data *prog_data,
struct gl_shader_program *shader_prog,
gl_shader_stage stage,
int shader_time_index)
: backend_shader(compiler, log_data, mem_ctx,
shader_prog, prog, &prog_data->base, stage),
- key(key),
+ key_tex(key_tex),
prog_data(prog_data),
sanity_param_count(0),
fail_msg(NULL),
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_state.h"
+ #include "brw_shader.h"
#include "main/enums.h"
#include "main/formats.h"
#include "main/fbobject.h"
* Return a bitfield where bit n is set if barycentric interpolation mode n
* (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
*/
-static unsigned
+unsigned
brw_compute_barycentric_interp_modes(struct brw_context *brw,
bool shade_model_flat,
bool persample_shading,
void *mem_ctx = ralloc_context(NULL);
struct brw_wm_prog_data prog_data;
const GLuint *program;
- struct gl_shader *fs = NULL;
+ struct brw_shader *fs = NULL;
GLuint program_size;
+ bool start_busy = false;
+ double start_time = 0;
if (prog)
- fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
memset(&prog_data, 0, sizeof(prog_data));
/* key->alpha_test_func means simulating alpha testing via discards,
fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
- prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+ prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (!prog)
*/
int param_count;
if (fs) {
- param_count = fs->num_uniform_components +
- fs->NumImages * BRW_IMAGE_PARAM_SIZE;
- prog_data.base.nr_image_params = fs->NumImages;
+ param_count = fs->base.num_uniform_components +
+ fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+ prog_data.base.nr_image_params = fs->base.NumImages;
} else {
param_count = fp->program.Base.Parameters->NumParameters * 4;
}
key->persample_shading,
&fp->program);
+ if (unlikely(brw->perf_debug)) {
+ start_busy = (brw->batch.last_bo &&
+ drm_intel_bo_busy(brw->batch.last_bo));
+ start_time = get_time();
+ }
+
program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
&fp->program, prog, &program_size);
if (program == NULL) {
return false;
}
+ if (unlikely(brw->perf_debug) && fs) {
+ if (fs->compiled_once)
+ brw_wm_debug_recompile(brw, prog, key);
+ fs->compiled_once = true;
+
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ }
+
if (prog_data.base.total_scratch) {
brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
prog_data.base.total_scratch * brw->max_wm_threads);
}
brw->wm.base.prog_data = &brw->wm.prog_data->base;
}
+
+ bool
+ brw_fs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+ {
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_wm_prog_key key;
+
+ struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+ bool program_uses_dfdy = fp->UsesDFdy;
+
+ memset(&key, 0, sizeof(key));
+
+ if (brw->gen < 6) {
+ if (fp->UsesKill)
+ key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* Just assume depth testing. */
+ key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+ key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+ }
+
+ if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+ BRW_FS_VARYING_INPUT_MASK) > 16)
+ key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+
+ brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+
+ if (fp->Base.InputsRead & VARYING_BIT_POS) {
+ key.drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+ ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+ BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+
+ if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+ key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+ key.nr_color_regions > 1;
+ }
+
+ key.program_string_id = bfp->id;
+
+ uint32_t old_prog_offset = brw->wm.base.prog_offset;
+ struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+
+ bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+
+ brw->wm.base.prog_offset = old_prog_offset;
+ brw->wm.prog_data = old_prog_data;
+
+ return success;
+ }
*/
struct gl_sampler_object
{
+ mtx_t Mutex;
GLuint Name;
GLint RefCount;
GLchar *Label; /**< GL_KHR_debug */
*/
struct gl_program
{
+ mtx_t Mutex;
GLuint Id;
GLint RefCount;
GLubyte *String; /**< Null-terminated program text */
struct gl_uniform_block *UniformBlocks;
struct exec_list *ir;
+ struct exec_list *packed_varyings;
struct glsl_symbol_table *symbols;
bool uses_builtin_functions;
{
ubo_packing_std140,
ubo_packing_shared,
- ubo_packing_packed
+ ubo_packing_packed,
+ ubo_packing_std430
};
*/
GLuint Binding;
+ /**
+ * Vulkan descriptor set qualifier for this block.
+ */
+ GLuint Set;
+
/**
* Minimum size (in bytes) of a buffer object to back this uniform buffer
* (GL_UNIFORM_BLOCK_DATA_SIZE).
*/
unsigned LastClipDistanceArraySize;
- unsigned NumUniformBlocks;
+ unsigned NumBufferInterfaceBlocks;
struct gl_uniform_block *UniformBlocks;
/**
GLboolean ARB_shader_stencil_export;
GLboolean ARB_shader_storage_buffer_object;
GLboolean ARB_shader_subroutine;
+ GLboolean ARB_shader_texture_image_samples;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_packing;
GLboolean ARB_shading_language_420pack;
struct gl_perf_monitor_state PerfMonitor;
struct gl_buffer_object *DrawIndirectBuffer; /** < GL_ARB_draw_indirect */
+ struct gl_buffer_object *DispatchIndirectBuffer; /** < GL_ARB_compute_shader */
struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
assert(!"Invalid type in type_size");
break;
}
case ir_unop_dFdy_coarse:
case ir_unop_dFdy_fine:
case ir_unop_subroutine_to_int:
+ case ir_unop_get_buffer_size:
assert(!"not supported");
break;
+ case ir_unop_ssbo_unsized_array_length:
case ir_quadop_vector:
/* This operation should have already been handled.
*/
case ir_query_levels:
assert(!"Unexpected ir_query_levels opcode");
break;
+ case ir_texture_samples:
+ unreachable("Unexpected ir_texture_samples opcode");
}
const glsl_type *sampler_type = ir->sampler->type;
case GLSL_TYPE_STRUCT:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
assert(!"Should not get here.");
break;
}
if (!ctx->Driver.LinkShader(ctx, prog)) {
prog->LinkStatus = GL_FALSE;
} else {
- build_program_resource_list(ctx, prog);
+ build_program_resource_list(prog);
}
}
--- /dev/null
- key->base.program_string_id = vp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- vp->program.Base.UsesClipDistanceOut);
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+#include "anv_nir.h"
+
+#include <brw_context.h>
+#include <brw_wm.h> /* brw_new_shader_program is here */
+#include <brw_nir.h>
+
+#include <brw_vs.h>
+#include <brw_gs.h>
+#include <brw_cs.h>
+
+#include <mesa/main/shaderobj.h>
+#include <mesa/main/fbobject.h>
+#include <mesa/main/context.h>
+#include <mesa/program/program.h>
+#include <glsl/program.h>
+
+/* XXX: We need this to keep symbols in nir.h from conflicting with the
+ * generated GEN command packing headers. We need to fix *both* to not
+ * define something as generic as LOAD.
+ */
+#undef LOAD
+
+#include <glsl/nir/nir_spirv.h>
+
+#define SPIR_V_MAGIC_NUMBER 0x07230203
+
+static void
+fail_if(int cond, const char *format, ...)
+{
+ va_list args;
+
+ if (!cond)
+ return;
+
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+
+ exit(1);
+}
+
+static VkResult
+set_binding_table_layout(struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline *pipeline, uint32_t stage)
+{
+ uint32_t bias, count, k, *map;
+ struct anv_pipeline_layout *layout = pipeline->layout;
+
+ /* No layout is valid for shaders that don't bind any resources. */
+ if (pipeline->layout == NULL)
+ return VK_SUCCESS;
+
+ if (stage == VK_SHADER_STAGE_FRAGMENT)
+ bias = MAX_RTS;
+ else
+ bias = 0;
+
+ count = layout->stage[stage].surface_count;
+ prog_data->map_entries =
+ (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
+ if (prog_data->map_entries == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ k = bias;
+ map = prog_data->map_entries;
+ for (uint32_t i = 0; i < layout->num_sets; i++) {
+ prog_data->bind_map[i].index = map;
+ for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
+ *map++ = k++;
+
+ prog_data->bind_map[i].index_count =
+ layout->set[i].layout->stage[stage].surface_count;
+ }
+
+ return VK_SUCCESS;
+}
+
+static uint32_t
+upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size)
+{
+ struct anv_state state =
+ anv_state_stream_alloc(&pipeline->program_stream, size, 64);
+
+ assert(size < pipeline->program_stream.block_pool->block_size);
+
+ memcpy(state.map, data, size);
+
+ return state.offset;
+}
+
+static void
+create_params_array(struct anv_pipeline *pipeline,
+ struct gl_shader *shader,
+ struct brw_stage_prog_data *prog_data)
+{
+ VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage);
+ unsigned num_params = 0;
+
+ if (shader->num_uniform_components) {
+ /* If the shader uses any push constants at all, we'll just give
+ * them the maximum possible number
+ */
+ num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
+ }
+
+ if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
+ num_params += MAX_DYNAMIC_BUFFERS;
+
+ if (num_params == 0)
+ return;
+
+ prog_data->param = (const gl_constant_value **)
+ anv_device_alloc(pipeline->device,
+ num_params * sizeof(gl_constant_value *),
+ 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER);
+
+ /* We now set the param values to be offsets into a
+ * anv_push_constant_data structure. Since the compiler doesn't
+ * actually dereference any of the gl_constant_value pointers in the
+ * params array, it doesn't really matter what we put here.
+ */
+ struct anv_push_constants *null_data = NULL;
+ for (unsigned i = 0; i < num_params; i++)
+ prog_data->param[i] =
+ (const gl_constant_value *)&null_data->client_data[i * sizeof(float)];
+}
+
+static void
+brw_vs_populate_key(struct brw_context *brw,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_VERTEX_PROGRAM */
+ struct gl_program *prog = (struct gl_program *) vp;
+
+ memset(key, 0, sizeof(*key));
+
+ /* Just upload the program verbatim for now. Always send it all
+ * the inputs it asks for, whether they are varying or not.
+ */
- &key->base.tex);
++ key->program_string_id = vp->id;
+
+ /* _NEW_POLYGON */
+ if (brw->gen < 6) {
+ key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL);
+ }
+
+ if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
+ VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
+ /* _NEW_LIGHT | _NEW_BUFFERS */
+ key->clamp_vertex_color = ctx->Light._ClampVertexColor;
+ }
+
+ /* _NEW_POINT */
+ if (brw->gen < 6 && ctx->Point.PointSprite) {
+ for (int i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i])
+ key->point_coord_replace |= (1 << i);
+ }
+ }
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
- if (key->base.userclip_active) {
++ &key->tex);
+}
+
+static bool
+really_do_vs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ GLuint program_size;
+ const GLuint *program;
+ struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+ void *mem_ctx;
+ struct gl_shader *vs = NULL;
+
+ if (prog)
+ vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ mem_ctx = ralloc_context(NULL);
+
+ create_params_array(pipeline, vs, &prog_data->base.base);
+ anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir,
+ &prog_data->base.base);
+
+ GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
+ prog_data->inputs_read = vp->program.Base.InputsRead;
+
+ if (key->copy_edgeflag) {
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
+ prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
+ }
+
+ if (brw->gen < 6) {
+ /* Put dummy slots into the VUE for the SF to put the replaced
+ * point sprite coords in. We shouldn't need these dummy slots,
+ * which take up precious URB space, but it would mean that the SF
+ * doesn't get nice aligned pairs of input coords into output
+ * coords, which would be a pain to handle.
+ */
+ for (int i = 0; i < 8; i++) {
+ if (key->point_coord_replace & (1 << i))
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
+ }
+
+ /* if back colors are written, allocate slots for front colors too */
+ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
+ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+ }
+
+ /* In order for legacy clipping to work, we need to populate the clip
+ * distance varying slots whenever clipping is enabled, even if the vertex
+ * shader doesn't write to gl_ClipDistance.
+ */
- &prog_data->base.vue_map, outputs_written);
- \
++ if (key->nr_userclip_plane_consts) {
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
+ }
+
+ brw_compute_vue_map(brw->intelScreen->devinfo,
- key->base.program_string_id = gp->id;
- brw_setup_vue_key_clip_info(brw, &key->base,
- gp->program.Base.UsesClipDistanceOut);
++ &prog_data->base.vue_map, outputs_written,
++ prog ? prog->SeparateShader : false);
++
+ set_binding_table_layout(&prog_data->base.base, pipeline,
+ VK_SHADER_STAGE_VERTEX);
+
+ /* Emit GEN4 code.
+ */
+ program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
+ prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ const uint32_t offset = upload_kernel(pipeline, program, program_size);
+ if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
+ pipeline->vs_simd8 = offset;
+ pipeline->vs_vec4 = NO_KERNEL;
+ } else {
+ pipeline->vs_simd8 = NO_KERNEL;
+ pipeline->vs_vec4 = offset;
+ }
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+void brw_wm_populate_key(struct brw_context *brw,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_program *prog = (struct gl_program *) brw->fragment_program;
+ GLuint lookup = 0;
+ GLuint line_aa;
+ bool program_uses_dfdy = fp->program.UsesDFdy;
+ struct gl_framebuffer draw_buffer;
+ bool multisample_fbo;
+
+ memset(key, 0, sizeof(*key));
+
+ for (int i = 0; i < MAX_SAMPLERS; i++) {
+ /* Assume color sampler, no swizzling. */
+ key->tex.swizzles[i] = SWIZZLE_XYZW;
+ }
+
+ /* A non-zero framebuffer name indicates that the framebuffer was created by
+ * the user rather than the window system. */
+ draw_buffer.Name = 1;
+ draw_buffer.Visual.samples = 1;
+ draw_buffer._NumColorDrawBuffers = 1;
+ draw_buffer._NumColorDrawBuffers = 1;
+ draw_buffer.Width = 400;
+ draw_buffer.Height = 400;
+ ctx->DrawBuffer = &draw_buffer;
+
+ multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
+
+ /* Build the index for table lookup
+ */
+ if (brw->gen < 6) {
+ /* _NEW_COLOR */
+ if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+ lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* _NEW_DEPTH */
+ if (ctx->Depth.Test)
+ lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
+ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ /* _NEW_STENCIL | _NEW_BUFFERS */
+ if (ctx->Stencil._Enabled) {
+ lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (ctx->Stencil.WriteMask[0] ||
+ ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+ lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ }
+ key->iz_lookup = lookup;
+ }
+
+ line_aa = AA_NEVER;
+
+ /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+ if (ctx->Line.SmoothFlag) {
+ if (brw->reduced_primitive == GL_LINES) {
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->reduced_primitive == GL_TRIANGLES) {
+ if (ctx->Polygon.FrontMode == GL_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (ctx->Polygon.BackMode == GL_LINE ||
+ (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_BACK))
+ line_aa = AA_ALWAYS;
+ }
+ else if (ctx->Polygon.BackMode == GL_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if ((ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT))
+ line_aa = AA_ALWAYS;
+ }
+ }
+ }
+
+ key->line_aa = line_aa;
+
+ /* _NEW_HINT */
+ key->high_quality_derivatives =
+ ctx->Hint.FragmentShaderDerivative == GL_NICEST;
+
+ if (brw->gen < 6)
+ key->stats_wm = brw->stats_wm;
+
+ /* _NEW_LIGHT */
+ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+
+ /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
+ key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
+ &key->tex);
+
+ /* _NEW_BUFFERS */
+ /*
+ * Include the draw buffer origin and height so that we can calculate
+ * fragment position values relative to the bottom left of the drawable,
+ * from the incoming screen origin relative position we get as part of our
+ * payload.
+ *
+ * This is only needed for the WM_WPOSXY opcode when the fragment program
+ * uses the gl_FragCoord input.
+ *
+ * We could avoid recompiling by including this as a constant referenced by
+ * our program, but if we were to do that it would also be nice to handle
+ * getting that constant updated at batchbuffer submit time (when we
+ * hold the lock and know where the buffer really is) rather than at emit
+ * time when we don't hold the lock and are just guessing. We could also
+ * just avoid using this as key data if the program doesn't use
+ * fragment.position.
+ *
+ * For DRI2 the origin_x/y will always be (0,0) but we still need the
+ * drawable height in order to invert the Y axis.
+ */
+ if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
+ key->drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+ key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ }
+
+ /* _NEW_BUFFERS */
+ key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
+
+ /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
+ key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
+ (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
+
+ /* _NEW_BUFFERS _NEW_MULTISAMPLE */
+ /* Ignore sample qualifier while computing this flag. */
+ key->persample_shading =
+ _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
+ if (key->persample_shading)
+ key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
+
+ key->compute_pos_offset =
+ _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
+ fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
+
+ key->compute_sample_id =
+ multisample_fbo &&
+ ctx->Multisample.Enabled &&
+ (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
+
+ /* BRW_NEW_VUE_MAP_GEOM_OUT */
+ if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
+ BRW_FS_VARYING_INPUT_MASK) > 16)
+ key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
+
+
+ /* _NEW_COLOR | _NEW_BUFFERS */
+ /* Pre-gen6, the hardware alpha test always used each render
+ * target's alpha to do alpha test, as opposed to render target 0's alpha
+ * like GL requires. Fix that by building the alpha test into the
+ * shader, and we'll skip enabling the fixed function alpha test.
+ */
+ if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
+ key->alpha_test_func = ctx->Color.AlphaFunc;
+ key->alpha_test_ref = ctx->Color.AlphaRef;
+ }
+
+ /* The unique fragment program ID */
+ key->program_string_id = fp->id;
+
+ ctx->DrawBuffer = NULL;
+}
+
+static uint8_t
+computed_depth_mode(struct gl_fragment_program *fp)
+{
+ if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ switch (fp->FragDepthLayout) {
+ case FRAG_DEPTH_LAYOUT_NONE:
+ case FRAG_DEPTH_LAYOUT_ANY:
+ return BRW_PSCDEPTH_ON;
+ case FRAG_DEPTH_LAYOUT_GREATER:
+ return BRW_PSCDEPTH_ON_GE;
+ case FRAG_DEPTH_LAYOUT_LESS:
+ return BRW_PSCDEPTH_ON_LE;
+ case FRAG_DEPTH_LAYOUT_UNCHANGED:
+ return BRW_PSCDEPTH_OFF;
+ }
+ }
+ return BRW_PSCDEPTH_OFF;
+}
+
+static bool
+really_do_wm_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
+{
+ void *mem_ctx = ralloc_context(NULL);
+ struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+ struct gl_shader *fs = NULL;
+ unsigned int program_size;
+ const uint32_t *program;
+
+ if (prog)
+ fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ /* key->alpha_test_func means simulating alpha testing via discards,
+ * so the shader definitely kills pixels.
+ */
+ prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
+
+ prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
+
+ create_params_array(pipeline, fs, &prog_data->base);
+ anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base);
+
+ prog_data->barycentric_interp_modes =
+ brw_compute_barycentric_interp_modes(brw, key->flat_shade,
+ key->persample_shading,
+ &fp->program);
+
+ set_binding_table_layout(&prog_data->base, pipeline,
+ VK_SHADER_STAGE_FRAGMENT);
+ /* This needs to come after shader time and pull constant entries, but we
+ * don't have those set up now, so just put it after the layout entries.
+ */
+ prog_data->binding_table.render_target_start = 0;
+
+ program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
+ &fp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ uint32_t offset = upload_kernel(pipeline, program, program_size);
+
+ if (prog_data->no_8)
+ pipeline->ps_simd8 = NO_KERNEL;
+ else
+ pipeline->ps_simd8 = offset;
+
+ if (prog_data->no_8 || prog_data->prog_offset_16) {
+ pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
+ } else {
+ pipeline->ps_simd16 = NO_KERNEL;
+ }
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+static void
+brw_gs_populate_key(struct brw_context *brw,
+ struct anv_pipeline *pipeline,
+ struct brw_geometry_program *gp,
+ struct brw_gs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct brw_stage_state *stage_state = &brw->gs.base;
+ struct gl_program *prog = &gp->program.Base;
+
+ memset(key, 0, sizeof(*key));
+
- &key->base.tex);
-
- struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
-
- /* BRW_NEW_VUE_MAP_VS */
- key->input_varyings = prog_data->base.vue_map.slots_valid;
++ key->program_string_id = gp->id;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
++ &key->tex);
+}
+
+static bool
+really_do_gs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_geometry_program *gp,
+ struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ struct brw_gs_compile_output output;
+
+ /* FIXME: We pass the bind map to the compile in the output struct. Need
+ * something better. */
+ set_binding_table_layout(&output.prog_data.base.base,
+ pipeline, VK_SHADER_STAGE_GEOMETRY);
+
+ brw_compile_gs_prog(brw, prog, gp, key, &output);
+
+ pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
+ pipeline->gs_vertex_count = gp->program.VerticesIn;
+
+ ralloc_free(output.mem_ctx);
+
+ return true;
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_compute_program *cp,
+ struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ const GLuint *program;
+ void *mem_ctx = ralloc_context(NULL);
+ GLuint program_size;
+ struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+
+ struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+ assert (cs);
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE);
+
+ create_params_array(pipeline, cs, &prog_data->base);
+ anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base);
+
+ program = brw_cs_emit(brw, mem_ctx, key, prog_data,
+ &cp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ fprintf(stderr, "\n");
+
+ pipeline->cs_simd = upload_kernel(pipeline, program, program_size);
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+static void
+brw_cs_populate_key(struct brw_context *brw,
+ struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* The unique compute program ID */
+ key->program_string_id = bcp->id;
+}
+
+struct anv_compiler {
+ struct anv_device *device;
+ struct intel_screen *screen;
+ struct brw_context *brw;
+ struct gl_pipeline_object pipeline;
+};
+
+extern "C" {
+
+struct anv_compiler *
+anv_compiler_create(struct anv_device *device)
+{
+ const struct brw_device_info *devinfo = &device->info;
+ struct anv_compiler *compiler;
+ struct gl_context *ctx;
+
+ compiler = rzalloc(NULL, struct anv_compiler);
+ if (compiler == NULL)
+ return NULL;
+
+ compiler->screen = rzalloc(compiler, struct intel_screen);
+ if (compiler->screen == NULL)
+ goto fail;
+
+ compiler->brw = rzalloc(compiler, struct brw_context);
+ if (compiler->brw == NULL)
+ goto fail;
+
+ compiler->device = device;
+
+ compiler->brw->gen = devinfo->gen;
+ compiler->brw->is_g4x = devinfo->is_g4x;
+ compiler->brw->is_baytrail = devinfo->is_baytrail;
+ compiler->brw->is_haswell = devinfo->is_haswell;
+ compiler->brw->is_cherryview = devinfo->is_cherryview;
+
+ /* We need this at least for CS, which will check brw->max_cs_threads
+ * against the work group size. */
+ compiler->brw->max_vs_threads = devinfo->max_vs_threads;
+ compiler->brw->max_hs_threads = devinfo->max_hs_threads;
+ compiler->brw->max_ds_threads = devinfo->max_ds_threads;
+ compiler->brw->max_gs_threads = devinfo->max_gs_threads;
+ compiler->brw->max_wm_threads = devinfo->max_wm_threads;
+ compiler->brw->max_cs_threads = devinfo->max_cs_threads;
+ compiler->brw->urb.size = devinfo->urb.size;
+ compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
+ compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
+ compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
+ compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
+ compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
+
+ compiler->brw->intelScreen = compiler->screen;
+ compiler->screen->devinfo = &device->info;
+
+ brw_process_intel_debug_variable(compiler->screen);
+
+ compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
+
+ ctx = &compiler->brw->ctx;
+ _mesa_init_shader_object_functions(&ctx->Driver);
+
+ /* brw_select_clip_planes() needs this for bogus reasons. */
+ ctx->_Shader = &compiler->pipeline;
+
+ return compiler;
+
+ fail:
+ ralloc_free(compiler);
+ return NULL;
+}
+
+void
+anv_compiler_destroy(struct anv_compiler *compiler)
+{
+ _mesa_free_errors_data(&compiler->brw->ctx);
+ ralloc_free(compiler);
+}
+
+/* From gen7_urb.c */
+
+/* FIXME: Add to struct intel_device_info */
+
+static const int gen8_push_size = 32 * 1024;
+
+static void
+gen7_compute_urb_partition(struct anv_pipeline *pipeline)
+{
+ const struct brw_device_info *devinfo = &pipeline->device->info;
+ bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
+ unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+ unsigned vs_entry_size_bytes = vs_size * 64;
+ bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
+ unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+ unsigned gs_entry_size_bytes = gs_size * 64;
+
+ /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
+ *
+ * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
+ * Allocation Size is less than 9 512-bit URB entries.
+ *
+ * Similar text exists for GS.
+ */
+ unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
+ unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
+
+ /* URB allocations must be done in 8k chunks. */
+ unsigned chunk_size_bytes = 8192;
+
+ /* Determine the size of the URB in chunks. */
+ unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
+
+ /* Reserve space for push constants */
+ unsigned push_constant_bytes = gen8_push_size;
+ unsigned push_constant_chunks =
+ push_constant_bytes / chunk_size_bytes;
+
+ /* Initially, assign each stage the minimum amount of URB space it needs,
+ * and make a note of how much additional space it "wants" (the amount of
+ * additional space it could actually make use of).
+ */
+
+ /* VS has a lower limit on the number of URB entries */
+ unsigned vs_chunks =
+ ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes;
+ unsigned vs_wants =
+ ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes - vs_chunks;
+
+ unsigned gs_chunks = 0;
+ unsigned gs_wants = 0;
+ if (gs_present) {
+ /* There are two constraints on the minimum amount of URB space we can
+ * allocate:
+ *
+ * (1) We need room for at least 2 URB entries, since we always operate
+ * the GS in DUAL_OBJECT mode.
+ *
+ * (2) We can't allocate less than nr_gs_entries_granularity.
+ */
+ gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes;
+ gs_wants =
+ ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes - gs_chunks;
+ }
+
+ /* There should always be enough URB space to satisfy the minimum
+ * requirements of each stage.
+ */
+ unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
+ assert(total_needs <= urb_chunks);
+
+ /* Mete out remaining space (if any) in proportion to "wants". */
+ unsigned total_wants = vs_wants + gs_wants;
+ unsigned remaining_space = urb_chunks - total_needs;
+ if (remaining_space > total_wants)
+ remaining_space = total_wants;
+ if (remaining_space > 0) {
+ unsigned vs_additional = (unsigned)
+ round(vs_wants * (((double) remaining_space) / total_wants));
+ vs_chunks += vs_additional;
+ remaining_space -= vs_additional;
+ gs_chunks += remaining_space;
+ }
+
+ /* Sanity check that we haven't over-allocated. */
+ assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
+
+ /* Finally, compute the number of entries that can fit in the space
+ * allocated to each stage.
+ */
+ unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
+ unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
+
+ /* Since we rounded up when computing *_wants, this may be slightly more
+ * than the maximum allowed amount, so correct for that.
+ */
+ nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
+ nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
+
+ /* Ensure that we program a multiple of the granularity. */
+ nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
+ nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
+
+ /* Finally, sanity check to make sure we have at least the minimum number
+ * of entries needed for each stage.
+ */
+ assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
+ if (gs_present)
+ assert(nr_gs_entries >= 2);
+
+ /* Lay out the URB in the following order:
+ * - push constants
+ * - VS
+ * - GS
+ */
+ pipeline->urb.vs_start = push_constant_chunks;
+ pipeline->urb.vs_size = vs_size;
+ pipeline->urb.nr_vs_entries = nr_vs_entries;
+
+ pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
+ pipeline->urb.gs_size = gs_size;
+ pipeline->urb.nr_gs_entries = nr_gs_entries;
+}
+
+static const struct {
+ uint32_t token;
+ gl_shader_stage stage;
+ const char *name;
+} stage_info[] = {
+ { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" },
+ { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" },
+ { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" },
+ { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" },
+ { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" },
+ { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" },
+};
+
+struct spirv_header{
+ uint32_t magic;
+ uint32_t version;
+ uint32_t gen_magic;
+};
+
+static void
+setup_nir_io(struct gl_shader *mesa_shader,
+ nir_shader *shader)
+{
+ struct gl_program *prog = mesa_shader->Program;
+ foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ prog->InputsRead |= BITFIELD64_BIT(var->data.location);
+ if (shader->stage == MESA_SHADER_FRAGMENT) {
+ struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog;
+
+ fprog->InterpQualifier[var->data.location] =
+ (glsl_interp_qualifier)var->data.interpolation;
+ if (var->data.centroid)
+ fprog->IsCentroid |= BITFIELD64_BIT(var->data.location);
+ if (var->data.sample)
+ fprog->IsSample |= BITFIELD64_BIT(var->data.location);
+ }
+ }
+
+ foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+ prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
+ }
+
+ mesa_shader->num_uniform_components = shader->num_uniforms;
+}
+
+static void
+anv_compile_shader_spirv(struct anv_compiler *compiler,
+ struct gl_shader_program *program,
+ struct anv_pipeline *pipeline, uint32_t stage)
+{
+ struct brw_context *brw = compiler->brw;
+ struct anv_shader *shader = pipeline->shaders[stage];
+ struct gl_shader *mesa_shader;
+ int name = 0;
+ uint32_t *spirv;
+
+ mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
+ fail_if(mesa_shader == NULL,
+ "failed to create %s shader\n", stage_info[stage].name);
+
+#define CREATE_PROGRAM(stage) \
+ _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0)
+
+ bool is_scalar;
+ struct gl_program *prog;
+ switch (stage) {
+ case VK_SHADER_STAGE_VERTEX:
+ prog = CREATE_PROGRAM(vertex);
+ is_scalar = compiler->screen->compiler->scalar_vs;
+ break;
+ case VK_SHADER_STAGE_GEOMETRY:
+ prog = CREATE_PROGRAM(geometry);
+ is_scalar = false;
+ break;
+ case VK_SHADER_STAGE_FRAGMENT:
+ prog = CREATE_PROGRAM(fragment);
+ is_scalar = true;
+ break;
+ case VK_SHADER_STAGE_COMPUTE:
+ prog = CREATE_PROGRAM(compute);
+ is_scalar = true;
+ break;
+ default:
+ unreachable("Unsupported shader stage");
+ }
+ _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog);
+
+ mesa_shader->Program->Parameters =
+ rzalloc(mesa_shader, struct gl_program_parameter_list);
+
+ mesa_shader->Type = stage_info[stage].token;
+ mesa_shader->Stage = stage_info[stage].stage;
+
+ struct gl_shader_compiler_options *glsl_options =
+ &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage];
+
+ spirv = (uint32_t *) shader->module->data;
+ assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
+ assert(shader->module->size % 4 == 0);
+
+ mesa_shader->Program->nir =
+ spirv_to_nir(spirv, shader->module->size / 4,
+ stage_info[stage].stage, glsl_options->NirOptions);
+ nir_validate_shader(mesa_shader->Program->nir);
+
+ brw_process_nir(mesa_shader->Program->nir,
+ compiler->screen->devinfo,
+ NULL, mesa_shader->Stage, is_scalar);
+
+ setup_nir_io(mesa_shader, mesa_shader->Program->nir);
+
+ fail_if(mesa_shader->Program->nir == NULL,
+ "failed to translate SPIR-V to NIR\n");
+
+ _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders],
+ mesa_shader);
+ program->NumShaders++;
+}
+
+static void
+add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct brw_device_info *devinfo = &pipeline->device->info;
+ uint32_t max_threads[] = {
+ [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads,
+ [VK_SHADER_STAGE_TESS_CONTROL] = 0,
+ [VK_SHADER_STAGE_TESS_EVALUATION] = 0,
+ [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads,
+ [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads,
+ [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads,
+ };
+
+ pipeline->prog_data[stage] = prog_data;
+ pipeline->active_stages |= 1 << stage;
+ pipeline->scratch_start[stage] = pipeline->total_scratch;
+ pipeline->total_scratch =
+ align_u32(pipeline->total_scratch, 1024) +
+ prog_data->total_scratch * max_threads[stage];
+}
+
+int
+anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
+{
+ struct gl_shader_program *program;
+ int name = 0;
+ struct brw_context *brw = compiler->brw;
+
+ pipeline->writes_point_size = false;
+
+ /* When we free the pipeline, we detect stages based on the NULL status
+ * of various prog_data pointers. Make them NULL by default.
+ */
+ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+ memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
+
+ brw->use_rep_send = pipeline->use_repclear;
+ brw->no_simd8 = pipeline->use_repclear;
+
+ program = brw->ctx.Driver.NewShaderProgram(name);
+ program->Shaders = (struct gl_shader **)
+ calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
+ fail_if(program == NULL || program->Shaders == NULL,
+ "failed to create program\n");
+
+ for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
+ if (pipeline->shaders[i])
+ anv_compile_shader_spirv(compiler, program, pipeline, i);
+ }
+
+ for (unsigned i = 0; i < program->NumShaders; i++) {
+ struct gl_shader *shader = program->Shaders[i];
+ program->_LinkedShaders[shader->Stage] = shader;
+ }
+
+ bool success;
+ pipeline->active_stages = 0;
+ pipeline->total_scratch = 0;
+
+ if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
+ struct brw_vs_prog_key vs_key;
+ struct gl_vertex_program *vp = (struct gl_vertex_program *)
+ program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
+ struct brw_vertex_program *bvp = brw_vertex_program(vp);
+
+ brw_vs_populate_key(brw, bvp, &vs_key);
+
+ success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
+ fail_if(!success, "do_wm_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
+ &pipeline->vs_prog_data.base.base);
+
+ if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
+ pipeline->writes_point_size = true;
+ } else {
+ memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
+ pipeline->vs_simd8 = NO_KERNEL;
+ pipeline->vs_vec4 = NO_KERNEL;
+ }
+
+
+ if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
+ struct brw_gs_prog_key gs_key;
+ struct gl_geometry_program *gp = (struct gl_geometry_program *)
+ program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
+ struct brw_geometry_program *bgp = brw_geometry_program(gp);
+
+ brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
+
+ success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
+ fail_if(!success, "do_gs_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
+ &pipeline->gs_prog_data.base.base);
+
+ if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
+ pipeline->writes_point_size = true;
+ } else {
+ pipeline->gs_vec4 = NO_KERNEL;
+ }
+
+ if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) {
+ struct brw_wm_prog_key wm_key;
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)
+ program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+
+ brw_wm_populate_key(brw, bfp, &wm_key);
+
+ success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
+ fail_if(!success, "do_wm_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
+ &pipeline->wm_prog_data.base);
+ }
+
+ if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
+ struct brw_cs_prog_key cs_key;
+ struct gl_compute_program *cp = (struct gl_compute_program *)
+ program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
+ struct brw_compute_program *bcp = brw_compute_program(cp);
+
+ brw_cs_populate_key(brw, bcp, &cs_key);
+
+ success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
+ fail_if(!success, "brw_codegen_cs_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
+ &pipeline->cs_prog_data.base);
+ }
+
+ brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
+
+ struct anv_device *device = compiler->device;
+ while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
+ anv_block_pool_alloc(&device->scratch_block_pool);
+
+ gen7_compute_urb_partition(pipeline);
+
+ return 0;
+}
+
+/* This badly named function frees the struct anv_pipeline data that the compiler
+ * allocates. Currently just the prog_data structs.
+ */
+void
+anv_compiler_free(struct anv_pipeline *pipeline)
+{
+ for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) {
+ if (pipeline->prog_data[stage]) {
+ free(pipeline->prog_data[stage]->map_entries);
+ /* We only ever set up the params array because we don't do
+ * non-UBO pull constants
+ */
+ anv_device_free(pipeline->device, pipeline->prog_data[stage]->param);
+ }
+ }
+}
+
+}
--- /dev/null
- nir_src_for_ssa(&new_load->dest.ssa),
- state->shader);
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_nir.h"
+#include "glsl/nir/nir_builder.h"
+
+struct apply_dynamic_offsets_state {
+ nir_shader *shader;
+ nir_builder builder;
+
+ VkShaderStage stage;
+ struct anv_pipeline_layout *layout;
+
+ uint32_t indices_start;
+};
+
+static bool
+apply_dynamic_offsets_block(nir_block *block, void *void_state)
+{
+ struct apply_dynamic_offsets_state *state = void_state;
+ struct anv_descriptor_set_layout *set_layout;
+ const struct anv_descriptor_slot *slot;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ bool has_indirect = false;
+ uint32_t set, binding;
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_ubo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_ubo: {
+ set = intrin->const_index[0];
+
+ nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]);
+ if (const_binding) {
+ binding = const_binding->u[0];
+ } else {
+ assert(0 && "need more info from the ir for this.");
+ }
+ break;
+ }
+ default:
+ continue; /* the loop */
+ }
+
+ set_layout = state->layout->set[set].layout;
+ slot = &set_layout->stage[state->stage].surface_start[binding];
+ if (slot->dynamic_slot < 0)
+ continue;
+
+ uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start +
+ slot->dynamic_slot;
+
+ state->builder.cursor = nir_before_instr(&intrin->instr);
+
+ nir_intrinsic_instr *offset_load =
+ nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
+ offset_load->num_components = 1;
+ offset_load->const_index[0] = state->indices_start + dynamic_index;
+ offset_load->const_index[1] = 0;
+ nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL);
+ nir_builder_instr_insert(&state->builder, &offset_load->instr);
+
+ nir_ssa_def *offset = &offset_load->dest.ssa;
+ if (has_indirect) {
+ assert(intrin->src[1].is_ssa);
+ offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset);
+ }
+
+ assert(intrin->dest.is_ssa);
+
+ nir_intrinsic_instr *new_load =
+ nir_intrinsic_instr_create(state->shader,
+ nir_intrinsic_load_ubo_indirect);
+ new_load->num_components = intrin->num_components;
+ new_load->const_index[0] = intrin->const_index[0];
+ new_load->const_index[1] = intrin->const_index[1];
+ nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr);
+ new_load->src[1] = nir_src_for_ssa(offset);
+ nir_ssa_dest_init(&new_load->instr, &new_load->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.name);
+ nir_builder_instr_insert(&state->builder, &new_load->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++ nir_src_for_ssa(&new_load->dest.ssa));
+
+ nir_instr_remove(&intrin->instr);
+ }
+
+ return true;
+}
+
+void
+anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ nir_shader *shader,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct apply_dynamic_offsets_state state = {
+ .shader = shader,
+ .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage),
+ .layout = pipeline->layout,
+ .indices_start = shader->num_uniforms,
+ };
+
+ if (!state.layout || !state.layout->stage[state.stage].has_dynamic_offsets)
+ return;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ nir_builder_init(&state.builder, overload->impl);
+ nir_foreach_block(overload->impl, apply_dynamic_offsets_block, &state);
+ nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ struct anv_push_constants *null_data = NULL;
+ for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
+ prog_data->param[i + shader->num_uniforms] =
+ (const gl_constant_value *)&null_data->dynamic_offsets[i];
+
+ shader->num_uniforms += MAX_DYNAMIC_BUFFERS;
+}