Merge branch 'master' of ../mesa into vulkan

author Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>

Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)

committer Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>

Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)
author Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)
committer Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)
diff --combined configure.ac

index 4669a67dc04709213453ce17d0745b1df1fc0208,217281f79507a449bdc94a15b7ed0751c09f41ef..fa64dab4e48c0fa9865da81eaf46e8bef642797f
--- 1/configure.ac
--- 2/configure.ac
+++ b/configure.ac
@@@ -74,7 -74,7 +74,7 @@@ LIBDRM_AMDGPU_REQUIRED=2.4.6
   LIBDRM_INTEL_REQUIRED=2.4.61
   LIBDRM_NVVIEUX_REQUIRED=2.4.33
   LIBDRM_NOUVEAU_REQUIRED=2.4.62
- LIBDRM_FREEDRENO_REQUIRED=2.4.64
+ LIBDRM_FREEDRENO_REQUIRED=2.4.65
   DRI2PROTO_REQUIRED=2.6
   DRI3PROTO_REQUIRED=1.0
   PRESENTPROTO_REQUIRED=1.0
@@@ -99,7 -99,6 +99,7 @@@ AM_PROG_CC_C_
   AM_PROG_AS
   AX_CHECK_GNU_MAKE
   AC_CHECK_PROGS([PYTHON2], [python2 python])
+ +AC_CHECK_PROGS([PYTHON3], [python3])
   AC_PROG_SED
   AC_PROG_MKDIR_P
   
@@@ -534,15 -533,32 +534,32 @@@ AM_CONDITIONAL(HAVE_COMPAT_SYMLINKS, te
   dnl
   dnl library names
   dnl
+ dnl Unfortunately we need to do a few things that libtool can't help us with,
+ dnl so we need some knowledge of shared library filenames:
+ dnl
+ dnl LIB_EXT is the extension used when creating symlinks for alternate
+ dnl filenames for a shared library which will be dynamically loaded
+ dnl
+ dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+ dnl the file for a shared library we wish to link with
+ dnl
   case "$host_os" in
   darwin* )
-     LIB_EXT='dylib' ;;
+     LIB_EXT='dylib'
+     IMP_LIB_EXT=$LIB_EXT
+     ;;
   cygwin* )
-     LIB_EXT='dll' ;;
+     LIB_EXT='dll'
+     IMP_LIB_EXT='dll.a'
+     ;;
   aix* )
-     LIB_EXT='a' ;;
+     LIB_EXT='a'
+     IMP_LIB_EXT=$LIB_EXT
+     ;;
   * )
-     LIB_EXT='so' ;;
+     LIB_EXT='so'
+     IMP_LIB_EXT=$LIB_EXT
+     ;;
   esac
   
   AC_SUBST([LIB_EXT])
@@@ -1111,6 -1127,11 +1128,11 @@@ AC_MSG_RESULT([$with_sha1]
   AC_SUBST(SHA1_LIBS)
   AC_SUBST(SHA1_CFLAGS)
   
+ # Enable a define for SHA1
+ if test "x$with_sha1" != "x"; then
+       DEFINES="$DEFINES -DHAVE_SHA1"
+ fi
+ 
   # Allow user to configure out the shader-cache feature
   AC_ARG_ENABLE([shader-cache],
       AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
@@@ -1290,6 -1311,16 +1312,16 @@@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS}
   AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
         [DEFINES="${DEFINES} -DGLX_USE_TLS"])
   
+ dnl Read-only text section on x86 hardened platforms
+ AC_ARG_ENABLE([glx-read-only-text],
+     [AS_HELP_STRING([--enable-glx-read-only-text],
+         [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+     [enable_glx_read_only_text="$enableval"],
+     [enable_glx_read_only_text=no])
+ if test "x$enable_glx_read_only_text" = xyes; then
+     DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+ fi
+ 
   dnl
   dnl More DRI setup
   dnl
@@@ -1518,12 -1549,6 +1550,12 @@@ GBM_PC_LIB_PRIV="$DLOPEN_LIBS
   AC_SUBST([GBM_PC_REQ_PRIV])
   AC_SUBST([GBM_PC_LIB_PRIV])
   
+ +AM_CONDITIONAL(HAVE_VULKAN, true)
+ +
+ +AC_ARG_VAR([GLSLC], [Path to the glslc executable])
+ +AC_CHECK_PROGS([GLSLC], [glslc])
+ +AC_SUBST([GLSLC])
+ +
   dnl
   dnl EGL configuration
   dnl
@@@ -2058,7 -2083,7 +2090,7 @@@ radeon_llvm_check() 
       if test "x$enable_gallium_llvm" != "xyes"; then
           AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
       fi
-     llvm_check_version_for "3" "4" "2" $1 
+     llvm_check_version_for "3" "5" "0" $1
       if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
           AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
       fi
@@@ -2146,11 -2171,8 +2178,8 @@@ if test -n "$with_gallium_drivers"; the
               gallium_require_drm "vc4"
               gallium_require_drm_loader
   
-             case "$host_cpu" in
-                 i?86 | x86_64 | amd64)
-                 USE_VC4_SIMULATOR=yes
-                 ;;
-             esac
+             PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+                               [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
               ;;
           *)
               AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@@ -2170,10 -2192,14 +2199,14 @@@ if test "x$MESA_LLVM" != x0; the
   
       LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
   
+     dnl llvm-config may not give the right answer when llvm is a built as a
+     dnl single shared library, so we must work the library name out for
+     dnl ourselves.
+     dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
       if test "x$enable_llvm_shared_libs" = xyes; then
           dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
           LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-         AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+         AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
   
           if test "x$llvm_have_one_so" = xyes; then
               dnl LLVM was built using auto*, so there is only one shared object.
@@@ -2181,7 -2207,7 +2214,7 @@@
           else
               dnl If LLVM was built with CMake, there will be one shared object per
               dnl component.
-             AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+             AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
                       [AC_MSG_ERROR([Could not find llvm shared libraries:
         Please make sure you have built llvm with the --enable-shared option
         and that your llvm libraries are installed in $LLVM_LIBDIR
@@@ -2298,13 -2324,6 +2331,13 @@@ AC_SUBST([XA_MINOR], $XA_MINOR
   AC_SUBST([XA_TINY], $XA_TINY)
   AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_TINY")
   
+ +PKG_CHECK_MODULES(VALGRIND, [valgrind],
+ +                  [have_valgrind=yes], [have_valgrind=no])
+ +if test "x$have_valgrind" = "xyes"; then
+ +    AC_DEFINE([HAVE_VALGRIND], 1,
+ +              [Use valgrind intrinsics to suppress false warnings])
+ +fi
+ +
   dnl Restore LDFLAGS and CPPFLAGS
   LDFLAGS="$_SAVE_LDFLAGS"
   CPPFLAGS="$_SAVE_CPPFLAGS"
@@@ -2414,9 -2433,6 +2447,9 @@@ AC_CONFIG_FILES([Makefil
                 src/mesa/drivers/osmesa/osmesa.pc
                 src/mesa/drivers/x11/Makefile
                 src/mesa/main/tests/Makefile
+ +              src/vulkan/Makefile
+ +              src/vulkan/anv_icd.json
+ +              src/vulkan/tests/Makefile
                 src/util/Makefile
                 src/util/tests/hash_table/Makefile])
   
@@@ -2535,7 -2551,6 +2568,7 @@@ if test "x$MESA_LLVM" = x1; the
       echo ""
   fi
   echo "        PYTHON2:         $PYTHON2"
+ +echo "        PYTHON3:         $PYTHON3"
   
   echo ""
   echo "        Run '${MAKE-make}' to build Mesa"
diff --combined src/Makefile.am

index da638a811fbd2eec25c1814257071b3300b04730,9e15cca5ea4a3c9dde9a73e437a30de90cb9b8e1..13cfaa5b367b97d422f44f52ba4a8e5092480df4
--- 1/src/Makefile.am
--- 2/src/Makefile.am
+++ b/src/Makefile.am
@@@ -53,10 -53,6 +53,10 @@@ EXTRA_DIST = 
   AM_CFLAGS = $(VISIBILITY_CFLAGS)
   AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
   
+ +if HAVE_VULKAN
+ +SUBDIRS += vulkan
+ +endif
+ +
   AM_CPPFLAGS = \
         -I$(top_srcdir)/include/ \
         -I$(top_srcdir)/src/mapi/ \
@@@ -66,6 -62,7 +66,7 @@@
   noinst_LTLIBRARIES = libglsl_util.la
   
   libglsl_util_la_SOURCES = \
+       glsl/shader_enums.c \
         mesa/main/imports.c \
         mesa/program/prog_hash_table.c \
         mesa/program/symbol_table.c \
diff --combined src/glsl/Makefile.am

index 2ab40506e979c04370568b34a57500cffe69a90b,32653911f6c07e7003698bad77506f2201eeb77c..08368311b8a8535439533b1da465a0b846bac41e
--- 1/src/glsl/Makefile.am
--- 2/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@@ -50,12 -50,14 +50,14 @@@ EXTRA_DIST = tests glcpp/tests README T
         nir/nir_opcodes_c.py                            \
         nir/nir_opcodes_h.py                            \
         nir/nir_opt_algebraic.py                        \
+       nir/tests                                       \
         SConscript
   
   include Makefile.sources
   
   TESTS = glcpp/tests/glcpp-test                                \
         glcpp/tests/glcpp-test-cr-lf                    \
+         nir/tests/control_flow_tests                  \
         tests/blob-test                                 \
         tests/general-ir-test                           \
         tests/optimization-test                         \
@@@ -70,12 -72,13 +72,13 @@@ noinst_LTLIBRARIES = libnir.la libglsl.
   check_PROGRAMS =                                      \
         glcpp/glcpp                                     \
         glsl_test                                       \
+       nir/tests/control_flow_tests                    \
         tests/blob-test                                 \
         tests/general-ir-test                           \
         tests/sampler-types-test                        \
         tests/uniform-initializer-test
   
- -noinst_PROGRAMS = glsl_compiler
+ +noinst_PROGRAMS = glsl_compiler spirv2nir
   
   tests_blob_test_SOURCES =                             \
         tests/blob_test.c
@@@ -140,13 -143,16 +143,16 @@@ libglsl_la_SOURCES =                                    
         glsl_parser.cpp                                 \
         glsl_parser.h                                   \
         $(LIBGLSL_FILES)                                \
-       $(NIR_FILES)
+       $(NIR_FILES)                                    \
+       $(NIR_GENERATED_FILES)
+ 
   
   libnir_la_SOURCES =                                   \
         glsl_types.cpp                                  \
         builtin_types.cpp                               \
         glsl_symbol_table.cpp                           \
-       $(NIR_FILES)
+       $(NIR_FILES)                                    \
+       $(NIR_GENERATED_FILES)
   
   glsl_compiler_SOURCES = \
         $(GLSL_COMPILER_CXX_FILES)
@@@ -154,16 -160,6 +160,16 @@@
   glsl_compiler_LDADD =                                 \
         libglsl.la                                      \
         $(top_builddir)/src/libglsl_util.la             \
+ +      $(PTHREAD_LIBS)
+ +
+ +spirv2nir_SOURCES = \
+ +      standalone_scaffolding.cpp \
+ +      standalone_scaffolding.h \
+ +      nir/spirv2nir.c
+ +
+ +spirv2nir_LDADD =                                     \
+ +      libglsl.la                                      \
+ +      $(top_builddir)/src/libglsl_util.la             \
         $(PTHREAD_LIBS)
   
   glsl_test_SOURCES = \
@@@ -207,19 -203,23 +213,23 @@@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT
   am__v_YACC_0 = @echo "  YACC    " $@;
   am__v_YACC_1 =
   
+ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+ YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+ LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+ 
   glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-       $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+       $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
   
   glsl_lexer.cpp: glsl_lexer.ll
-       $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+       $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
   
   glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-       $(AM_V_at)$(MKDIR_P) glcpp
-       $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+       $(MKDIR_GEN)
+       $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
   
   glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-       $(AM_V_at)$(MKDIR_P) glcpp
-       $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+       $(MKDIR_GEN)
+       $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
   
   # Only the parsers (specifically the header files generated at the same time)
   # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@@ -232,11 -232,7 +242,7 @@@ BUILT_SOURCES =                                           
         glsl_lexer.cpp                                  \
         glcpp/glcpp-parse.c                             \
         glcpp/glcpp-lex.c                               \
-       nir/nir_builder_opcodes.h                               \
-       nir/nir_constant_expressions.c                  \
-       nir/nir_opcodes.c                               \
-       nir/nir_opcodes.h                               \
-       nir/nir_opt_algebraic.c
+       $(NIR_GENERATED_FILES)
   CLEANFILES =                                          \
         glcpp/glcpp-parse.h                             \
         glsl_parser.h                                   \
@@@ -249,22 -245,35 +255,35 @@@ dist-hook
         $(RM) glcpp/tests/*.out
         $(RM) glcpp/tests/subtest*/*.out
   
+ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+ 
   nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
   
- nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+ nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
   
   nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
   
   nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
   
   nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-       $(AM_V_at)$(MKDIR_P) nir
-       $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+       $(MKDIR_GEN)
+       $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
+ 
+ nir_tests_control_flow_tests_SOURCES =                        \
+       nir/tests/control_flow_tests.cpp
+ nir_tests_control_flow_tests_CFLAGS =                 \
+       $(PTHREAD_CFLAGS)
+ nir_tests_control_flow_tests_LDADD =                  \
+       $(top_builddir)/src/gtest/libgtest.la           \
+       $(top_builddir)/src/glsl/libnir.la              \
+       $(top_builddir)/src/libglsl_util.la             \
+       $(top_builddir)/src/util/libmesautil.la         \
+       $(PTHREAD_LIBS)
diff --combined src/glsl/Makefile.sources

index acd13ef7745e20efef62245252a185c2f8fcb768,32b6dba2e910987b5126f079a5c8c537f302ea02..0cd3d2847564bab16d7d86975c139d898d83ccc6
--- 1/src/glsl/Makefile.sources
--- 2/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@@ -30,31 -30,37 +30,37 @@@ NIR_FILES = 
         nir/nir_control_flow_private.h \
         nir/nir_dominance.c \
         nir/nir_from_ssa.c \
+       nir/nir_gs_count_vertices.c \
         nir/nir_intrinsics.c \
         nir/nir_intrinsics.h \
         nir/nir_live_variables.c \
         nir/nir_lower_alu_to_scalar.c \
         nir/nir_lower_atomics.c \
+       nir/nir_lower_clip.c \
         nir/nir_lower_global_vars_to_local.c \
+       nir/nir_lower_gs_intrinsics.c \
         nir/nir_lower_load_const_to_scalar.c \
         nir/nir_lower_locals_to_regs.c \
         nir/nir_lower_idiv.c \
         nir/nir_lower_io.c \
         nir/nir_lower_outputs_to_temporaries.c \
         nir/nir_lower_phis_to_scalar.c \
-       nir/nir_lower_samplers.cpp \
+       nir/nir_lower_samplers.c \
         nir/nir_lower_system_values.c \
-       nir/nir_lower_tex_projector.c \
+       nir/nir_lower_tex.c \
         nir/nir_lower_to_source_mods.c \
+       nir/nir_lower_two_sided_color.c \
         nir/nir_lower_vars_to_ssa.c \
         nir/nir_lower_var_copies.c \
         nir/nir_lower_vec_to_movs.c \
         nir/nir_metadata.c \
+       nir/nir_move_vec_src_uses_to_dest.c \
         nir/nir_normalize_cubemap_coords.c \
         nir/nir_opt_constant_folding.c \
         nir/nir_opt_copy_propagate.c \
         nir/nir_opt_cse.c \
         nir/nir_opt_dce.c \
+       nir/nir_opt_dead_cf.c \
         nir/nir_opt_gcm.c \
         nir/nir_opt_global_to_local.c \
         nir/nir_opt_peephole_ffma.c \
@@@ -65,7 -71,6 +71,7 @@@
         nir/nir_remove_dead_variables.c \
         nir/nir_search.c \
         nir/nir_search.h \
+ +      nir/nir_spirv.h \
         nir/nir_split_var_copies.c \
         nir/nir_sweep.c \
         nir/nir_to_ssa.c \
@@@ -74,10 -79,7 +80,9 @@@
         nir/nir_vla.h \
         nir/nir_worklist.c \
         nir/nir_worklist.h \
- -      nir/nir_types.cpp
+ +      nir/nir_types.cpp \
+ +      nir/spirv_to_nir.c \
-       nir/spirv_glsl450_to_nir.c \
-       $(NIR_GENERATED_FILES)
++      nir/spirv_glsl450_to_nir.c
   
   # libglsl
   
diff --combined src/glsl/ast_to_hir.cpp

index 81b44bd6786c8c8f427650159bf3953da9d26dfe,6899a554f2c92cb5779bfa42c4301855be6c304a..351aafc1a72aee29aa274403725ed8b5453da5db
--- 1/src/glsl/ast_to_hir.cpp
--- 2/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@@ -67,6 -67,48 +67,48 @@@ static voi
   remove_per_vertex_blocks(exec_list *instructions,
                            _mesa_glsl_parse_state *state, ir_variable_mode mode);
   
+ /**
+  * Visitor class that finds the first instance of any write-only variable that
+  * is ever read, if any
+  */
+ class read_from_write_only_variable_visitor : public ir_hierarchical_visitor
+ {
+ public:
+    read_from_write_only_variable_visitor() : found(NULL)
+    {
+    }
+ 
+    virtual ir_visitor_status visit(ir_dereference_variable *ir)
+    {
+       if (this->in_assignee)
+          return visit_continue;
+ 
+       ir_variable *var = ir->variable_referenced();
+       /* We can have image_write_only set on both images and buffer variables,
+        * but in the former there is a distinction between reads from
+        * the variable itself (write_only) and from the memory they point to
+        * (image_write_only), while in the case of buffer variables there is
+        * no such distinction, that is why this check here is limited to
+        * buffer variables alone.
+        */
+       if (!var || var->data.mode != ir_var_shader_storage)
+          return visit_continue;
+ 
+       if (var->data.image_write_only) {
+          found = var;
+          return visit_stop;
+       }
+ 
+       return visit_continue;
+    }
+ 
+    ir_variable *get_variable() {
+       return found;
+    }
+ 
+ private:
+    ir_variable *found;
+ };
   
   void
   _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
@@@ -162,6 -204,20 +204,20 @@@
       */
      remove_per_vertex_blocks(instructions, state, ir_var_shader_in);
      remove_per_vertex_blocks(instructions, state, ir_var_shader_out);
+ 
+    /* Check that we don't have reads from write-only variables */
+    read_from_write_only_variable_visitor v;
+    v.run(instructions);
+    ir_variable *error_var = v.get_variable();
+    if (error_var) {
+       /* It would be nice to have proper location information, but for that
+        * we would need to check this as we process each kind of AST node
+        */
+       YYLTYPE loc;
+       memset(&loc, 0, sizeof(loc));
+       _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'",
+                        error_var->name);
+    }
   }
   
   
@@@ -820,7 -876,16 +876,16 @@@ do_assignment(exec_list *instructions, 
                             "assignment to %s",
                             non_lvalue_description);
            error_emitted = true;
-       } else if (lhs_var != NULL && lhs_var->data.read_only) {
+       } else if (lhs_var != NULL && (lhs_var->data.read_only ||
+                  (lhs_var->data.mode == ir_var_shader_storage &&
+                   lhs_var->data.image_read_only))) {
+          /* We can have image_read_only set on both images and buffer variables,
+           * but in the former there is a distinction between assignments to
+           * the variable itself (read_only) and to the memory they point to
+           * (image_read_only), while in the case of buffer variables there is
+           * no such distinction, that is why this check here is limited to
+           * buffer variables alone.
+           */
            _mesa_glsl_error(&lhs_loc, state,
                             "assignment to read-only variable '%s'",
                             lhs_var->name);
@@@ -1019,7 -1084,6 +1084,7 @@@ do_comparison(void *mem_ctx, int operat
      case GLSL_TYPE_SAMPLER:
      case GLSL_TYPE_IMAGE:
      case GLSL_TYPE_INTERFACE:
+ +   case GLSL_TYPE_FUNCTION:
      case GLSL_TYPE_ATOMIC_UINT:
      case GLSL_TYPE_SUBROUTINE:
         /* I assume a comparison of a struct containing a sampler just
@@@ -2115,7 -2179,7 +2180,7 @@@ validate_binding_qualifier(struct _mesa
      }
   
      const struct gl_context *const ctx = state->ctx;
-    unsigned elements = type->is_array() ? type->length : 1;
+    unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
      unsigned max_index = qual->binding + elements - 1;
      const glsl_type *base_type = type->without_array();
   
@@@ -2921,11 -2985,13 +2986,13 @@@ apply_type_qualifier_to_variable(const 
          var->data.depth_layout = ir_depth_layout_none;
   
      if (qual->flags.q.std140 ||
+        qual->flags.q.std430 ||
          qual->flags.q.packed ||
          qual->flags.q.shared) {
         _mesa_glsl_error(loc, state,
-                        "uniform block layout qualifiers std140, packed, and "
-                        "shared can only be applied to uniform blocks, not "
+                        "uniform and shader storage block layout qualifiers "
+                        "std140, std430, packed, and shared can only be "
+                        "applied to uniform or shader storage blocks, not "
                          "members");
      }
   
@@@ -4579,7 -4645,7 +4646,7 @@@ ast_function::hir(exec_list *instructio
      if (state->es_shader && state->language_version >= 300) {
         /* Local shader has no exact candidates; check the built-ins. */
         _mesa_glsl_initialize_builtin_functions();
-       if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+       if (_mesa_glsl_find_builtin_function_by_name(name)) {
            YYLTYPE loc = this->get_location();
            _mesa_glsl_error(& loc, state,
                             "A shader cannot redefine or overload built-in "
@@@ -5605,10 -5671,19 +5672,19 @@@ ast_process_structure_or_interface_bloc
                                            bool is_interface,
                                            enum glsl_matrix_layout matrix_layout,
                                            bool allow_reserved_names,
-                                          ir_variable_mode var_mode)
+                                          ir_variable_mode var_mode,
+                                          ast_type_qualifier *layout)
   {
      unsigned decl_count = 0;
   
+    /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+     * that we don't have incompatible qualifiers
+     */
+    if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
+       _mesa_glsl_error(&loc, state,
+                        "Interface block sets both readonly and writeonly");
+    }
+ 
      /* Make an initial pass over the list of fields to determine how
       * many there are.  Each element in this list is an ast_declarator_list.
       * This means that we actually need to count the number of elements in the
@@@ -5657,17 -5732,16 +5733,16 @@@
             * is_interface case, will have resulted in compilation having
             * already halted due to a syntax error.
             */
-          const struct glsl_type *field_type =
-             decl_type != NULL ? decl_type : glsl_type::error_type;
+          assert(decl_type);
   
-          if (is_interface && field_type->contains_opaque()) {
+          if (is_interface && decl_type->contains_opaque()) {
               YYLTYPE loc = decl_list->get_location();
               _mesa_glsl_error(&loc, state,
                                "uniform/buffer in non-default interface block contains "
                                "opaque variable");
            }
   
-          if (field_type->contains_atomic()) {
+          if (decl_type->contains_atomic()) {
               /* From section 4.1.7.3 of the GLSL 4.40 spec:
                *
                *    "Members of structures cannot be declared as atomic counter
@@@ -5678,7 -5752,7 +5753,7 @@@
                                "shader storage block or uniform block");
            }
   
-          if (field_type->contains_image()) {
+          if (decl_type->contains_image()) {
               /* FINISHME: Same problem as with atomic counters.
                * FINISHME: Request clarification from Khronos and add
                * FINISHME: spec quotation here.
@@@ -5692,12 -5766,14 +5767,14 @@@
            const struct ast_type_qualifier *const qual =
               & decl_list->type->qualifier;
            if (qual->flags.q.std140 ||
+              qual->flags.q.std430 ||
                qual->flags.q.packed ||
                qual->flags.q.shared) {
               _mesa_glsl_error(&loc, state,
                                "uniform/shader storage block layout qualifiers "
-                              "std140, packed, and shared can only be applied "
-                              "to uniform/shader storage blocks, not members");
+                              "std140, std430, packed, and shared can only be "
+                              "applied to uniform/shader storage blocks, not "
+                              "members");
            }
   
            if (qual->flags.q.constant) {
@@@ -5707,8 -5783,8 +5784,8 @@@
                                "to struct or interface block members");
            }
   
-          field_type = process_array_type(&loc, decl_type,
-                                          decl->array_specifier, state);
+          const struct glsl_type *field_type =
+             process_array_type(&loc, decl_type, decl->array_specifier, state);
            fields[i].type = field_type;
            fields[i].name = decl->identifier;
            fields[i].location = -1;
@@@ -5768,6 -5844,44 +5845,44 @@@
                      || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
            }
   
+          /* Image qualifiers are allowed on buffer variables, which can only
+           * be defined inside shader storage buffer objects
+           */
+          if (layout && var_mode == ir_var_shader_storage) {
+             if (qual->flags.q.read_only && qual->flags.q.write_only) {
+                _mesa_glsl_error(&loc, state,
+                                 "buffer variable `%s' can't be "
+                                 "readonly and writeonly.", fields[i].name);
+             }
+ 
+             /* For readonly and writeonly qualifiers the field definition,
+              * if set, overwrites the layout qualifier.
+              */
+             bool read_only = layout->flags.q.read_only;
+             bool write_only = layout->flags.q.write_only;
+ 
+             if (qual->flags.q.read_only) {
+                read_only = true;
+                write_only = false;
+             } else if (qual->flags.q.write_only) {
+                read_only = false;
+                write_only = true;
+             }
+ 
+             fields[i].image_read_only = read_only;
+             fields[i].image_write_only = write_only;
+ 
+             /* For other qualifiers, we set the flag if either the layout
+              * qualifier or the field qualifier are set
+              */
+             fields[i].image_coherent = qual->flags.q.coherent ||
+                                         layout->flags.q.coherent;
+             fields[i].image_volatile = qual->flags.q._volatile ||
+                                         layout->flags.q._volatile;
+             fields[i].image_restrict = qual->flags.q.restrict_flag ||
+                                         layout->flags.q.restrict_flag;
+          }
+ 
            i++;
         }
      }
@@@ -5822,7 -5936,8 +5937,8 @@@ ast_struct_specifier::hir(exec_list *in
                                                  false,
                                                  GLSL_MATRIX_LAYOUT_INHERITED,
                                                  false /* allow_reserved_names */,
-                                                ir_var_auto);
+                                                ir_var_auto,
+                                                NULL);
   
      validate_identifier(this->name, loc, state);
   
@@@ -5881,6 -5996,19 +5997,19 @@@ private
      bool found;
   };
   
+ static bool
+ is_unsized_array_last_element(ir_variable *v)
+ {
+    const glsl_type *interface_type = v->get_interface_type();
+    int length = interface_type->length;
+ 
+    assert(v->type->is_unsized_array());
+ 
+    /* Check if it is the last element of the interface */
+    if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0)
+       return true;
+    return false;
+ }
   
   ir_rvalue *
   ast_interface_block::hir(exec_list *instructions,
@@@ -5896,6 -6024,13 +6025,13 @@@
                          this->block_name);
      }
   
+    if (!this->layout.flags.q.buffer &&
+        this->layout.flags.q.std430) {
+       _mesa_glsl_error(&loc, state,
+                        "std430 storage block layout qualifier is supported "
+                        "only for shader storage blocks");
+    }
+ 
      /* The ast_interface_block has a list of ast_declarator_lists.  We
       * need to turn those into ir_variables with an association
       * with this uniform block.
@@@ -5905,6 -6040,8 +6041,8 @@@
         packing = GLSL_INTERFACE_PACKING_SHARED;
      } else if (this->layout.flags.q.packed) {
         packing = GLSL_INTERFACE_PACKING_PACKED;
+    } else if (this->layout.flags.q.std430) {
+       packing = GLSL_INTERFACE_PACKING_STD430;
      } else {
         /* The default layout is std140.
          */
@@@ -5955,7 -6092,8 +6093,8 @@@
                                                  true,
                                                  matrix_layout,
                                                  redeclaring_per_vertex,
-                                                var_mode);
+                                                var_mode,
+                                                &this->layout);
   
      state->struct_specifier_depth--;
   
@@@ -6253,6 -6391,33 +6392,33 @@@
         else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
            handle_tess_ctrl_shader_output_decl(state, loc, var);
   
+       for (unsigned i = 0; i < num_variables; i++) {
+          if (fields[i].type->is_unsized_array()) {
+             if (var_mode == ir_var_shader_storage) {
+                if (i != (num_variables - 1)) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                    "only last member of a shader storage block "
+                                    "can be defined as unsized array",
+                                    fields[i].name);
+                }
+             } else {
+                /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+                *
+                * "If an array is declared as the last member of a shader storage
+                * block and the size is not specified at compile-time, it is
+                * sized at run-time. In all other cases, arrays are sized only
+                * at compile-time."
+                */
+                if (state->es_shader) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                  "only last member of a shader storage block "
+                                  "can be defined as unsized array",
+                                  fields[i].name);
+                }
+             }
+          }
+       }
+ 
         if (ir_variable *earlier =
             state->symbols->get_variable(this->instance_name)) {
            if (!redeclaring_per_vertex) {
@@@ -6312,6 -6477,14 +6478,14 @@@
   
            var->data.stream = this->layout.stream;
   
+          if (var->data.mode == ir_var_shader_storage) {
+             var->data.image_read_only = fields[i].image_read_only;
+             var->data.image_write_only = fields[i].image_write_only;
+             var->data.image_coherent = fields[i].image_coherent;
+             var->data.image_volatile = fields[i].image_volatile;
+             var->data.image_restrict = fields[i].image_restrict;
+          }
+ 
            /* Examine var name here since var may get deleted in the next call */
            bool var_is_gl_id = is_gl_identifier(var->name);
   
@@@ -6344,6 -6517,32 +6518,32 @@@
            var->data.explicit_binding = this->layout.flags.q.explicit_binding;
            var->data.binding = this->layout.binding;
   
+          if (var->type->is_unsized_array()) {
+             if (var->is_in_shader_storage_block()) {
+                if (!is_unsized_array_last_element(var)) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                    "only last member of a shader storage block "
+                                    "can be defined as unsized array",
+                                    var->name);
+                }
+                var->data.from_ssbo_unsized_array = true;
+             } else {
+                /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
+                *
+                * "If an array is declared as the last member of a shader storage
+                * block and the size is not specified at compile-time, it is
+                * sized at run-time. In all other cases, arrays are sized only
+                * at compile-time."
+                */
+                if (state->es_shader) {
+                   _mesa_glsl_error(&loc, state, "unsized array `%s' definition: "
+                                  "only last member of a shader storage block "
+                                  "can be defined as unsized array",
+                                  var->name);
+                }
+             }
+          }
+ 
            state->symbols->add_variable(var);
            instructions->push_tail(var);
         }
diff --combined src/glsl/glsl_parser_extras.cpp

index 5c8f98b091d96f606e06807a584407a5912705b6,f5542415d4a4e11a02a3d28953d288f603c94156..c02d28c0866dd72d959c9b5f88d685ca4ab5d2b3
--- 1/src/glsl/glsl_parser_extras.cpp
--- 2/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@@ -87,8 -87,6 +87,8 @@@ _mesa_glsl_parse_state::_mesa_glsl_pars
   
      this->extensions = &ctx->Extensions;
   
+ +   this->ARB_compute_shader_enable = true;
+ +
      this->Const.MaxLights = ctx->Const.MaxLights;
      this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes;
      this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits;
@@@ -244,6 -242,12 +244,12 @@@
      this->default_uniform_qualifier = new(this) ast_type_qualifier();
      this->default_uniform_qualifier->flags.q.shared = 1;
      this->default_uniform_qualifier->flags.q.column_major = 1;
+    this->default_uniform_qualifier->is_default_qualifier = true;
+ 
+    this->default_shader_storage_qualifier = new(this) ast_type_qualifier();
+    this->default_shader_storage_qualifier->flags.q.shared = 1;
+    this->default_shader_storage_qualifier->flags.q.column_major = 1;
+    this->default_shader_storage_qualifier->is_default_qualifier = true;
   
      this->fs_uses_gl_fragcoord = false;
      this->fs_redeclares_gl_fragcoord = false;
@@@ -604,8 -608,9 +610,9 @@@ static const _mesa_glsl_extension _mesa
      EXT(ARB_shader_image_size,            true,  false,     ARB_shader_image_size),
      EXT(ARB_shader_precision,             true,  false,     ARB_shader_precision),
      EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
-    EXT(ARB_shader_storage_buffer_object, true,  false,     ARB_shader_storage_buffer_object),
+    EXT(ARB_shader_storage_buffer_object, true,  true,      ARB_shader_storage_buffer_object),
      EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
+    EXT(ARB_shader_texture_image_samples, true,  false,     ARB_shader_texture_image_samples),
      EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
      EXT(ARB_shading_language_420pack,     true,  false,     ARB_shading_language_420pack),
      EXT(ARB_shading_language_packing,     true,  false,     ARB_shading_language_packing),
@@@ -859,6 -864,139 +866,139 @@@ _mesa_ast_set_aggregate_type(const glsl
      }
   }
   
+ void
+ _mesa_ast_process_interface_block(YYLTYPE *locp,
+                                   _mesa_glsl_parse_state *state,
+                                   ast_interface_block *const block,
+                                   const struct ast_type_qualifier q)
+ {
+    if (q.flags.q.buffer) {
+       if (!state->has_shader_storage_buffer_objects()) {
+          _mesa_glsl_error(locp, state,
+                           "#version 430 / GL_ARB_shader_storage_buffer_object "
+                           "required for defining shader storage blocks");
+       } else if (state->ARB_shader_storage_buffer_object_warn) {
+          _mesa_glsl_warning(locp, state,
+                             "#version 430 / GL_ARB_shader_storage_buffer_object "
+                             "required for defining shader storage blocks");
+       }
+    } else if (q.flags.q.uniform) {
+       if (!state->has_uniform_buffer_objects()) {
+          _mesa_glsl_error(locp, state,
+                           "#version 140 / GL_ARB_uniform_buffer_object "
+                           "required for defining uniform blocks");
+       } else if (state->ARB_uniform_buffer_object_warn) {
+          _mesa_glsl_warning(locp, state,
+                             "#version 140 / GL_ARB_uniform_buffer_object "
+                             "required for defining uniform blocks");
+       }
+    } else {
+       if (state->es_shader || state->language_version < 150) {
+          _mesa_glsl_error(locp, state,
+                           "#version 150 required for using "
+                           "interface blocks");
+       }
+    }
+ 
+    /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"):
+     * "It is illegal to have an input block in a vertex shader
+     *  or an output block in a fragment shader"
+     */
+    if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) {
+       _mesa_glsl_error(locp, state,
+                        "`in' interface block is not allowed for "
+                        "a vertex shader");
+    } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) {
+       _mesa_glsl_error(locp, state,
+                        "`out' interface block is not allowed for "
+                        "a fragment shader");
+    }
+ 
+    /* Since block arrays require names, and both features are added in
+     * the same language versions, we don't have to explicitly
+     * version-check both things.
+     */
+    if (block->instance_name != NULL) {
+       state->check_version(150, 300, locp, "interface blocks with "
+                            "an instance name are not allowed");
+    }
+ 
+    uint64_t interface_type_mask;
+    struct ast_type_qualifier temp_type_qualifier;
+ 
+    /* Get a bitmask containing only the in/out/uniform/buffer
+     * flags, allowing us to ignore other irrelevant flags like
+     * interpolation qualifiers.
+     */
+    temp_type_qualifier.flags.i = 0;
+    temp_type_qualifier.flags.q.uniform = true;
+    temp_type_qualifier.flags.q.in = true;
+    temp_type_qualifier.flags.q.out = true;
+    temp_type_qualifier.flags.q.buffer = true;
+    interface_type_mask = temp_type_qualifier.flags.i;
+ 
+    /* Get the block's interface qualifier.  The interface_qualifier
+     * production rule guarantees that only one bit will be set (and
+     * it will be in/out/uniform).
+     */
+    uint64_t block_interface_qualifier = q.flags.i;
+ 
+    block->layout.flags.i |= block_interface_qualifier;
+ 
+    if (state->stage == MESA_SHADER_GEOMETRY &&
+        state->has_explicit_attrib_stream()) {
+       /* Assign global layout's stream value. */
+       block->layout.flags.q.stream = 1;
+       block->layout.flags.q.explicit_stream = 0;
+       block->layout.stream = state->out_qualifier->stream;
+    }
+ 
+    foreach_list_typed (ast_declarator_list, member, link, &block->declarations) {
+       ast_type_qualifier& qualifier = member->type->qualifier;
+       if ((qualifier.flags.i & interface_type_mask) == 0) {
+          /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+           * "If no optional qualifier is used in a member declaration, the
+           *  qualifier of the variable is just in, out, or uniform as declared
+           *  by interface-qualifier."
+           */
+          qualifier.flags.i |= block_interface_qualifier;
+       } else if ((qualifier.flags.i & interface_type_mask) !=
+                  block_interface_qualifier) {
+          /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks):
+           * "If optional qualifiers are used, they can include interpolation
+           *  and storage qualifiers and they must declare an input, output,
+           *  or uniform variable consistent with the interface qualifier of
+           *  the block."
+           */
+          _mesa_glsl_error(locp, state,
+                           "uniform/in/out qualifier on "
+                           "interface block member does not match "
+                           "the interface block");
+       }
+ 
+       /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+        *
+        * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+        * outputs."
+        *
+        * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+        *
+        * "Only variables output from a shader can be candidates for
+        * invariance."
+        *
+        * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+        *
+        * "If optional qualifiers are used, they can include interpolation
+        * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+        * and they must declare an input, output, or uniform member
+        * consistent with the interface qualifier of the block"
+        */
+       if (qualifier.flags.q.invariant)
+          _mesa_glsl_error(locp, state,
+                           "invariant qualifiers cannot be used "
+                           "with interface blocks members");
+    }
+ }
   
   void
   _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
@@@ -1695,6 -1833,8 +1835,8 @@@ _mesa_glsl_compile_shader(struct gl_con
         }
      }
   
+    _mesa_glsl_initialize_derived_variables(shader);
+ 
      delete state->symbols;
      ralloc_free(state);
   }
diff --combined src/glsl/glsl_types.cpp

index c737fb6e36639eb838b0d8116ced8e535346e3ed,0ead0f2a32716d7b341519aaccd50c36ee054b1e..15cd45ea708e4adf458536c638eec7293ffdf541
--- 1/src/glsl/glsl_types.cpp
--- 2/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@@ -32,7 -32,6 +32,7 @@@ mtx_t glsl_type::mutex = _MTX_INITIALIZ
   hash_table *glsl_type::array_types = NULL;
   hash_table *glsl_type::record_types = NULL;
   hash_table *glsl_type::interface_types = NULL;
+ +hash_table *glsl_type::function_types = NULL;
   hash_table *glsl_type::subroutine_types = NULL;
   void *glsl_type::mem_ctx = NULL;
   
@@@ -46,8 -45,8 +46,8 @@@ glsl_type::init_ralloc_type_ctx(void
   }
   
   glsl_type::glsl_type(GLenum gl_type,
-                    glsl_base_type base_type, unsigned vector_elements,
-                    unsigned matrix_columns, const char *name) :
+                      glsl_base_type base_type, unsigned vector_elements,
+                      unsigned matrix_columns, const char *name) :
      gl_type(gl_type),
      base_type(base_type),
      sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@@ -70,8 -69,8 +70,8 @@@
   }
   
   glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
-                    enum glsl_sampler_dim dim, bool shadow, bool array,
-                    unsigned type, const char *name) :
+                      enum glsl_sampler_dim dim, bool shadow, bool array,
+                      unsigned type, const char *name) :
      gl_type(gl_type),
      base_type(base_type),
      sampler_dimensionality(dim), sampler_shadow(shadow),
@@@ -97,7 -96,7 +97,7 @@@
   }
   
   glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-                    const char *name) :
+                      const char *name) :
      gl_type(0),
      base_type(GLSL_TYPE_STRUCT),
      sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@@ -113,25 -112,30 +113,30 @@@
      assert(name != NULL);
      this->name = ralloc_strdup(this->mem_ctx, name);
      this->fields.structure = ralloc_array(this->mem_ctx,
-                                        glsl_struct_field, length);
+                                          glsl_struct_field, length);
   
      for (i = 0; i < length; i++) {
         this->fields.structure[i].type = fields[i].type;
         this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-                                                    fields[i].name);
+                                                      fields[i].name);
         this->fields.structure[i].location = fields[i].location;
         this->fields.structure[i].interpolation = fields[i].interpolation;
         this->fields.structure[i].centroid = fields[i].centroid;
         this->fields.structure[i].sample = fields[i].sample;
         this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
         this->fields.structure[i].patch = fields[i].patch;
+       this->fields.structure[i].image_read_only = fields[i].image_read_only;
+       this->fields.structure[i].image_write_only = fields[i].image_write_only;
+       this->fields.structure[i].image_coherent = fields[i].image_coherent;
+       this->fields.structure[i].image_volatile = fields[i].image_volatile;
+       this->fields.structure[i].image_restrict = fields[i].image_restrict;
      }
   
      mtx_unlock(&glsl_type::mutex);
   }
   
   glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
-                    enum glsl_interface_packing packing, const char *name) :
+                      enum glsl_interface_packing packing, const char *name) :
      gl_type(0),
      base_type(GLSL_TYPE_INTERFACE),
      sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
@@@ -147,11 -151,11 +152,11 @@@
      assert(name != NULL);
      this->name = ralloc_strdup(this->mem_ctx, name);
      this->fields.structure = ralloc_array(this->mem_ctx,
-                                        glsl_struct_field, length);
+                                          glsl_struct_field, length);
      for (i = 0; i < length; i++) {
         this->fields.structure[i].type = fields[i].type;
         this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
-                                                    fields[i].name);
+                                                      fields[i].name);
         this->fields.structure[i].location = fields[i].location;
         this->fields.structure[i].interpolation = fields[i].interpolation;
         this->fields.structure[i].centroid = fields[i].centroid;
@@@ -163,39 -167,6 +168,39 @@@
      mtx_unlock(&glsl_type::mutex);
   }
   
+ +glsl_type::glsl_type(const glsl_type *return_type,
+ +                     const glsl_function_param *params, unsigned num_params) :
+ +   gl_type(0),
+ +   base_type(GLSL_TYPE_FUNCTION),
+ +   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ +   sampler_type(0), interface_packing(0),
+ +   vector_elements(0), matrix_columns(0),
+ +   length(num_params)
+ +{
+ +   unsigned int i;
+ +
+ +   mtx_lock(&glsl_type::mutex);
+ +
+ +   init_ralloc_type_ctx();
+ +
+ +   this->fields.parameters = rzalloc_array(this->mem_ctx,
+ +                                           glsl_function_param, num_params + 1);
+ +
+ +   /* We store the return type as the first parameter */
+ +   this->fields.parameters[0].type = return_type;
+ +   this->fields.parameters[0].in = false;
+ +   this->fields.parameters[0].out = true;
+ +
+ +   /* We store the i'th parameter in slot i+1 */
+ +   for (i = 0; i < length; i++) {
+ +      this->fields.parameters[i + 1].type = params[i].type;
+ +      this->fields.parameters[i + 1].in = params[i].in;
+ +      this->fields.parameters[i + 1].out = params[i].out;
+ +   }
+ +
+ +   mtx_unlock(&glsl_type::mutex);
+ +}
+ +
   glsl_type::glsl_type(const char *subroutine_name) :
      gl_type(0),
      base_type(GLSL_TYPE_SUBROUTINE),
@@@ -220,8 -191,8 +225,8 @@@ glsl_type::contains_sampler() cons
         return this->fields.array->contains_sampler();
      } else if (this->is_record()) {
         for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_sampler())
-           return true;
+          if (this->fields.structure[i].type->contains_sampler())
+             return true;
         }
         return false;
      } else {
@@@ -237,8 -208,8 +242,8 @@@ glsl_type::contains_integer() cons
         return this->fields.array->contains_integer();
      } else if (this->is_record()) {
         for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_integer())
-           return true;
+          if (this->fields.structure[i].type->contains_integer())
+             return true;
         }
         return false;
      } else {
@@@ -253,8 -224,8 +258,8 @@@ glsl_type::contains_double() cons
         return this->fields.array->contains_double();
      } else if (this->is_record()) {
         for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_double())
-           return true;
+          if (this->fields.structure[i].type->contains_double())
+             return true;
         }
         return false;
      } else {
@@@ -289,8 -260,8 +294,8 @@@ glsl_type::contains_subroutine() cons
         return this->fields.array->contains_subroutine();
      } else if (this->is_record()) {
         for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_subroutine())
-           return true;
+          if (this->fields.structure[i].type->contains_subroutine())
+             return true;
         }
         return false;
      } else {
@@@ -335,8 -306,8 +340,8 @@@ glsl_type::contains_image() cons
         return this->fields.array->contains_image();
      } else if (this->is_record()) {
         for (unsigned int i = 0; i < this->length; i++) {
-        if (this->fields.structure[i].type->contains_image())
-           return true;
+          if (this->fields.structure[i].type->contains_image())
+             return true;
         }
         return false;
      } else {
@@@ -536,21 -507,21 +541,21 @@@ glsl_type::get_instance(unsigned base_t
      if (columns == 1) {
         switch (base_type) {
         case GLSL_TYPE_UINT:
-        return uvec(rows);
+          return uvec(rows);
         case GLSL_TYPE_INT:
-        return ivec(rows);
+          return ivec(rows);
         case GLSL_TYPE_FLOAT:
-        return vec(rows);
+          return vec(rows);
         case GLSL_TYPE_DOUBLE:
-        return dvec(rows);
+          return dvec(rows);
         case GLSL_TYPE_BOOL:
-        return bvec(rows);
+          return bvec(rows);
         default:
-        return error_type;
+          return error_type;
         }
      } else {
         if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
-        return error_type;
+          return error_type;
   
         /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
          * combinations are valid:
@@@ -772,10 -743,10 +777,10 @@@ glsl_type::record_compare(const glsl_ty
   
      for (unsigned i = 0; i < this->length; i++) {
         if (this->fields.structure[i].type != b->fields.structure[i].type)
-        return false;
+          return false;
         if (strcmp(this->fields.structure[i].name,
-                b->fields.structure[i].name) != 0)
-        return false;
+                  b->fields.structure[i].name) != 0)
+          return false;
         if (this->fields.structure[i].matrix_layout
            != b->fields.structure[i].matrix_layout)
           return false;
@@@ -794,6 -765,21 +799,21 @@@
         if (this->fields.structure[i].patch
             != b->fields.structure[i].patch)
            return false;
+       if (this->fields.structure[i].image_read_only
+           != b->fields.structure[i].image_read_only)
+          return false;
+       if (this->fields.structure[i].image_write_only
+           != b->fields.structure[i].image_write_only)
+          return false;
+       if (this->fields.structure[i].image_coherent
+           != b->fields.structure[i].image_coherent)
+          return false;
+       if (this->fields.structure[i].image_volatile
+           != b->fields.structure[i].image_volatile)
+          return false;
+       if (this->fields.structure[i].image_restrict
+           != b->fields.structure[i].image_restrict)
+          return false;
      }
   
      return true;
@@@ -836,8 -822,8 +856,8 @@@ glsl_type::record_key_hash(const void *
   
   const glsl_type *
   glsl_type::get_record_instance(const glsl_struct_field *fields,
-                              unsigned num_fields,
-                              const char *name)
+                                unsigned num_fields,
+                                const char *name)
   {
      const glsl_type key(fields, num_fields, name);
   
@@@ -870,9 -856,9 +890,9 @@@
   
   const glsl_type *
   glsl_type::get_interface_instance(const glsl_struct_field *fields,
-                                 unsigned num_fields,
-                                 enum glsl_interface_packing packing,
-                                 const char *block_name)
+                                   unsigned num_fields,
+                                   enum glsl_interface_packing packing,
+                                   const char *block_name)
   {
      const glsl_type key(fields, num_fields, packing, block_name);
   
@@@ -934,74 -920,6 +954,74 @@@ glsl_type::get_subroutine_instance(cons
   }
   
   
+ +static bool
+ +function_key_compare(const void *a, const void *b)
+ +{
+ +   const glsl_type *const key1 = (glsl_type *) a;
+ +   const glsl_type *const key2 = (glsl_type *) b;
+ +
+ +   if (key1->length != key2->length)
+ +      return 1;
+ +
+ +   return memcmp(key1->fields.parameters, key2->fields.parameters,
+ +                 (key1->length + 1) * sizeof(*key1->fields.parameters));
+ +}
+ +
+ +
+ +static uint32_t
+ +function_key_hash(const void *a)
+ +{
+ +   const glsl_type *const key = (glsl_type *) a;
+ +   char hash_key[128];
+ +   unsigned size = 0;
+ +
+ +   size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
+ +
+ +   for (unsigned i = 0; i < key->length; i++) {
+ +      if (size >= sizeof(hash_key))
+ +       break;
+ +
+ +      size += snprintf(& hash_key[size], sizeof(hash_key) - size,
+ +                     "%p", (void *) key->fields.structure[i].type);
+ +   }
+ +
+ +   return _mesa_hash_string(hash_key);
+ +}
+ +
+ +const glsl_type *
+ +glsl_type::get_function_instance(const glsl_type *return_type,
+ +                                 const glsl_function_param *params,
+ +                                 unsigned num_params)
+ +{
+ +   const glsl_type key(return_type, params, num_params);
+ +
+ +   mtx_lock(&glsl_type::mutex);
+ +
+ +   if (function_types == NULL) {
+ +      function_types = _mesa_hash_table_create(NULL, function_key_hash,
+ +                                               function_key_compare);
+ +   }
+ +
+ +   struct hash_entry *entry = _mesa_hash_table_search(function_types, &key);
+ +   if (entry == NULL) {
+ +      mtx_unlock(&glsl_type::mutex);
+ +      const glsl_type *t = new glsl_type(return_type, params, num_params);
+ +      mtx_lock(&glsl_type::mutex);
+ +
+ +      entry = _mesa_hash_table_insert(function_types, t, (void *) t);
+ +   }
+ +
+ +   const glsl_type *t = (const glsl_type *)entry->data;
+ +
+ +   assert(t->base_type == GLSL_TYPE_FUNCTION);
+ +   assert(t->length == num_params);
+ +
+ +   mtx_unlock(&glsl_type::mutex);
+ +
+ +   return t;
+ +}
+ +
+ +
   const glsl_type *
   glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
   {
@@@ -1078,7 -996,7 +1098,7 @@@ glsl_type::field_type(const char *name
   
      for (unsigned i = 0; i < this->length; i++) {
         if (strcmp(name, this->fields.structure[i].name) == 0)
-        return this->fields.structure[i].type;
+          return this->fields.structure[i].type;
      }
   
      return error_type;
@@@ -1094,7 -1012,7 +1114,7 @@@ glsl_type::field_index(const char *name
   
      for (unsigned i = 0; i < this->length; i++) {
         if (strcmp(name, this->fields.structure[i].name) == 0)
-        return i;
+          return i;
      }
   
      return -1;
@@@ -1119,7 -1037,7 +1139,7 @@@ glsl_type::component_slots() cons
         unsigned size = 0;
   
         for (unsigned i = 0; i < this->length; i++)
-        size += this->fields.structure[i].type->component_slots();
+          size += this->fields.structure[i].type->component_slots();
   
         return size;
      }
@@@ -1131,8 -1049,6 +1151,8 @@@
         return 1;
      case GLSL_TYPE_SUBROUTINE:
        return 1;
+ +
+ +   case GLSL_TYPE_FUNCTION:
      case GLSL_TYPE_SAMPLER:
      case GLSL_TYPE_ATOMIC_UINT:
      case GLSL_TYPE_VOID:
@@@ -1143,6 -1059,32 +1163,32 @@@
      return 0;
   }
   
+ unsigned
+ glsl_type::record_location_offset(unsigned length) const
+ {
+    unsigned offset = 0;
+    const glsl_type *t = this->without_array();
+    if (t->is_record()) {
+       assert(length <= t->length);
+ 
+       for (unsigned i = 0; i < length; i++) {
+          const glsl_type *st = t->fields.structure[i].type;
+          const glsl_type *wa = st->without_array();
+          if (wa->is_record()) {
+             unsigned r_offset = wa->record_location_offset(wa->length);
+             offset += st->is_array() ? st->length * r_offset : r_offset;
+          } else {
+             /* We dont worry about arrays here because unless the array
+              * contains a structure or another array it only takes up a single
+              * uniform slot.
+              */
+             offset += 1;
+          }
+       }
+    }
+    return offset;
+ }
+ 
   unsigned
   glsl_type::uniform_locations() const
   {
@@@ -1231,12 -1173,12 +1277,12 @@@ glsl_type::std140_base_alignment(bool r
      if (this->is_scalar() || this->is_vector()) {
         switch (this->vector_elements) {
         case 1:
-        return N;
+          return N;
         case 2:
-        return 2 * N;
+          return 2 * N;
         case 3:
         case 4:
-        return 4 * N;
+          return 4 * N;
         }
      }
   
@@@ -1261,13 -1203,13 +1307,13 @@@
       */
      if (this->is_array()) {
         if (this->fields.array->is_scalar() ||
-         this->fields.array->is_vector() ||
-         this->fields.array->is_matrix()) {
-        return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
+           this->fields.array->is_vector() ||
+           this->fields.array->is_matrix()) {
+          return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
         } else {
-        assert(this->fields.array->is_record() ||
+          assert(this->fields.array->is_record() ||
                   this->fields.array->is_array());
-        return this->fields.array->std140_base_alignment(row_major);
+          return this->fields.array->std140_base_alignment(row_major);
         }
      }
   
@@@ -1286,11 -1228,11 +1332,11 @@@
         int r = this->vector_elements;
   
         if (row_major) {
-        vec_type = get_instance(base_type, c, 1);
-        array_type = glsl_type::get_array_instance(vec_type, r);
+          vec_type = get_instance(base_type, c, 1);
+          array_type = glsl_type::get_array_instance(vec_type, r);
         } else {
-        vec_type = get_instance(base_type, r, 1);
-        array_type = glsl_type::get_array_instance(vec_type, c);
+          vec_type = get_instance(base_type, r, 1);
+          array_type = glsl_type::get_array_instance(vec_type, c);
         }
   
         return array_type->std140_base_alignment(false);
@@@ -1320,9 -1262,9 +1366,9 @@@
               field_row_major = false;
            }
   
-        const struct glsl_type *field_type = this->fields.structure[i].type;
-        base_alignment = MAX2(base_alignment,
-                              field_type->std140_base_alignment(field_row_major));
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          base_alignment = MAX2(base_alignment,
+                                field_type->std140_base_alignment(field_row_major));
         }
         return base_alignment;
      }
@@@ -1374,25 -1316,25 +1420,25 @@@ glsl_type::std140_size(bool row_major) 
         unsigned int array_len;
   
         if (this->is_array()) {
-        element_type = this->fields.array;
-        array_len = this->length;
+          element_type = this->fields.array;
+          array_len = this->length;
         } else {
-        element_type = this;
-        array_len = 1;
+          element_type = this;
+          array_len = 1;
         }
   
         if (row_major) {
            vec_type = get_instance(element_type->base_type,
                                    element_type->matrix_columns, 1);
   
-        array_len *= element_type->vector_elements;
+          array_len *= element_type->vector_elements;
         } else {
-        vec_type = get_instance(element_type->base_type,
-                                element_type->vector_elements, 1);
-        array_len *= element_type->matrix_columns;
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->vector_elements, 1);
+          array_len *= element_type->matrix_columns;
         }
         const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
-                                                                 array_len);
+                                                                   array_len);
   
         return array_type->std140_size(false);
      }
@@@ -1409,11 -1351,11 +1455,11 @@@
       */
      if (this->is_array()) {
         if (this->fields.array->is_record()) {
-        return this->length * this->fields.array->std140_size(row_major);
+          return this->length * this->fields.array->std140_size(row_major);
         } else {
-        unsigned element_base_align =
-           this->fields.array->std140_base_alignment(row_major);
-        return this->length * MAX2(element_base_align, 16);
+          unsigned element_base_align =
+             this->fields.array->std140_base_alignment(row_major);
+          return this->length * MAX2(element_base_align, 16);
         }
      }
   
@@@ -1429,7 -1371,7 +1475,7 @@@
       *     rounded up to the next multiple of the base alignment of the
       *     structure.
       */
-    if (this->is_record()) {
+    if (this->is_record() || this->is_interface()) {
         unsigned size = 0;
         unsigned max_align = 0;
   
@@@ -1443,10 -1385,15 +1489,15 @@@
               field_row_major = false;
            }
   
-        const struct glsl_type *field_type = this->fields.structure[i].type;
-        unsigned align = field_type->std140_base_alignment(field_row_major);
-        size = glsl_align(size, align);
-        size += field_type->std140_size(field_row_major);
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          unsigned align = field_type->std140_base_alignment(field_row_major);
+ 
+          /* Ignore unsized arrays when calculating size */
+          if (field_type->is_unsized_array())
+             continue;
+ 
+          size = glsl_align(size, align);
+          size += field_type->std140_size(field_row_major);
   
            max_align = MAX2(align, max_align);
   
@@@ -1461,6 -1408,213 +1512,213 @@@
      return -1;
   }
   
+ unsigned
+ glsl_type::std430_base_alignment(bool row_major) const
+ {
+ 
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_scalar() || this->is_vector()) {
+       switch (this->vector_elements) {
+       case 1:
+          return N;
+       case 2:
+          return 2 * N;
+       case 3:
+       case 4:
+          return 4 * N;
+       }
+    }
+ 
+    /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+     *
+     * "When using the std430 storage layout, shader storage blocks will be
+     * laid out in buffer storage identically to uniform and shader storage
+     * blocks using the std140 layout, except that the base alignment and
+     * stride of arrays of scalars and vectors in rule 4 and of structures
+     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+     */
+ 
+    /* (1) If the member is a scalar consuming <N> basic machine units, the
+     *     base alignment is <N>.
+     *
+     * (2) If the member is a two- or four-component vector with components
+     *     consuming <N> basic machine units, the base alignment is 2<N> or
+     *     4<N>, respectively.
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_array())
+       return this->fields.array->std430_base_alignment(row_major);
+ 
+    /* (5) If the member is a column-major matrix with <C> columns and
+     *     <R> rows, the matrix is stored identically to an array of
+     *     <C> column vectors with <R> components each, according to
+     *     rule (4).
+     *
+     * (7) If the member is a row-major matrix with <C> columns and <R>
+     *     rows, the matrix is stored identically to an array of <R>
+     *     row vectors with <C> components each, according to rule (4).
+     */
+    if (this->is_matrix()) {
+       const struct glsl_type *vec_type, *array_type;
+       int c = this->matrix_columns;
+       int r = this->vector_elements;
+ 
+       if (row_major) {
+          vec_type = get_instance(base_type, c, 1);
+          array_type = glsl_type::get_array_instance(vec_type, r);
+       } else {
+          vec_type = get_instance(base_type, r, 1);
+          array_type = glsl_type::get_array_instance(vec_type, c);
+       }
+ 
+       return array_type->std430_base_alignment(false);
+    }
+ 
+       /* (9) If the member is a structure, the base alignment of the
+     *     structure is <N>, where <N> is the largest base alignment
+     *     value of any of its members, and rounded up to the base
+     *     alignment of a vec4. The individual members of this
+     *     sub-structure are then assigned offsets by applying this set
+     *     of rules recursively, where the base offset of the first
+     *     member of the sub-structure is equal to the aligned offset
+     *     of the structure. The structure may have padding at the end;
+     *     the base offset of the member following the sub-structure is
+     *     rounded up to the next multiple of the base alignment of the
+     *     structure.
+     */
+    if (this->is_record()) {
+       unsigned base_alignment = 0;
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+ 
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          base_alignment = MAX2(base_alignment,
+                                field_type->std430_base_alignment(field_row_major));
+       }
+       return base_alignment;
+    }
+    assert(!"not reached");
+    return -1;
+ }
+ 
+ unsigned
+ glsl_type::std430_array_stride(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
+     * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
+     *
+     * (3) If the member is a three-component vector with components consuming
+     *     <N> basic machine units, the base alignment is 4<N>.
+     */
+    if (this->is_vector() && this->vector_elements == 3)
+       return 4 * N;
+ 
+    /* By default use std430_size(row_major) */
+    return this->std430_size(row_major);
+ }
+ 
+ unsigned
+ glsl_type::std430_size(bool row_major) const
+ {
+    unsigned N = is_double() ? 8 : 4;
+ 
+    /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout":
+     *
+     * "When using the std430 storage layout, shader storage blocks will be
+     * laid out in buffer storage identically to uniform and shader storage
+     * blocks using the std140 layout, except that the base alignment and
+     * stride of arrays of scalars and vectors in rule 4 and of structures
+     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
+     */
+    if (this->is_scalar() || this->is_vector())
+          return this->vector_elements * N;
+ 
+    if (this->without_array()->is_matrix()) {
+       const struct glsl_type *element_type;
+       const struct glsl_type *vec_type;
+       unsigned int array_len;
+ 
+       if (this->is_array()) {
+          element_type = this->fields.array;
+          array_len = this->length;
+       } else {
+          element_type = this;
+          array_len = 1;
+       }
+ 
+       if (row_major) {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->matrix_columns, 1);
+ 
+          array_len *= element_type->vector_elements;
+       } else {
+          vec_type = get_instance(element_type->base_type,
+                                  element_type->vector_elements, 1);
+          array_len *= element_type->matrix_columns;
+       }
+       const glsl_type *array_type = glsl_type::get_array_instance(vec_type,
+                                                                   array_len);
+ 
+       return array_type->std430_size(false);
+    }
+ 
+    if (this->is_array()) {
+       if (this->fields.array->is_record())
+          return this->length * this->fields.array->std430_size(row_major);
+       else
+          return this->length * this->fields.array->std430_base_alignment(row_major);
+    }
+ 
+    if (this->is_record() || this->is_interface()) {
+       unsigned size = 0;
+       unsigned max_align = 0;
+ 
+       for (unsigned i = 0; i < this->length; i++) {
+          bool field_row_major = row_major;
+          const enum glsl_matrix_layout matrix_layout =
+             glsl_matrix_layout(this->fields.structure[i].matrix_layout);
+          if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
+             field_row_major = true;
+          } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
+             field_row_major = false;
+          }
+ 
+          const struct glsl_type *field_type = this->fields.structure[i].type;
+          unsigned align = field_type->std430_base_alignment(field_row_major);
+          size = glsl_align(size, align);
+          size += field_type->std430_size(field_row_major);
+ 
+          max_align = MAX2(align, max_align);
+       }
+       size = glsl_align(size, max_align);
+       return size;
+    }
+ 
+    assert(!"not reached");
+    return -1;
+ }
   
   unsigned
   glsl_type::count_attribute_slots() const
@@@ -1506,7 -1660,6 +1764,7 @@@
      case GLSL_TYPE_ARRAY:
         return this->length * this->fields.array->count_attribute_slots();
   
+ +   case GLSL_TYPE_FUNCTION:
      case GLSL_TYPE_SAMPLER:
      case GLSL_TYPE_IMAGE:
      case GLSL_TYPE_ATOMIC_UINT:
diff --combined src/glsl/glsl_types.h

index 0a8a0b97dc908cb5d5701bed255a2ae5059f69f9,3ec764219de9fdb4ac7bb597a3b484aa216a2c40..b83e1ca3d2cbca3c2f65a41443171a27e700e7e7
--- 1/src/glsl/glsl_types.h
--- 2/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@@ -56,7 -56,6 +56,7 @@@ enum glsl_base_type 
      GLSL_TYPE_IMAGE,
      GLSL_TYPE_ATOMIC_UINT,
      GLSL_TYPE_STRUCT,
+ +   GLSL_TYPE_FUNCTION,
      GLSL_TYPE_INTERFACE,
      GLSL_TYPE_ARRAY,
      GLSL_TYPE_VOID,
@@@ -78,7 -77,8 +78,8 @@@ enum glsl_sampler_dim 
   enum glsl_interface_packing {
      GLSL_INTERFACE_PACKING_STD140,
      GLSL_INTERFACE_PACKING_SHARED,
-    GLSL_INTERFACE_PACKING_PACKED
+    GLSL_INTERFACE_PACKING_PACKED,
+    GLSL_INTERFACE_PACKING_STD430
   };
   
   enum glsl_matrix_layout {
@@@ -180,7 -180,7 +181,7 @@@ struct glsl_type 
       */
      union {
         const struct glsl_type *array;            /**< Type of array elements. */
- -      const struct glsl_type *parameters;       /**< Parameters to function. */
+ +      struct glsl_function_param *parameters;   /**< Parameters to function. */
         struct glsl_struct_field *structure;      /**< List of struct fields. */
      } fields;
   
@@@ -270,13 -270,6 +271,13 @@@
       */
      static const glsl_type *get_subroutine_instance(const char *subroutine_name);
   
+ +   /**
+ +    * Get the instance of a function type
+ +    */
+ +   static const glsl_type *get_function_instance(const struct glsl_type *return_type,
+ +                                                 const glsl_function_param *parameters,
+ +                                                 unsigned num_params);
+ +
      /**
       * Get the type resulting from a multiplication of \p type_a * \p type_b
       */
@@@ -299,6 -292,14 +300,14 @@@
       */
      unsigned component_slots() const;
   
+    /**
+     * Calculate offset between the base location of the struct in
+     * uniform storage and a struct member.
+     * For the initial call, length is the index of the member to find the
+     * offset for.
+     */
+    unsigned record_location_offset(unsigned length) const;
+ 
      /**
       * Calculate the number of unique values from glGetUniformLocation for the
       * elements of the type.
@@@ -333,6 -334,25 +342,25 @@@
       */
      unsigned std140_size(bool row_major) const;
   
+    /**
+     * Alignment in bytes of the start of this type in a std430 shader
+     * storage block.
+     */
+    unsigned std430_base_alignment(bool row_major) const;
+ 
+    /**
+     * Calculate array stride in bytes of this type in a std430 shader storage
+     * block.
+     */
+    unsigned std430_array_stride(bool row_major) const;
+ 
+    /**
+     * Size in bytes of this type in a std430 shader storage block.
+     *
+     * Note that this is not GL_BUFFER_SIZE
+     */
+    unsigned std430_size(bool row_major) const;
+ 
      /**
       * \brief Can this type be implicitly converted to another?
       *
@@@ -557,6 -577,25 +585,25 @@@
         return t;
      }
   
+    /**
+     * Return the total number of elements in an array including the elements
+     * in arrays of arrays.
+     */
+    unsigned arrays_of_arrays_size() const
+    {
+       if (!is_array())
+          return 0;
+ 
+       unsigned size = length;
+       const glsl_type *base_type = fields.array;
+ 
+       while (base_type->is_array()) {
+          size = size * base_type->length;
+          base_type = base_type->fields.array;
+       }
+       return size;
+    }
+ 
      /**
       * Return the amount of atomic counter storage required for a type.
       */
@@@ -697,10 -736,6 +744,10 @@@ private
      glsl_type(const glsl_struct_field *fields, unsigned num_fields,
              enum glsl_interface_packing packing, const char *name);
   
+ +   /** Constructor for interface types */
+ +   glsl_type(const glsl_type *return_type,
+ +             const glsl_function_param *params, unsigned num_params);
+ +
      /** Constructor for array types */
      glsl_type(const glsl_type *array, unsigned length);
   
@@@ -719,9 -754,6 +766,9 @@@
      /** Hash table containing the known subroutine types. */
      static struct hash_table *subroutine_types;
   
+ +   /** Hash table containing the known function types. */
+ +   static struct hash_table *function_types;
+ +
      static bool record_key_compare(const void *a, const void *b);
      static unsigned record_key_hash(const void *key);
   
@@@ -749,10 -781,6 +796,10 @@@
      /*@}*/
   };
   
+ +#undef DECL_TYPE
+ +#undef STRUCT_TYPE
+ +#endif /* __cplusplus */
+ +
   struct glsl_struct_field {
      const struct glsl_type *type;
      const char *name;
@@@ -801,7 -829,17 +848,17 @@@
       */
      int stream;
   
- -
+    /**
+     * Image qualifiers, applicable to buffer variables defined in shader
+     * storage buffer objects (SSBOs)
+     */
+    unsigned image_read_only:1;
+    unsigned image_write_only:1;
+    unsigned image_coherent:1;
+    unsigned image_volatile:1;
+    unsigned image_restrict:1;
+ 
+ +#ifdef __cplusplus
      glsl_struct_field(const struct glsl_type *_type, const char *_name)
         : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
           sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
@@@ -814,14 -852,6 +871,14 @@@
      {
         /* empty */
      }
+ +#endif
+ +};
+ +
+ +struct glsl_function_param {
+ +   const struct glsl_type *type;
+ +
+ +   bool in;
+ +   bool out;
   };
   
   static inline unsigned int
@@@ -830,4 -860,8 +887,4 @@@ glsl_align(unsigned int a, unsigned in
      return (a + align - 1) / align * align;
   }
   
- -#undef DECL_TYPE
- -#undef STRUCT_TYPE
- -#endif /* __cplusplus */
- -
   #endif /* GLSL_TYPES_H */
diff --combined src/glsl/ir_clone.cpp

index 4edf70dba5d96ea0fb617a12eb8f37bb453c4765,a2cd672d5d61878a6216213119a5f73b553de393..d6b06eeec87e2d2939e0460d0a717e04eeb5be28
--- 1/src/glsl/ir_clone.cpp
--- 2/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@@ -222,6 -222,7 +222,7 @@@ ir_texture::clone(void *mem_ctx, struc
      case ir_tex:
      case ir_lod:
      case ir_query_levels:
+    case ir_texture_samples:
         break;
      case ir_txb:
         new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
@@@ -363,7 -364,6 +364,7 @@@ ir_constant::clone(void *mem_ctx, struc
         return c;
      }
   
+ +   case GLSL_TYPE_FUNCTION:
      case GLSL_TYPE_SAMPLER:
      case GLSL_TYPE_IMAGE:
      case GLSL_TYPE_ATOMIC_UINT:
diff --combined src/glsl/link_uniform_initializers.cpp

index f238513f17451b09ad97d4ae6c8fbaf474435e71,34830829b4a63de3a7fba63856b5f1973ed93aae..b0a4ec3f2fbfa93060a3a6d13828af6b47483251
--- 1/src/glsl/link_uniform_initializers.cpp
--- 2/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@@ -48,7 -48,7 +48,7 @@@ static unsigne
   get_uniform_block_index(const gl_shader_program *shProg,
                           const char *uniformBlockName)
   {
-    for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+    for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
         if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName))
          return i;
      }
@@@ -88,7 -88,6 +88,7 @@@ copy_constant_to_storage(union gl_const
         case GLSL_TYPE_IMAGE:
         case GLSL_TYPE_ATOMIC_UINT:
         case GLSL_TYPE_INTERFACE:
+ +      case GLSL_TYPE_FUNCTION:
         case GLSL_TYPE_VOID:
         case GLSL_TYPE_SUBROUTINE:
         case GLSL_TYPE_ERROR:
diff --combined src/glsl/nir/glsl_to_nir.cpp

index e3597e57e7331c5dfda9ca4d440217598bc56657,f03a107a9015ca59fb9c9223c83df31236b3f916..5ee6ff1d854ec35a6eb212ded4117f4ca52795cd
--- 1/src/glsl/nir/glsl_to_nir.cpp
--- 2/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@@ -44,7 -44,7 +44,7 @@@ namespace 
   class nir_visitor : public ir_visitor
   {
   public:
- -   nir_visitor(nir_shader *shader);
+ +   nir_visitor(nir_shader *shader, gl_shader *sh);
      ~nir_visitor();
   
      virtual void visit(ir_variable *);
@@@ -84,8 -84,6 +84,8 @@@ private
   
      bool supports_ints;
   
+ +   struct gl_shader *sh;
+ +
      nir_shader *shader;
      nir_function_impl *impl;
      exec_list *cf_node_list;
@@@ -135,19 -133,23 +135,24 @@@ glsl_to_nir(struct gl_shader *sh, cons
   {
      nir_shader *shader = nir_shader_create(NULL, sh->Stage, options);
   
- -   nir_visitor v1(shader);
+ +   nir_visitor v1(shader, sh);
      nir_function_visitor v2(&v1);
      v2.run(sh->ir);
      visit_exec_list(sh->ir, &v1);
   
+    nir_lower_outputs_to_temporaries(shader);
+ 
+    shader->gs.vertices_out = sh->Geom.VerticesOut;
+    shader->gs.invocations = sh->Geom.Invocations;
+ 
      return shader;
   }
   
- -nir_visitor::nir_visitor(nir_shader *shader)
+ +nir_visitor::nir_visitor(nir_shader *shader, gl_shader *sh)
   {
      this->supports_ints = shader->options->native_integers;
      this->shader = shader;
+ +   this->sh = sh;
      this->is_global = true;
      this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                _mesa_key_pointer_equal);
@@@ -327,7 -329,6 +332,7 @@@ nir_visitor::visit(ir_variable *ir
      }
   
      var->data.index = ir->data.index;
+ +   var->data.descriptor_set = 0;
      var->data.binding = ir->data.binding;
      /* XXX Get rid of buffer_index */
      var->data.atomic.buffer_index = ir->data.binding;
@@@ -646,11 -647,34 +651,34 @@@ nir_visitor::visit(ir_call *ir
            op = nir_intrinsic_memory_barrier;
         } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
            op = nir_intrinsic_image_size;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+          op = nir_intrinsic_image_samples;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+          op = nir_intrinsic_store_ssbo;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+          op = nir_intrinsic_load_ssbo;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_add;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_and;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_or;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_xor;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_min;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_max;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_exchange;
+       } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+          op = nir_intrinsic_ssbo_atomic_comp_swap;
         } else {
            unreachable("not reached");
         }
   
         nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+       nir_dest *dest = &instr->dest;
   
         switch (op) {
         case nir_intrinsic_atomic_counter_read_var:
@@@ -660,6 -684,7 +688,7 @@@
               (ir_dereference *) ir->actual_parameters.get_head();
            instr->variables[0] = evaluate_deref(&instr->instr, param);
            nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
            break;
         }
         case nir_intrinsic_image_load:
@@@ -672,6 -697,7 +701,7 @@@
         case nir_intrinsic_image_atomic_xor:
         case nir_intrinsic_image_atomic_exchange:
         case nir_intrinsic_image_atomic_comp_swap:
+       case nir_intrinsic_image_samples:
         case nir_intrinsic_image_size: {
            nir_ssa_undef_instr *instr_undef =
               nir_ssa_undef_instr_create(shader, 1);
@@@ -695,8 -721,11 +725,11 @@@
                                 info->dest_components, NULL);
            }
   
-          if (op == nir_intrinsic_image_size)
+          if (op == nir_intrinsic_image_size ||
+              op == nir_intrinsic_image_samples) {
+             nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
               break;
+          }
   
            /* Set the address argument, extending the coordinate vector to four
             * components.
@@@ -738,16 -767,157 +771,157 @@@
               instr->src[3] = evaluate_rvalue((ir_dereference *)param);
               param = param->get_next();
            }
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
            break;
         }
         case nir_intrinsic_memory_barrier:
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
            break;
+       case nir_intrinsic_store_ssbo: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+          assert(write_mask);
+ 
+          /* Check if we need the indirect version */
+          ir_constant *const_offset = offset->as_constant();
+          if (!const_offset) {
+             op = nir_intrinsic_store_ssbo_indirect;
+             ralloc_free(instr);
+             instr = nir_intrinsic_instr_create(shader, op);
+             instr->src[2] = evaluate_rvalue(offset);
+             instr->const_index[0] = 0;
+             dest = &instr->dest;
+          } else {
+             instr->const_index[0] = const_offset->value.u[0];
+          }
+ 
+          instr->const_index[1] = write_mask->value.u[0];
+ 
+          instr->src[0] = evaluate_rvalue(val);
+          instr->num_components = val->type->vector_elements;
+ 
+          instr->src[1] = evaluate_rvalue(block);
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+          break;
+       }
+       case nir_intrinsic_load_ssbo: {
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+ 
+          param = param->get_next();
+          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+ 
+          /* Check if we need the indirect version */
+          ir_constant *const_offset = offset->as_constant();
+          if (!const_offset) {
+             op = nir_intrinsic_load_ssbo_indirect;
+             ralloc_free(instr);
+             instr = nir_intrinsic_instr_create(shader, op);
+             instr->src[1] = evaluate_rvalue(offset);
+             instr->const_index[0] = 0;
+             dest = &instr->dest;
+          } else {
+             instr->const_index[0] = const_offset->value.u[0];
+          }
+ 
+          instr->src[0] = evaluate_rvalue(block);
+ 
+          const glsl_type *type = ir->return_deref->var->type;
+          instr->num_components = type->vector_elements;
+ 
+          /* Setup destination register */
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            type->vector_elements, NULL);
+ 
+          /* Insert the created nir instruction now since in the case of boolean
+           * result we will need to emit another instruction after it
+           */
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+ 
+          /*
+           * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+           * consider a true boolean to be ~0. Fix this up with a != 0
+           * comparison.
+           */
+          if (type->base_type == GLSL_TYPE_BOOL) {
+             nir_load_const_instr *const_zero =
+                nir_load_const_instr_create(shader, 1);
+             const_zero->value.u[0] = 0;
+             nir_instr_insert_after_cf_list(this->cf_node_list,
+                                            &const_zero->instr);
+ 
+             nir_alu_instr *load_ssbo_compare =
+                nir_alu_instr_create(shader, nir_op_ine);
+             load_ssbo_compare->src[0].src.is_ssa = true;
+             load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+             load_ssbo_compare->src[1].src.is_ssa = true;
+             load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+             for (unsigned i = 0; i < type->vector_elements; i++)
+                load_ssbo_compare->src[1].swizzle[i] = 0;
+             nir_ssa_dest_init(&load_ssbo_compare->instr,
+                               &load_ssbo_compare->dest.dest,
+                               type->vector_elements, NULL);
+             load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+             nir_instr_insert_after_cf_list(this->cf_node_list,
+                                            &load_ssbo_compare->instr);
+             dest = &load_ssbo_compare->dest.dest;
+          }
+          break;
+       }
+       case nir_intrinsic_ssbo_atomic_add:
+       case nir_intrinsic_ssbo_atomic_min:
+       case nir_intrinsic_ssbo_atomic_max:
+       case nir_intrinsic_ssbo_atomic_and:
+       case nir_intrinsic_ssbo_atomic_or:
+       case nir_intrinsic_ssbo_atomic_xor:
+       case nir_intrinsic_ssbo_atomic_exchange:
+       case nir_intrinsic_ssbo_atomic_comp_swap: {
+          int param_count = ir->actual_parameters.length();
+          assert(param_count == 3 || param_count == 4);
+ 
+          /* Block index */
+          exec_node *param = ir->actual_parameters.get_head();
+          ir_instruction *inst = (ir_instruction *) param;
+          instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+ 
+          /* Offset */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+ 
+          /* data1 parameter (this is always present) */
+          param = param->get_next();
+          inst = (ir_instruction *) param;
+          instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+ 
+          /* data2 parameter (only with atomic_comp_swap) */
+          if (param_count == 4) {
+             assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+             param = param->get_next();
+             inst = (ir_instruction *) param;
+             instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+          }
+ 
+          /* Atomic result */
+          assert(ir->return_deref);
+          nir_ssa_dest_init(&instr->instr, &instr->dest,
+                            ir->return_deref->type->vector_elements, NULL);
+          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+          break;
+       }
         default:
            unreachable("not reached");
         }
   
-       nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
- 
         if (ir->return_deref) {
            nir_intrinsic_instr *store_instr =
               nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
@@@ -755,7 -925,7 +929,7 @@@
   
            store_instr->variables[0] =
               evaluate_deref(&store_instr->instr, ir->return_deref);
-          store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa);
+          store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
   
            nir_instr_insert_after_cf_list(this->cf_node_list,
                                           &store_instr->instr);
@@@ -922,7 -1092,8 +1096,8 @@@ nir_visitor::add_instr(nir_instr *instr
   {
      nir_dest *dest = get_instr_dest(instr);
   
-    nir_ssa_dest_init(instr, dest, num_components, NULL);
+    if (dest)
+       nir_ssa_dest_init(instr, dest, num_components, NULL);
   
      nir_instr_insert_after_cf_list(this->cf_node_list, instr);
      this->result = instr;
@@@ -1002,11 -1173,9 +1177,10 @@@ nir_visitor::visit(ir_expression *ir
         } else {
            op = nir_intrinsic_load_ubo_indirect;
         }
+ +
         nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
         load->num_components = ir->type->vector_elements;
         load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
-       load->const_index[1] = 1; /* number of vec4's */
         load->src[0] = evaluate_rvalue(ir->operands[0]);
         if (!const_index)
            load->src[1] = evaluate_rvalue(ir->operands[1]);
@@@ -1319,6 -1488,16 +1493,16 @@@
            unreachable("not reached");
         }
         break;
+    case ir_unop_get_buffer_size: {
+       nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+          this->shader,
+          nir_intrinsic_get_buffer_size);
+       load->num_components = ir->type->vector_elements;
+       load->src[0] = evaluate_rvalue(ir->operands[0]);
+       add_instr(&load->instr, ir->type->vector_elements);
+       return;
+    }
+ 
      case ir_binop_add:
      case ir_binop_sub:
      case ir_binop_mul:
@@@ -1722,6 -1901,11 +1906,11 @@@ nir_visitor::visit(ir_texture *ir
         num_srcs = 0;
         break;
   
+    case ir_texture_samples:
+       op = nir_texop_texture_samples;
+       num_srcs = 0;
+       break;
+ 
      default:
         unreachable("not reached");
      }
diff --combined src/glsl/nir/nir.h

index 7b188fa8ed19354284792aca1cea1bb9ad16e059,d0c7b04a49f655d464a24d9fceb65e02d5a87cb4..19a4a361a25d287e9e1f69a5fe834d35aa4ce616
--- 1/src/glsl/nir/nir.h
--- 2/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@@ -278,6 -278,7 +278,7 @@@ typedef struct 
          *   - Fragment shader output: one of the values from \c gl_frag_result.
          *   - Uniforms: Per-stage uniform slot number for default uniform block.
          *   - Uniforms: Index within the uniform block definition for UBO members.
+        *   - Non-UBO Uniforms: uniform slot number.
          *   - Other: This field is not currently used.
          *
          * If the variable is a uniform, shader input, or shader output, and the
@@@ -296,11 -297,6 +297,11 @@@
          */
         int index;
   
+ +      /**
+ +       * Descriptor set binding for sampler or UBO.
+ +       */
+ +      int descriptor_set;
+ +
         /**
          * Initial binding point for a sampler or UBO.
          *
@@@ -422,6 -418,9 +423,9 @@@ typedef struct nir_instr 
      nir_instr_type type;
      struct nir_block *block;
   
+    /** generic instruction index. */
+    unsigned index;
+ 
      /* A temporary for optimization and analysis passes to use for storing
       * flags.  For instance, DCE uses this to store the "dead/live" info.
       */
@@@ -520,11 -519,7 +524,11 @@@ typedef struct nir_src 
      bool is_ssa;
   } nir_src;
   
- -#define NIR_SRC_INIT (nir_src) { { NULL } }
+ +#ifdef __cplusplus
+ +#  define NIR_SRC_INIT nir_src()
+ +#else
+ +#  define NIR_SRC_INIT (nir_src) { { NULL } }
+ +#endif
   
   #define nir_foreach_use(reg_or_ssa_def, src) \
      list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
@@@ -547,11 -542,7 +551,11 @@@ typedef struct 
      bool is_ssa;
   } nir_dest;
   
- -#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+ +#ifdef __cplusplus
+ +#  define NIR_DEST_INIT nir_dest()
+ +#else
+ +#  define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+ +#endif
   
   #define nir_foreach_def(reg, dest) \
      list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
@@@ -593,8 -584,8 +597,8 @@@ nir_dest_for_reg(nir_register *reg
      return dest;
   }
   
- void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
- void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+ void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+ void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
   
   typedef struct {
      nir_src src;
@@@ -643,10 -634,6 +647,6 @@@ typedef struct 
      unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
   } nir_alu_dest;
   
- void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
- void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
-                        void *mem_ctx);
- 
   typedef enum {
      nir_type_invalid = 0, /* Not a valid type */
      nir_type_float,
@@@ -715,6 -702,11 +715,11 @@@ typedef struct nir_alu_instr 
      nir_alu_src src[];
   } nir_alu_instr;
   
+ void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                       nir_alu_instr *instr);
+ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                        nir_alu_instr *instr);
+ 
   /* is this source channel used? */
   static inline bool
   nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
@@@ -790,15 -782,6 +795,15 @@@ NIR_DEFINE_CAST(nir_deref_as_var, nir_d
   NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
   NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
   
+ +/** Returns the tail of a deref chain */
+ +static inline nir_deref *
+ +nir_deref_tail(nir_deref *deref)
+ +{
+ +   while (deref->child)
+ +      deref = deref->child;
+ +   return deref;
+ +}
+ +
   typedef struct {
      nir_instr instr;
   
@@@ -966,7 -949,8 +971,8 @@@ typedef enum 
      nir_texop_txs,                /**< Texture size */
      nir_texop_lod,                /**< Texture lod query */
      nir_texop_tg4,                /**< Texture gather */
-    nir_texop_query_levels       /**< Texture levels query */
+    nir_texop_query_levels,       /**< Texture levels query */
+    nir_texop_texture_samples,    /**< Texture samples query */
   } nir_texop;
   
   typedef struct {
@@@ -993,9 -977,6 +999,9 @@@
      /* gather component selector */
      unsigned component : 2;
   
+ +   /* The descriptor set containing this texture */
+ +   unsigned sampler_set;
+ +
      /** The sampler index
       *
       * If this texture instruction has a nir_tex_src_sampler_offset source,
@@@ -1041,6 -1022,7 +1047,7 @@@ nir_tex_instr_dest_size(nir_tex_instr *
      case nir_texop_lod:
         return 2;
   
+    case nir_texop_texture_samples:
      case nir_texop_query_levels:
         return 1;
   
@@@ -1444,7 -1426,6 +1451,7 @@@ typedef struct nir_function 
   
   typedef struct nir_shader_compiler_options {
      bool lower_ffma;
+ +   bool lower_fdiv;
      bool lower_flrp;
      bool lower_fpow;
      bool lower_fsat;
@@@ -1457,6 -1438,15 +1464,15 @@@
      /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
      bool lower_scmp;
   
+    /* Does the native fdot instruction replicate its result for four
+     * components?  If so, then opt_algebraic_late will turn all fdotN
+     * instructions into fdot_replicatedN instructions.
+     */
+    bool fdot_replicates;
+ 
+    /** lowers ffract to fsub+ffloor: */
+    bool lower_ffract;
+ 
      /**
       * Does the driver support real 32-bit integers?  (Otherwise, integers
       * are simulated by floats.)
@@@ -1465,13 -1455,13 +1481,13 @@@
   } nir_shader_compiler_options;
   
   typedef struct nir_shader {
-    /** hash table of name -> uniform nir_variable */
+    /** list of uniforms (nir_variable) */
      struct exec_list uniforms;
   
-    /** hash table of name -> input nir_variable */
+    /** list of inputs (nir_variable) */
      struct exec_list inputs;
   
-    /** hash table of name -> output nir_variable */
+    /** list of outputs (nir_variable) */
      struct exec_list outputs;
   
      /** Set of driver-specific options for the shader.
@@@ -1481,10 -1471,10 +1497,10 @@@
       */
      const struct nir_shader_compiler_options *options;
   
-    /** list of global variables in the shader */
+    /** list of global variables in the shader (nir_variable) */
      struct exec_list globals;
   
-    /** list of system value variables in the shader */
+    /** list of system value variables in the shader (nir_variable) */
      struct exec_list system_values;
   
      struct exec_list functions; /** < list of nir_function */
@@@ -1503,6 -1493,14 +1519,14 @@@
   
      /** The shader stage, such as MESA_SHADER_VERTEX. */
      gl_shader_stage stage;
+ 
+    struct {
+       /** The maximum number of vertices the geometry shader might write. */
+       unsigned vertices_out;
+ 
+       /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+       unsigned invocations;
+    } gs;
   } nir_shader;
   
   #define nir_foreach_overload(shader, overload)                        \
@@@ -1597,17 -1595,6 +1621,17 @@@ typedef struct 
      };
   } nir_cursor;
   
+ +static inline nir_block *
+ +nir_cursor_current_block(nir_cursor cursor)
+ +{
+ +   if (cursor.option == nir_cursor_before_instr ||
+ +       cursor.option == nir_cursor_after_instr) {
+ +      return cursor.instr->block;
+ +   } else {
+ +      return cursor.block;
+ +   }
+ +}
+ +
   static inline nir_cursor
   nir_before_block(nir_block *block)
   {
@@@ -1761,12 -1748,14 +1785,14 @@@ bool nir_srcs_equal(nir_src src1, nir_s
   void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
   void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
   void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+                             nir_dest new_dest);
   
   void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
                          unsigned num_components, const char *name);
   void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                         unsigned num_components, const char *name);
- void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+ void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
   
   /* visits basic blocks in source-code order */
   typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
@@@ -1774,15 -1763,20 +1800,20 @@@ bool nir_foreach_block(nir_function_imp
                          void *state);
   bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
                                  void *state);
+ bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                                   void *state);
   
   /* If the following CF node is an if, this function returns that if.
    * Otherwise, it returns NULL.
    */
   nir_if *nir_block_get_following_if(nir_block *block);
   
+ nir_loop *nir_block_get_following_loop(nir_block *block);
+ 
   void nir_index_local_regs(nir_function_impl *impl);
   void nir_index_global_regs(nir_shader *shader);
   void nir_index_ssa_defs(nir_function_impl *impl);
+ unsigned nir_index_instrs(nir_function_impl *impl);
   
   void nir_index_blocks(nir_function_impl *impl);
   
@@@ -1810,17 -1804,19 +1841,21 @@@ void nir_dump_dom_frontier(nir_shader *
   void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
   void nir_dump_cfg(nir_shader *shader, FILE *fp);
   
- void nir_split_var_copies(nir_shader *shader);
+ int nir_gs_count_vertices(nir_shader *shader);
+ 
+ bool nir_split_var_copies(nir_shader *shader);
   
   void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
   void nir_lower_var_copies(nir_shader *shader);
   
- void nir_lower_global_vars_to_local(nir_shader *shader);
+ bool nir_lower_global_vars_to_local(nir_shader *shader);
   
- void nir_lower_locals_to_regs(nir_shader *shader);
+ bool nir_lower_locals_to_regs(nir_shader *shader);
+ 
+ void nir_lower_outputs_to_temporaries(nir_shader *shader);
   
+ +void nir_lower_outputs_to_temporaries(nir_shader *shader);
+ +
   void nir_assign_var_locations(struct exec_list *var_list,
                                 unsigned *size,
                                 int (*type_size)(const struct glsl_type *));
@@@ -1829,9 -1825,10 +1864,10 @@@ void nir_lower_io(nir_shader *shader
                     int (*type_size)(const struct glsl_type *));
   void nir_lower_vars_to_ssa(nir_shader *shader);
   
- void nir_remove_dead_variables(nir_shader *shader);
+ bool nir_remove_dead_variables(nir_shader *shader);
   
- void nir_lower_vec_to_movs(nir_shader *shader);
+ void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+ bool nir_lower_vec_to_movs(nir_shader *shader);
   void nir_lower_alu_to_scalar(nir_shader *shader);
   void nir_lower_load_const_to_scalar(nir_shader *shader);
   
@@@ -1839,16 -1836,58 +1875,59 @@@ void nir_lower_phis_to_scalar(nir_shade
   
   void nir_lower_samplers(nir_shader *shader,
                           const struct gl_shader_program *shader_program);
+ +void nir_lower_samplers_for_vk(nir_shader *shader);
   
- void nir_lower_system_values(nir_shader *shader);
- void nir_lower_tex_projector(nir_shader *shader);
+ bool nir_lower_system_values(nir_shader *shader);
+ 
+ typedef struct nir_lower_tex_options {
+    /**
+     * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+     * sampler types a texture projector is lowered.
+     */
+    unsigned lower_txp;
+ 
+    /**
+     * If true, lower rect textures to 2D, using txs to fetch the
+     * texture dimensions and dividing the texture coords by the
+     * texture dims to normalize.
+     */
+    bool lower_rect;
+ 
+    /**
+     * To emulate certain texture wrap modes, this can be used
+     * to saturate the specified tex coord to [0.0, 1.0].  The
+     * bits are according to sampler #, ie. if, for example:
+     *
+     *   (conf->saturate_s & (1 << n))
+     *
+     * is true, then the s coord for sampler n is saturated.
+     *
+     * Note that clamping must happen *after* projector lowering
+     * so any projected texture sample instruction with a clamped
+     * coordinate gets automatically lowered, regardless of the
+     * 'lower_txp' setting.
+     */
+    unsigned saturate_s;
+    unsigned saturate_t;
+    unsigned saturate_r;
+ } nir_lower_tex_options;
+ 
+ void nir_lower_tex(nir_shader *shader,
+                    const nir_lower_tex_options *options);
+ 
   void nir_lower_idiv(nir_shader *shader);
   
+ void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+ 
+ void nir_lower_two_sided_color(nir_shader *shader);
+ 
   void nir_lower_atomics(nir_shader *shader);
   void nir_lower_to_source_mods(nir_shader *shader);
   
- void nir_normalize_cubemap_coords(nir_shader *shader);
+ bool nir_lower_gs_intrinsics(nir_shader *shader);
+ 
+ bool nir_normalize_cubemap_coords(nir_shader *shader);
   
   void nir_live_variables_impl(nir_function_impl *impl);
   bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
@@@ -1876,6 -1915,8 +1955,8 @@@ bool nir_opt_cse(nir_shader *shader)
   bool nir_opt_dce_impl(nir_function_impl *impl);
   bool nir_opt_dce(nir_shader *shader);
   
+ bool nir_opt_dead_cf(nir_shader *shader);
+ 
   void nir_opt_gcm(nir_shader *shader);
   
   bool nir_opt_peephole_select(nir_shader *shader);
@@@ -1887,6 -1928,9 +1968,9 @@@ bool nir_opt_undef(nir_shader *shader)
   
   void nir_sweep(nir_shader *shader);
   
+ nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+ gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+ 
   #ifdef __cplusplus
   } /* extern "C" */
   #endif
diff --combined src/glsl/nir/nir_builder.h

index 295a209b4e653972009a3d457a66a739f22153ae,624329d0a8a6fe244e2af18eb190853f48dd351a..bf45d0373c08a441d43207b76d7359ae60caf01e
--- 1/src/glsl/nir/nir_builder.h
--- 2/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@@ -76,21 -76,36 +76,36 @@@ nir_build_imm(nir_builder *build, unsig
   static inline nir_ssa_def *
   nir_imm_float(nir_builder *build, float x)
   {
-    nir_const_value v = { { .f = {x, 0, 0, 0} } };
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.f[0] = x;
+ 
      return nir_build_imm(build, 1, v);
   }
   
   static inline nir_ssa_def *
   nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
   {
-    nir_const_value v = { { .f = {x, y, z, w} } };
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.f[0] = x;
+    v.f[1] = y;
+    v.f[2] = z;
+    v.f[3] = w;
+ 
      return nir_build_imm(build, 4, v);
   }
   
   static inline nir_ssa_def *
   nir_imm_int(nir_builder *build, int x)
   {
-    nir_const_value v = { { .i = {x, 0, 0, 0} } };
+    nir_const_value v;
+ 
+    memset(&v, 0, sizeof(v));
+    v.i[0] = x;
+ 
      return nir_build_imm(build, 1, v);
   }
   
@@@ -173,6 -188,24 +188,24 @@@ nir_##op(nir_builder *build, nir_ssa_de
   
   #include "nir_builder_opcodes.h"
   
+ static inline nir_ssa_def *
+ nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+ {
+    switch (num_components) {
+    case 4:
+       return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+    case 3:
+       return nir_vec3(build, comp[0], comp[1], comp[2]);
+    case 2:
+       return nir_vec2(build, comp[0], comp[1]);
+    case 1:
+       return comp[0];
+    default:
+       unreachable("bad component count");
+       return NULL;
+    }
+ }
+ 
   /**
    * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
    */
@@@ -216,23 -249,13 +249,30 @@@ nir_swizzle(nir_builder *build, nir_ssa
                        nir_imov_alu(build, alu_src, num_components);
   }
   
+ +/* Selects the right fdot given the number of components in each source. */
+ +static inline nir_ssa_def *
+ +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
+ +{
+ +   assert(src0->num_components == src1->num_components);
+ +   switch (src0->num_components) {
+ +   case 1: return nir_fmul(build, src0, src1);
+ +   case 2: return nir_fdot2(build, src0, src1);
+ +   case 3: return nir_fdot3(build, src0, src1);
+ +   case 4: return nir_fdot4(build, src0, src1);
+ +   default:
+ +      unreachable("bad component size");
+ +   }
+ +
+ +   return NULL;
+ +}
+ +
+ static inline nir_ssa_def *
+ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+ {
+    unsigned swizzle[4] = {c, c, c, c};
+    return nir_swizzle(b, def, swizzle, 1, false);
+ }
+ 
   /**
    * Turns a nir_src into a nir_ssa_def * so it can be passed to
    * nir_build_alu()-based builder calls.
@@@ -251,4 -274,31 +291,31 @@@ nir_ssa_for_src(nir_builder *build, nir
      return nir_imov_alu(build, alu, num_components);
   }
   
+ static inline nir_ssa_def *
+ nir_load_var(nir_builder *build, nir_variable *var)
+ {
+    const unsigned num_components = glsl_get_vector_elements(var->type);
+ 
+    nir_intrinsic_instr *load =
+       nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+    load->num_components = num_components;
+    load->variables[0] = nir_deref_var_create(load, var);
+    nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+    nir_builder_instr_insert(build, &load->instr);
+    return &load->dest.ssa;
+ }
+ 
+ static inline void
+ nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value)
+ {
+    const unsigned num_components = glsl_get_vector_elements(var->type);
+ 
+    nir_intrinsic_instr *store =
+       nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+    store->num_components = num_components;
+    store->variables[0] = nir_deref_var_create(store, var);
+    store->src[0] = nir_src_for_ssa(value);
+    nir_builder_instr_insert(build, &store->instr);
+ }
+ 
   #endif /* NIR_BUILDER_H */
diff --combined src/glsl/nir/nir_intrinsics.h

index 1f24f9f677dbed015373256b495e95fa9b97767d,649312fec51e6652065c3540a4d1ac7e0f7ae449..e02779e157bbbe6002857fd645e00102392850b7
--- 1/src/glsl/nir/nir_intrinsics.h
--- 2/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@@ -61,6 -61,13 +61,13 @@@ INTRINSIC(interp_var_at_sample, 1, ARR(
   INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
   
+ /*
+  * Ask the driver for the size of a given buffer. It takes the buffer index
+  * as source.
+  */
+ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ 
   /*
    * a barrier is an intrinsic with no inputs/outputs but which can't be moved
    * around/optimized in general
@@@ -79,9 -86,30 +86,30 @@@ BARRIER(memory_barrier
   /** A conditional discard, with a single boolean source. */
   INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
   
+ /**
+  * Basic Geometry Shader intrinsics.
+  *
+  * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
+  * index, which is the stream ID to write to.
+  *
+  * end_primitive implements GLSL's EndPrimitive() built-in.
+  */
   INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
   INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
   
+ /**
+  * Geometry Shader intrinsics with a vertex count.
+  *
+  * Alternatively, drivers may implement these intrinsics, and use
+  * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+  *
+  * These maintain a count of the number of vertices emitted, as an additional
+  * unsigned integer source.
+  */
+ INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+ 
   /*
    * Atomic counters
    *
@@@ -125,29 -153,60 +153,61 @@@ INTRINSIC(image_atomic_exchange, 3, ARR
   INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
   INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
             NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+           NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
   
- #define SYSTEM_VALUE(name, components) \
-    INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+ /*
+  * SSBO atomic intrinsics
+  *
+  * All of the SSBO atomic memory operations read a value from memory,
+  * compute a new value using one of the operations below, write the new
+  * value to memory, and return the original value read.
+  *
+  * All operations take 3 sources except CompSwap that takes 4. These
+  * sources represent:
+  *
+  * 0: The SSBO buffer index.
+  * 1: The offset into the SSBO buffer of the variable that the atomic
+  *    operation will operate on.
+  * 2: The data parameter to the atomic function (i.e. the value to add
+  *    in ssbo_atomic_add, etc).
+  * 3: For CompSwap only: the second data parameter.
+  */
+ INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+ 
+ #define SYSTEM_VALUE(name, components, num_indices) \
+    INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
      NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
   
- SYSTEM_VALUE(front_face, 1)
- SYSTEM_VALUE(vertex_id, 1)
- SYSTEM_VALUE(vertex_id_zero_base, 1)
- SYSTEM_VALUE(base_vertex, 1)
- SYSTEM_VALUE(instance_id, 1)
- SYSTEM_VALUE(sample_id, 1)
- SYSTEM_VALUE(sample_pos, 2)
- SYSTEM_VALUE(sample_mask_in, 1)
- SYSTEM_VALUE(invocation_id, 1)
+ SYSTEM_VALUE(front_face, 1, 0)
+ SYSTEM_VALUE(vertex_id, 1, 0)
+ SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+ SYSTEM_VALUE(base_vertex, 1, 0)
+ SYSTEM_VALUE(instance_id, 1, 0)
+ SYSTEM_VALUE(sample_id, 1, 0)
+ SYSTEM_VALUE(sample_pos, 2, 0)
+ SYSTEM_VALUE(sample_mask_in, 1, 0)
+ SYSTEM_VALUE(invocation_id, 1, 0)
+ SYSTEM_VALUE(local_invocation_id, 3, 0)
+ SYSTEM_VALUE(work_group_id, 3, 0)
+ SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+ SYSTEM_VALUE(num_work_groups, 3, 0)
   
   /*
    * The format of the indices depends on the type of the load.  For uniforms,
    * the first index is the base address and the second index is an offset that
    * should be added to the base address.  (This way you can determine in the
    * back-end which variable is being accessed even in an array.)  For inputs,
- - * the one and only index corresponds to the attribute slot.  UBO loads also
- - * have a single index which is the base address to load from.
+ + * the one and only index corresponds to the attribute slot.  UBO loads
+ + * have two indices the first of which is the descriptor set and the second
+ + * is the base address to load from.
    *
    * UBO loads have a (possibly constant) source which is the UBO buffer index.
    * For each type of load, the _indirect variant has one additional source
@@@ -166,22 -225,26 +226,26 @@@
                true, 0, 0, indices, flags)
   
   LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
- -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+ +LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
   LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
- /* LOAD(ssbo, 1, 0) */
+ LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
   
   /*
    * Stores work the same way as loads, except now the first register input is
    * the value or array to store and the optional second input is the indirect
-  * offset.
+  * offset. SSBO stores are similar, but they accept an extra source for the
+  * block index and an extra index with the writemask to use.
    */
   
- #define STORE(name, num_indices, flags) \
-    INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
-    INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
-              num_indices, flags) \
+ #define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
+    INTRINSIC(store_##name, 1 + extra_srcs, \
+              ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
+              false, 0, 0, 1 + extra_indices, flags) \
+    INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
+              ARR(0, 1, extra_srcs_size, extra_srcs_size), \
+              false, 0, 0, 1 + extra_indices, flags)
   
- STORE(output, 1, 0)
- /* STORE(ssbo, 2, 0) */
+ STORE(output, 0, 0, 0, 0)
+ STORE(ssbo, 1, 1, 1, 0)
   
- LAST_INTRINSIC(store_output_indirect)
+ LAST_INTRINSIC(store_ssbo_indirect)
diff --combined src/glsl/nir/nir_lower_samplers.c

index 0000000000000000000000000000000000000000,58ea0db4e0f12c85ffdd62d435c1973f71d4a47d..33cd9c8b0cfc5b62ac66ce7fe21fc8a60da9bde1

mode 000000,100644..100644
--- /dev/null
--- 2/src/glsl/nir/nir_lower_samplers.c
+++ b/src/glsl/nir/nir_lower_samplers.c
@@@ -1,0 -1,188 +1,258 @@@
+ /*
+  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+  * Copyright © 2014 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include "nir.h"
+ #include "nir_builder.h"
+ #include "../program.h"
+ #include "program/hash_table.h"
+ #include "ir_uniform.h"
+ 
+ #include "main/compiler.h"
+ #include "main/mtypes.h"
+ #include "program/prog_parameter.h"
+ #include "program/program.h"
+ 
++static void
++add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect)
++{
++   /* First, we have to resize the array of texture sources */
++   nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
++                                         instr->num_srcs + 1);
++
++   for (unsigned i = 0; i < instr->num_srcs; i++) {
++      new_srcs[i].src_type = instr->src[i].src_type;
++      nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src);
++   }
++
++   ralloc_free(instr->src);
++   instr->src = new_srcs;
++
++   /* Now we can go ahead and move the source over to being a
++    * first-class texture source.
++    */
++   instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
++   instr->num_srcs++;
++   nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src,
++                         indirect);
++}
++
+ /* Calculate the sampler index based on array indicies and also
+  * calculate the base uniform location for struct members.
+  */
+ static void
+ calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+                      unsigned *array_elements, nir_ssa_def **indirect,
+                      nir_builder *b, unsigned *location)
+ {
+    if (tail->child == NULL)
+       return;
+ 
+    switch (tail->child->deref_type) {
+    case nir_deref_type_array: {
+       nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+ 
+       assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+ 
+       calc_sampler_offsets(tail->child, instr, array_elements,
+                            indirect, b, location);
+       instr->sampler_index += deref_array->base_offset * *array_elements;
+ 
+       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+          nir_ssa_def *mul =
+             nir_imul(b, nir_imm_int(b, *array_elements),
+                      nir_ssa_for_src(b, deref_array->indirect, 1));
+ 
+          nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+                                NIR_SRC_INIT);
+ 
+          if (*indirect) {
+             *indirect = nir_iadd(b, *indirect, mul);
+          } else {
+             *indirect = mul;
+          }
+       }
+ 
+       *array_elements *= glsl_get_length(tail->type);
+        break;
+    }
+ 
+    case nir_deref_type_struct: {
+       nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+       *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+       calc_sampler_offsets(tail->child, instr, array_elements,
+                            indirect, b, location);
+       break;
+    }
+ 
+    default:
+       unreachable("Invalid deref type");
+       break;
+    }
+ }
+ 
+ static void
+ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+               gl_shader_stage stage, nir_builder *builder)
+ {
+    if (instr->sampler == NULL)
+       return;
+ 
+    instr->sampler_index = 0;
+    unsigned location = instr->sampler->var->data.location;
+    unsigned array_elements = 1;
+    nir_ssa_def *indirect = NULL;
+ 
+    builder->cursor = nir_before_instr(&instr->instr);
+    calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+                         &indirect, builder, &location);
+ 
+    if (indirect) {
+       /* First, we have to resize the array of texture sources */
+       nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+                                             instr->num_srcs + 1);
+ 
+       for (unsigned i = 0; i < instr->num_srcs; i++) {
+          new_srcs[i].src_type = instr->src[i].src_type;
+          nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+                             &instr->src[i].src);
+       }
+ 
+       ralloc_free(instr->src);
+       instr->src = new_srcs;
+ 
+       /* Now we can go ahead and move the source over to being a
+        * first-class texture source.
+        */
+       instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+       instr->num_srcs++;
+       nir_instr_rewrite_src(&instr->instr,
+                             &instr->src[instr->num_srcs - 1].src,
+                             nir_src_for_ssa(indirect));
+ 
+       instr->sampler_array_size = array_elements;
+    }
+ 
+    if (location > shader_program->NumUniformStorage - 1 ||
+        !shader_program->UniformStorage[location].sampler[stage].active) {
+       assert(!"cannot return a sampler");
+       return;
+    }
+ 
+    instr->sampler_index +=
+       shader_program->UniformStorage[location].sampler[stage].index;
+ 
+    instr->sampler = NULL;
+ }
+ 
+ typedef struct {
+    nir_builder builder;
+    const struct gl_shader_program *shader_program;
+    gl_shader_stage stage;
+ } lower_state;
+ 
+ static bool
+ lower_block_cb(nir_block *block, void *_state)
+ {
+    lower_state *state = (lower_state *) _state;
+ 
+    nir_foreach_instr(block, instr) {
+       if (instr->type == nir_instr_type_tex) {
+          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+          lower_sampler(tex_instr, state->shader_program, state->stage,
+                        &state->builder);
+       }
+    }
+ 
+    return true;
+ }
+ 
+ static void
+ lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+            gl_shader_stage stage)
+ {
+    lower_state state;
+ 
+    nir_builder_init(&state.builder, impl);
+    state.shader_program = shader_program;
+    state.stage = stage;
+ 
+    nir_foreach_block(impl, lower_block_cb, &state);
+ }
+ 
+ void
+ nir_lower_samplers(nir_shader *shader,
+                    const struct gl_shader_program *shader_program)
+ {
+    nir_foreach_overload(shader, overload) {
+       if (overload->impl)
+          lower_impl(overload->impl, shader_program, shader->stage);
+    }
+ }
++
++static bool
++lower_samplers_for_vk_block(nir_block *block, void *data)
++{
++   nir_foreach_instr(block, instr) {
++      if (instr->type != nir_instr_type_tex)
++         continue;
++
++      nir_tex_instr *tex = nir_instr_as_tex(instr);
++
++      assert(tex->sampler);
++
++      tex->sampler_set = tex->sampler->var->data.descriptor_set;
++      tex->sampler_index = tex->sampler->var->data.binding;
++
++      if (tex->sampler->deref.child) {
++         assert(tex->sampler->deref.child->deref_type == nir_deref_type_array);
++         nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child);
++
++         /* Only one-level arrays are allowed in vulkan */
++         assert(arr->deref.child == NULL);
++
++         tex->sampler_index += arr->base_offset;
++         if (arr->deref_array_type == nir_deref_array_type_indirect) {
++            add_indirect_to_tex(tex, arr->indirect);
++            nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT);
++
++            tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type);
++         }
++      }
++
++      tex->sampler = NULL;
++   }
++
++   return true;
++}
++
++void
++nir_lower_samplers_for_vk(nir_shader *shader)
++{
++   nir_foreach_overload(shader, overload) {
++      if (overload->impl) {
++         nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL);
++      }
++   }
++}
diff --combined src/glsl/nir/nir_opt_algebraic.py

index 880408bc3678c5b0bb4a704b7dd6ca57f16e0e2f,585e5e0ae98cd585861000154a91c64c80a40716..cafbd6d66a531edb47eaf88128cfbc4f8f9b5820
--- 1/src/glsl/nir/nir_opt_algebraic.py
--- 2/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@@ -66,7 -66,6 +66,7 @@@ optimizations = 
      (('imul', a, 1), a),
      (('fmul', a, -1.0), ('fneg', a)),
      (('imul', a, -1), ('ineg', a)),
+ +   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
      (('ffma', 0.0, a, b), b),
      (('ffma', a, 0.0, b), b),
      (('ffma', a, b, 0.0), ('fmul', a, b)),
@@@ -77,6 -76,7 +77,7 @@@
      (('flrp', a, a, b), a),
      (('flrp', 0.0, a, b), ('fmul', a, b)),
      (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
      (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
      (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
      (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
@@@ -241,6 -241,10 +242,10 @@@ late_optimizations = 
      (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
      (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
      (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+    (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+    (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+    (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
   ]
   
   print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
diff --combined src/glsl/nir/nir_split_var_copies.c

index 5c163b59819a182b71f49c58a55ffa4adc17e9f7,d2ea58a8b7c6d9f781d41739d769134c5a72b2e5..f583178caf426dfeb305faf3fbcecedf35085cb8
--- 1/src/glsl/nir/nir_split_var_copies.c
--- 2/src/glsl/nir/nir_split_var_copies.c
+++ b/src/glsl/nir/nir_split_var_copies.c
@@@ -64,8 -64,17 +64,9 @@@
   struct split_var_copies_state {
      void *mem_ctx;
      void *dead_ctx;
+    bool progress;
   };
   
- -static nir_deref *
- -get_deref_tail(nir_deref *deref)
- -{
- -   while (deref->child != NULL)
- -      deref = deref->child;
- -   return deref;
- -}
- -
   /* Recursively constructs deref chains to split a copy instruction into
    * multiple (if needed) copy instructions with full-length deref chains.
    * External callers of this function should pass the tail and head of the
@@@ -190,6 -199,7 +191,7 @@@ split_var_copy_instr(nir_intrinsic_inst
             * remove the old one later.
             */
            nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+          state->progress = true;
         }
         break;
   
@@@ -217,8 -227,8 +219,8 @@@ split_var_copies_block(nir_block *block
   
         nir_deref *dest_head = &intrinsic->variables[0]->deref;
         nir_deref *src_head = &intrinsic->variables[1]->deref;
- -      nir_deref *dest_tail = get_deref_tail(dest_head);
- -      nir_deref *src_tail = get_deref_tail(src_head);
+ +      nir_deref *dest_tail = nir_deref_tail(dest_head);
+ +      nir_deref *src_tail = nir_deref_tail(src_head);
   
         switch (glsl_get_base_type(src_tail->type)) {
         case GLSL_TYPE_ARRAY:
@@@ -248,24 -258,31 +250,31 @@@
      return true;
   }
   
- static void
+ static bool
   split_var_copies_impl(nir_function_impl *impl)
   {
      struct split_var_copies_state state;
   
      state.mem_ctx = ralloc_parent(impl);
      state.dead_ctx = ralloc_context(NULL);
+    state.progress = false;
   
      nir_foreach_block(impl, split_var_copies_block, &state);
   
      ralloc_free(state.dead_ctx);
+ 
+    return state.progress;
   }
   
- void
+ bool
   nir_split_var_copies(nir_shader *shader)
   {
+    bool progress = false;
+ 
      nir_foreach_overload(shader, overload) {
         if (overload->impl)
-          split_var_copies_impl(overload->impl);
+          progress = split_var_copies_impl(overload->impl) || progress;
      }
+ 
+    return progress;
   }
diff --combined src/glsl/nir/nir_types.cpp

index 69cfac18587d461b866f63403305a21c81f5b8e3,da9807f0e629b167af6ebae07c35b3a291b879fe..01f0e9b5abcc6a17bdd7fd47423a8349609f1175
--- 1/src/glsl/nir/nir_types.cpp
--- 2/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@@ -70,18 -70,6 +70,18 @@@ glsl_get_struct_field(const glsl_type *
      return type->fields.structure[index].type;
   }
   
+ +const glsl_type *
+ +glsl_get_function_return_type(const glsl_type *type)
+ +{
+ +   return type->fields.parameters[0].type;
+ +}
+ +
+ +const glsl_function_param *
+ +glsl_get_function_param(const glsl_type *type, unsigned index)
+ +{
+ +   return &type->fields.parameters[index + 1];
+ +}
+ +
   const struct glsl_type *
   glsl_get_column_type(const struct glsl_type *type)
   {
@@@ -124,20 -112,13 +124,27 @@@ glsl_get_struct_elem_name(const struct 
      return type->fields.structure[index].name;
   }
   
+ +glsl_sampler_dim
+ +glsl_get_sampler_dim(const struct glsl_type *type)
+ +{
+ +   assert(glsl_type_is_sampler(type));
+ +   return (glsl_sampler_dim)type->sampler_dimensionality;
+ +}
+ +
+ +glsl_base_type
+ +glsl_get_sampler_result_type(const struct glsl_type *type)
+ +{
+ +   assert(glsl_type_is_sampler(type));
+ +   return (glsl_base_type)type->sampler_type;
+ +}
+ +
+ unsigned
+ glsl_get_record_location_offset(const struct glsl_type *type,
+                                 unsigned length)
+ {
+    return type->record_location_offset(length);
+ }
+ 
   bool
   glsl_type_is_void(const glsl_type *type)
   {
@@@ -156,50 -137,12 +163,50 @@@ glsl_type_is_scalar(const struct glsl_t
      return type->is_scalar();
   }
   
+ +bool
+ +glsl_type_is_vector_or_scalar(const struct glsl_type *type)
+ +{
+ +   return type->is_vector() || type->is_scalar();
+ +}
+ +
   bool
   glsl_type_is_matrix(const struct glsl_type *type)
   {
      return type->is_matrix();
   }
   
+ +bool
+ +glsl_type_is_array(const struct glsl_type *type)
+ +{
+ +   return type->is_array();
+ +}
+ +
+ +bool
+ +glsl_type_is_struct(const struct glsl_type *type)
+ +{
+ +   return type->is_record() || type->is_interface();
+ +}
+ +
+ +bool
+ +glsl_type_is_sampler(const struct glsl_type *type)
+ +{
+ +   return type->is_sampler();
+ +}
+ +
+ +bool
+ +glsl_sampler_type_is_shadow(const struct glsl_type *type)
+ +{
+ +   assert(glsl_type_is_sampler(type));
+ +   return type->sampler_shadow;
+ +}
+ +
+ +bool
+ +glsl_sampler_type_is_array(const struct glsl_type *type)
+ +{
+ +   assert(glsl_type_is_sampler(type));
+ +   return type->sampler_array;
+ +}
+ +
   const glsl_type *
   glsl_void_type(void)
   {
@@@ -213,9 -156,9 +220,9 @@@ glsl_float_type(void
   }
   
   const glsl_type *
- -glsl_vec4_type(void)
+ +glsl_int_type(void)
   {
- -   return glsl_type::vec4_type;
+ +   return glsl_type::int_type;
   }
   
   const glsl_type *
@@@ -224,68 -167,8 +231,68 @@@ glsl_uint_type(void
      return glsl_type::uint_type;
   }
   
+ +const glsl_type *
+ +glsl_bool_type(void)
+ +{
+ +   return glsl_type::bool_type;
+ +}
+ +
+ +const glsl_type *
+ +glsl_vec4_type(void)
+ +{
+ +   return glsl_type::vec4_type;
+ +}
+ +
+ +const glsl_type *
+ +glsl_scalar_type(enum glsl_base_type base_type)
+ +{
+ +   return glsl_type::get_instance(base_type, 1, 1);
+ +}
+ +
+ +const glsl_type *
+ +glsl_vector_type(enum glsl_base_type base_type, unsigned components)
+ +{
+ +   assert(components > 1 && components <= 4);
+ +   return glsl_type::get_instance(base_type, components, 1);
+ +}
+ +
+ +const glsl_type *
+ +glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns)
+ +{
+ +   assert(rows > 1 && rows <= 4 && columns >= 1 && columns <= 4);
+ +   return glsl_type::get_instance(base_type, rows, columns);
+ +}
+ +
   const glsl_type *
   glsl_array_type(const glsl_type *base, unsigned elements)
   {
      return glsl_type::get_array_instance(base, elements);
   }
+ +
+ +const glsl_type *
+ +glsl_struct_type(const glsl_struct_field *fields,
+ +                 unsigned num_fields, const char *name)
+ +{
+ +   return glsl_type::get_record_instance(fields, num_fields, name);
+ +}
+ +
+ +const struct glsl_type *
+ +glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array,
+ +                  enum glsl_base_type base_type)
+ +{
+ +   return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type);
+ +}
+ +
+ +const glsl_type *
+ +glsl_function_type(const glsl_type *return_type,
+ +                   const glsl_function_param *params, unsigned num_params)
+ +{
+ +   return glsl_type::get_function_instance(return_type, params, num_params);
+ +}
+ +
+ +const glsl_type *
+ +glsl_transposed_type(const struct glsl_type *type)
+ +{
+ +   return glsl_type::get_instance(type->base_type, type->matrix_columns,
+ +                                  type->vector_elements);
+ +}
diff --combined src/glsl/nir/nir_types.h

index a2fa7934e16c5125235004e47daf3d1bb7d61e86,49d6a65e7c4c64ddfd3e1793229365652d972291..1a0cb1fb77482b72f8abb8387f2dad44cc35ed93
--- 1/src/glsl/nir/nir_types.h
--- 2/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@@ -27,6 -27,8 +27,8 @@@
   
   #pragma once
   
+ #include <stdio.h>
+ 
   /* C wrapper around glsl_types.h */
   
   #include "../glsl_types.h"
@@@ -37,8 -39,6 +39,6 @@@ extern "C" 
   struct glsl_type;
   #endif
   
- #include <stdio.h>
- 
   void glsl_print_type(const struct glsl_type *type, FILE *fp);
   void glsl_print_struct(const struct glsl_type *type, FILE *fp);
   
@@@ -49,12 -49,6 +49,12 @@@ const struct glsl_type *glsl_get_array_
   
   const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
   
+ +const struct glsl_type *
+ +glsl_get_function_return_type(const struct glsl_type *type);
+ +
+ +const struct glsl_function_param *
+ +glsl_get_function_param(const struct glsl_type *type, unsigned index);
+ +
   enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
   
   unsigned glsl_get_vector_elements(const struct glsl_type *type);
@@@ -68,44 -62,20 +68,47 @@@ unsigned glsl_get_length(const struct g
   const char *glsl_get_struct_elem_name(const struct glsl_type *type,
                                         unsigned index);
   
+ +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type);
+ +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
+ +
+ unsigned glsl_get_record_location_offset(const struct glsl_type *type,
+                                          unsigned length);
+ 
   bool glsl_type_is_void(const struct glsl_type *type);
   bool glsl_type_is_vector(const struct glsl_type *type);
   bool glsl_type_is_scalar(const struct glsl_type *type);
+ +bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
   bool glsl_type_is_matrix(const struct glsl_type *type);
+ +bool glsl_type_is_array(const struct glsl_type *type);
+ +bool glsl_type_is_struct(const struct glsl_type *type);
+ +bool glsl_type_is_sampler(const struct glsl_type *type);
+ +bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
+ +bool glsl_sampler_type_is_array(const struct glsl_type *type);
   
   const struct glsl_type *glsl_void_type(void);
   const struct glsl_type *glsl_float_type(void);
- -const struct glsl_type *glsl_vec4_type(void);
+ +const struct glsl_type *glsl_int_type(void);
   const struct glsl_type *glsl_uint_type(void);
+ +const struct glsl_type *glsl_bool_type(void);
+ +
+ +const struct glsl_type *glsl_vec4_type(void);
+ +const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type);
+ +const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type,
+ +                                         unsigned components);
+ +const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
+ +                                         unsigned rows, unsigned columns);
   const struct glsl_type *glsl_array_type(const struct glsl_type *base,
                                           unsigned elements);
+ +const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
+ +                                         unsigned num_fields, const char *name);
+ +const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim,
+ +                                          bool is_shadow, bool is_array,
+ +                                          enum glsl_base_type base_type);
+ +const struct glsl_type * glsl_function_type(const struct glsl_type *return_type,
+ +                                            const struct glsl_function_param *params,
+ +                                            unsigned num_params);
+ +
+ +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
   
   #ifdef __cplusplus
   }
diff --combined src/glsl/standalone_scaffolding.cpp

index 6ff9553d6fe2f48ad470d611d97e6faaf9908984,ea9334fd7b7f403323ed98c988e83af4ee286027..1af50d6b68de6d0648e1e98788b34fed13b0bcc4
--- 1/src/glsl/standalone_scaffolding.cpp
--- 2/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@@ -35,12 -35,6 +35,12 @@@
   #include "util/ralloc.h"
   #include "util/strtod.h"
   
+ +extern "C" void
+ +_mesa_error_no_memory(const char *caller)
+ +{
+ +   fprintf(stderr, "Mesa error: out of memory in %s", caller);
+ +}
+ +
   void
   _mesa_warning(struct gl_context *ctx, const char *fmt, ...)
   {
@@@ -68,7 -62,7 +68,7 @@@ _mesa_reference_shader(struct gl_contex
   }
   
   void
- _mesa_shader_debug(struct gl_context *, GLenum, GLuint *id,
+ _mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
                      const char *, int)
   {
   }
@@@ -107,7 -101,7 +107,7 @@@ _mesa_clear_shader_program_data(struct 
   
      ralloc_free(shProg->UniformBlocks);
      shProg->UniformBlocks = NULL;
-    shProg->NumUniformBlocks = 0;
+    shProg->NumBufferInterfaceBlocks = 0;
      for (i = 0; i < MESA_SHADER_STAGES; i++) {
         ralloc_free(shProg->UniformBlockStageIndex[i]);
         shProg->UniformBlockStageIndex[i] = NULL;
diff --combined src/mesa/drivers/dri/i965/brw_context.h

index 49ff428b13a2eb1cc47a0c6bd53baf427e9b6fc1,8b790fe0bca18fa76c7366d70bf65d455116e3d2..2479182e37049ccafebf061bfcf5de013dba8604
--- 1/src/mesa/drivers/dri/i965/brw_context.h
--- 2/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@@ -194,7 -194,6 +194,6 @@@ enum brw_state_id 
      BRW_STATE_GS_CONSTBUF,
      BRW_STATE_PROGRAM_CACHE,
      BRW_STATE_STATE_BASE_ADDRESS,
-    BRW_STATE_VUE_MAP_VS,
      BRW_STATE_VUE_MAP_GEOM_OUT,
      BRW_STATE_TRANSFORM_FEEDBACK,
      BRW_STATE_RASTERIZER_DISCARD,
@@@ -214,6 -213,7 +213,7 @@@
      BRW_STATE_SAMPLER_STATE_TABLE,
      BRW_STATE_VS_ATTRIB_WORKAROUNDS,
      BRW_STATE_COMPUTE_PROGRAM,
+    BRW_STATE_CS_WORK_GROUPS,
      BRW_NUM_STATE_BITS
   };
   
@@@ -276,7 -276,6 +276,6 @@@
   #define BRW_NEW_GS_CONSTBUF             (1ull << BRW_STATE_GS_CONSTBUF)
   #define BRW_NEW_PROGRAM_CACHE           (1ull << BRW_STATE_PROGRAM_CACHE)
   #define BRW_NEW_STATE_BASE_ADDRESS      (1ull << BRW_STATE_STATE_BASE_ADDRESS)
- #define BRW_NEW_VUE_MAP_VS              (1ull << BRW_STATE_VUE_MAP_VS)
   #define BRW_NEW_VUE_MAP_GEOM_OUT        (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
   #define BRW_NEW_TRANSFORM_FEEDBACK      (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
   #define BRW_NEW_RASTERIZER_DISCARD      (1ull << BRW_STATE_RASTERIZER_DISCARD)
@@@ -296,6 -295,7 +295,7 @@@
   #define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
   #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
   #define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
+ #define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
   
   struct brw_state_flags {
      /** State update flags signalled by mesa internals */
@@@ -361,12 -361,6 +361,12 @@@ struct brw_stage_prog_data 
         /** @} */
      } binding_table;
   
+ +   uint32_t *map_entries;
+ +   struct {
+ +      uint32_t index_count;
+ +      uint32_t *index;
+ +   } bind_map[8]; /* MAX_SETS from vulkan/private.h */
+ +
      GLuint nr_params;       /**< number of float params/constants */
      GLuint nr_pull_params;
      unsigned nr_image_params;
@@@ -504,6 -498,16 +504,16 @@@ struct brw_cs_prog_data 
      GLuint dispatch_grf_start_reg_16;
      unsigned local_size[3];
      unsigned simd_size;
+    bool uses_barrier;
+    bool uses_num_work_groups;
+ 
+    struct {
+       /** @{
+        * surface indices the CS-specific surfaces
+        */
+       uint32_t work_groups_start;
+       /** @} */
+    } binding_table;
   };
   
   /**
@@@ -545,6 -549,17 +555,17 @@@ struct brw_vue_map 
       */
      GLbitfield64 slots_valid;
   
+    /**
+     * Is this VUE map for a separate shader pipeline?
+     *
+     * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+     * without the linker having a chance to dead code eliminate unused varyings.
+     *
+     * This means that we have to use a fixed slot layout, based on the output's
+     * location field, rather than assigning slots in a compact contiguous block.
+     */
+    bool separate;
+ 
      /**
       * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
       * not stored in a slot (because they are not written, or because
@@@ -590,7 -605,8 +611,8 @@@ static inline GLuint brw_varying_to_off
   
   void brw_compute_vue_map(const struct brw_device_info *devinfo,
                            struct brw_vue_map *vue_map,
-                          GLbitfield64 slots_valid);
+                          GLbitfield64 slots_valid,
+                          bool separate_shader);
   
   
   /**
@@@ -753,7 -769,8 +775,8 @@@ struct brw_vs_prog_data 
                               12 + /* ubo */                              \
                               BRW_MAX_ABO +                               \
                               BRW_MAX_IMAGES +                            \
-                             2 /* shader time, pull constants */)
+                             2 + /* shader time, pull constants */       \
+                             1 /* cs num work groups */)
   
   #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
   
@@@ -787,6 -804,11 +810,11 @@@ struct brw_gs_prog_dat
   
      bool include_primitive_id;
   
+    /**
+     * The number of vertices emitted, if constant - otherwise -1.
+     */
+    int static_vertex_count;
+ 
      int invocations;
   
      /**
@@@ -1241,6 -1263,17 +1269,17 @@@ struct brw_contex
         uint32_t draw_params_offset;
      } draw;
   
+    struct {
+       /**
+        * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+        * an indirect call, and num_work_groups_offset is valid. Otherwise,
+        * num_work_groups is set based on glDispatchCompute.
+        */
+       drm_intel_bo *num_work_groups_bo;
+       GLintptr num_work_groups_offset;
+       const GLuint *num_work_groups;
+    } compute;
+ 
      struct {
         struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
         struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
@@@ -1367,17 -1400,9 +1406,9 @@@
         GLuint curbe_offset;
      } curbe;
   
-    /**
-     * Layout of vertex data exiting the vertex shader.
-     *
-     * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
-     */
-    struct brw_vue_map vue_map_vs;
- 
      /**
       * Layout of vertex data exiting the geometry portion of the pipleine.
-     * This comes from the geometry shader if one exists, otherwise from the
-     * vertex shader.
+     * This comes from the last enabled shader stage (GS, DS, or VS).
       *
       * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
       */
@@@ -1523,7 -1548,7 +1554,7 @@@
   
      int num_atoms[BRW_NUM_PIPELINES];
      const struct brw_tracked_state render_atoms[60];
-    const struct brw_tracked_state compute_atoms[4];
+    const struct brw_tracked_state compute_atoms[7];
   
      /* If (INTEL_DEBUG & DEBUG_BATCH) */
      struct {
@@@ -1784,6 -1809,12 +1815,12 @@@ void brw_create_constant_surface(struc
                                    uint32_t size,
                                    uint32_t *out_offset,
                                    bool dword_pitch);
+ void brw_create_buffer_surface(struct brw_context *brw,
+                                drm_intel_bo *bo,
+                                uint32_t offset,
+                                uint32_t size,
+                                uint32_t *out_offset,
+                                bool dword_pitch);
   void brw_update_buffer_texture_surface(struct gl_context *ctx,
                                          unsigned unit,
                                          uint32_t *surf_offset);
@@@ -2063,11 -2094,6 +2100,6 @@@ void gen8_hiz_exec(struct brw_context *
   
   uint32_t get_hw_prim_for_gl_prim(int mode);
   
- void
- brw_setup_vue_key_clip_info(struct brw_context *brw,
-                             struct brw_vue_prog_key *key,
-                             bool program_uses_clip_distance);
- 
   void
   gen6_upload_push_constants(struct brw_context *brw,
                              const struct gl_program *prog,
diff --combined src/mesa/drivers/dri/i965/brw_defines.h

index f9dcdc735b338c991e0f094e570c4c83037455b5,393f17ac98c6abb9c8a1e27707d920b3b1798e5a..a8cde20e04583b9f847d577142fb074d92e42016
--- 1/src/mesa/drivers/dri/i965/brw_defines.h
--- 2/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@@ -57,7 -57,6 +57,7 @@@
   # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
   # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
   
+ +#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */
   #define _3DPRIM_POINTLIST         0x01
   #define _3DPRIM_LINELIST          0x02
   #define _3DPRIM_LINESTRIP         0x03
@@@ -66,10 -65,10 +66,10 @@@
   #define _3DPRIM_TRIFAN            0x06
   #define _3DPRIM_QUADLIST          0x07
   #define _3DPRIM_QUADSTRIP         0x08
- #define _3DPRIM_LINELIST_ADJ      0x09
- #define _3DPRIM_LINESTRIP_ADJ     0x0A
- #define _3DPRIM_TRILIST_ADJ       0x0B
- #define _3DPRIM_TRISTRIP_ADJ      0x0C
+ #define _3DPRIM_LINELIST_ADJ      0x09 /* G45+ */
+ #define _3DPRIM_LINESTRIP_ADJ     0x0A /* G45+ */
+ #define _3DPRIM_TRILIST_ADJ       0x0B /* G45+ */
+ #define _3DPRIM_TRISTRIP_ADJ      0x0C /* G45+ */
   #define _3DPRIM_TRISTRIP_REVERSE  0x0D
   #define _3DPRIM_POLYGON           0x0E
   #define _3DPRIM_RECTLIST          0x0F
@@@ -78,8 -77,7 +78,8 @@@
   #define _3DPRIM_LINESTRIP_CONT    0x12
   #define _3DPRIM_LINESTRIP_BF      0x13
   #define _3DPRIM_LINESTRIP_CONT_BF 0x14
- #define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+ #define _3DPRIM_TRIFAN_NOSTIPPLE  0x16
+ +#endif
   
   /* We use this offset to be able to pass native primitive types in struct
    * _mesa_prim::mode.  Native primitive types are BRW_PRIM_OFFSET +
@@@ -981,6 -979,7 +981,7 @@@ enum opcode 
      SHADER_OPCODE_TG4_LOGICAL,
      SHADER_OPCODE_TG4_OFFSET,
      SHADER_OPCODE_TG4_OFFSET_LOGICAL,
+    SHADER_OPCODE_SAMPLEINFO,
   
      /**
       * Combines multiple sources of size 1 into a larger virtual GRF.
@@@ -1068,6 -1067,7 +1069,7 @@@
      FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
      FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
      FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+    FS_OPCODE_GET_BUFFER_SIZE,
      FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
      FS_OPCODE_DISCARD_JUMP,
      FS_OPCODE_SET_SAMPLE_ID,
@@@ -1085,6 -1085,9 +1087,9 @@@
      VS_OPCODE_PULL_CONSTANT_LOAD,
      VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
      VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+ 
+    VS_OPCODE_GET_BUFFER_SIZE,
+ 
      VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
   
      /**
@@@ -1513,6 -1516,7 +1518,7 @@@ enum brw_message_target 
   #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4      8
   #define GEN5_SAMPLER_MESSAGE_LOD                 9
   #define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+ #define GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO   11
   #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C    16
   #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO   17
   #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
@@@ -1696,7 -1700,13 +1702,13 @@@
   
   #define BRW_URB_OPCODE_WRITE_HWORD  0
   #define BRW_URB_OPCODE_WRITE_OWORD  1
- #define GEN8_URB_OPCODE_SIMD8_WRITE  7
+ #define BRW_URB_OPCODE_READ_HWORD   2
+ #define BRW_URB_OPCODE_READ_OWORD   3
+ #define GEN7_URB_OPCODE_ATOMIC_MOV  4
+ #define GEN7_URB_OPCODE_ATOMIC_INC  5
+ #define GEN8_URB_OPCODE_ATOMIC_ADD  6
+ #define GEN8_URB_OPCODE_SIMD8_WRITE 7
+ #define GEN8_URB_OPCODE_SIMD8_READ  8
   
   #define BRW_URB_SWIZZLE_NONE          0
   #define BRW_URB_SWIZZLE_INTERLEAVE    1
@@@ -1784,6 -1794,8 +1796,8 @@@
   /* DW3: PS */
   
   #define _3DSTATE_SAMPLER_STATE_POINTERS_VS    0x782B /* GEN7+ */
+ #define _3DSTATE_SAMPLER_STATE_POINTERS_HS    0x782C /* GEN7+ */
+ #define _3DSTATE_SAMPLER_STATE_POINTERS_DS    0x782D /* GEN7+ */
   #define _3DSTATE_SAMPLER_STATE_POINTERS_GS    0x782E /* GEN7+ */
   #define _3DSTATE_SAMPLER_STATE_POINTERS_PS    0x782F /* GEN7+ */
   
@@@ -1867,6 -1879,8 +1881,8 @@@
   #define GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES                (5*128)
   
   #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+ #define _3DSTATE_PUSH_CONSTANT_ALLOC_HS         0x7913 /* GEN7+ */
+ #define _3DSTATE_PUSH_CONSTANT_ALLOC_DS         0x7914 /* GEN7+ */
   #define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
   #define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
   # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
@@@ -1950,6 -1964,11 +1966,11 @@@
   # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK                INTEL_MASK(25, 16)
   # define GEN6_GS_ENABLE                                       (1 << 15)
   
+ /* Gen8+ DW8 */
+ # define GEN8_GS_STATIC_OUTPUT                          (1 << 30)
+ # define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT              16
+ # define GEN8_GS_STATIC_VERTEX_COUNT_MASK               INTEL_MASK(26, 16)
+ 
   /* Gen8+ DW9 */
   # define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
   # define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT                16
@@@ -1969,8 -1988,76 +1990,76 @@@
   #define GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES          (62*16)
   
   #define _3DSTATE_HS                             0x781B /* GEN7+ */
+ /* DW1 */
+ # define GEN7_HS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+ # define GEN7_HS_SAMPLER_COUNT_SHIFT                    27
+ # define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+ # define GEN7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+ # define GEN7_HS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+ # define GEN7_HS_FLOATING_POINT_MODE_ALT                (1 << 16)
+ # define GEN7_HS_MAX_THREADS_SHIFT                      0
+ /* DW2 */
+ # define GEN7_HS_ENABLE                                 (1 << 31)
+ # define GEN7_HS_STATISTICS_ENABLE                      (1 << 29)
+ # define GEN8_HS_MAX_THREADS_SHIFT                      8
+ # define GEN7_HS_INSTANCE_COUNT_MASK                    INTEL_MASK(3, 0)
+ # define GEN7_HS_INSTANCE_COUNT_SHIFT                   0
+ /* DW5 */
+ # define GEN7_HS_SINGLE_PROGRAM_FLOW                    (1 << 27)
+ # define GEN7_HS_VECTOR_MASK_ENABLE                     (1 << 26)
+ # define HSW_HS_ACCESSES_UAV                            (1 << 25)
+ # define GEN7_HS_INCLUDE_VERTEX_HANDLES                 (1 << 24)
+ # define GEN7_HS_DISPATCH_START_GRF_MASK                INTEL_MASK(23, 19)
+ # define GEN7_HS_DISPATCH_START_GRF_SHIFT               19
+ # define GEN7_HS_URB_READ_LENGTH_MASK                   INTEL_MASK(16, 11)
+ # define GEN7_HS_URB_READ_LENGTH_SHIFT                  11
+ # define GEN7_HS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+ # define GEN7_HS_URB_ENTRY_READ_OFFSET_SHIFT            4
+ 
   #define _3DSTATE_TE                             0x781C /* GEN7+ */
+ /* DW1 */
+ # define GEN7_TE_PARTITIONING_SHIFT                     12
+ # define GEN7_TE_OUTPUT_TOPOLOGY_SHIFT                  8
+ # define GEN7_TE_DOMAIN_SHIFT                           4
+ //# define GEN7_TE_MODE_SW                                (1 << 1)
+ # define GEN7_TE_ENABLE                                 (1 << 0)
+ 
   #define _3DSTATE_DS                             0x781D /* GEN7+ */
+ /* DW2 */
+ # define GEN7_DS_SINGLE_DOMAIN_POINT_DISPATCH           (1 << 31)
+ # define GEN7_DS_VECTOR_MASK_ENABLE                     (1 << 30)
+ # define GEN7_DS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
+ # define GEN7_DS_SAMPLER_COUNT_SHIFT                    27
+ # define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
+ # define GEN7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+ # define GEN7_DS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+ # define GEN7_DS_FLOATING_POINT_MODE_ALT                (1 << 16)
+ # define HSW_DS_ACCESSES_UAV                            (1 << 14)
+ /* DW4 */
+ # define GEN7_DS_DISPATCH_START_GRF_MASK                INTEL_MASK(24, 20)
+ # define GEN7_DS_DISPATCH_START_GRF_SHIFT               20
+ # define GEN7_DS_URB_READ_LENGTH_MASK                   INTEL_MASK(17, 11)
+ # define GEN7_DS_URB_READ_LENGTH_SHIFT                  11
+ # define GEN7_DS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
+ # define GEN7_DS_URB_ENTRY_READ_OFFSET_SHIFT            4
+ /* DW5 */
+ # define GEN7_DS_MAX_THREADS_SHIFT                      25
+ # define HSW_DS_MAX_THREADS_SHIFT                       21
+ # define GEN7_DS_STATISTICS_ENABLE                      (1 << 10)
+ # define GEN7_DS_SIMD8_DISPATCH_ENABLE                  (1 << 3)
+ # define GEN7_DS_COMPUTE_W_COORDINATE_ENABLE            (1 << 2)
+ # define GEN7_DS_CACHE_DISABLE                          (1 << 1)
+ # define GEN7_DS_ENABLE                                 (1 << 0)
+ /* Gen8+ DW8 */
+ # define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK           INTEL_MASK(26, 21)
+ # define GEN8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
+ # define GEN8_DS_URB_OUTPUT_LENGTH_MASK                 INTEL_MASK(20, 16)
+ # define GEN8_DS_URB_OUTPUT_LENGTH_SHIFT                16
+ # define GEN8_DS_USER_CLIP_DISTANCE_MASK                INTEL_MASK(15, 8)
+ # define GEN8_DS_USER_CLIP_DISTANCE_SHIFT               8
+ # define GEN8_DS_USER_CULL_DISTANCE_MASK                INTEL_MASK(7, 0)
+ # define GEN8_DS_USER_CULL_DISTANCE_SHIFT               0
+ 
   
   #define _3DSTATE_CLIP                         0x7812 /* GEN6+ */
   /* DW1 */
@@@ -2268,6 -2355,21 +2357,21 @@@ enum brw_pixel_shader_computed_depth_mo
      BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
   };
   
+ enum brw_pixel_shader_coverage_mask_mode {
+    BRW_PSICMS_OFF     = 0, /* PS does not use input coverage masks. */
+    BRW_PSICMS_NORMAL  = 1, /* Input Coverage masks based on outer conservatism
+                             * and factors in SAMPLE_MASK.  If Pixel is
+                             * conservatively covered, all samples are enabled.
+                             */
+ 
+    BRW_PSICMS_INNER   = 2, /* Input Coverage masks based on inner conservatism
+                             * and factors in SAMPLE_MASK.  If Pixel is
+                             * conservatively *FULLY* covered, all samples are
+                             * enabled.
+                             */
+    BRW_PCICMS_DEPTH   = 3,
+ };
+ 
   #define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
   /* DW1 */
   # define GEN8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
@@@ -2285,6 -2387,7 +2389,7 @@@
   # define GEN9_PSX_SHADER_PULLS_BARY                     (1 << 3)
   # define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
   # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
+ # define GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT     0
   
   enum brw_wm_barycentric_interp_mode {
      BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC               = 0,
@@@ -2660,14 -2763,24 +2765,24 @@@
   # define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT      0
   # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
   
+ #define MEDIA_CURBE_LOAD                        0x7001
   #define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
+ /* GEN7 DW4, GEN8+ DW5 */
+ # define MEDIA_CURBE_READ_LENGTH_SHIFT          16
+ # define MEDIA_CURBE_READ_LENGTH_MASK           INTEL_MASK(31, 16)
+ # define MEDIA_CURBE_READ_OFFSET_SHIFT          0
+ # define MEDIA_CURBE_READ_OFFSET_MASK           INTEL_MASK(15, 0)
   /* GEN7 DW5, GEN8+ DW6 */
+ # define MEDIA_BARRIER_ENABLE_SHIFT             21
+ # define MEDIA_BARRIER_ENABLE_MASK              INTEL_MASK(21, 21)
   # define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
   # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
   # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
   # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
   #define MEDIA_STATE_FLUSH                       0x7004
   #define GPGPU_WALKER                            0x7105
+ /* GEN7 DW0 */
+ # define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
   /* GEN8+ DW2 */
   # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
   # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
diff --combined src/mesa/drivers/dri/i965/brw_device_info.c

index 16c125d07eebc3e8757a01c344400a8463e699c6,a6a3bb670cae2e4a316a9bcc9b7e51ce26171ad1..65172490da317f34f3898d895589e7047e29e690
--- 1/src/mesa/drivers/dri/i965/brw_device_info.c
--- 2/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@@ -314,7 -314,7 +314,7 @@@ static const struct brw_device_info brw
      .max_wm_threads = 64 * 6,                        \
      .max_cs_threads = 56,                            \
      .urb = {                                         \
-       .size = 192,                                  \
+       .size = 384,                                  \
         .min_vs_entries = 64,                         \
         .max_vs_entries = 1856,                       \
         .max_hs_entries = 672,                        \
@@@ -324,6 -324,7 +324,7 @@@
   
   static const struct brw_device_info brw_device_info_skl_gt1 = {
      GEN9_FEATURES, .gt = 1,
+    .urb.size = 192,
   };
   
   static const struct brw_device_info brw_device_info_skl_gt2 = {
@@@ -373,15 -374,3 +374,15 @@@ brw_get_device_info(int devid, int revi
   
      return devinfo;
   }
+ +
+ +const char *
+ +brw_get_device_name(int devid)
+ +{
+ +   switch (devid) {
+ +#undef CHIPSET
+ +#define CHIPSET(id, family, name) case id: return name;
+ +#include "pci_ids/i965_pci_ids.h"
+ +   default:
+ +      return NULL;
+ +   }
+ +}
diff --combined src/mesa/drivers/dri/i965/brw_fs.cpp

index 76530a476d6be39a35c562f088c774dfacb592ba,64215ae5a6aa8344111691a9ca7145eec65b9f41..e620301fde7e16f89bfe9995370092cb64649a17
--- 1/src/mesa/drivers/dri/i965/brw_fs.cpp
--- 2/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@@ -42,6 -42,7 +42,7 @@@
   #include "brw_eu.h"
   #include "brw_wm.h"
   #include "brw_fs.h"
+ #include "brw_cs.h"
   #include "brw_cfg.h"
   #include "brw_dead_control_flow.h"
   #include "main/uniforms.h"
@@@ -491,7 -492,6 +492,7 @@@ type_size_scalar(const struct glsl_typ
      case GLSL_TYPE_ERROR:
      case GLSL_TYPE_INTERFACE:
      case GLSL_TYPE_DOUBLE:
+ +   case GLSL_TYPE_FUNCTION:
         unreachable("not reached");
      }
   
@@@ -797,6 -797,7 +798,7 @@@ fs_inst::regs_read(int arg) cons
         break;
   
      case CS_OPCODE_CS_TERMINATE:
+    case SHADER_OPCODE_BARRIER:
         return 1;
   
      default:
@@@ -878,9 -879,11 +880,11 @@@ fs_visitor::implied_mrf_writes(fs_inst 
      case SHADER_OPCODE_TXL:
      case SHADER_OPCODE_TXS:
      case SHADER_OPCODE_LOD:
+    case SHADER_OPCODE_SAMPLEINFO:
         return 1;
      case FS_OPCODE_FB_WRITE:
         return 2;
+    case FS_OPCODE_GET_BUFFER_SIZE:
      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
      case SHADER_OPCODE_GEN4_SCRATCH_READ:
         return 1;
@@@ -1394,6 -1397,9 +1398,9 @@@ fs_visitor::assign_curb_setup(
          }
         }
      }
+ 
+    /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
+    this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
   }
   
   void
@@@ -1434,7 -1440,8 +1441,8 @@@ fs_visitor::calculate_urb_setup(
             */
            struct brw_vue_map prev_stage_vue_map;
            brw_compute_vue_map(devinfo, &prev_stage_vue_map,
-                              key->input_slots_valid);
+                              key->input_slots_valid,
+                              shader_prog->SeparateShader);
            int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
            assert(prev_stage_vue_map.num_slots <= first_slot + 32);
            for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
@@@ -1508,8 -1515,7 +1516,7 @@@ fs_visitor::assign_urb_setup(
      }
   
      /* Each attribute is 4 setup channels, each of which is half a reg. */
-    this->first_non_payload_grf =
-       urb_start + prog_data->num_varying_inputs * 2;
+    this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
   }
   
   void
@@@ -1524,16 -1530,11 +1531,15 @@@ fs_visitor::assign_vs_urb_setup(
         count++;
   
      /* Each attribute is 4 regs. */
-    this->first_non_payload_grf =
-       payload.num_regs + prog_data->curb_read_length + count * 4;
+    this->first_non_payload_grf += count * 4;
   
      unsigned vue_entries =
         MAX2(count, vs_prog_data->base.vue_map.num_slots);
   
+ +   /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
+ +    * command).  Each attribute is 16 bytes (4 floats/dwords), so each unit
+ +    * fits four attributes.
+ +    */
      vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
      vs_prog_data->base.urb_read_length = (count + 1) / 2;
   
@@@ -1565,7 -1566,10 +1571,10 @@@
   
               inst->src[i].file = HW_REG;
               inst->src[i].fixed_hw_reg =
-                retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+                stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                                   inst->src[i].subreg_offset),
+                       inst->exec_size * inst->src[i].stride,
+                       inst->exec_size, inst->src[i].stride);
            }
         }
      }
@@@ -2646,22 -2650,9 +2655,22 @@@ fs_visitor::emit_repclear_shader(
      brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
      int base_mrf = 1;
      int color_mrf = base_mrf + 2;
+ +   fs_inst *mov;
   
- -   fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- -                                     fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ +   if (uniforms == 1) {
+ +      mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ +                               fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ +   } else {
+ +      struct brw_reg reg =
+ +         brw_reg(BRW_GENERAL_REGISTER_FILE,
+ +                 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+ +                 BRW_VERTICAL_STRIDE_8,
+ +                 BRW_WIDTH_2,
+ +                 BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ +
+ +      mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ +                               fs_reg(reg));
+ +   }
   
      fs_inst *write;
      if (key->nr_color_regions == 1) {
@@@ -2690,10 -2681,8 +2699,10 @@@
      assign_curb_setup();
   
      /* Now that we have the uniform assigned, go ahead and force it to a vec4. */
- -   assert(mov->src[0].file == HW_REG);
- -   mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ +   if (uniforms == 1) {
+ +      assert(mov->src[0].file == HW_REG);
+ +      mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ +   }
   }
   
   /**
@@@ -2811,7 -2800,7 +2820,7 @@@ fs_visitor::insert_gen4_pre_send_depend
   {
      int write_len = inst->regs_written;
      int first_write_grf = inst->dst.reg;
-    bool needs_dep[BRW_MAX_MRF];
+    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
      assert(write_len < (int)sizeof(needs_dep) - 1);
   
      memset(needs_dep, false, sizeof(needs_dep));
@@@ -2882,7 -2871,7 +2891,7 @@@ fs_visitor::insert_gen4_post_send_depen
   {
      int write_len = inst->regs_written;
      int first_write_grf = inst->dst.reg;
-    bool needs_dep[BRW_MAX_MRF];
+    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
      assert(write_len < (int)sizeof(needs_dep) - 1);
   
      memset(needs_dep, false, sizeof(needs_dep));
@@@ -3212,7 -3201,8 +3221,8 @@@ fs_visitor::lower_integer_multiplicatio
                * schedule multi-component multiplications much better.
                */
   
-             if (inst->conditional_mod && inst->dst.is_null()) {
+             fs_reg orig_dst = inst->dst;
+             if (orig_dst.is_null() || orig_dst.file == MRF) {
                  inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                                     inst->dst.type);
               }
@@@ -3278,10 -3268,9 +3288,9 @@@
   
               ibld.ADD(dst, low, high);
   
-             if (inst->conditional_mod) {
-                fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+             if (inst->conditional_mod || orig_dst.file == MRF) {
                  set_condmod(inst->conditional_mod,
-                            ibld.MOV(null, inst->dst));
+                            ibld.MOV(orig_dst, inst->dst));
               }
            }
   
@@@ -4749,10 -4738,19 +4758,19 @@@ fs_visitor::setup_cs_payload(
      assert(devinfo->gen >= 7);
   
      payload.num_regs = 1;
+ 
+    if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+       const unsigned local_id_dwords =
+          brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+       assert((local_id_dwords & 0x7) == 0);
+       const unsigned local_id_regs = local_id_dwords / 8;
+       payload.local_invocation_id_reg = payload.num_regs;
+       payload.num_regs += local_id_regs;
+    }
   }
   
   void
- fs_visitor::assign_binding_table_offsets()
+ fs_visitor::assign_fs_binding_table_offsets()
   {
      assert(stage == MESA_SHADER_FRAGMENT);
      brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
@@@ -4768,6 -4766,20 +4786,20 @@@
      assign_common_binding_table_offsets(next_binding_table_offset);
   }
   
+ void
+ fs_visitor::assign_cs_binding_table_offsets()
+ {
+    assert(stage == MESA_SHADER_COMPUTE);
+    brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
+    uint32_t next_binding_table_offset = 0;
+ 
+    /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+    prog_data->binding_table.work_groups_start = next_binding_table_offset;
+    next_binding_table_offset++;
+ 
+    assign_common_binding_table_offsets(next_binding_table_offset);
+ }
+ 
   void
   fs_visitor::calculate_register_pressure()
   {
@@@ -4789,6 -4801,9 +4821,9 @@@
   void
   fs_visitor::optimize()
   {
+    /* Start by validating the shader we currently have. */
+    validate();
+ 
      /* bld is the common builder object pointing at the end of the program we
       * used to translate it into i965 IR.  For the optimization and lowering
       * passes coming next, any code added after the end of the program without
@@@ -4805,7 -4820,10 +4840,10 @@@
      assign_constant_locations();
      demote_pull_constants();
   
+    validate();
+ 
      split_virtual_grfs();
+    validate();
   
   #define OPT(pass, args...) ({                                           \
         pass_num++;                                                       \
@@@ -4819,6 -4837,8 +4857,8 @@@
            backend_shader::dump_instructions(filename);                   \
         }                                                                 \
                                                                           \
+       validate();                                                       \
+                                                                         \
         progress = progress || this_progress;                             \
         this_progress;                                                    \
      })
@@@ -4880,6 -4900,8 +4920,8 @@@
      OPT(lower_integer_multiplication);
   
      lower_uniform_pull_constant_loads();
+ 
+    validate();
   }
   
   /**
@@@ -4971,8 -4993,7 +5013,8 @@@ fs_visitor::run_vs(gl_clip_plane *clip_
   {
      assert(stage == MESA_SHADER_VERTEX);
   
- -   assign_common_binding_table_offsets(0);
+ +   if (prog_data->map_entries == NULL)
+ +      assign_common_binding_table_offsets(0);
      setup_vs_payload();
   
      if (shader_time_index >= 0)
@@@ -5011,8 -5032,9 +5053,10 @@@ fs_visitor::run_fs(bool do_rep_send
   
      assert(stage == MESA_SHADER_FRAGMENT);
   
- -   assign_fs_binding_table_offsets();
+    sanity_param_count = prog->Parameters->NumParameters;
+ 
-       assign_binding_table_offsets();
+ +   if (prog_data->map_entries == NULL)
++      assign_fs_binding_table_offsets();
   
      if (devinfo->gen >= 6)
         setup_payload_gen6();
@@@ -5082,6 -5104,13 +5126,6 @@@
      else
         wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
   
- -   /* If any state parameters were appended, then ParameterValues could have
- -    * been realloced, in which case the driver uniform storage set up by
- -    * _mesa_associate_uniform_storage() would point to freed memory.  Make
- -    * sure that didn't happen.
- -    */
- -   assert(sanity_param_count == prog->Parameters->NumParameters);
- -
      return !failed;
   }
   
@@@ -5093,7 -5122,7 +5137,7 @@@ fs_visitor::run_cs(
   
      sanity_param_count = prog->Parameters->NumParameters;
   
-    assign_common_binding_table_offsets(0);
+    assign_cs_binding_table_offsets();
   
      setup_cs_payload();
   
@@@ -5141,20 -5170,11 +5185,11 @@@ brw_wm_fs_emit(struct brw_context *brw
                  struct gl_shader_program *prog,
                  unsigned *final_assembly_size)
   {
-    bool start_busy = false;
-    double start_time = 0;
- 
-    if (unlikely(brw->perf_debug)) {
-       start_busy = (brw->batch.last_bo &&
-                     drm_intel_bo_busy(brw->batch.last_bo));
-       start_time = get_time();
-    }
- 
      struct brw_shader *shader = NULL;
      if (prog)
         shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
   
- -   if (unlikely(INTEL_DEBUG & DEBUG_WM))
+ +   if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir)
         brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
   
      int st_index8 = -1, st_index16 = -1;
@@@ -5227,93 -5247,127 +5262,127 @@@
      if (simd16_cfg)
         prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
   
-    if (unlikely(brw->perf_debug) && shader) {
-       if (shader->compiled_once)
-          brw_wm_debug_recompile(brw, prog, key);
-       shader->compiled_once = true;
- 
-       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-          perf_debug("FS compile took %.03f ms and stalled the GPU\n",
-                     (get_time() - start_time) * 1000);
-       }
-    }
- 
      return g.get_assembly(final_assembly_size);
   }
   
- extern "C" bool
- brw_fs_precompile(struct gl_context *ctx,
-                   struct gl_shader_program *shader_prog,
-                   struct gl_program *prog)
+ fs_reg *
+ fs_visitor::emit_cs_local_invocation_id_setup()
   {
-    struct brw_context *brw = brw_context(ctx);
-    struct brw_wm_prog_key key;
- 
-    struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
-    struct brw_fragment_program *bfp = brw_fragment_program(fp);
-    bool program_uses_dfdy = fp->UsesDFdy;
- 
-    memset(&key, 0, sizeof(key));
+    assert(stage == MESA_SHADER_COMPUTE);
   
-    if (brw->gen < 6) {
-       if (fp->UsesKill)
-          key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
   
-       if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-          key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+    struct brw_reg src =
+       brw_vec8_grf(payload.local_invocation_id_reg, 0);
+    src = retype(src, BRW_REGISTER_TYPE_UD);
+    bld.MOV(*reg, src);
+    src.nr += dispatch_width / 8;
+    bld.MOV(offset(*reg, bld, 1), src);
+    src.nr += dispatch_width / 8;
+    bld.MOV(offset(*reg, bld, 2), src);
   
-       /* Just assume depth testing. */
-       key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
-    }
+    return reg;
+ }
   
-    if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
-                                          BRW_FS_VARYING_INPUT_MASK) > 16)
-       key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+ fs_reg *
+ fs_visitor::emit_cs_work_group_id_setup()
+ {
+    assert(stage == MESA_SHADER_COMPUTE);
   
-    brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
   
-    if (fp->Base.InputsRead & VARYING_BIT_POS) {
-       key.drawable_height = ctx->DrawBuffer->Height;
-    }
+    struct brw_reg r0_1(retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+    struct brw_reg r0_6(retype(brw_vec1_grf(0, 6), BRW_REGISTER_TYPE_UD));
+    struct brw_reg r0_7(retype(brw_vec1_grf(0, 7), BRW_REGISTER_TYPE_UD));
   
-    key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
-          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
-          BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+    bld.MOV(*reg, r0_1);
+    bld.MOV(offset(*reg, bld, 1), r0_6);
+    bld.MOV(offset(*reg, bld, 2), r0_7);
   
-    if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
-       key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
-                           key.nr_color_regions > 1;
-    }
+    return reg;
+ }
   
-    key.program_string_id = bfp->id;
+ const unsigned *
+ brw_cs_emit(struct brw_context *brw,
+             void *mem_ctx,
+             const struct brw_cs_prog_key *key,
+             struct brw_cs_prog_data *prog_data,
+             struct gl_compute_program *cp,
+             struct gl_shader_program *prog,
+             unsigned *final_assembly_size)
+ {
+    struct brw_shader *shader =
+       (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
   
-    uint32_t old_prog_offset = brw->wm.base.prog_offset;
-    struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+    if (unlikely(INTEL_DEBUG & DEBUG_CS))
+       brw_dump_ir("compute", prog, &shader->base, &cp->Base);
   
-    bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+    prog_data->local_size[0] = cp->LocalSize[0];
+    prog_data->local_size[1] = cp->LocalSize[1];
+    prog_data->local_size[2] = cp->LocalSize[2];
+    unsigned local_workgroup_size =
+       cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
   
-    brw->wm.base.prog_offset = old_prog_offset;
-    brw->wm.prog_data = old_prog_data;
+    cfg_t *cfg = NULL;
+    const char *fail_msg = NULL;
   
-    return success;
- }
+    int st_index = -1;
+    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+       st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
   
- void
- brw_setup_tex_for_precompile(struct brw_context *brw,
-                              struct brw_sampler_prog_key_data *tex,
-                              struct gl_program *prog)
- {
-    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
-    unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
-    for (unsigned i = 0; i < sampler_count; i++) {
-       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
-          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
-          tex->swizzles[i] =
-             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+    /* Now the main event: Visit the shader IR and generate our CS IR for it.
+     */
+    fs_visitor v8(brw->intelScreen->compiler, brw,
+                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                  &cp->Base, 8, st_index);
+    if (!v8.run_cs()) {
+       fail_msg = v8.fail_msg;
+    } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+       cfg = v8.cfg;
+       prog_data->simd_size = 8;
+    }
+ 
+    fs_visitor v16(brw->intelScreen->compiler, brw,
+                   mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                   &cp->Base, 16, st_index);
+    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+        !fail_msg && !v8.simd16_unsupported &&
+        local_workgroup_size <= 16 * brw->max_cs_threads) {
+       /* Try a SIMD16 compile */
+       v16.import_uniforms(&v8);
+       if (!v16.run_cs()) {
+          perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+          if (!cfg) {
+             fail_msg =
+                "Couldn't generate SIMD16 program and not "
+                "enough threads for SIMD8";
+          }
         } else {
-          /* Color sampler: assume no swizzling. */
-          tex->swizzles[i] = SWIZZLE_XYZW;
+          cfg = v16.cfg;
+          prog_data->simd_size = 16;
         }
      }
+ 
+    if (unlikely(cfg == NULL)) {
+       assert(fail_msg);
+       prog->LinkStatus = false;
+       ralloc_strcat(&prog->InfoLog, fail_msg);
+       _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
+                     fail_msg);
+       return NULL;
+    }
+ 
+    fs_generator g(brw->intelScreen->compiler, brw,
+                   mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+                   v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+    if (INTEL_DEBUG & DEBUG_CS) {
+       char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
+                                    prog->Label ? prog->Label : "unnamed",
+                                    prog->Name);
+       g.enable_debug(name);
+    }
+ 
+    g.generate_code(cfg, prog_data->simd_size);
+ 
+    return g.get_assembly(final_assembly_size);
   }
diff --combined src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index da8d47f1c5ec7d5599ddc8898bc5d2770c650b5d,7a965cd5b73328448f900a465f4759f1d1172991..61d0c896b8ea50e9b77076fdf7140ff713106568
--- 1/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
--- 2/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@@ -29,8 -29,10 +29,10 @@@
   #include "brw_fs.h"
   #include "brw_fs_surface_builder.h"
   #include "brw_nir.h"
+ #include "brw_fs_surface_builder.h"
   
   using namespace brw;
+ using namespace brw::surface_access;
   
   void
   fs_visitor::emit_nir_code()
@@@ -42,8 -44,7 +44,8 @@@
       */
      nir_setup_inputs(nir);
      nir_setup_outputs(nir);
- -   nir_setup_uniforms(nir);
+ +   uniforms = nir->num_uniforms;
+ +   //nir_setup_uniforms(nir);
      nir_emit_system_values(nir);
   
      /* get the main function and emit it */
@@@ -338,6 -339,20 +340,20 @@@ emit_system_values_block(nir_block *blo
                                    BRW_REGISTER_TYPE_D));
            break;
   
+       case nir_intrinsic_load_local_invocation_id:
+          assert(v->stage == MESA_SHADER_COMPUTE);
+          reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
+          if (reg->file == BAD_FILE)
+             *reg = *v->emit_cs_local_invocation_id_setup();
+          break;
+ 
+       case nir_intrinsic_load_work_group_id:
+          assert(v->stage == MESA_SHADER_COMPUTE);
+          reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
+          if (reg->file == BAD_FILE)
+             *reg = *v->emit_cs_work_group_id_setup();
+          break;
+ 
         default:
            break;
         }
@@@ -1437,6 -1452,11 +1453,11 @@@ fs_visitor::nir_emit_intrinsic(const fs
         break;
      }
   
+    case nir_intrinsic_image_samples:
+       /* The driver does not support multi-sampled images. */
+       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
+       break;
+ 
      case nir_intrinsic_load_front_face:
         bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
                 *emit_frontfacing_interpolation());
@@@ -1445,35 -1465,16 +1466,16 @@@
      case nir_intrinsic_load_vertex_id:
         unreachable("should be lowered by lower_vertex_id()");
   
-    case nir_intrinsic_load_vertex_id_zero_base: {
-       fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
-       assert(vertex_id.file != BAD_FILE);
-       dest.type = vertex_id.type;
-       bld.MOV(dest, vertex_id);
-       break;
-    }
- 
-    case nir_intrinsic_load_base_vertex: {
-       fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
-       assert(base_vertex.file != BAD_FILE);
-       dest.type = base_vertex.type;
-       bld.MOV(dest, base_vertex);
-       break;
-    }
- 
-    case nir_intrinsic_load_instance_id: {
-       fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
-       assert(instance_id.file != BAD_FILE);
-       dest.type = instance_id.type;
-       bld.MOV(dest, instance_id);
-       break;
-    }
- 
-    case nir_intrinsic_load_sample_mask_in: {
-       fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
-       assert(sample_mask_in.file != BAD_FILE);
-       dest.type = sample_mask_in.type;
-       bld.MOV(dest, sample_mask_in);
+    case nir_intrinsic_load_vertex_id_zero_base:
+    case nir_intrinsic_load_base_vertex:
+    case nir_intrinsic_load_instance_id:
+    case nir_intrinsic_load_sample_mask_in:
+    case nir_intrinsic_load_sample_id: {
+       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+       fs_reg val = nir_system_values[sv];
+       assert(val.file != BAD_FILE);
+       dest.type = val.type;
+       bld.MOV(dest, val);
         break;
      }
   
@@@ -1486,14 -1487,6 +1488,6 @@@
         break;
      }
   
-    case nir_intrinsic_load_sample_id: {
-       fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
-       assert(sample_id.file != BAD_FILE);
-       dest.type = sample_id.type;
-       bld.MOV(dest, sample_id);
-       break;
-    }
- 
      case nir_intrinsic_load_uniform_indirect:
         has_indirect = true;
         /* fallthrough */
@@@ -1516,22 -1509,13 +1510,22 @@@
         has_indirect = true;
         /* fallthrough */
      case nir_intrinsic_load_ubo: {
+ +      uint32_t set = instr->const_index[0];
         nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
         fs_reg surf_index;
   
         if (const_index) {
- -         surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
- -                             const_index->u[0]);
+ +         uint32_t binding = const_index->u[0];
+ +
+ +         /* FIXME: We should probably assert here, but dota2 seems to hit
+ +          * it and we'd like to keep going.
+ +          */
+ +         if (binding >= stage_prog_data->bind_map[set].index_count)
+ +            binding = 0;
+ +
+ +         surf_index = fs_reg(stage_prog_data->bind_map[set].index[binding]);
         } else {
+ +         assert(0 && "need more info from the ir for this.");
            /* The block index is not a constant. Evaluate the index expression
             * per-channel and add the base UBO index; we have to select a value
             * from any live channel.
@@@ -1546,7 -1530,7 +1540,7 @@@
             */
            brw_mark_surface_used(prog_data,
                                  stage_prog_data->binding_table.ubo_start +
-                                shader_prog->NumUniformBlocks - 1);
+                                shader_prog->NumBufferInterfaceBlocks - 1);
         }
   
         if (has_indirect) {
@@@ -1556,7 -1540,7 +1550,7 @@@
                                        BRW_REGISTER_TYPE_D),
                    fs_reg(2));
   
- -         unsigned vec4_offset = instr->const_index[0] / 4;
+ +         unsigned vec4_offset = instr->const_index[1] / 4;
            for (int i = 0; i < instr->num_components; i++)
               VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
                                          base_offset, vec4_offset + i);
@@@ -1564,7 -1548,7 +1558,7 @@@
            fs_reg packed_consts = vgrf(glsl_type::float_type);
            packed_consts.type = dest.type;
   
- -         fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
+ +         fs_reg const_offset_reg((unsigned) instr->const_index[1] & ~15);
            bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
                     surf_index, const_offset_reg);
   
@@@ -1583,6 -1567,68 +1577,68 @@@
         break;
      }
   
+    case nir_intrinsic_load_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_load_ssbo: {
+       assert(devinfo->gen >= 7);
+ 
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[0]);
+ 
+       fs_reg surf_index;
+       if (const_uniform_block) {
+          unsigned index = stage_prog_data->binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = fs_reg(index);
+          brw_mark_surface_used(prog_data, index);
+       } else {
+          surf_index = vgrf(glsl_type::uint_type);
+          bld.ADD(surf_index, get_nir_src(instr->src[0]),
+                  fs_reg(stage_prog_data->binding_table.ubo_start));
+          surf_index = bld.emit_uniformize(surf_index);
+ 
+          /* Assume this may touch any UBO. It would be nice to provide
+           * a tighter bound, but the array information is already lowered away.
+           */
+          brw_mark_surface_used(prog_data,
+                                stage_prog_data->binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+ 
+       /* Get the offset to read from */
+       fs_reg offset_reg = vgrf(glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+       }
+ 
+       /* Read the vector */
+       for (int i = 0; i < instr->num_components; i++) {
+          fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                                 1 /* dims */, 1 /* size */,
+                                                 BRW_PREDICATE_NONE);
+          read_result.type = dest.type;
+          bld.MOV(dest, read_result);
+          dest = offset(dest, bld, 1);
+ 
+          /* Vector components are stored contiguous in memory */
+          if (i < instr->num_components) {
+             if (!has_indirect) {
+                const_offset_bytes += 4;
+                bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+             } else {
+                bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
+             }
+          }
+       }
+ 
+       break;
+    }
+ 
      case nir_intrinsic_load_input_indirect:
         has_indirect = true;
         /* fallthrough */
@@@ -1717,6 -1763,75 +1773,75 @@@
         break;
      }
   
+    case nir_intrinsic_store_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_store_ssbo: {
+       assert(devinfo->gen >= 7);
+ 
+       /* Block index */
+       fs_reg surf_index;
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[1]);
+       if (const_uniform_block) {
+          unsigned index = stage_prog_data->binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = fs_reg(index);
+          brw_mark_surface_used(prog_data, index);
+       } else {
+          surf_index = vgrf(glsl_type::uint_type);
+          bld.ADD(surf_index, get_nir_src(instr->src[1]),
+                   fs_reg(stage_prog_data->binding_table.ubo_start));
+          surf_index = bld.emit_uniformize(surf_index);
+ 
+          brw_mark_surface_used(prog_data,
+                                stage_prog_data->binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+ 
+       /* Offset */
+       fs_reg offset_reg = vgrf(glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          bld.MOV(offset_reg, get_nir_src(instr->src[2]));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+       }
+ 
+       /* Value */
+       fs_reg val_reg = get_nir_src(instr->src[0]);
+ 
+       /* Writemask */
+       unsigned writemask = instr->const_index[1];
+ 
+       /* Write each component present in the writemask */
+       unsigned skipped_channels = 0;
+       for (int i = 0; i < instr->num_components; i++) {
+          int component_mask = 1 << i;
+          if (writemask & component_mask) {
+             if (skipped_channels) {
+                if (!has_indirect) {
+                   const_offset_bytes += 4 * skipped_channels;
+                   bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+                } else {
+                   bld.ADD(offset_reg, offset_reg,
+                            brw_imm_ud(4 * skipped_channels));
+                }
+                skipped_channels = 0;
+             }
+ 
+             emit_untyped_write(bld, surf_index, offset_reg,
+                                offset(val_reg, bld, i),
+                                1 /* dims */, 1 /* size */,
+                                BRW_PREDICATE_NONE);
+          }
+ 
+          skipped_channels++;
+       }
+       break;
+    }
+ 
      case nir_intrinsic_store_output_indirect:
         has_indirect = true;
         /* fallthrough */
@@@ -1737,23 -1852,159 +1862,165 @@@
   
      case nir_intrinsic_barrier:
         emit_barrier();
+       if (stage == MESA_SHADER_COMPUTE)
+          ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
+       break;
+ 
+    case nir_intrinsic_load_local_invocation_id:
+    case nir_intrinsic_load_work_group_id: {
+       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+       fs_reg val = nir_system_values[sv];
+       assert(val.file != BAD_FILE);
+       dest.type = val.type;
+       for (unsigned i = 0; i < 3; i++)
+          bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+       break;
+    }
+ 
+    case nir_intrinsic_ssbo_atomic_add:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_min:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
+       else
+          nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_max:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr);
+       else
+          nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_and:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_or:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_OR, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_xor:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_XOR, instr);
         break;
+    case nir_intrinsic_ssbo_atomic_exchange:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_MOV, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_comp_swap:
+       nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
+       break;
+ 
+    case nir_intrinsic_get_buffer_size: {
+       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+       unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+       int reg_width = dispatch_width / 8;
+ 
+       assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+ 
+       /* Set LOD = 0 */
+       fs_reg source = fs_reg(0);
+ 
+       int mlen = 1 * reg_width;
+       fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+                                   BRW_REGISTER_TYPE_UD);
+       bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
+ 
+       fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index);
+       fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+                                src_payload, surf_index);
+       inst->header_size = 0;
+       inst->mlen = mlen;
+       bld.emit(inst);
+       break;
+    }
+ 
+    case nir_intrinsic_load_num_work_groups: {
+       assert(devinfo->gen >= 7);
+       assert(stage == MESA_SHADER_COMPUTE);
+ 
+       struct brw_cs_prog_data *cs_prog_data =
+          (struct brw_cs_prog_data *) prog_data;
+       const unsigned surface =
+          cs_prog_data->binding_table.work_groups_start;
+ 
+       cs_prog_data->uses_num_work_groups = true;
+ 
+       fs_reg surf_index = fs_reg(surface);
+       brw_mark_surface_used(prog_data, surface);
+ 
+       /* Read the 3 GLuint components of gl_NumWorkGroups */
+       for (unsigned i = 0; i < 3; i++) {
+          fs_reg read_result =
+             emit_untyped_read(bld, surf_index,
+                               fs_reg(i << 2),
+                               1 /* dims */, 1 /* size */,
+                               BRW_PREDICATE_NONE);
+          read_result.type = dest.type;
+          bld.MOV(dest, read_result);
+          dest = offset(dest, bld, 1);
+       }
+       break;
+    }
   
      default:
         unreachable("unknown intrinsic");
      }
   }
   
+ void
+ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
+                                  int op, nir_intrinsic_instr *instr)
+ {
+    fs_reg dest;
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+       dest = get_nir_dest(instr->dest);
+ 
+    fs_reg surface;
+    nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+    if (const_surface) {
+       unsigned surf_index = stage_prog_data->binding_table.ubo_start +
+                             const_surface->u[0];
+       surface = fs_reg(surf_index);
+       brw_mark_surface_used(prog_data, surf_index);
+    } else {
+       surface = vgrf(glsl_type::uint_type);
+       bld.ADD(surface, get_nir_src(instr->src[0]),
+               fs_reg(stage_prog_data->binding_table.ubo_start));
+ 
+       /* Assume this may touch any UBO. This is the same we do for other
+        * UBO/SSBO accesses with non-constant surface.
+        */
+       brw_mark_surface_used(prog_data,
+                             stage_prog_data->binding_table.ubo_start +
+                             shader_prog->NumBufferInterfaceBlocks - 1);
+    }
+ 
+    fs_reg offset = get_nir_src(instr->src[1]);
+    fs_reg data1 = get_nir_src(instr->src[2]);
+    fs_reg data2;
+    if (op == BRW_AOP_CMPWR)
+       data2 = get_nir_src(instr->src[3]);
+ 
+    /* Emit the actual atomic operation operation */
+ 
+    fs_reg atomic_result =
+       surface_access::emit_untyped_atomic(bld, surface, offset,
+                                           data1, data2,
+                                           1 /* dims */, 1 /* rsize */,
+                                           op,
+                                           BRW_PREDICATE_NONE);
+    dest.type = atomic_result.type;
+    bld.MOV(dest, atomic_result);
+ }
+ 
   void
   fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
   {
- -   unsigned sampler = instr->sampler_index;
+ +   uint32_t set = instr->sampler_set;
+ +   uint32_t binding = instr->sampler_index;
+ +
+ +   assert(binding < stage_prog_data->bind_map[set].index_count);
+ +   assert(stage_prog_data->bind_map[set].index[binding] < 1000);
+ +
+ +   unsigned sampler = stage_prog_data->bind_map[set].index[binding];
      fs_reg sampler_reg(sampler);
   
      /* FINISHME: We're failing to recompile our programs when the sampler is
@@@ -1885,6 -2136,16 +2152,16 @@@
      case nir_texop_txf_ms: op = ir_txf_ms; break;
      case nir_texop_txl: op = ir_txl; break;
      case nir_texop_txs: op = ir_txs; break;
+    case nir_texop_texture_samples: {
+       fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
+       fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+                                bld.vgrf(BRW_REGISTER_TYPE_D, 1),
+                                sampler_reg);
+       inst->mlen = 1;
+       inst->header_size = 1;
+       inst->base_mrf = -1;
+       return;
+    }
      default:
         unreachable("unknown texture opcode");
      }
@@@ -1913,12 -2174,6 +2190,12 @@@ fs_visitor::nir_emit_jump(const fs_buil
         bld.emit(BRW_OPCODE_CONTINUE);
         break;
      case nir_jump_return:
+ +      /* This has to be the last block in the shader.  We don't handle
+ +       * early returns.
+ +       */
+ +      assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL &&
+ +             instr->instr.block->cf_node.parent->type == nir_cf_node_function);
+ +      break;
      default:
         unreachable("unknown jump");
      }
diff --combined src/mesa/drivers/dri/i965/brw_gs.c

index 4ad65215756986526d1bc82dbc387584b6fabd4e,0de36cc9af15084945f37511f8d5f3deeba7d9fd..0119a906e9ea1de3c379107bce595b2700443f64
--- 1/src/mesa/drivers/dri/i965/brw_gs.c
--- 2/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@@ -33,23 -33,19 +33,23 @@@
   #include "brw_state.h"
   #include "brw_ff_gs.h"
   
- -
   bool
- -brw_codegen_gs_prog(struct brw_context *brw,
+ +brw_compile_gs_prog(struct brw_context *brw,
                       struct gl_shader_program *prog,
                       struct brw_geometry_program *gp,
- -                    struct brw_gs_prog_key *key)
+ +                    struct brw_gs_prog_key *key,
+ +                    struct brw_gs_compile_output *output)
   {
- -   struct brw_stage_state *stage_state = &brw->gs.base;
      struct brw_gs_compile c;
      memset(&c, 0, sizeof(c));
      c.key = *key;
      c.gp = gp;
   
+ +   /* We get the bind map as input in the output struct...*/
+ +   c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries;
+ +   memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map,
+ +          sizeof(c.prog_data.base.base.bind_map));
+ +
      c.prog_data.include_primitive_id =
         (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
   
@@@ -66,8 -62,6 +66,6 @@@
      struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
      int param_count = gs->num_uniform_components * 4;
   
-    /* We also upload clip plane data as uniforms */
-    param_count += MAX_CLIP_PLANES * 4;
      param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
   
      c.prog_data.base.base.param =
@@@ -79,6 -73,11 +77,11 @@@
      c.prog_data.base.base.nr_params = param_count;
      c.prog_data.base.base.nr_image_params = gs->NumImages;
   
+    if (brw->gen >= 8) {
+       c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
+          nir_gs_count_vertices(gp->program.Base.nir);
+    }
+ 
      if (brw->gen >= 7) {
         if (gp->program.OutputType == GL_POINTS) {
            /* When the output type is points, the geometry shader may output data
@@@ -125,17 -124,9 +128,9 @@@
   
      GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
   
-    /* In order for legacy clipping to work, we need to populate the clip
-     * distance varying slots whenever clipping is enabled, even if the vertex
-     * shader doesn't write to gl_ClipDistance.
-     */
-    if (c.key.base.userclip_active) {
-       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
-       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
-    }
- 
      brw_compute_vue_map(brw->intelScreen->devinfo,
-                        &c.prog_data.base.vue_map, outputs_written);
+                        &c.prog_data.base.vue_map, outputs_written,
+                        prog ? prog->SeparateShader : false);
   
      /* Compute the output vertex size.
       *
@@@ -257,8 -248,22 +252,22 @@@
      c.prog_data.output_topology =
         get_hw_prim_for_gl_prim(gp->program.OutputType);
   
+    /* The GLSL linker will have already matched up GS inputs and the outputs
+     * of prior stages.  The driver does extend VS outputs in some cases, but
+     * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+     * geometry shader support.  So we can safely ignore that.
+     *
+     * For SSO pipelines, we use a fixed VUE map layout based on variable
+     * locations, so we can rely on rendezvous-by-location making this work.
+     *
+     * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+     * written by previous stages and shows up via payload magic.
+     */
+    GLbitfield64 inputs_read =
+       gp->program.Base.InputsRead & ~VARYING_BIT_PRIMITIVE_ID;
      brw_compute_vue_map(brw->intelScreen->devinfo,
-                        &c.input_vue_map, c.key.input_varyings);
+                        &c.input_vue_map, inputs_read,
+                        prog->SeparateShader);
   
      /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
       * need to program a URB read length of ceiling(num_slots / 2).
@@@ -274,39 -279,19 +283,39 @@@
         return false;
      }
   
- -   /* Scratch space is used for register spilling */
- -   if (c.prog_data.base.base.total_scratch) {
+ +   output->mem_ctx = mem_ctx;
+ +   output->program = program;
+ +   output->program_size = program_size;
+ +   memcpy(&output->prog_data, &c.prog_data,
+ +          sizeof(output->prog_data));
+ +
+ +   return true;
+ +}
+ +
+ +bool
+ +brw_codegen_gs_prog(struct brw_context *brw,
+ +                    struct gl_shader_program *prog,
+ +                    struct brw_geometry_program *gp,
+ +                    struct brw_gs_prog_key *key)
+ +{
+ +   struct brw_gs_compile_output output;
+ +   struct brw_stage_state *stage_state = &brw->gs.base;
+ +
+ +   if (brw_compile_gs_prog(brw, prog, gp, key, &output))
+ +      return false;
+ +
+ +   if (output.prog_data.base.base.total_scratch) {
         brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- -                       c.prog_data.base.base.total_scratch *
+ +                       output.prog_data.base.base.total_scratch *
                            brw->max_gs_threads);
      }
   
      brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
- -                    &c.key, sizeof(c.key),
- -                    program, program_size,
- -                    &c.prog_data, sizeof(c.prog_data),
+ +                    key, sizeof(*key),
+ +                    output.program, output.program_size,
+ +                    &output.prog_data, sizeof(output.prog_data),
                       &stage_state->prog_offset, &brw->gs.prog_data);
- -   ralloc_free(mem_ctx);
+ +   ralloc_free(output.mem_ctx);
   
      return true;
   }
@@@ -317,8 -302,7 +326,7 @@@ brw_gs_state_dirty(struct brw_context *
      return brw_state_dirty(brw,
                             _NEW_TEXTURE,
                             BRW_NEW_GEOMETRY_PROGRAM |
-                           BRW_NEW_TRANSFORM_FEEDBACK |
-                           BRW_NEW_VUE_MAP_VS);
+                           BRW_NEW_TRANSFORM_FEEDBACK);
   }
   
   static void
@@@ -333,16 -317,11 +341,11 @@@ brw_gs_populate_key(struct brw_context 
   
      memset(key, 0, sizeof(*key));
   
-    key->base.program_string_id = gp->id;
-    brw_setup_vue_key_clip_info(brw, &key->base,
-                                gp->program.Base.UsesClipDistanceOut);
+    key->program_string_id = gp->id;
   
      /* _NEW_TEXTURE */
      brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
-                                       &key->base.tex);
- 
-    /* BRW_NEW_VUE_MAP_VS */
-    key->input_varyings = brw->vue_map_vs.slots_valid;
+                                       &key->tex);
   }
   
   void
@@@ -361,11 -340,6 +364,6 @@@ brw_upload_gs_prog(struct brw_context *
   
      if (gp == NULL) {
         /* No geometry shader.  Vertex data just passes straight through. */
-       if (brw->ctx.NewDriverState & BRW_NEW_VUE_MAP_VS) {
-          brw->vue_map_geom_out = brw->vue_map_vs;
-          brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-       }
- 
         if (brw->gen == 6 &&
             (brw->ctx.NewDriverState & BRW_NEW_TRANSFORM_FEEDBACK)) {
            gen6_brw_upload_ff_gs_prog(brw);
@@@ -392,12 -366,6 +390,6 @@@
         (void)success;
      }
      brw->gs.base.prog_data = &brw->gs.prog_data->base.base;
- 
-    if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
-               sizeof(brw->vue_map_geom_out)) != 0) {
-       brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
-       brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-    }
   }
   
   bool
@@@ -416,12 -384,8 +408,8 @@@ brw_gs_precompile(struct gl_context *ct
   
      memset(&key, 0, sizeof(key));
   
-    brw_vue_setup_prog_key_for_precompile(ctx, &key.base, bgp->id, &gp->Base);
- 
-    /* Assume that the set of varyings coming in from the vertex shader exactly
-     * matches what the geometry shader requires.
-     */
-    key.input_varyings = gp->Base.InputsRead;
+    brw_setup_tex_for_precompile(brw, &key.tex, prog);
+    key.program_string_id = bgp->id;
   
      success = brw_codegen_gs_prog(brw, shader_prog, bgp, &key);
   
diff --combined src/mesa/drivers/dri/i965/brw_nir.c

index 4c8602a108572265cb1eacedd247599ecaa2b0f8,1d4f6ab2ccd5e257a4c11889f1178d92db1c96c9..35855092dfff4f8cc4f69bd4a96d527d588f4b9f
--- 1/src/mesa/drivers/dri/i965/brw_nir.c
--- 2/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@@ -61,6 -61,8 +61,8 @@@ nir_optimize(nir_shader *nir, bool is_s
         nir_validate_shader(nir);
         progress |= nir_opt_constant_folding(nir);
         nir_validate_shader(nir);
+       progress |= nir_opt_dead_cf(nir);
+       nir_validate_shader(nir);
         progress |= nir_opt_remove_phis(nir);
         nir_validate_shader(nir);
         progress |= nir_opt_undef(nir);
@@@ -78,7 -80,11 +80,7 @@@ brw_create_nir(struct brw_context *brw
      struct gl_context *ctx = &brw->ctx;
      const nir_shader_compiler_options *options =
         ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- -   static const nir_lower_tex_options tex_options = {
- -      .lower_txp = ~0,
- -   };
      struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
- -   bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
      nir_shader *nir;
   
      /* First, lower the GLSL IR or Mesa IR to NIR */
@@@ -90,31 -96,15 +92,39 @@@
      }
      nir_validate_shader(nir);
   
+ +   brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar);
+ +
+ +   static GLuint msg_id = 0;
+ +   _mesa_gl_debug(&brw->ctx, &msg_id,
+ +                  MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ +                  MESA_DEBUG_TYPE_OTHER,
+ +                  MESA_DEBUG_SEVERITY_NOTIFICATION,
+ +                  "%s NIR shader:\n",
+ +                  _mesa_shader_stage_to_abbrev(stage));
+ +
+ +   return nir;
+ +}
+ +
+ +void
+ +brw_process_nir(nir_shader *nir,
+ +                const struct brw_device_info *devinfo,
+ +                const struct gl_shader_program *shader_prog,
+ +                gl_shader_stage stage, bool is_scalar)
+ +{
+ +   bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
++   static const nir_lower_tex_options tex_options = {
++      .lower_txp = ~0,
++   };
++
+    if (stage == MESA_SHADER_GEOMETRY) {
+       nir_lower_gs_intrinsics(nir);
+       nir_validate_shader(nir);
+    }
   
      nir_lower_global_vars_to_local(nir);
      nir_validate_shader(nir);
   
-    nir_lower_tex_projector(nir);
+    nir_lower_tex(nir, &tex_options);
      nir_validate_shader(nir);
   
      nir_normalize_cubemap_coords(nir);
@@@ -159,10 -149,8 +169,10 @@@
   
      if (shader_prog) {
         nir_lower_samplers(nir, shader_prog);
- -      nir_validate_shader(nir);
+ +   } else {
+ +      nir_lower_samplers_for_vk(nir);
      }
+ +   nir_validate_shader(nir);
   
      nir_lower_system_values(nir);
      nir_validate_shader(nir);
@@@ -172,7 -160,7 +182,7 @@@
   
      nir_optimize(nir, is_scalar);
   
- -   if (brw->gen >= 6) {
+ +   if (devinfo->gen >= 6) {
         /* Try and fuse multiply-adds */
         nir_opt_peephole_ffma(nir);
         nir_validate_shader(nir);
@@@ -203,10 -191,13 +213,13 @@@
         nir_print_shader(nir, stderr);
      }
   
-    nir_convert_from_ssa(nir, is_scalar);
+    nir_convert_from_ssa(nir, true);
      nir_validate_shader(nir);
   
      if (!is_scalar) {
+       nir_move_vec_src_uses_to_dest(nir);
+       nir_validate_shader(nir);
+ 
         nir_lower_vec_to_movs(nir);
         nir_validate_shader(nir);
      }
@@@ -216,7 -207,7 +229,7 @@@
       * run it last because it stashes data in instr->pass_flags and we don't
       * want that to be squashed by other NIR passes.
       */
- -   if (brw->gen <= 5)
+ +   if (devinfo->gen <= 5)
         brw_nir_analyze_boolean_resolves(nir);
   
      nir_sweep(nir);
@@@ -226,6 -217,8 +239,6 @@@
                 _mesa_shader_stage_to_string(stage));
         nir_print_shader(nir, stderr);
      }
- -
- -   return nir;
   }
   
   enum brw_reg_type
diff --combined src/mesa/drivers/dri/i965/brw_program.c

index 5a54cd39076163cc3f8918f74c803a1b7565625d,1ac0ed273ef10d9fc2fc81a9acf0771ccb033d3f..fa59338950a55fa62da3f9b7500dd7c8054b4070
--- 1/src/mesa/drivers/dri/i965/brw_program.c
--- 2/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@@ -276,7 -276,7 +276,7 @@@ brw_get_scratch_bo(struct brw_context *
   
   void brwInitFragProgFuncs( struct dd_function_table *functions )
   {
- -   assert(functions->ProgramStringNotify == _tnl_program_string);
+ +   /* assert(functions->ProgramStringNotify == _tnl_program_string); */
   
      functions->NewProgram = brwNewProgram;
      functions->DeleteProgram = brwDeleteProgram;
@@@ -588,3 -588,22 +588,22 @@@ brw_dump_ir(const char *stage, struct g
         _mesa_print_program(prog);
      }
   }
+ 
+ void
+ brw_setup_tex_for_precompile(struct brw_context *brw,
+                              struct brw_sampler_prog_key_data *tex,
+                              struct gl_program *prog)
+ {
+    const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+    unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+    for (unsigned i = 0; i < sampler_count; i++) {
+       if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+          tex->swizzles[i] =
+             MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+       } else {
+          /* Color sampler: assume no swizzling. */
+          tex->swizzles[i] = SWIZZLE_XYZW;
+       }
+    }
+ }
diff --combined src/mesa/drivers/dri/i965/brw_shader.cpp

index 4ef2777559f36e2a23952bdce2d0f78c04d3dbea,785cb2722feee68d0ddadc382249d555415f6e34..1060d93ae6ba328e984db9bb55bd5e54a69f5609
--- 1/src/mesa/drivers/dri/i965/brw_shader.cpp
--- 2/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@@ -96,8 -96,15 +96,17 @@@ brw_compiler_create(void *mem_ctx, cons
       */
      nir_options->lower_ffma = true;
      nir_options->lower_sub = true;
+ +   nir_options->lower_fdiv = true;
+ +
+    /* In the vec4 backend, our dpN instruction replicates its result to all
+     * the components of a vec4.  We would like NIR to give us replicated fdot
+     * instructions because it can optimize better for us.
+     *
+     * For the FS backend, it should be lowered away by the scalarizing pass so
+     * we should never see fdot anyway.
+     */
+    nir_options->fdot_replicates = true;
+ 
      /* We want the GLSL compiler to emit code that uses condition codes */
      for (int i = 0; i < MESA_SHADER_STAGES; i++) {
         compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
@@@ -108,41 -115,35 +117,35 @@@
         compiler->glsl_compiler_options[i].EmitNoNoise = true;
         compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
         compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
-       compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
-        (i == MESA_SHADER_FRAGMENT);
-       compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
-        (i == MESA_SHADER_FRAGMENT);
         compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
         compiler->glsl_compiler_options[i].LowerClipDistance = true;
   
+       bool is_scalar;
+       switch (i) {
+       case MESA_SHADER_FRAGMENT:
+       case MESA_SHADER_COMPUTE:
+          is_scalar = true;
+          break;
+       case MESA_SHADER_VERTEX:
+          is_scalar = compiler->scalar_vs;
+          break;
+       default:
+          is_scalar = false;
+          break;
+       }
+ 
+       compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
+       compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
+       compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
+ 
         /* !ARB_gpu_shader5 */
         if (devinfo->gen < 7)
            compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
-    }
   
-    compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-    compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
- 
-    if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-       if (compiler->scalar_vs) {
-          /* If we're using the scalar backend for vertex shaders, we need to
-           * configure these accordingly.
-           */
-          compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-          compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
-          compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
-       }
- 
-       compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
-    }
- 
-    if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
-       compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+       if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
+          compiler->glsl_compiler_options[i].NirOptions = nir_options;
      }
   
-    compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
-    compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
- 
      return compiler;
   }
   
@@@ -196,6 -197,7 +199,7 @@@ is_scalar_shader_stage(struct brw_conte
   {
      switch (stage) {
      case MESA_SHADER_FRAGMENT:
+    case MESA_SHADER_COMPUTE:
         return true;
      case MESA_SHADER_VERTEX:
         return brw->intelScreen->compiler->scalar_vs;
@@@ -323,9 -325,6 +327,6 @@@ process_glsl_ir(gl_shader_stage stage
                                           options, ctx->Const.NativeIntegers) || progress;
      } while (progress);
   
-    if (options->NirOptions != NULL)
-       lower_output_reads(stage, shader->ir);
- 
      validate_ir_tree(shader->ir);
   
      /* Now that we've finished altering the linked IR, reparent any live IR back
@@@ -460,7 -459,6 +461,7 @@@ brw_type_for_base_type(const struct gls
      case GLSL_TYPE_ERROR:
      case GLSL_TYPE_INTERFACE:
      case GLSL_TYPE_DOUBLE:
+ +   case GLSL_TYPE_FUNCTION:
         unreachable("not reached");
      }
   
@@@ -623,6 -621,8 +624,8 @@@ brw_instruction_name(enum opcode op
         return "tg4_offset";
      case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
         return "tg4_offset_logical";
+    case SHADER_OPCODE_SAMPLEINFO:
+       return "sampleinfo";
   
      case SHADER_OPCODE_SHADER_TIME_ADD:
         return "shader_time_add";
@@@ -697,6 -697,9 +700,9 @@@
      case FS_OPCODE_PIXEL_Y:
         return "pixel_y";
   
+    case FS_OPCODE_GET_BUFFER_SIZE:
+       return "fs_get_buffer_size";
+ 
      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
         return "uniform_pull_const";
      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
@@@ -745,6 -748,9 +751,9 @@@
      case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
         return "set_simd4x2_header_gen9";
   
+    case VS_OPCODE_GET_BUFFER_SIZE:
+       return "vs_get_buffer_size";
+ 
      case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
         return "unpack_flags_simd4x2";
   
diff --combined src/mesa/drivers/dri/i965/brw_vec4.cpp

index 5e528b5c5a19c5d3cc3dd5b1ee6e2f7fdb27f7b5,c61b38548f7f10dfd754f9cc413d35c9fcef1cc5..1d62f2f6a75f609abd901cf54776086d5a7d4ef0
--- 1/src/mesa/drivers/dri/i965/brw_vec4.cpp
--- 2/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@@ -61,6 -61,8 +61,8 @@@ src_reg::src_reg(register_file file, in
         this->swizzle = brw_swizzle_for_size(type->vector_elements);
      else
         this->swizzle = BRW_SWIZZLE_XYZW;
+    if (type)
+       this->type = brw_type_for_base_type(type);
   }
   
   /** Generic unset register constructor. */
@@@ -329,6 -331,8 +331,8 @@@ vec4_visitor::implied_mrf_writes(vec4_i
      case SHADER_OPCODE_TXS:
      case SHADER_OPCODE_TG4:
      case SHADER_OPCODE_TG4_OFFSET:
+    case SHADER_OPCODE_SAMPLEINFO:
+    case VS_OPCODE_GET_BUFFER_SIZE:
         return inst->header_size;
      default:
         unreachable("not reached");
@@@ -938,10 -942,18 +942,18 @@@ vec4_visitor::opt_set_dependency_contro
   }
   
   bool
- vec4_instruction::can_reswizzle(int dst_writemask,
+ vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
+                                 int dst_writemask,
                                   int swizzle,
                                   int swizzle_mask)
   {
+    /* Gen6 MATH instructions can not execute in align16 mode, so swizzles
+     * or writemasking are not allowed.
+     */
+    if (devinfo->gen == 6 && is_math() &&
+        (swizzle != BRW_SWIZZLE_XYZW || dst_writemask != WRITEMASK_XYZW))
+       return false;
+ 
      /* If this instruction sets anything not referenced by swizzle, then we'd
       * totally break it when we reswizzle.
       */
@@@ -951,6 -963,14 +963,14 @@@
      if (mlen > 0)
         return false;
   
+    /* We can't use swizzles on the accumulator and that's really the only
+     * HW_REG we would care to reswizzle so just disallow them all.
+     */
+    for (int i = 0; i < 3; i++) {
+       if (src[i].file == HW_REG)
+          return false;
+    }
+ 
      return true;
   }
   
@@@ -1010,6 -1030,28 +1030,28 @@@ vec4_visitor::opt_register_coalesce(
           inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
          continue;
   
+       /* Remove no-op MOVs */
+       if (inst->dst.file == inst->src[0].file &&
+           inst->dst.reg == inst->src[0].reg &&
+           inst->dst.reg_offset == inst->src[0].reg_offset) {
+          bool is_nop_mov = true;
+ 
+          for (unsigned c = 0; c < 4; c++) {
+             if ((inst->dst.writemask & (1 << c)) == 0)
+                continue;
+ 
+             if (BRW_GET_SWZ(inst->src[0].swizzle, c) != c) {
+                is_nop_mov = false;
+                break;
+             }
+          }
+ 
+          if (is_nop_mov) {
+             inst->remove(block);
+             continue;
+          }
+       }
+ 
         bool to_mrf = (inst->dst.file == MRF);
   
         /* Can't coalesce this GRF if someone else was going to
@@@ -1054,8 -1096,19 +1096,19 @@@
                  }
               }
   
+             /* This doesn't handle saturation on the instruction we
+              * want to coalesce away if the register types do not match.
+              * But if scan_inst is a non type-converting 'mov', we can fix
+              * the types later.
+              */
+             if (inst->saturate &&
+                 inst->dst.type != scan_inst->dst.type &&
+                 !(scan_inst->opcode == BRW_OPCODE_MOV &&
+                   scan_inst->dst.type == scan_inst->src[0].type))
+                break;
+ 
               /* If we can't handle the swizzle, bail. */
-             if (!scan_inst->can_reswizzle(inst->dst.writemask,
+             if (!scan_inst->can_reswizzle(devinfo, inst->dst.writemask,
                                             inst->src[0].swizzle,
                                             chans_needed)) {
                  break;
@@@ -1087,11 -1140,13 +1140,13 @@@
          if (interfered)
             break;
   
-          /* If somebody else writes our destination here, we can't coalesce
-           * before that.
+          /* If somebody else writes the same channels of our destination here,
+           * we can't coalesce before that.
             */
-          if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-           break;
+          if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+              (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+             break;
+          }
   
            /* Check for reads of the register we're trying to coalesce into.  We
             * can't go rewriting instructions above that to put some other value
@@@ -1129,6 -1184,16 +1184,16 @@@
                scan_inst->dst.file = inst->dst.file;
                scan_inst->dst.reg = inst->dst.reg;
                scan_inst->dst.reg_offset = inst->dst.reg_offset;
+                if (inst->saturate &&
+                    inst->dst.type != scan_inst->dst.type) {
+                   /* If we have reached this point, scan_inst is a non
+                    * type-converting 'mov' and we can modify its register types
+                    * to match the ones in inst. Otherwise, we could have an
+                    * incorrect saturation result.
+                    */
+                   scan_inst->dst.type = inst->dst.type;
+                   scan_inst->src[0].type = inst->src[0].type;
+                }
                scan_inst->saturate |= inst->saturate;
             }
             scan_inst = (vec4_instruction *)scan_inst->next;
@@@ -1719,7 -1784,7 +1784,7 @@@ vec4_visitor::emit_shader_time_write(in
   }
   
   bool
- vec4_visitor::run(gl_clip_plane *clip_planes)
+ vec4_visitor::run()
   {
      bool use_vec4_nir =
         compiler->glsl_compiler_options[stage].NirOptions != NULL;
@@@ -1748,9 -1813,6 +1813,6 @@@
      }
      base_ir = NULL;
   
-    if (key->userclip_active && !prog->UsesClipDistanceOut)
-       setup_uniform_clipplane_values(clip_planes);
- 
      emit_thread_end();
   
      calculate_cfg();
@@@ -1834,7 -1896,7 +1896,7 @@@
   
      setup_payload();
   
-    if (false) {
+    if (unlikely(INTEL_DEBUG & DEBUG_SPILL_VEC4)) {
         /* Debug of register spilling: Go spill everything. */
         const int grf_count = alloc.count;
         float spill_costs[alloc.count];
@@@ -1899,16 -1961,8 +1961,8 @@@ brw_vs_emit(struct brw_context *brw
               struct gl_shader_program *prog,
               unsigned *final_assembly_size)
   {
-    bool start_busy = false;
-    double start_time = 0;
      const unsigned *assembly = NULL;
   
-    if (unlikely(brw->perf_debug)) {
-       start_busy = (brw->batch.last_bo &&
-                     drm_intel_bo_busy(brw->batch.last_bo));
-       start_time = get_time();
-    }
- 
      struct brw_shader *shader = NULL;
      if (prog)
         shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
@@@ -1917,7 -1971,7 +1971,7 @@@
      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
         st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
   
- -   if (unlikely(INTEL_DEBUG & DEBUG_VS))
+ +   if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
         brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
   
      if (!vp->Base.nir &&
@@@ -1977,9 -2031,10 +2031,10 @@@
         prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
   
         vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
-                         vp, prog, mem_ctx, st_index,
+                         vp, prog, brw_select_clip_planes(&brw->ctx),
+                         mem_ctx, st_index,
                           !_mesa_is_gles3(&brw->ctx));
-       if (!v.run(brw_select_clip_planes(&brw->ctx))) {
+       if (!v.run()) {
            if (prog) {
               prog->LinkStatus = false;
               ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@@ -1997,30 -2052,7 +2052,7 @@@
         assembly = g.generate_assembly(v.cfg, final_assembly_size);
      }
   
-    if (unlikely(brw->perf_debug) && shader) {
-       if (shader->compiled_once) {
-          brw_vs_debug_recompile(brw, prog, key);
-       }
-       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
-          perf_debug("VS compile took %.03f ms and stalled the GPU\n",
-                     (get_time() - start_time) * 1000);
-       }
-       shader->compiled_once = true;
-    }
- 
      return assembly;
   }
   
- 
- void
- brw_vue_setup_prog_key_for_precompile(struct gl_context *ctx,
-                                       struct brw_vue_prog_key *key,
-                                       GLuint id, struct gl_program *prog)
- {
-    struct brw_context *brw = brw_context(ctx);
-    key->program_string_id = id;
- 
-    brw_setup_tex_for_precompile(brw, &key->tex, prog);
- }
- 
   } /* extern "C" */
diff --combined src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index d5a24d823b5232c952d5d87ca2116488745ae5b5,94906d2e70505d6d34d1221703edad9194531fce..9d56f9a1ae0d3a00ddcb0bc9768369f4aad60442
--- 1/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
--- 2/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@@ -23,8 -23,13 +23,13 @@@
   
   #include "brw_nir.h"
   #include "brw_vec4.h"
+ #include "brw_vec4_builder.h"
+ #include "brw_vec4_surface_builder.h"
   #include "glsl/ir_uniform.h"
   
+ using namespace brw;
+ using namespace brw::surface_access;
+ 
   namespace brw {
   
   void
@@@ -58,25 -63,24 +63,24 @@@ vec4_visitor::nir_setup_system_value_in
         unreachable("should be lowered by lower_vertex_id().");
   
      case nir_intrinsic_load_vertex_id_zero_base:
-       reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+       reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
         if (reg->file == BAD_FILE)
-          *reg =
-             *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
-                                              glsl_type::int_type);
+          *reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+                                            glsl_type::int_type);
         break;
   
      case nir_intrinsic_load_base_vertex:
-       reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+       reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
         if (reg->file == BAD_FILE)
-          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
-                                                  glsl_type::int_type);
+          *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+                                            glsl_type::int_type);
         break;
   
      case nir_intrinsic_load_instance_id:
-       reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+       reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
         if (reg->file == BAD_FILE)
-          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
-                                                  glsl_type::int_type);
+          *reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+                                            glsl_type::int_type);
         break;
   
      default:
@@@ -142,7 -146,7 +146,7 @@@ vec4_visitor::nir_setup_uniforms(nir_sh
            }
   
            assert(uniforms < uniform_array_size);
-          this->uniform_size[uniforms] = type_size_vec4(var->type);
+          uniform_size[uniforms] = type_size_vec4(var->type);
   
            if (strncmp(var->name, "gl_", 3) == 0)
               nir_setup_builtin_uniform(var);
@@@ -158,7 -162,7 +162,7 @@@
                strcmp(var->name, "parameters") == 0);
   
         assert(uniforms < uniform_array_size);
-       this->uniform_size[uniforms] = type_size_vec4(var->type);
+       uniform_size[uniforms] = type_size_vec4(var->type);
   
         struct gl_program_parameter_list *plist = prog->Parameters;
         for (unsigned p = 0; p < plist->NumParameters; p++) {
@@@ -243,10 -247,10 +247,10 @@@ vec4_visitor::nir_setup_builtin_uniform
          * ParameterValues directly, since unlike brw_fs.cpp, we never
          * add new state references during compile.
          */
-       int index = _mesa_add_state_reference(this->prog->Parameters,
+       int index = _mesa_add_state_reference(prog->Parameters,
                                             (gl_state_index *)slots[i].tokens);
         gl_constant_value *values =
-          &this->prog->Parameters->ParameterValues[index][0];
+          &prog->Parameters->ParameterValues[index][0];
   
         assert(uniforms < uniform_array_size);
   
@@@ -254,7 -258,7 +258,7 @@@
            stage_prog_data->param[uniforms * 4 + j] =
               &values[GET_SWZ(slots[i].swizzle, j)];
   
-       this->uniform_vector_size[uniforms] =
+       uniform_vector_size[uniforms] =
            (var->type->is_scalar() || var->type->is_vector() ||
             var->type->is_matrix() ? var->type->vector_elements : 4);
   
@@@ -344,7 -348,7 +348,7 @@@ vec4_visitor::nir_emit_block(nir_block 
   void
   vec4_visitor::nir_emit_instr(nir_instr *instr)
   {
-    this->base_ir = instr;
+    base_ir = instr;
   
      switch (instr->type) {
      case nir_instr_type_load_const:
@@@ -367,6 -371,10 +371,10 @@@
         nir_emit_texture(nir_instr_as_tex(instr));
         break;
   
+    case nir_instr_type_ssa_undef:
+       nir_emit_undef(nir_instr_as_ssa_undef(instr));
+       break;
+ 
      default:
         fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
         break;
@@@ -393,9 -401,14 +401,14 @@@ dst_reg_for_nir_reg(vec4_visitor *v, ni
   dst_reg
   vec4_visitor::get_nir_dest(nir_dest dest)
   {
-    assert(!dest.is_ssa);
-    return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
-                               dest.reg.indirect);
+    if (dest.is_ssa) {
+       dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+       nir_ssa_values[dest.ssa.index] = dst;
+       return dst;
+    } else {
+       return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+                                  dest.reg.indirect);
+    }
   }
   
   dst_reg
@@@ -450,7 -463,7 +463,7 @@@ voi
   vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
   {
      dst_reg reg = dst_reg(GRF, alloc.allocate(1));
-    reg.type =  BRW_REGISTER_TYPE_F;
+    reg.type =  BRW_REGISTER_TYPE_D;
   
      unsigned remaining = brw_writemask_for_size(instr->def.num_components);
   
@@@ -471,7 -484,7 +484,7 @@@
         }
   
         reg.writemask = writemask;
-       emit(MOV(reg, src_reg(instr->value.f[i])));
+       emit(MOV(reg, src_reg(instr->value.i[i])));
   
         remaining &= ~writemask;
      }
@@@ -530,33 -543,271 +543,271 @@@ vec4_visitor::nir_emit_intrinsic(nir_in
         break;
      }
   
-    case nir_intrinsic_load_vertex_id:
-       unreachable("should be lowered by lower_vertex_id()");
+    case nir_intrinsic_get_buffer_size: {
+       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
+       unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ 
+       assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+ 
+       src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                                    ubo_index);
+       dst_reg result_dst = get_nir_dest(instr->dest);
+       vec4_instruction *inst = new(mem_ctx)
+          vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
+ 
+       inst->base_mrf = 2;
+       inst->mlen = 1; /* always at least one */
+       inst->src[1] = src_reg(surf_index);
+ 
+       /* MRF for the first parameter */
+       src_reg lod = src_reg(0);
+       int param_base = inst->base_mrf;
+       int writemask = WRITEMASK_X;
+       emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+ 
+       emit(inst);
+       break;
+    }
+ 
+    case nir_intrinsic_store_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_store_ssbo: {
+       assert(devinfo->gen >= 7);
+ 
+       /* Block index */
+       src_reg surf_index;
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[1]);
+       if (const_uniform_block) {
+          unsigned index = prog_data->base.binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = src_reg(index);
+          brw_mark_surface_used(&prog_data->base, index);
+       } else {
+          surf_index = src_reg(this, glsl_type::uint_type);
+          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
+                   src_reg(prog_data->base.binding_table.ubo_start)));
+          surf_index = emit_uniformize(surf_index);
+ 
+          brw_mark_surface_used(&prog_data->base,
+                                prog_data->base.binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+ 
+       /* Offset */
+       src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1)));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+       }
+ 
+       /* Value */
+       src_reg val_reg = get_nir_src(instr->src[0], 4);
+ 
+       /* Writemask */
+       unsigned write_mask = instr->const_index[1];
+ 
+       /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
+        * writes will use SIMD8 mode. In order to hide this and keep symmetry across
+        * typed and untyped messages and across hardware platforms, the
+        * current implementation of the untyped messages will transparently convert
+        * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it
+        * and enabling only channel X on the SEND instruction.
+        *
+        * The above, works well for full vector writes, but not for partial writes
+        * where we want to write some channels and not others, like when we have
+        * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are
+        * quite restrictive with regards to the channel enables we can configure in
+        * the message descriptor (not all combinations are allowed) we cannot simply
+        * implement these scenarios with a single message while keeping the
+        * aforementioned symmetry in the implementation. For now we de decided that
+        * it is better to keep the symmetry to reduce complexity, so in situations
+        * such as the one described we end up emitting two untyped write messages
+        * (one for xy and another for w).
+        *
+        * The code below packs consecutive channels into a single write message,
+        * detects gaps in the vector write and if needed, sends a second message
+        * with the remaining channels. If in the future we decide that we want to
+        * emit a single message at the expense of losing the symmetry in the
+        * implementation we can:
+        *
+        * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8
+        *    message payload. In this mode we can write up to 8 offsets and dwords
+        *    to the red channel only (for the two vec4s in the SIMD4x2 execution)
+        *    and select which of the 8 channels carry data to write by setting the
+        *    appropriate writemask in the dst register of the SEND instruction.
+        *    It would require to write a new generator opcode specifically for
+        *    IvyBridge since we would need to prepare a SIMD8 payload that could
+        *    use any channel, not just X.
+        *
+        * 2) For Haswell+: Simply send a single write message but set the writemask
+        *    on the dst of the SEND instruction to select the channels we want to
+        *    write. It would require to modify the current messages to receive
+        *    and honor the writemask provided.
+        */
+       const vec4_builder bld = vec4_builder(this).at_end()
+                                .annotate(current_annotation, base_ir);
+ 
+       int swizzle[4] = { 0, 0, 0, 0};
+       int num_channels = 0;
+       unsigned skipped_channels = 0;
+       int num_components = instr->num_components;
+       for (int i = 0; i < num_components; i++) {
+          /* Check if this channel needs to be written. If so, record the
+           * channel we need to take the data from in the swizzle array
+           */
+          int component_mask = 1 << i;
+          int write_test = write_mask & component_mask;
+          if (write_test)
+             swizzle[num_channels++] = i;
+ 
+          /* If we don't have to write this channel it means we have a gap in the
+           * vector, so write the channels we accumulated until now, if any. Do
+           * the same if this was the last component in the vector.
+           */
+          if (!write_test || i == num_components - 1) {
+             if (num_channels > 0) {
+                /* We have channels to write, so update the offset we need to
+                 * write at to skip the channels we skipped, if any.
+                 */
+                if (skipped_channels > 0) {
+                   if (!has_indirect) {
+                      const_offset_bytes += 4 * skipped_channels;
+                      offset_reg = src_reg(const_offset_bytes);
+                   } else {
+                      emit(ADD(dst_reg(offset_reg), offset_reg,
+                               brw_imm_ud(4 * skipped_channels)));
+                   }
+                }
+ 
+                /* Swizzle the data register so we take the data from the channels
+                 * we need to write and send the write message. This will write
+                 * num_channels consecutive dwords starting at offset.
+                 */
+                val_reg.swizzle =
+                   BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+                emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+                                   1 /* dims */, num_channels /* size */,
+                                   BRW_PREDICATE_NONE);
+ 
+                /* If we have to do a second write we will have to update the
+                 * offset so that we jump over the channels we have just written
+                 * now.
+                 */
+                skipped_channels = num_channels;
+ 
+                /* Restart the count for the next write message */
+                num_channels = 0;
+             }
+ 
+             /* We did not write the current channel, so increase skipped count */
+             skipped_channels++;
+          }
+       }
   
-    case nir_intrinsic_load_vertex_id_zero_base: {
-       src_reg vertex_id =
-          src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
-       assert(vertex_id.file != BAD_FILE);
-       dest = get_nir_dest(instr->dest, vertex_id.type);
-       emit(MOV(dest, vertex_id));
         break;
      }
   
-    case nir_intrinsic_load_base_vertex: {
-       src_reg base_vertex =
-          src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
-       assert(base_vertex.file != BAD_FILE);
-       dest = get_nir_dest(instr->dest, base_vertex.type);
-       emit(MOV(dest, base_vertex));
+    case nir_intrinsic_load_ssbo_indirect:
+       has_indirect = true;
+       /* fallthrough */
+    case nir_intrinsic_load_ssbo: {
+       assert(devinfo->gen >= 7);
+ 
+       nir_const_value *const_uniform_block =
+          nir_src_as_const_value(instr->src[0]);
+ 
+       src_reg surf_index;
+       if (const_uniform_block) {
+          unsigned index = prog_data->base.binding_table.ubo_start +
+                           const_uniform_block->u[0];
+          surf_index = src_reg(index);
+ 
+          brw_mark_surface_used(&prog_data->base, index);
+       } else {
+          surf_index = src_reg(this, glsl_type::uint_type);
+          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
+                   src_reg(prog_data->base.binding_table.ubo_start)));
+          surf_index = emit_uniformize(surf_index);
+ 
+          /* Assume this may touch any UBO. It would be nice to provide
+           * a tighter bound, but the array information is already lowered away.
+           */
+          brw_mark_surface_used(&prog_data->base,
+                                prog_data->base.binding_table.ubo_start +
+                                shader_prog->NumBufferInterfaceBlocks - 1);
+       }
+ 
+       src_reg offset_reg = src_reg(this, glsl_type::uint_type);
+       unsigned const_offset_bytes = 0;
+       if (has_indirect) {
+          emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1)));
+       } else {
+          const_offset_bytes = instr->const_index[0];
+          emit(MOV(dst_reg(offset_reg), src_reg(const_offset_bytes)));
+       }
+ 
+       /* Read the vector */
+       const vec4_builder bld = vec4_builder(this).at_end()
+          .annotate(current_annotation, base_ir);
+ 
+       src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                               1 /* dims */, 4 /* size*/,
+                                               BRW_PREDICATE_NONE);
+       dst_reg dest = get_nir_dest(instr->dest);
+       read_result.type = dest.type;
+       read_result.swizzle = brw_swizzle_for_size(instr->num_components);
+       emit(MOV(dest, read_result));
+ 
         break;
      }
   
+    case nir_intrinsic_ssbo_atomic_add:
+       nir_emit_ssbo_atomic(BRW_AOP_ADD, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_min:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr);
+       else
+          nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_max:
+       if (dest.type == BRW_REGISTER_TYPE_D)
+          nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr);
+       else
+          nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_and:
+       nir_emit_ssbo_atomic(BRW_AOP_AND, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_or:
+       nir_emit_ssbo_atomic(BRW_AOP_OR, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_xor:
+       nir_emit_ssbo_atomic(BRW_AOP_XOR, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_exchange:
+       nir_emit_ssbo_atomic(BRW_AOP_MOV, instr);
+       break;
+    case nir_intrinsic_ssbo_atomic_comp_swap:
+       nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr);
+       break;
+ 
+    case nir_intrinsic_load_vertex_id:
+       unreachable("should be lowered by lower_vertex_id()");
+ 
+    case nir_intrinsic_load_vertex_id_zero_base:
+    case nir_intrinsic_load_base_vertex:
      case nir_intrinsic_load_instance_id: {
-       src_reg instance_id =
-          src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
-       assert(instance_id.file != BAD_FILE);
-       dest = get_nir_dest(instr->dest, instance_id.type);
-       emit(MOV(dest, instance_id));
+       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+       src_reg val = src_reg(nir_system_values[sv]);
+       assert(val.file != BAD_FILE);
+       dest = get_nir_dest(instr->dest, val.type);
+       emit(MOV(dest, val));
         break;
      }
   
@@@ -611,19 -862,17 +862,19 @@@
         has_indirect = true;
         /* fallthrough */
      case nir_intrinsic_load_ubo: {
+ +      const uint32_t set = instr->const_index[0];
         nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
         src_reg surf_index;
   
         dest = get_nir_dest(instr->dest);
   
         if (const_block_index) {
+ +         uint32_t binding = const_block_index->u[0];
+ +
            /* The block index is a constant, so just emit the binding table entry
             * as an immediate.
             */
- -         surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- -                              const_block_index->u[0]);
+ +         surf_index = src_reg(stage_prog_data->bind_map[set].index[binding]);
         } else {
            /* The block index is not a constant. Evaluate the index expression
             * per-channel and add the base UBO index; we have to select a value
@@@ -640,10 -889,10 +891,10 @@@
             */
            brw_mark_surface_used(&prog_data->base,
                                  prog_data->base.binding_table.ubo_start +
-                                shader_prog->NumUniformBlocks - 1);
+                                shader_prog->NumBufferInterfaceBlocks - 1);
         }
   
- -      unsigned const_offset = instr->const_index[0];
+ +      unsigned const_offset = instr->const_index[1];
         src_reg offset;
   
         if (!has_indirect)  {
@@@ -677,6 -926,53 +928,53 @@@
      }
   }
   
+ void
+ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
+ {
+    dst_reg dest;
+    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+       dest = get_nir_dest(instr->dest);
+ 
+    src_reg surface;
+    nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
+    if (const_surface) {
+       unsigned surf_index = prog_data->base.binding_table.ubo_start +
+                             const_surface->u[0];
+       surface = src_reg(surf_index);
+       brw_mark_surface_used(&prog_data->base, surf_index);
+    } else {
+       surface = src_reg(this, glsl_type::uint_type);
+       emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
+                src_reg(prog_data->base.binding_table.ubo_start)));
+ 
+       /* Assume this may touch any UBO. This is the same we do for other
+        * UBO/SSBO accesses with non-constant surface.
+        */
+       brw_mark_surface_used(&prog_data->base,
+                             prog_data->base.binding_table.ubo_start +
+                             shader_prog->NumBufferInterfaceBlocks - 1);
+    }
+ 
+    src_reg offset = get_nir_src(instr->src[1], 1);
+    src_reg data1 = get_nir_src(instr->src[2], 1);
+    src_reg data2;
+    if (op == BRW_AOP_CMPWR)
+       data2 = get_nir_src(instr->src[3], 1);
+ 
+    /* Emit the actual atomic operation operation */
+    const vec4_builder bld =
+       vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+ 
+    src_reg atomic_result =
+       surface_access::emit_untyped_atomic(bld, surface, offset,
+                                           data1, data2,
+                                           1 /* dims */, 1 /* rsize */,
+                                           op,
+                                           BRW_PREDICATE_NONE);
+    dest.type = atomic_result.type;
+    bld.MOV(dest, atomic_result);
+ }
+ 
   static unsigned
   brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
   {
@@@ -1263,21 -1559,26 +1561,26 @@@ vec4_visitor::nir_emit_alu(nir_alu_inst
         inst->predicate = BRW_PREDICATE_NORMAL;
         break;
   
-    case nir_op_fdot2:
+    case nir_op_fdot_replicated2:
         inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
         inst->saturate = instr->dest.saturate;
         break;
   
-    case nir_op_fdot3:
+    case nir_op_fdot_replicated3:
         inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
         inst->saturate = instr->dest.saturate;
         break;
   
-    case nir_op_fdot4:
+    case nir_op_fdot_replicated4:
         inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
         inst->saturate = instr->dest.saturate;
         break;
   
+    case nir_op_fdph_replicated:
+       inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]);
+       inst->saturate = instr->dest.saturate;
+       break;
+ 
      case nir_op_bany2:
      case nir_op_bany3:
      case nir_op_bany4: {
@@@ -1355,6 -1656,7 +1658,7 @@@ ir_texture_opcode_for_nir_texop(nir_tex
      switch (texop) {
      case nir_texop_lod: op = ir_lod; break;
      case nir_texop_query_levels: op = ir_query_levels; break;
+    case nir_texop_texture_samples: op = ir_texture_samples; break;
      case nir_texop_tex: op = ir_tex; break;
      case nir_texop_tg4: op = ir_tg4; break;
      case nir_texop_txb: op = ir_txb; break;
@@@ -1411,7 -1713,7 +1715,7 @@@ vec4_visitor::nir_emit_texture(nir_tex_
       * emitting anything other than setting up the constant result.
       */
      if (instr->op == nir_texop_tg4) {
-       int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+       int swiz = GET_SWZ(key_tex->swizzles[sampler], instr->component);
         if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
            emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
            return;
@@@ -1473,7 -1775,7 +1777,7 @@@
            sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
            assert(coord_type != NULL);
            if (devinfo->gen >= 7 &&
-              key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+              key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
               mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
            } else {
               mcs = src_reg(0u);
@@@ -1546,4 -1848,10 +1850,10 @@@
                   mcs, is_cube_array, sampler, sampler_reg);
   }
   
+ void
+ vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
+ {
+    nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+ }
+ 
   }
diff --combined src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 499f6288aeec1295a583ecde6f02951cf215ee99,3cb783ece1c675c447028469d90323730436d591..6d61112056ca2bd3dc81b7f204cdee9aeda665d7
--- 1/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
--- 2/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@@ -26,6 -26,8 +26,8 @@@
   #include "glsl/ir_uniform.h"
   #include "program/sampler.h"
   
+ #define FIRST_SPILL_MRF(gen) (gen == 6 ? 21 : 13)
+ 
   namespace brw {
   
   vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
@@@ -256,7 -258,7 +258,7 @@@ vec4_visitor::SCRATCH_READ(const dst_re
   
      inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
                                         dst, index);
-    inst->base_mrf = 14;
+    inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
      inst->mlen = 2;
   
      return inst;
@@@ -270,7 -272,7 +272,7 @@@ vec4_visitor::SCRATCH_WRITE(const dst_r
   
      inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
                                         dst, src, index);
-    inst->base_mrf = 13;
+    inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen);
      inst->mlen = 3;
   
      return inst;
@@@ -643,7 -645,6 +645,7 @@@ type_size_vec4(const struct glsl_type *
      case GLSL_TYPE_DOUBLE:
      case GLSL_TYPE_ERROR:
      case GLSL_TYPE_INTERFACE:
+ +   case GLSL_TYPE_FUNCTION:
         unreachable("not reached");
      }
   
@@@ -756,22 -757,6 +758,6 @@@ vec4_visitor::setup_uniform_values(ir_v
      }
   }
   
- void
- vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
- {
-    for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
-       assert(this->uniforms < uniform_array_size);
-       this->uniform_vector_size[this->uniforms] = 4;
-       this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
-       this->userplane[i].type = BRW_REGISTER_TYPE_F;
-       for (int j = 0; j < 4; ++j) {
-          stage_prog_data->param[this->uniforms * 4 + j] =
-             (gl_constant_value *) &clip_planes[i][j];
-       }
-       ++this->uniforms;
-    }
- }
- 
   /* Our support for builtin uniforms is even scarier than non-builtin.
    * It sits on top of the PROG_STATE_VAR parameters that are
    * automatically updated from GL context state.
@@@ -1089,11 -1074,12 +1075,12 @@@ vec4_visitor::visit(ir_variable *ir
         break;
   
      case ir_var_uniform:
+    case ir_var_shader_storage:
         reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
   
         /* Thanks to the lower_ubo_reference pass, we will see only
-        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-        * variables, so no need for them to be in variable_ht.
+        * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
+        * for UBO/SSBO variables, so no need for them to be in variable_ht.
          *
          * Some uniforms, such as samplers and atomic counters, have no actual
          * storage, so we should ignore them.
@@@ -1401,7 -1387,7 +1388,7 @@@ vec4_visitor::emit_pull_constant_load_r
                                              dst,
                                              surf_index,
                                              offset_reg);
-       pull->base_mrf = 14;
+       pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
         pull->mlen = 1;
      }
   
@@@ -1599,6 -1585,10 +1586,10 @@@ vec4_visitor::visit(ir_expression *ir
         emit(MOV(result_dst, op[0]));
         break;
   
+    case ir_unop_ssbo_unsized_array_length:
+       unreachable("not reached: should be handled by lower_ubo_reference");
+       break;
+ 
      case ir_binop_add:
         emit(ADD(result_dst, op[0], op[1]));
         break;
@@@ -1805,6 -1795,10 +1796,10 @@@
         emit(RNDE(result_dst, op[0]));
         break;
   
+    case ir_unop_get_buffer_size:
+       unreachable("not reached: not implemented");
+       break;
+ 
      case ir_binop_min:
         emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
         break;
@@@ -1878,7 -1872,7 +1873,7 @@@
             */
            brw_mark_surface_used(&prog_data->base,
                                  prog_data->base.binding_table.ubo_start +
-                                shader_prog->NumUniformBlocks - 1);
+                                shader_prog->NumBufferInterfaceBlocks - 1);
         }
   
         if (const_offset_ir) {
@@@ -2567,6 -2561,7 +2562,7 @@@ vec4_visitor::emit_texture(ir_texture_o
      case ir_tg4: opcode = offset_value.file != BAD_FILE
                            ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
      case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+    case ir_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO; break;
      case ir_txb:
         unreachable("TXB is not valid for vertex shaders.");
      case ir_lod:
@@@ -2586,13 -2581,15 +2582,15 @@@
       * - Texel offsets
       * - Gather channel selection
       * - Sampler indices too large to fit in a 4-bit value.
+     * - Sampleinfo message - takes no parameters, but mlen = 0 is illegal
       */
      inst->header_size =
         (devinfo->gen < 5 || devinfo->gen >= 9 ||
          inst->offset != 0 || op == ir_tg4 ||
+        op == ir_texture_samples ||
          is_high_sampler(sampler_reg)) ? 1 : 0;
      inst->base_mrf = 2;
-    inst->mlen = inst->header_size + 1; /* always at least one */
+    inst->mlen = inst->header_size;
      inst->dst.writemask = WRITEMASK_XYZW;
      inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
   
@@@ -2604,6 -2601,9 +2602,9 @@@
      if (op == ir_txs || op == ir_query_levels) {
         int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
         emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+       inst->mlen++;
+    } else if (op == ir_texture_samples) {
+       inst->dst.writemask = WRITEMASK_X;
      } else {
         /* Load the coordinate */
         /* FINISHME: gl_clamp_mask and saturate */
@@@ -2612,6 -2612,7 +2613,7 @@@
   
         emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
                  coordinate));
+       inst->mlen++;
   
         if (zero_mask != 0) {
            emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
@@@ -2641,7 -2642,6 +2643,6 @@@
             mrf = param_base;
             writemask = WRITEMASK_W;
          }
-          lod.swizzle = BRW_SWIZZLE_XXXX;
          emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
         } else if (op == ir_txf) {
            emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
@@@ -2710,7 -2710,7 +2711,7 @@@
      }
   
      if (devinfo->gen == 6 && op == ir_tg4) {
-       emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+       emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
      }
   
      swizzle_result(op, dest,
@@@ -2762,7 -2762,7 +2763,7 @@@ vec4_visitor::visit(ir_texture *ir
       */
      if (ir->op == ir_tg4) {
         ir_constant *chan = ir->lod_info.component->as_constant();
-       int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+       int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
         if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
            dst_reg result(this, ir->type);
            this->result = src_reg(result);
@@@ -2820,7 -2820,7 +2821,7 @@@
         ir->lod_info.sample_index->accept(this);
         sample_index = this->result;
   
-       if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+       if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
            mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
         else
            mcs = src_reg(0u);
@@@ -2835,6 -2835,7 +2836,7 @@@
      case ir_txb:
      case ir_lod:
      case ir_tg4:
+    case ir_texture_samples:
         break;
      }
   
@@@ -2898,14 -2899,14 +2900,14 @@@ vec4_visitor::emit_gen6_gather_wa(uint8
   uint32_t
   vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
   {
-    int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
+    int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
      switch (swiz) {
         case SWIZZLE_X: return 0;
         case SWIZZLE_Y:
            /* gather4 sampler is broken for green channel on RG32F --
             * we must ask for blue instead.
             */
-          if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+          if (key_tex->gather_channel_quirk_mask & (1 << sampler))
               return 2;
            return 1;
         case SWIZZLE_Z: return 2;
@@@ -2920,7 -2921,7 +2922,7 @@@ vec4_visitor::swizzle_result(ir_texture
                                src_reg orig_val, uint32_t sampler,
                                const glsl_type *dest_type)
   {
-    int s = key->tex.swizzles[sampler];
+    int s = key_tex->swizzles[sampler];
   
      dst_reg swizzled_result = dest;
   
@@@ -3122,7 -3123,8 +3124,8 @@@ vec4_visitor::emit_psiz_and_flags(dst_r
   {
      if (devinfo->gen < 6 &&
          ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
-         key->userclip_active || devinfo->has_negative_rhw_bug)) {
+         output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+         devinfo->has_negative_rhw_bug)) {
         dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
         dst_reg header1_w = header1;
         header1_w.writemask = WRITEMASK_W;
@@@ -3137,7 -3139,7 +3140,7 @@@
          emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
         }
   
-       if (key->userclip_active) {
+       if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
            current_annotation = "Clipping flags";
            dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
            dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
@@@ -3203,35 -3205,6 +3206,6 @@@
      }
   }
   
- void
- vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
- {
-    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
-     *
-     *     "If a linked set of shaders forming the vertex stage contains no
-     *     static write to gl_ClipVertex or gl_ClipDistance, but the
-     *     application has requested clipping against user clip planes through
-     *     the API, then the coordinate written to gl_Position is used for
-     *     comparison against the user clip planes."
-     *
-     * This function is only called if the shader didn't write to
-     * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
-     * if the user wrote to it; otherwise we use gl_Position.
-     */
-    gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
-    if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
-       clip_vertex = VARYING_SLOT_POS;
-    }
- 
-    for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
-         ++i) {
-       reg.writemask = 1 << i;
-       emit(DP4(reg,
-                src_reg(output_reg[clip_vertex]),
-                src_reg(this->userplane[i + offset])));
-    }
- }
- 
   vec4_instruction *
   vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
   {
@@@ -3278,21 -3251,6 +3252,6 @@@ vec4_visitor::emit_urb_slot(dst_reg reg
      case BRW_VARYING_SLOT_PAD:
         /* No need to write to this slot */
         break;
-    case VARYING_SLOT_COL0:
-    case VARYING_SLOT_COL1:
-    case VARYING_SLOT_BFC0:
-    case VARYING_SLOT_BFC1: {
-       /* These built-in varyings are only supported in compatibility mode,
-        * and we only support GS in core profile.  So, this must be a vertex
-        * shader.
-        */
-       assert(stage == MESA_SHADER_VERTEX);
-       vec4_instruction *inst = emit_generic_urb_slot(reg, varying);
-       if (((struct brw_vs_prog_key *) key)->clamp_vertex_color)
-          inst->saturate = true;
-       break;
-    }
- 
      default:
         emit_generic_urb_slot(reg, varying);
         break;
@@@ -3337,7 -3295,7 +3296,7 @@@ vec4_visitor::emit_vertex(
       * may need to unspill a register or load from an array.  Those
       * reads would use MRFs 14-15.
       */
-    int max_usable_mrf = 13;
+    int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
   
      /* The following assertion verifies that max_usable_mrf causes an
       * even-numbered amount of URB write data, which will meet gen6's
@@@ -3354,17 -3312,6 +3313,6 @@@
         emit_ndc_computation();
      }
   
-    /* Lower legacy ff and ClipVertex clipping to clip distances */
-    if (key->userclip_active && !prog->UsesClipDistanceOut) {
-       current_annotation = "user clip distances";
- 
-       output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
-       output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type);
- 
-       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0);
-       emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
-    }
- 
      /* We may need to split this up into several URB writes, so do them in a
       * loop.
       */
@@@ -3382,9 -3329,10 +3330,10 @@@
                          prog_data->vue_map.slot_to_varying[slot]);
   
            /* If this was max_usable_mrf, we can't fit anything more into this
-           * URB WRITE.
+           * URB WRITE. Same thing if we reached the maximum length available.
             */
-          if (mrf > max_usable_mrf) {
+          if (mrf > max_usable_mrf ||
+              align_interleaved_urb_mlen(devinfo, mrf - base_mrf + 1) > BRW_MAX_MSG_LENGTH) {
               slot++;
               break;
            }
@@@ -3763,7 -3711,7 +3712,7 @@@ vec4_visitor::resolve_bool_comparison(i
   vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                              void *log_data,
                              struct gl_program *prog,
-                            const struct brw_vue_prog_key *key,
+                            const struct brw_sampler_prog_key_data *key_tex,
                              struct brw_vue_prog_data *prog_data,
                            struct gl_shader_program *shader_prog,
                              gl_shader_stage stage,
@@@ -3772,7 -3720,7 +3721,7 @@@
                              int shader_time_index)
      : backend_shader(compiler, log_data, mem_ctx,
                       shader_prog, prog, &prog_data->base, stage),
-      key(key),
+      key_tex(key_tex),
        prog_data(prog_data),
        sanity_param_count(0),
        fail_msg(NULL),
diff --combined src/mesa/drivers/dri/i965/brw_wm.c

index 41266f57560cb660accb37f29da6008ff745be67,35c0908324ffadda1e69e3f0005e08dd2d49e9ce..1faf2eaa346cd9e1ae32d1098bba97260eaa8481
--- 1/src/mesa/drivers/dri/i965/brw_wm.c
--- 2/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@@ -26,6 -26,7 +26,7 @@@
   #include "brw_context.h"
   #include "brw_wm.h"
   #include "brw_state.h"
+ #include "brw_shader.h"
   #include "main/enums.h"
   #include "main/formats.h"
   #include "main/fbobject.h"
@@@ -41,7 -42,7 +42,7 @@@
    * Return a bitfield where bit n is set if barycentric interpolation mode n
    * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
    */
- -static unsigned
+ +unsigned
   brw_compute_barycentric_interp_modes(struct brw_context *brw,
                                        bool shade_model_flat,
                                        bool persample_shading,
@@@ -164,11 -165,13 +165,13 @@@ brw_codegen_wm_prog(struct brw_context 
      void *mem_ctx = ralloc_context(NULL);
      struct brw_wm_prog_data prog_data;
      const GLuint *program;
-    struct gl_shader *fs = NULL;
+    struct brw_shader *fs = NULL;
      GLuint program_size;
+    bool start_busy = false;
+    double start_time = 0;
   
      if (prog)
-       fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+       fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
   
      memset(&prog_data, 0, sizeof(prog_data));
      /* key->alpha_test_func means simulating alpha testing via discards,
@@@ -179,7 -182,7 +182,7 @@@
         fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
      prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
   
-    prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+    prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests;
   
      /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
      if (!prog)
@@@ -191,9 -194,9 +194,9 @@@
       */
      int param_count;
      if (fs) {
-       param_count = fs->num_uniform_components +
-                     fs->NumImages * BRW_IMAGE_PARAM_SIZE;
-       prog_data.base.nr_image_params = fs->NumImages;
+       param_count = fs->base.num_uniform_components +
+                     fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+       prog_data.base.nr_image_params = fs->base.NumImages;
      } else {
         param_count = fp->program.Base.Parameters->NumParameters * 4;
      }
@@@ -213,6 -216,12 +216,12 @@@
                                              key->persample_shading,
                                              &fp->program);
   
+    if (unlikely(brw->perf_debug)) {
+       start_busy = (brw->batch.last_bo &&
+                     drm_intel_bo_busy(brw->batch.last_bo));
+       start_time = get_time();
+    }
+ 
      program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data,
                               &fp->program, prog, &program_size);
      if (program == NULL) {
@@@ -220,6 -229,17 +229,17 @@@
         return false;
      }
   
+    if (unlikely(brw->perf_debug) && fs) {
+       if (fs->compiled_once)
+          brw_wm_debug_recompile(brw, prog, key);
+       fs->compiled_once = true;
+ 
+       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+          perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+                     (get_time() - start_time) * 1000);
+       }
+    }
+ 
      if (prog_data.base.total_scratch) {
         brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
                          prog_data.base.total_scratch * brw->max_wm_threads);
@@@ -642,3 -662,61 +662,61 @@@ brw_upload_wm_prog(struct brw_context *
      }
      brw->wm.base.prog_data = &brw->wm.prog_data->base;
   }
+ 
+ bool
+ brw_fs_precompile(struct gl_context *ctx,
+                   struct gl_shader_program *shader_prog,
+                   struct gl_program *prog)
+ {
+    struct brw_context *brw = brw_context(ctx);
+    struct brw_wm_prog_key key;
+ 
+    struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+    struct brw_fragment_program *bfp = brw_fragment_program(fp);
+    bool program_uses_dfdy = fp->UsesDFdy;
+ 
+    memset(&key, 0, sizeof(key));
+ 
+    if (brw->gen < 6) {
+       if (fp->UsesKill)
+          key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+ 
+       if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+          key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+ 
+       /* Just assume depth testing. */
+       key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+    }
+ 
+    if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+                                          BRW_FS_VARYING_INPUT_MASK) > 16)
+       key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+ 
+    brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+ 
+    if (fp->Base.InputsRead & VARYING_BIT_POS) {
+       key.drawable_height = ctx->DrawBuffer->Height;
+    }
+ 
+    key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+          BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+ 
+    if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+       key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+                           key.nr_color_regions > 1;
+    }
+ 
+    key.program_string_id = bfp->id;
+ 
+    uint32_t old_prog_offset = brw->wm.base.prog_offset;
+    struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+ 
+    bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
+ 
+    brw->wm.base.prog_offset = old_prog_offset;
+    brw->wm.prog_data = old_prog_data;
+ 
+    return success;
+ }
diff --combined src/mesa/main/mtypes.h

index 4e88494c387ab7bac542571dc7f7611717aa96fa,288d75790a496a93d4b5160e35503b6daf710e4a..cbfb15522f0b4e1aae08825d99f42592e73c2bac
--- 1/src/mesa/main/mtypes.h
--- 2/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@@ -944,6 -944,7 +944,7 @@@ typedef enu
    */
   struct gl_sampler_object
   {
+    mtx_t Mutex;
      GLuint Name;
      GLint RefCount;
      GLchar *Label;               /**< GL_KHR_debug */
@@@ -1887,6 -1888,7 +1888,7 @@@ enum gl_frag_depth_layou
    */
   struct gl_program
   {
+    mtx_t Mutex;
      GLuint Id;
      GLint RefCount;
      GLubyte *String;  /**< Null-terminated program text */
@@@ -2292,6 -2294,7 +2294,7 @@@ struct gl_shade
      struct gl_uniform_block *UniformBlocks;
   
      struct exec_list *ir;
+    struct exec_list *packed_varyings;
      struct glsl_symbol_table *symbols;
   
      bool uses_builtin_functions;
@@@ -2453,7 -2456,8 +2456,8 @@@ enum gl_uniform_block_packin
   {
      ubo_packing_std140,
      ubo_packing_shared,
-    ubo_packing_packed
+    ubo_packing_packed,
+    ubo_packing_std430
   };
   
   
@@@ -2473,11 -2477,6 +2477,11 @@@ struct gl_uniform_bloc
       */
      GLuint Binding;
   
+ +   /**
+ +    * Vulkan descriptor set qualifier for this block.
+ +    */
+ +   GLuint Set;
+ +
      /**
       * Minimum size (in bytes) of a buffer object to back this uniform buffer
       * (GL_UNIFORM_BLOCK_DATA_SIZE).
@@@ -2690,7 -2689,7 +2694,7 @@@ struct gl_shader_progra
       */
      unsigned LastClipDistanceArraySize;
   
-    unsigned NumUniformBlocks;
+    unsigned NumBufferInterfaceBlocks;
      struct gl_uniform_block *UniformBlocks;
   
      /**
@@@ -3663,6 -3662,7 +3667,7 @@@ struct gl_extension
      GLboolean ARB_shader_stencil_export;
      GLboolean ARB_shader_storage_buffer_object;
      GLboolean ARB_shader_subroutine;
+    GLboolean ARB_shader_texture_image_samples;
      GLboolean ARB_shader_texture_lod;
      GLboolean ARB_shading_language_packing;
      GLboolean ARB_shading_language_420pack;
@@@ -4292,6 -4292,7 +4297,7 @@@ struct gl_contex
      struct gl_perf_monitor_state PerfMonitor;
   
      struct gl_buffer_object *DrawIndirectBuffer; /** < GL_ARB_draw_indirect */
+    struct gl_buffer_object *DispatchIndirectBuffer; /** < GL_ARB_compute_shader */
   
      struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
      struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
diff --combined src/mesa/program/ir_to_mesa.cpp

index b8b082e2a5905041761d5d91bd392f691c81041d,4201a80cf620440e6683f0bb7d518fb2cb3a70b0..1cfcf9119f97c2508219a0fc7508b826a05d1b92
--- 1/src/mesa/program/ir_to_mesa.cpp
--- 2/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@@ -543,7 -543,6 +543,7 @@@ type_size(const struct glsl_type *type
      case GLSL_TYPE_VOID:
      case GLSL_TYPE_ERROR:
      case GLSL_TYPE_INTERFACE:
+ +   case GLSL_TYPE_FUNCTION:
         assert(!"Invalid type in type_size");
         break;
      }
@@@ -1345,9 -1344,11 +1345,11 @@@ ir_to_mesa_visitor::visit(ir_expressio
      case ir_unop_dFdy_coarse:
      case ir_unop_dFdy_fine:
      case ir_unop_subroutine_to_int:
+    case ir_unop_get_buffer_size:
         assert(!"not supported");
         break;
   
+    case ir_unop_ssbo_unsized_array_length:
      case ir_quadop_vector:
         /* This operation should have already been handled.
          */
@@@ -1920,6 -1921,8 +1922,8 @@@ ir_to_mesa_visitor::visit(ir_texture *i
      case ir_query_levels:
         assert(!"Unexpected ir_query_levels opcode");
         break;
+    case ir_texture_samples:
+       unreachable("Unexpected ir_texture_samples opcode");
      }
   
      const glsl_type *sampler_type = ir->sampler->type;
@@@ -2464,7 -2467,6 +2468,7 @@@ _mesa_associate_uniform_storage(struct 
            case GLSL_TYPE_STRUCT:
            case GLSL_TYPE_ERROR:
            case GLSL_TYPE_INTERFACE:
+ +         case GLSL_TYPE_FUNCTION:
             assert(!"Should not get here.");
             break;
          }
@@@ -2981,7 -2983,7 +2985,7 @@@ _mesa_glsl_link_shader(struct gl_contex
         if (!ctx->Driver.LinkShader(ctx, prog)) {
          prog->LinkStatus = GL_FALSE;
         } else {
-          build_program_resource_list(ctx, prog);
+          build_program_resource_list(prog);
         }
      }
   
diff --combined src/vulkan/anv_compiler.cpp

index dcd5581f9574003cc3e1e1cd23bd6797c35559d2,0000000000000000000000000000000000000000..8acfc068e90ac3f0a4f993c228a38c40b89fa88a

mode 100644,000000..100644
--- 1/src/vulkan/anv_compiler.cpp
--- /dev/null
+++ b/src/vulkan/anv_compiler.cpp
@@@ -1,1128 -1,0 +1,1120 @@@
-    key->base.program_string_id = vp->id;
-    brw_setup_vue_key_clip_info(brw, &key->base,
-                                vp->program.Base.UsesClipDistanceOut);
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ + * DEALINGS IN THE SOFTWARE.
+ + */
+ +
+ +#include <sys/stat.h>
+ +#include <unistd.h>
+ +#include <fcntl.h>
+ +
+ +#include "anv_private.h"
+ +#include "anv_nir.h"
+ +
+ +#include <brw_context.h>
+ +#include <brw_wm.h> /* brw_new_shader_program is here */
+ +#include <brw_nir.h>
+ +
+ +#include <brw_vs.h>
+ +#include <brw_gs.h>
+ +#include <brw_cs.h>
+ +
+ +#include <mesa/main/shaderobj.h>
+ +#include <mesa/main/fbobject.h>
+ +#include <mesa/main/context.h>
+ +#include <mesa/program/program.h>
+ +#include <glsl/program.h>
+ +
+ +/* XXX: We need this to keep symbols in nir.h from conflicting with the
+ + * generated GEN command packing headers.  We need to fix *both* to not
+ + * define something as generic as LOAD.
+ + */
+ +#undef LOAD
+ +
+ +#include <glsl/nir/nir_spirv.h>
+ +
+ +#define SPIR_V_MAGIC_NUMBER 0x07230203
+ +
+ +static void
+ +fail_if(int cond, const char *format, ...)
+ +{
+ +   va_list args;
+ +
+ +   if (!cond)
+ +      return;
+ +
+ +   va_start(args, format);
+ +   vfprintf(stderr, format, args);
+ +   va_end(args);
+ +
+ +   exit(1);
+ +}
+ +
+ +static VkResult
+ +set_binding_table_layout(struct brw_stage_prog_data *prog_data,
+ +                         struct anv_pipeline *pipeline, uint32_t stage)
+ +{
+ +   uint32_t bias, count, k, *map;
+ +   struct anv_pipeline_layout *layout = pipeline->layout;
+ +
+ +   /* No layout is valid for shaders that don't bind any resources. */
+ +   if (pipeline->layout == NULL)
+ +      return VK_SUCCESS;
+ +
+ +   if (stage == VK_SHADER_STAGE_FRAGMENT)
+ +      bias = MAX_RTS;
+ +   else
+ +      bias = 0;
+ +
+ +   count = layout->stage[stage].surface_count;
+ +   prog_data->map_entries =
+ +      (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
+ +   if (prog_data->map_entries == NULL)
+ +      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ +
+ +   k = bias;
+ +   map = prog_data->map_entries;
+ +   for (uint32_t i = 0; i < layout->num_sets; i++) {
+ +      prog_data->bind_map[i].index = map;
+ +      for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
+ +         *map++ = k++;
+ +
+ +      prog_data->bind_map[i].index_count =
+ +         layout->set[i].layout->stage[stage].surface_count;
+ +   }
+ +
+ +   return VK_SUCCESS;
+ +}
+ +
+ +static uint32_t
+ +upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size)
+ +{
+ +   struct anv_state state =
+ +      anv_state_stream_alloc(&pipeline->program_stream, size, 64);
+ +
+ +   assert(size < pipeline->program_stream.block_pool->block_size);
+ +
+ +   memcpy(state.map, data, size);
+ +
+ +   return state.offset;
+ +}
+ +
+ +static void
+ +create_params_array(struct anv_pipeline *pipeline,
+ +                    struct gl_shader *shader,
+ +                    struct brw_stage_prog_data *prog_data)
+ +{
+ +   VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage);
+ +   unsigned num_params = 0;
+ +
+ +   if (shader->num_uniform_components) {
+ +      /* If the shader uses any push constants at all, we'll just give
+ +       * them the maximum possible number
+ +       */
+ +      num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
+ +   }
+ +
+ +   if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
+ +      num_params += MAX_DYNAMIC_BUFFERS;
+ +
+ +   if (num_params == 0)
+ +      return;
+ +
+ +   prog_data->param = (const gl_constant_value **)
+ +      anv_device_alloc(pipeline->device,
+ +                       num_params * sizeof(gl_constant_value *),
+ +                       8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER);
+ +
+ +   /* We now set the param values to be offsets into a
+ +    * anv_push_constant_data structure.  Since the compiler doesn't
+ +    * actually dereference any of the gl_constant_value pointers in the
+ +    * params array, it doesn't really matter what we put here.
+ +    */
+ +   struct anv_push_constants *null_data = NULL;
+ +   for (unsigned i = 0; i < num_params; i++)
+ +      prog_data->param[i] =
+ +         (const gl_constant_value *)&null_data->client_data[i * sizeof(float)];
+ +}
+ +
+ +static void
+ +brw_vs_populate_key(struct brw_context *brw,
+ +                    struct brw_vertex_program *vp,
+ +                    struct brw_vs_prog_key *key)
+ +{
+ +   struct gl_context *ctx = &brw->ctx;
+ +   /* BRW_NEW_VERTEX_PROGRAM */
+ +   struct gl_program *prog = (struct gl_program *) vp;
+ +
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   /* Just upload the program verbatim for now.  Always send it all
+ +    * the inputs it asks for, whether they are varying or not.
+ +    */
-                                       &key->base.tex);
++   key->program_string_id = vp->id;
+ +
+ +   /* _NEW_POLYGON */
+ +   if (brw->gen < 6) {
+ +      key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+ +                           ctx->Polygon.BackMode != GL_FILL);
+ +   }
+ +
+ +   if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
+ +                               VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
+ +      /* _NEW_LIGHT | _NEW_BUFFERS */
+ +      key->clamp_vertex_color = ctx->Light._ClampVertexColor;
+ +   }
+ +
+ +   /* _NEW_POINT */
+ +   if (brw->gen < 6 && ctx->Point.PointSprite) {
+ +      for (int i = 0; i < 8; i++) {
+ +         if (ctx->Point.CoordReplace[i])
+ +            key->point_coord_replace |= (1 << i);
+ +      }
+ +   }
+ +
+ +   /* _NEW_TEXTURE */
+ +   brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
-    if (key->base.userclip_active) {
++                                      &key->tex);
+ +}
+ +
+ +static bool
+ +really_do_vs_prog(struct brw_context *brw,
+ +                  struct gl_shader_program *prog,
+ +                  struct brw_vertex_program *vp,
+ +                  struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
+ +{
+ +   GLuint program_size;
+ +   const GLuint *program;
+ +   struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+ +   void *mem_ctx;
+ +   struct gl_shader *vs = NULL;
+ +
+ +   if (prog)
+ +      vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   mem_ctx = ralloc_context(NULL);
+ +
+ +   create_params_array(pipeline, vs, &prog_data->base.base);
+ +   anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir,
+ +                                 &prog_data->base.base);
+ +
+ +   GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
+ +   prog_data->inputs_read = vp->program.Base.InputsRead;
+ +
+ +   if (key->copy_edgeflag) {
+ +      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
+ +      prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
+ +   }
+ +
+ +   if (brw->gen < 6) {
+ +      /* Put dummy slots into the VUE for the SF to put the replaced
+ +       * point sprite coords in.  We shouldn't need these dummy slots,
+ +       * which take up precious URB space, but it would mean that the SF
+ +       * doesn't get nice aligned pairs of input coords into output
+ +       * coords, which would be a pain to handle.
+ +       */
+ +      for (int i = 0; i < 8; i++) {
+ +         if (key->point_coord_replace & (1 << i))
+ +            outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
+ +      }
+ +
+ +      /* if back colors are written, allocate slots for front colors too */
+ +      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+ +         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
+ +      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+ +         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+ +   }
+ +
+ +   /* In order for legacy clipping to work, we need to populate the clip
+ +    * distance varying slots whenever clipping is enabled, even if the vertex
+ +    * shader doesn't write to gl_ClipDistance.
+ +    */
-                        &prog_data->base.vue_map, outputs_written);
- \
++   if (key->nr_userclip_plane_consts) {
+ +      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
+ +      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
+ +   }
+ +
+ +   brw_compute_vue_map(brw->intelScreen->devinfo,
-    key->base.program_string_id = gp->id;
-    brw_setup_vue_key_clip_info(brw, &key->base,
-                                gp->program.Base.UsesClipDistanceOut);
++                       &prog_data->base.vue_map, outputs_written,
++                       prog ? prog->SeparateShader : false);
++
+ +   set_binding_table_layout(&prog_data->base.base, pipeline,
+ +                            VK_SHADER_STAGE_VERTEX);
+ +
+ +   /* Emit GEN4 code.
+ +    */
+ +   program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
+ +                         prog, &program_size);
+ +   if (program == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return false;
+ +   }
+ +
+ +   const uint32_t offset = upload_kernel(pipeline, program, program_size);
+ +   if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
+ +      pipeline->vs_simd8 = offset;
+ +      pipeline->vs_vec4 = NO_KERNEL;
+ +   } else {
+ +      pipeline->vs_simd8 = NO_KERNEL;
+ +      pipeline->vs_vec4 = offset;
+ +   }
+ +
+ +   ralloc_free(mem_ctx);
+ +
+ +   return true;
+ +}
+ +
+ +void brw_wm_populate_key(struct brw_context *brw,
+ +                         struct brw_fragment_program *fp,
+ +                         struct brw_wm_prog_key *key)
+ +{
+ +   struct gl_context *ctx = &brw->ctx;
+ +   struct gl_program *prog = (struct gl_program *) brw->fragment_program;
+ +   GLuint lookup = 0;
+ +   GLuint line_aa;
+ +   bool program_uses_dfdy = fp->program.UsesDFdy;
+ +   struct gl_framebuffer draw_buffer;
+ +   bool multisample_fbo;
+ +
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   for (int i = 0; i < MAX_SAMPLERS; i++) {
+ +      /* Assume color sampler, no swizzling. */
+ +      key->tex.swizzles[i] = SWIZZLE_XYZW;
+ +   }
+ +
+ +   /* A non-zero framebuffer name indicates that the framebuffer was created by
+ +    * the user rather than the window system. */
+ +   draw_buffer.Name = 1;
+ +   draw_buffer.Visual.samples = 1;
+ +   draw_buffer._NumColorDrawBuffers = 1;
+ +   draw_buffer._NumColorDrawBuffers = 1;
+ +   draw_buffer.Width = 400;
+ +   draw_buffer.Height = 400;
+ +   ctx->DrawBuffer = &draw_buffer;
+ +
+ +   multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
+ +
+ +   /* Build the index for table lookup
+ +    */
+ +   if (brw->gen < 6) {
+ +      /* _NEW_COLOR */
+ +      if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+ +         lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+ +
+ +      if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ +         lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+ +
+ +      /* _NEW_DEPTH */
+ +      if (ctx->Depth.Test)
+ +         lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+ +
+ +      if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
+ +         lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+ +
+ +      /* _NEW_STENCIL | _NEW_BUFFERS */
+ +      if (ctx->Stencil._Enabled) {
+ +         lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+ +
+ +         if (ctx->Stencil.WriteMask[0] ||
+ +             ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+ +            lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ +      }
+ +      key->iz_lookup = lookup;
+ +   }
+ +
+ +   line_aa = AA_NEVER;
+ +
+ +   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+ +   if (ctx->Line.SmoothFlag) {
+ +      if (brw->reduced_primitive == GL_LINES) {
+ +         line_aa = AA_ALWAYS;
+ +      }
+ +      else if (brw->reduced_primitive == GL_TRIANGLES) {
+ +         if (ctx->Polygon.FrontMode == GL_LINE) {
+ +            line_aa = AA_SOMETIMES;
+ +
+ +            if (ctx->Polygon.BackMode == GL_LINE ||
+ +                (ctx->Polygon.CullFlag &&
+ +                 ctx->Polygon.CullFaceMode == GL_BACK))
+ +               line_aa = AA_ALWAYS;
+ +         }
+ +         else if (ctx->Polygon.BackMode == GL_LINE) {
+ +            line_aa = AA_SOMETIMES;
+ +
+ +            if ((ctx->Polygon.CullFlag &&
+ +                 ctx->Polygon.CullFaceMode == GL_FRONT))
+ +               line_aa = AA_ALWAYS;
+ +         }
+ +      }
+ +   }
+ +
+ +   key->line_aa = line_aa;
+ +
+ +   /* _NEW_HINT */
+ +   key->high_quality_derivatives =
+ +      ctx->Hint.FragmentShaderDerivative == GL_NICEST;
+ +
+ +   if (brw->gen < 6)
+ +      key->stats_wm = brw->stats_wm;
+ +
+ +   /* _NEW_LIGHT */
+ +   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+ +
+ +   /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
+ +   key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
+ +
+ +   /* _NEW_TEXTURE */
+ +   brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
+ +                                      &key->tex);
+ +
+ +   /* _NEW_BUFFERS */
+ +   /*
+ +    * Include the draw buffer origin and height so that we can calculate
+ +    * fragment position values relative to the bottom left of the drawable,
+ +    * from the incoming screen origin relative position we get as part of our
+ +    * payload.
+ +    *
+ +    * This is only needed for the WM_WPOSXY opcode when the fragment program
+ +    * uses the gl_FragCoord input.
+ +    *
+ +    * We could avoid recompiling by including this as a constant referenced by
+ +    * our program, but if we were to do that it would also be nice to handle
+ +    * getting that constant updated at batchbuffer submit time (when we
+ +    * hold the lock and know where the buffer really is) rather than at emit
+ +    * time when we don't hold the lock and are just guessing.  We could also
+ +    * just avoid using this as key data if the program doesn't use
+ +    * fragment.position.
+ +    *
+ +    * For DRI2 the origin_x/y will always be (0,0) but we still need the
+ +    * drawable height in order to invert the Y axis.
+ +    */
+ +   if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
+ +      key->drawable_height = ctx->DrawBuffer->Height;
+ +   }
+ +
+ +   if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+ +      key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ +   }
+ +
+ +   /* _NEW_BUFFERS */
+ +   key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
+ +
+ +   /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
+ +   key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
+ +      (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
+ +
+ +   /* _NEW_BUFFERS _NEW_MULTISAMPLE */
+ +   /* Ignore sample qualifier while computing this flag. */
+ +   key->persample_shading =
+ +      _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
+ +   if (key->persample_shading)
+ +      key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
+ +
+ +   key->compute_pos_offset =
+ +      _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
+ +      fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
+ +
+ +   key->compute_sample_id =
+ +      multisample_fbo &&
+ +      ctx->Multisample.Enabled &&
+ +      (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
+ +
+ +   /* BRW_NEW_VUE_MAP_GEOM_OUT */
+ +   if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
+ +                                         BRW_FS_VARYING_INPUT_MASK) > 16)
+ +      key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
+ +
+ +
+ +   /* _NEW_COLOR | _NEW_BUFFERS */
+ +   /* Pre-gen6, the hardware alpha test always used each render
+ +    * target's alpha to do alpha test, as opposed to render target 0's alpha
+ +    * like GL requires.  Fix that by building the alpha test into the
+ +    * shader, and we'll skip enabling the fixed function alpha test.
+ +    */
+ +   if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
+ +      key->alpha_test_func = ctx->Color.AlphaFunc;
+ +      key->alpha_test_ref = ctx->Color.AlphaRef;
+ +   }
+ +
+ +   /* The unique fragment program ID */
+ +   key->program_string_id = fp->id;
+ +
+ +   ctx->DrawBuffer = NULL;
+ +}
+ +
+ +static uint8_t
+ +computed_depth_mode(struct gl_fragment_program *fp)
+ +{
+ +   if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ +      switch (fp->FragDepthLayout) {
+ +      case FRAG_DEPTH_LAYOUT_NONE:
+ +      case FRAG_DEPTH_LAYOUT_ANY:
+ +         return BRW_PSCDEPTH_ON;
+ +      case FRAG_DEPTH_LAYOUT_GREATER:
+ +         return BRW_PSCDEPTH_ON_GE;
+ +      case FRAG_DEPTH_LAYOUT_LESS:
+ +         return BRW_PSCDEPTH_ON_LE;
+ +      case FRAG_DEPTH_LAYOUT_UNCHANGED:
+ +         return BRW_PSCDEPTH_OFF;
+ +      }
+ +   }
+ +   return BRW_PSCDEPTH_OFF;
+ +}
+ +
+ +static bool
+ +really_do_wm_prog(struct brw_context *brw,
+ +                  struct gl_shader_program *prog,
+ +                  struct brw_fragment_program *fp,
+ +                  struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
+ +{
+ +   void *mem_ctx = ralloc_context(NULL);
+ +   struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+ +   struct gl_shader *fs = NULL;
+ +   unsigned int program_size;
+ +   const uint32_t *program;
+ +
+ +   if (prog)
+ +      fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   /* key->alpha_test_func means simulating alpha testing via discards,
+ +    * so the shader definitely kills pixels.
+ +    */
+ +   prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
+ +
+ +   prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
+ +
+ +   create_params_array(pipeline, fs, &prog_data->base);
+ +   anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base);
+ +
+ +   prog_data->barycentric_interp_modes =
+ +      brw_compute_barycentric_interp_modes(brw, key->flat_shade,
+ +                                           key->persample_shading,
+ +                                           &fp->program);
+ +
+ +   set_binding_table_layout(&prog_data->base, pipeline,
+ +                            VK_SHADER_STAGE_FRAGMENT);
+ +   /* This needs to come after shader time and pull constant entries, but we
+ +    * don't have those set up now, so just put it after the layout entries.
+ +    */
+ +   prog_data->binding_table.render_target_start = 0;
+ +
+ +   program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
+ +                            &fp->program, prog, &program_size);
+ +   if (program == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return false;
+ +   }
+ +
+ +   uint32_t offset = upload_kernel(pipeline, program, program_size);
+ +
+ +   if (prog_data->no_8)
+ +      pipeline->ps_simd8 = NO_KERNEL;
+ +   else
+ +      pipeline->ps_simd8 = offset;
+ +
+ +   if (prog_data->no_8 || prog_data->prog_offset_16) {
+ +      pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
+ +   } else {
+ +      pipeline->ps_simd16 = NO_KERNEL;
+ +   }
+ +
+ +   ralloc_free(mem_ctx);
+ +
+ +   return true;
+ +}
+ +
+ +static void
+ +brw_gs_populate_key(struct brw_context *brw,
+ +                    struct anv_pipeline *pipeline,
+ +                    struct brw_geometry_program *gp,
+ +                    struct brw_gs_prog_key *key)
+ +{
+ +   struct gl_context *ctx = &brw->ctx;
+ +   struct brw_stage_state *stage_state = &brw->gs.base;
+ +   struct gl_program *prog = &gp->program.Base;
+ +
+ +   memset(key, 0, sizeof(*key));
+ +
-                                       &key->base.tex);
- 
-    struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
- 
-    /* BRW_NEW_VUE_MAP_VS */
-    key->input_varyings = prog_data->base.vue_map.slots_valid;
++   key->program_string_id = gp->id;
+ +
+ +   /* _NEW_TEXTURE */
+ +   brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
++                                      &key->tex);
+ +}
+ +
+ +static bool
+ +really_do_gs_prog(struct brw_context *brw,
+ +                  struct gl_shader_program *prog,
+ +                  struct brw_geometry_program *gp,
+ +                  struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
+ +{
+ +   struct brw_gs_compile_output output;
+ +
+ +   /* FIXME: We pass the bind map to the compile in the output struct. Need
+ +    * something better. */
+ +   set_binding_table_layout(&output.prog_data.base.base,
+ +                            pipeline, VK_SHADER_STAGE_GEOMETRY);
+ +
+ +   brw_compile_gs_prog(brw, prog, gp, key, &output);
+ +
+ +   pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
+ +   pipeline->gs_vertex_count = gp->program.VerticesIn;
+ +
+ +   ralloc_free(output.mem_ctx);
+ +
+ +   return true;
+ +}
+ +
+ +static bool
+ +brw_codegen_cs_prog(struct brw_context *brw,
+ +                    struct gl_shader_program *prog,
+ +                    struct brw_compute_program *cp,
+ +                    struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
+ +{
+ +   const GLuint *program;
+ +   void *mem_ctx = ralloc_context(NULL);
+ +   GLuint program_size;
+ +   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+ +
+ +   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+ +   assert (cs);
+ +
+ +   memset(prog_data, 0, sizeof(*prog_data));
+ +
+ +   set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE);
+ +
+ +   create_params_array(pipeline, cs, &prog_data->base);
+ +   anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base);
+ +
+ +   program = brw_cs_emit(brw, mem_ctx, key, prog_data,
+ +                         &cp->program, prog, &program_size);
+ +   if (program == NULL) {
+ +      ralloc_free(mem_ctx);
+ +      return false;
+ +   }
+ +
+ +   if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ +      fprintf(stderr, "\n");
+ +
+ +   pipeline->cs_simd = upload_kernel(pipeline, program, program_size);
+ +
+ +   ralloc_free(mem_ctx);
+ +
+ +   return true;
+ +}
+ +
+ +static void
+ +brw_cs_populate_key(struct brw_context *brw,
+ +                    struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
+ +{
+ +   memset(key, 0, sizeof(*key));
+ +
+ +   /* The unique compute program ID */
+ +   key->program_string_id = bcp->id;
+ +}
+ +
+ +struct anv_compiler {
+ +   struct anv_device *device;
+ +   struct intel_screen *screen;
+ +   struct brw_context *brw;
+ +   struct gl_pipeline_object pipeline;
+ +};
+ +
+ +extern "C" {
+ +
+ +struct anv_compiler *
+ +anv_compiler_create(struct anv_device *device)
+ +{
+ +   const struct brw_device_info *devinfo = &device->info;
+ +   struct anv_compiler *compiler;
+ +   struct gl_context *ctx;
+ +
+ +   compiler = rzalloc(NULL, struct anv_compiler);
+ +   if (compiler == NULL)
+ +      return NULL;
+ +
+ +   compiler->screen = rzalloc(compiler, struct intel_screen);
+ +   if (compiler->screen == NULL)
+ +      goto fail;
+ +
+ +   compiler->brw = rzalloc(compiler, struct brw_context);
+ +   if (compiler->brw == NULL)
+ +      goto fail;
+ +
+ +   compiler->device = device;
+ +
+ +   compiler->brw->gen = devinfo->gen;
+ +   compiler->brw->is_g4x = devinfo->is_g4x;
+ +   compiler->brw->is_baytrail = devinfo->is_baytrail;
+ +   compiler->brw->is_haswell = devinfo->is_haswell;
+ +   compiler->brw->is_cherryview = devinfo->is_cherryview;
+ +
+ +   /* We need this at least for CS, which will check brw->max_cs_threads
+ +    * against the work group size. */
+ +   compiler->brw->max_vs_threads = devinfo->max_vs_threads;
+ +   compiler->brw->max_hs_threads = devinfo->max_hs_threads;
+ +   compiler->brw->max_ds_threads = devinfo->max_ds_threads;
+ +   compiler->brw->max_gs_threads = devinfo->max_gs_threads;
+ +   compiler->brw->max_wm_threads = devinfo->max_wm_threads;
+ +   compiler->brw->max_cs_threads = devinfo->max_cs_threads;
+ +   compiler->brw->urb.size = devinfo->urb.size;
+ +   compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
+ +   compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
+ +   compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
+ +   compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
+ +   compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
+ +
+ +   compiler->brw->intelScreen = compiler->screen;
+ +   compiler->screen->devinfo = &device->info;
+ +
+ +   brw_process_intel_debug_variable(compiler->screen);
+ +
+ +   compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
+ +
+ +   ctx = &compiler->brw->ctx;
+ +   _mesa_init_shader_object_functions(&ctx->Driver);
+ +
+ +   /* brw_select_clip_planes() needs this for bogus reasons. */
+ +   ctx->_Shader = &compiler->pipeline;
+ +
+ +   return compiler;
+ +
+ + fail:
+ +   ralloc_free(compiler);
+ +   return NULL;
+ +}
+ +
+ +void
+ +anv_compiler_destroy(struct anv_compiler *compiler)
+ +{
+ +   _mesa_free_errors_data(&compiler->brw->ctx);
+ +   ralloc_free(compiler);
+ +}
+ +
+ +/* From gen7_urb.c */
+ +
+ +/* FIXME: Add to struct intel_device_info */
+ +
+ +static const int gen8_push_size = 32 * 1024;
+ +
+ +static void
+ +gen7_compute_urb_partition(struct anv_pipeline *pipeline)
+ +{
+ +   const struct brw_device_info *devinfo = &pipeline->device->info;
+ +   bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
+ +   unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+ +   unsigned vs_entry_size_bytes = vs_size * 64;
+ +   bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
+ +   unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+ +   unsigned gs_entry_size_bytes = gs_size * 64;
+ +
+ +   /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
+ +    *
+ +    *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
+ +    *     Allocation Size is less than 9 512-bit URB entries.
+ +    *
+ +    * Similar text exists for GS.
+ +    */
+ +   unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
+ +   unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
+ +
+ +   /* URB allocations must be done in 8k chunks. */
+ +   unsigned chunk_size_bytes = 8192;
+ +
+ +   /* Determine the size of the URB in chunks. */
+ +   unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
+ +
+ +   /* Reserve space for push constants */
+ +   unsigned push_constant_bytes = gen8_push_size;
+ +   unsigned push_constant_chunks =
+ +      push_constant_bytes / chunk_size_bytes;
+ +
+ +   /* Initially, assign each stage the minimum amount of URB space it needs,
+ +    * and make a note of how much additional space it "wants" (the amount of
+ +    * additional space it could actually make use of).
+ +    */
+ +
+ +   /* VS has a lower limit on the number of URB entries */
+ +   unsigned vs_chunks =
+ +      ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
+ +            chunk_size_bytes) / chunk_size_bytes;
+ +   unsigned vs_wants =
+ +      ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
+ +            chunk_size_bytes) / chunk_size_bytes - vs_chunks;
+ +
+ +   unsigned gs_chunks = 0;
+ +   unsigned gs_wants = 0;
+ +   if (gs_present) {
+ +      /* There are two constraints on the minimum amount of URB space we can
+ +       * allocate:
+ +       *
+ +       * (1) We need room for at least 2 URB entries, since we always operate
+ +       * the GS in DUAL_OBJECT mode.
+ +       *
+ +       * (2) We can't allocate less than nr_gs_entries_granularity.
+ +       */
+ +      gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
+ +                        chunk_size_bytes) / chunk_size_bytes;
+ +      gs_wants =
+ +         ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
+ +               chunk_size_bytes) / chunk_size_bytes - gs_chunks;
+ +   }
+ +
+ +   /* There should always be enough URB space to satisfy the minimum
+ +    * requirements of each stage.
+ +    */
+ +   unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
+ +   assert(total_needs <= urb_chunks);
+ +
+ +   /* Mete out remaining space (if any) in proportion to "wants". */
+ +   unsigned total_wants = vs_wants + gs_wants;
+ +   unsigned remaining_space = urb_chunks - total_needs;
+ +   if (remaining_space > total_wants)
+ +      remaining_space = total_wants;
+ +   if (remaining_space > 0) {
+ +      unsigned vs_additional = (unsigned)
+ +         round(vs_wants * (((double) remaining_space) / total_wants));
+ +      vs_chunks += vs_additional;
+ +      remaining_space -= vs_additional;
+ +      gs_chunks += remaining_space;
+ +   }
+ +
+ +   /* Sanity check that we haven't over-allocated. */
+ +   assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
+ +
+ +   /* Finally, compute the number of entries that can fit in the space
+ +    * allocated to each stage.
+ +    */
+ +   unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
+ +   unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
+ +
+ +   /* Since we rounded up when computing *_wants, this may be slightly more
+ +    * than the maximum allowed amount, so correct for that.
+ +    */
+ +   nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
+ +   nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
+ +
+ +   /* Ensure that we program a multiple of the granularity. */
+ +   nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
+ +   nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
+ +
+ +   /* Finally, sanity check to make sure we have at least the minimum number
+ +    * of entries needed for each stage.
+ +    */
+ +   assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
+ +   if (gs_present)
+ +      assert(nr_gs_entries >= 2);
+ +
+ +   /* Lay out the URB in the following order:
+ +    * - push constants
+ +    * - VS
+ +    * - GS
+ +    */
+ +   pipeline->urb.vs_start = push_constant_chunks;
+ +   pipeline->urb.vs_size = vs_size;
+ +   pipeline->urb.nr_vs_entries = nr_vs_entries;
+ +
+ +   pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
+ +   pipeline->urb.gs_size = gs_size;
+ +   pipeline->urb.nr_gs_entries = nr_gs_entries;
+ +}
+ +
+ +static const struct {
+ +   uint32_t token;
+ +   gl_shader_stage stage;
+ +   const char *name;
+ +} stage_info[] = {
+ +   { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" },
+ +   { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" },
+ +   { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" },
+ +   { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" },
+ +   { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" },
+ +   { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" },
+ +};
+ +
+ +struct spirv_header{
+ +   uint32_t magic;
+ +   uint32_t version;
+ +   uint32_t gen_magic;
+ +};
+ +
+ +static void
+ +setup_nir_io(struct gl_shader *mesa_shader,
+ +             nir_shader *shader)
+ +{
+ +   struct gl_program *prog = mesa_shader->Program;
+ +   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ +      prog->InputsRead |= BITFIELD64_BIT(var->data.location);
+ +      if (shader->stage == MESA_SHADER_FRAGMENT) {
+ +         struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog;
+ +
+ +         fprog->InterpQualifier[var->data.location] =
+ +            (glsl_interp_qualifier)var->data.interpolation;
+ +         if (var->data.centroid)
+ +            fprog->IsCentroid |= BITFIELD64_BIT(var->data.location);
+ +         if (var->data.sample)
+ +            fprog->IsSample |= BITFIELD64_BIT(var->data.location);
+ +      }
+ +   }
+ +
+ +   foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+ +      prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
+ +   }
+ +
+ +   mesa_shader->num_uniform_components = shader->num_uniforms;
+ +}
+ +
+ +static void
+ +anv_compile_shader_spirv(struct anv_compiler *compiler,
+ +                         struct gl_shader_program *program,
+ +                         struct anv_pipeline *pipeline, uint32_t stage)
+ +{
+ +   struct brw_context *brw = compiler->brw;
+ +   struct anv_shader *shader = pipeline->shaders[stage];
+ +   struct gl_shader *mesa_shader;
+ +   int name = 0;
+ +   uint32_t *spirv;
+ +
+ +   mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
+ +   fail_if(mesa_shader == NULL,
+ +           "failed to create %s shader\n", stage_info[stage].name);
+ +
+ +#define CREATE_PROGRAM(stage) \
+ +   _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0)
+ +
+ +   bool is_scalar;
+ +   struct gl_program *prog;
+ +   switch (stage) {
+ +   case VK_SHADER_STAGE_VERTEX:
+ +      prog = CREATE_PROGRAM(vertex);
+ +      is_scalar = compiler->screen->compiler->scalar_vs;
+ +      break;
+ +   case VK_SHADER_STAGE_GEOMETRY:
+ +      prog = CREATE_PROGRAM(geometry);
+ +      is_scalar = false;
+ +      break;
+ +   case VK_SHADER_STAGE_FRAGMENT:
+ +      prog = CREATE_PROGRAM(fragment);
+ +      is_scalar = true;
+ +      break;
+ +   case VK_SHADER_STAGE_COMPUTE:
+ +      prog = CREATE_PROGRAM(compute);
+ +      is_scalar = true;
+ +      break;
+ +   default:
+ +      unreachable("Unsupported shader stage");
+ +   }
+ +   _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog);
+ +
+ +   mesa_shader->Program->Parameters =
+ +      rzalloc(mesa_shader, struct gl_program_parameter_list);
+ +
+ +   mesa_shader->Type = stage_info[stage].token;
+ +   mesa_shader->Stage = stage_info[stage].stage;
+ +
+ +   struct gl_shader_compiler_options *glsl_options =
+ +      &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage];
+ +
+ +   spirv = (uint32_t *) shader->module->data;
+ +   assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
+ +   assert(shader->module->size % 4 == 0);
+ +
+ +   mesa_shader->Program->nir =
+ +      spirv_to_nir(spirv, shader->module->size / 4,
+ +                   stage_info[stage].stage, glsl_options->NirOptions);
+ +   nir_validate_shader(mesa_shader->Program->nir);
+ +
+ +   brw_process_nir(mesa_shader->Program->nir,
+ +                   compiler->screen->devinfo,
+ +                   NULL, mesa_shader->Stage, is_scalar);
+ +
+ +   setup_nir_io(mesa_shader, mesa_shader->Program->nir);
+ +
+ +   fail_if(mesa_shader->Program->nir == NULL,
+ +           "failed to translate SPIR-V to NIR\n");
+ +
+ +   _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders],
+ +                          mesa_shader);
+ +   program->NumShaders++;
+ +}
+ +
+ +static void
+ +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
+ +                   struct brw_stage_prog_data *prog_data)
+ +{
+ +   struct brw_device_info *devinfo = &pipeline->device->info;
+ +   uint32_t max_threads[] = {
+ +      [VK_SHADER_STAGE_VERTEX]                  = devinfo->max_vs_threads,
+ +      [VK_SHADER_STAGE_TESS_CONTROL]            = 0,
+ +      [VK_SHADER_STAGE_TESS_EVALUATION]         = 0,
+ +      [VK_SHADER_STAGE_GEOMETRY]                = devinfo->max_gs_threads,
+ +      [VK_SHADER_STAGE_FRAGMENT]                = devinfo->max_wm_threads,
+ +      [VK_SHADER_STAGE_COMPUTE]                 = devinfo->max_cs_threads,
+ +   };
+ +
+ +   pipeline->prog_data[stage] = prog_data;
+ +   pipeline->active_stages |= 1 << stage;
+ +   pipeline->scratch_start[stage] = pipeline->total_scratch;
+ +   pipeline->total_scratch =
+ +      align_u32(pipeline->total_scratch, 1024) +
+ +      prog_data->total_scratch * max_threads[stage];
+ +}
+ +
+ +int
+ +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
+ +{
+ +   struct gl_shader_program *program;
+ +   int name = 0;
+ +   struct brw_context *brw = compiler->brw;
+ +
+ +   pipeline->writes_point_size = false;
+ +
+ +   /* When we free the pipeline, we detect stages based on the NULL status
+ +    * of various prog_data pointers.  Make them NULL by default.
+ +    */
+ +   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+ +   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
+ +
+ +   brw->use_rep_send = pipeline->use_repclear;
+ +   brw->no_simd8 = pipeline->use_repclear;
+ +
+ +   program = brw->ctx.Driver.NewShaderProgram(name);
+ +   program->Shaders = (struct gl_shader **)
+ +      calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
+ +   fail_if(program == NULL || program->Shaders == NULL,
+ +           "failed to create program\n");
+ +
+ +   for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
+ +      if (pipeline->shaders[i])
+ +         anv_compile_shader_spirv(compiler, program, pipeline, i);
+ +   }
+ +
+ +   for (unsigned i = 0; i < program->NumShaders; i++) {
+ +      struct gl_shader *shader = program->Shaders[i];
+ +      program->_LinkedShaders[shader->Stage] = shader;
+ +   }
+ +
+ +   bool success;
+ +   pipeline->active_stages = 0;
+ +   pipeline->total_scratch = 0;
+ +
+ +   if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
+ +      struct brw_vs_prog_key vs_key;
+ +      struct gl_vertex_program *vp = (struct gl_vertex_program *)
+ +         program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
+ +      struct brw_vertex_program *bvp = brw_vertex_program(vp);
+ +
+ +      brw_vs_populate_key(brw, bvp, &vs_key);
+ +
+ +      success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
+ +      fail_if(!success, "do_wm_prog failed\n");
+ +      add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
+ +                         &pipeline->vs_prog_data.base.base);
+ +
+ +      if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
+ +         pipeline->writes_point_size = true;
+ +   } else {
+ +      memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
+ +      pipeline->vs_simd8 = NO_KERNEL;
+ +      pipeline->vs_vec4 = NO_KERNEL;
+ +   }
+ +
+ +
+ +   if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
+ +      struct brw_gs_prog_key gs_key;
+ +      struct gl_geometry_program *gp = (struct gl_geometry_program *)
+ +         program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
+ +      struct brw_geometry_program *bgp = brw_geometry_program(gp);
+ +
+ +      brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
+ +
+ +      success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
+ +      fail_if(!success, "do_gs_prog failed\n");
+ +      add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
+ +                         &pipeline->gs_prog_data.base.base);
+ +
+ +      if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
+ +         pipeline->writes_point_size = true;
+ +   } else {
+ +      pipeline->gs_vec4 = NO_KERNEL;
+ +   }
+ +
+ +   if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) {
+ +      struct brw_wm_prog_key wm_key;
+ +      struct gl_fragment_program *fp = (struct gl_fragment_program *)
+ +         program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ +      struct brw_fragment_program *bfp = brw_fragment_program(fp);
+ +
+ +      brw_wm_populate_key(brw, bfp, &wm_key);
+ +
+ +      success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
+ +      fail_if(!success, "do_wm_prog failed\n");
+ +      add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
+ +                         &pipeline->wm_prog_data.base);
+ +   }
+ +
+ +   if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
+ +      struct brw_cs_prog_key cs_key;
+ +      struct gl_compute_program *cp = (struct gl_compute_program *)
+ +         program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
+ +      struct brw_compute_program *bcp = brw_compute_program(cp);
+ +
+ +      brw_cs_populate_key(brw, bcp, &cs_key);
+ +
+ +      success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
+ +      fail_if(!success, "brw_codegen_cs_prog failed\n");
+ +      add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
+ +                         &pipeline->cs_prog_data.base);
+ +   }
+ +
+ +   brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
+ +
+ +   struct anv_device *device = compiler->device;
+ +   while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
+ +      anv_block_pool_alloc(&device->scratch_block_pool);
+ +
+ +   gen7_compute_urb_partition(pipeline);
+ +
+ +   return 0;
+ +}
+ +
+ +/* This badly named function frees the struct anv_pipeline data that the compiler
+ + * allocates.  Currently just the prog_data structs.
+ + */
+ +void
+ +anv_compiler_free(struct anv_pipeline *pipeline)
+ +{
+ +   for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) {
+ +      if (pipeline->prog_data[stage]) {
+ +         free(pipeline->prog_data[stage]->map_entries);
+ +         /* We only ever set up the params array because we don't do
+ +          * non-UBO pull constants
+ +          */
+ +         anv_device_free(pipeline->device, pipeline->prog_data[stage]->param);
+ +      }
+ +   }
+ +}
+ +
+ +}
diff --combined src/vulkan/anv_nir_apply_dynamic_offsets.c

index 367c4f82d64e504f1df7a03a2000d0215b942fb3,0000000000000000000000000000000000000000..1f6c64a9e02507ef115c177f56f7af8d4167be19

mode 100644,000000..100644
--- 1/src/vulkan/anv_nir_apply_dynamic_offsets.c
--- /dev/null
+++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c
@@@ -1,150 -1,0 +1,149 @@@
-                                nir_src_for_ssa(&new_load->dest.ssa),
-                                state->shader);
+ +/*
+ + * Copyright © 2015 Intel Corporation
+ + *
+ + * Permission is hereby granted, free of charge, to any person obtaining a
+ + * copy of this software and associated documentation files (the "Software"),
+ + * to deal in the Software without restriction, including without limitation
+ + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ + * and/or sell copies of the Software, and to permit persons to whom the
+ + * Software is furnished to do so, subject to the following conditions:
+ + *
+ + * The above copyright notice and this permission notice (including the next
+ + * paragraph) shall be included in all copies or substantial portions of the
+ + * Software.
+ + *
+ + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ + * IN THE SOFTWARE.
+ + */
+ +
+ +#include "anv_nir.h"
+ +#include "glsl/nir/nir_builder.h"
+ +
+ +struct apply_dynamic_offsets_state {
+ +   nir_shader *shader;
+ +   nir_builder builder;
+ +
+ +   VkShaderStage stage;
+ +   struct anv_pipeline_layout *layout;
+ +
+ +   uint32_t indices_start;
+ +};
+ +
+ +static bool
+ +apply_dynamic_offsets_block(nir_block *block, void *void_state)
+ +{
+ +   struct apply_dynamic_offsets_state *state = void_state;
+ +   struct anv_descriptor_set_layout *set_layout;
+ +   const struct anv_descriptor_slot *slot;
+ +
+ +   nir_foreach_instr_safe(block, instr) {
+ +      if (instr->type != nir_instr_type_intrinsic)
+ +         continue;
+ +
+ +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ +
+ +      bool has_indirect = false;
+ +      uint32_t set, binding;
+ +      switch (intrin->intrinsic) {
+ +      case nir_intrinsic_load_ubo_indirect:
+ +         has_indirect = true;
+ +         /* fallthrough */
+ +      case nir_intrinsic_load_ubo: {
+ +         set = intrin->const_index[0];
+ +
+ +         nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]);
+ +         if (const_binding) {
+ +            binding = const_binding->u[0];
+ +         } else {
+ +            assert(0 && "need more info from the ir for this.");
+ +         }
+ +         break;
+ +      }
+ +      default:
+ +         continue; /* the loop */
+ +      }
+ +
+ +      set_layout = state->layout->set[set].layout;
+ +      slot = &set_layout->stage[state->stage].surface_start[binding];
+ +      if (slot->dynamic_slot < 0)
+ +         continue;
+ +
+ +      uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start +
+ +                               slot->dynamic_slot;
+ +
+ +      state->builder.cursor = nir_before_instr(&intrin->instr);
+ +
+ +      nir_intrinsic_instr *offset_load =
+ +         nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
+ +      offset_load->num_components = 1;
+ +      offset_load->const_index[0] = state->indices_start + dynamic_index;
+ +      offset_load->const_index[1] = 0;
+ +      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL);
+ +      nir_builder_instr_insert(&state->builder, &offset_load->instr);
+ +
+ +      nir_ssa_def *offset = &offset_load->dest.ssa;
+ +      if (has_indirect) {
+ +         assert(intrin->src[1].is_ssa);
+ +         offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset);
+ +      }
+ +
+ +      assert(intrin->dest.is_ssa);
+ +
+ +      nir_intrinsic_instr *new_load =
+ +         nir_intrinsic_instr_create(state->shader,
+ +                                    nir_intrinsic_load_ubo_indirect);
+ +      new_load->num_components = intrin->num_components;
+ +      new_load->const_index[0] = intrin->const_index[0];
+ +      new_load->const_index[1] = intrin->const_index[1];
+ +      nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr);
+ +      new_load->src[1] = nir_src_for_ssa(offset);
+ +      nir_ssa_dest_init(&new_load->instr, &new_load->dest,
+ +                        intrin->dest.ssa.num_components,
+ +                        intrin->dest.ssa.name);
+ +      nir_builder_instr_insert(&state->builder, &new_load->instr);
+ +
+ +      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
++                               nir_src_for_ssa(&new_load->dest.ssa));
+ +
+ +      nir_instr_remove(&intrin->instr);
+ +   }
+ +
+ +   return true;
+ +}
+ +
+ +void
+ +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
+ +                              nir_shader *shader,
+ +                              struct brw_stage_prog_data *prog_data)
+ +{
+ +   struct apply_dynamic_offsets_state state = {
+ +      .shader = shader,
+ +      .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage),
+ +      .layout = pipeline->layout,
+ +      .indices_start = shader->num_uniforms,
+ +   };
+ +
+ +   if (!state.layout || !state.layout->stage[state.stage].has_dynamic_offsets)
+ +      return;
+ +
+ +   nir_foreach_overload(shader, overload) {
+ +      if (overload->impl) {
+ +         nir_builder_init(&state.builder, overload->impl);
+ +         nir_foreach_block(overload->impl, apply_dynamic_offsets_block, &state);
+ +         nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+ +                                               nir_metadata_dominance);
+ +      }
+ +   }
+ +
+ +   struct anv_push_constants *null_data = NULL;
+ +   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
+ +      prog_data->param[i + shader->num_uniforms] =
+ +         (const gl_constant_value *)&null_data->dynamic_offsets[i];
+ +
+ +   shader->num_uniforms += MAX_DYNAMIC_BUFFERS;
+ +}
author	Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
	Wed, 30 Sep 2015 00:10:50 +0000 (17:10 -0700)
committer	Kristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
	Thu, 1 Oct 2015 21:24:29 +0000 (14:24 -0700)
		1	2
configure.ac	patch \|	diff1 \|	diff2 \|	blob \| history
src/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/Makefile.sources	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/ast_to_hir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/glsl_parser_extras.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/glsl_types.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/glsl_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/ir_clone.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/link_uniform_initializers.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/glsl_to_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir_builder.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir_intrinsics.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir_lower_samplers.c	patch \|	\|	diff2 \|	blob \| history
src/glsl/nir/nir_opt_algebraic.py	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir_split_var_copies.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir_types.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/nir/nir_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/glsl/standalone_scaffolding.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_context.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_defines.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_device_info.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_gs.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_nir.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_program.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_shader.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/drivers/dri/i965/brw_wm.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/main/mtypes.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/mesa/program/ir_to_mesa.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/vulkan/anv_compiler.cpp	patch \|	diff1 \|	\|	blob \| history
src/vulkan/anv_nir_apply_dynamic_offsets.c	patch \|	diff1 \|	\|	blob \| history